Python Examples of sklearn.base.TransformerMixin

Source File: common_utils.py From interpret-text with MIT License

8 votes

def create_pandas_only_svm_classifier(X, y, probability=True):
    class PandasOnlyEstimator(TransformerMixin):
        def fit(self, X, y=None, **fitparams):
            return self

        def transform(self, X, **transformparams):
            dataset_is_df = isinstance(X, pd.DataFrame)
            if not dataset_is_df:
                raise Exception("Dataset must be a pandas dataframe!")
            return X

    pandas_only = PandasOnlyEstimator()

    clf = svm.SVC(gamma=0.001, C=100.0, probability=probability, random_state=777)
    pipeline = Pipeline([("pandas_only", pandas_only), ("clf", clf)])
    return pipeline.fit(X, y)

Source File: common_utils.py From interpret-community with MIT License

6 votes

def create_pandas_only_svm_classifier(X, y, probability=True):
    class PandasOnlyEstimator(TransformerMixin):
        def fit(self, X, y=None, **fitparams):
            return self

        def transform(self, X, **transformparams):
            dataset_is_df = isinstance(X, pd.DataFrame)
            if not dataset_is_df:
                raise Exception("Dataset must be a pandas dataframe!")
            return X

    pandas_only = PandasOnlyEstimator()

    clf = svm.SVC(gamma=0.001, C=100., probability=probability, random_state=777)
    pipeline = Pipeline([('pandas_only', pandas_only), ('clf', clf)])
    return pipeline.fit(X, y)

Source File: filters.py From causallib with Apache License 2.0

6 votes

def track_selected_features(pipeline_stages, num_features):
    """

    Args:
        pipeline_stages (list [tuple[str, TransformerMixin]]): list of steps. each step is a tuple of Name and
                                                               Transformer Object.
        num_features (int):

    Returns:
        np.ndarray:
    """
    selected_features = np.arange(num_features)
    for p_name, p in pipeline_stages:
        if not isinstance(p, BaseFeatureSelector):
            continue
        p_features = p.selected_features
        selected_features = selected_features[p_features]
    return selected_features

Source File: tpot_tests.py From tpot with GNU Lesser General Public License v3.0

6 votes

def test_template_4():
    """Assert that TPOT template option generates pipeline when one of steps is a specific operator."""

    tpot_obj = TPOTClassifier(
        population_size=5,
        generations=2,
        random_state=42,
        verbosity=0,
        config_dict = 'TPOT light',
        template='SelectPercentile-Transformer-Classifier'
    )
    tpot_obj.fit(pretest_X, pretest_y)

    assert isinstance(tpot_obj._optimized_pipeline, creator.Individual)
    assert not (tpot_obj._start_datetime is None)

    sklearn_pipeline = tpot_obj.fitted_pipeline_
    operator_count = tpot_obj._operator_count(tpot_obj._optimized_pipeline)
    assert operator_count == 3
    assert sklearn_pipeline.steps[0][0] == 'SelectPercentile'.lower()
    assert issubclass(sklearn_pipeline.steps[0][1].__class__, SelectorMixin)
    assert issubclass(sklearn_pipeline.steps[1][1].__class__, TransformerMixin)
    assert issubclass(sklearn_pipeline.steps[2][1].__class__, ClassifierMixin)

Source File: tpot_tests.py From tpot with GNU Lesser General Public License v3.0

6 votes

def test_template_3():
    """Assert that TPOT template option generates pipeline when one of steps is a specific operator."""

    tpot_obj = TPOTClassifier(
        random_state=42,
        verbosity=0,
        template='SelectPercentile-Transformer-Classifier'
    )
    tpot_obj._fit_init()
    pop = tpot_obj._toolbox.population(n=10)
    for deap_pipeline in pop:
        operator_count = tpot_obj._operator_count(deap_pipeline)
        sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
        assert operator_count == 3
        assert sklearn_pipeline.steps[0][0] == 'SelectPercentile'.lower()
        assert issubclass(sklearn_pipeline.steps[0][1].__class__, SelectorMixin)
        assert issubclass(sklearn_pipeline.steps[1][1].__class__, TransformerMixin)
        assert issubclass(sklearn_pipeline.steps[2][1].__class__, ClassifierMixin)

Source File: tpot_tests.py From tpot with GNU Lesser General Public License v3.0

6 votes

def test_template_2():
    """Assert that TPOT template option generates pipeline when each step is operator type with a duplicate main type."""

    tpot_obj = TPOTClassifier(
        random_state=42,
        verbosity=0,
        template='Selector-Selector-Transformer-Classifier'
    )
    tpot_obj._fit_init()
    pop = tpot_obj._toolbox.population(n=10)
    for deap_pipeline in pop:
        operator_count = tpot_obj._operator_count(deap_pipeline)
        sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
        assert operator_count == 4
        assert issubclass(sklearn_pipeline.steps[0][1].__class__, SelectorMixin)
        assert issubclass(sklearn_pipeline.steps[1][1].__class__, SelectorMixin)
        assert issubclass(sklearn_pipeline.steps[2][1].__class__, TransformerMixin)
        assert issubclass(sklearn_pipeline.steps[3][1].__class__, ClassifierMixin)

Source File: tpot_tests.py From tpot with GNU Lesser General Public License v3.0

6 votes

def test_template_1():
    """Assert that TPOT template option generates pipeline when each step is a type of operator."""

    tpot_obj = TPOTClassifier(
        random_state=42,
        verbosity=0,
        template='Selector-Transformer-Classifier'
    )
    tpot_obj._fit_init()
    pop = tpot_obj._toolbox.population(n=10)
    for deap_pipeline in pop:
        operator_count = tpot_obj._operator_count(deap_pipeline)
        sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline)
        assert operator_count == 3
        assert issubclass(sklearn_pipeline.steps[0][1].__class__, SelectorMixin)
        assert issubclass(sklearn_pipeline.steps[1][1].__class__, TransformerMixin)
        assert issubclass(sklearn_pipeline.steps[2][1].__class__, ClassifierMixin)

Source File: test_base.py From twitter-stock-recommendation with MIT License

5 votes

def test_clone_pandas_dataframe():

    class DummyEstimator(BaseEstimator, TransformerMixin):
        """This is a dummy class for generating numerical features

        This feature extractor extracts numerical features from pandas data
        frame.

        Parameters
        ----------

        df: pandas data frame
            The pandas data frame parameter.

        Notes
        -----
        """
        def __init__(self, df=None, scalar_param=1):
            self.df = df
            self.scalar_param = scalar_param

        def fit(self, X, y=None):
            pass

        def transform(self, X):
            pass

    # build and clone estimator
    d = np.arange(10)
    df = MockDataFrame(d)
    e = DummyEstimator(df, scalar_param=1)
    cloned_e = clone(e)

    # the test
    assert_true((e.df == cloned_e.df).values.all())
    assert_equal(e.scalar_param, cloned_e.scalar_param)

Source File: normalization_strategy_selector.py From Auto-PyTorch with Apache License 2.0

5 votes

def add_normalization_strategy(self, name, normalization_type, is_default_normalization_strategy=False):
        """Add a normalization strategy.
        Will be called with {pipeline_config, X, Y}
        
        Arguments:
            name {string} -- name of normalization strategy for definition in config
            normalization_strategy {function} -- callable with {pipeline_config, X}
            is_default_normalization_strategy {bool} -- should the given normalization_strategy be the default normalization_strategy if not specified in config
        """

        if (not issubclass(normalization_type, BaseEstimator) and not issubclass(normalization_type, TransformerMixin)):
            raise ValueError("normalization_type must be subclass of BaseEstimator")
        self.normalization_strategies[name] = normalization_type

Source File: data_splitters.py From MAST-ML with MIT License

5 votes

def split(self, X, y, groups):
        n_groups = self.get_n_splits(groups=groups)
        #print('n_groups', n_groups)
        lpgo = ms.LeavePGroupsOut(n_groups=n_groups-1)
        return lpgo.split(X, y, groups)

#class WithoutElement(BaseEstimator, TransformerMixin):
#    " Train the model without each element, then test on the rows with that element "
#    pass

Source File: ABuML.py From abu with GNU General Public License v3.0

5 votes

def fit_transform(self, **kwargs):
        """
        被装饰器@entry_wrapper()装饰，默认参数即支持有监督和无监督学习，
        内部通过检测isinstance(fiter, TransformerMixin) or hasattr(fiter, 'fit_transform')
        来判定是否可以fit_transform

        eg：
            input:  ttn_abu.x.shape
            output: (891, 14)

            input:  ttn_abu.fit_transform(fiter_type=ml.EMLFitType.E_FIT_PCA).shape
            output: (891, 4)

            input:  ttn_abu.fit_transform(fiter_type=ml.EMLFitType.E_FIT_KMEAN).shape
            output: (891, 2)

        :param kwargs: 外部可以传递x, y, 通过
                                x = kwargs.pop('x', self.x)
                                y = kwargs.pop('y', self.y)
                       以及装饰器使用的fiter_type，eg：ttn_abu.fit_transform(fiter_type=ml.EMLFitType.E_FIT_CLF)
        :return: fit_transform后的转换结果矩阵
        """
        fiter = self.get_fiter()
        if isinstance(fiter, TransformerMixin) or hasattr(fiter, 'fit_transform'):
            x = kwargs.pop('x', self.x)
            y = kwargs.pop('y', self.y)
            if self.is_supervised_learning():
                trans = fiter.fit_transform(x, y)
            else:
                trans = fiter.fit_transform(x)
            return trans
        else:
            self.log_func('{} not support fit_transform'.format(fiter))

Source File: base.py From smrt with BSD 3-Clause "New" or "Revised" License

5 votes

def transform(self, X):
        """Inherited from the ``TransformerMixin``. Pass the ``X`` array
        through the inferential MLP layers.

        Parameters
        ----------
        X : array-like, shape=(n_samples, n_features)
            The array of samples that will be encoded into the new
            hidden layer space.
        """
        return self.encode(X)

Source File: test_preprocessing.py From skl-groups with BSD 3-Clause "New" or "Revised" License

5 votes

def test_basic():
    bags = [np.random.normal(5, 3, size=(np.random.randint(10, 100), 20))
            for _ in xrange(50)]
    feats = Features(bags, stack=True)

    stder = BagStandardizer()
    stdized = stder.fit_transform(bags)
    stdized.make_stacked()

    assert np.allclose(np.mean(stdized.stacked_features), 0)
    assert np.allclose(np.std(stdized.stacked_features), 1)

    first_five = stder.transform(bags[:5])
    assert first_five == stdized[:5]

    minmaxer = BagMinMaxScaler([3, 7])
    minmaxed = minmaxer.fit_transform(feats)
    minmaxed.make_stacked()
    assert np.allclose(np.min(minmaxed.stacked_features, 0), 3)
    assert np.allclose(np.max(minmaxed.stacked_features, 0), 7)

    normer = BagNormalizer('l1')
    normed = normer.fit_transform(Features(bags))
    normed.make_stacked()
    assert np.allclose(np.sum(np.abs(normed.stacked_features), 1), 1)

    class GetMean(BaseEstimator, TransformerMixin):
        def fit(self, X, y=None):
            return self
        def transform(self, X):
            return X.mean(axis=1)[None, :]
    m = BagPreprocesser(GetMean())
    assert_raises(ValueError, lambda: m.transform(bags))

Source File: test_step.py From baikal with BSD 3-Clause "New" or "Revised" License

5 votes

def test_get_params_without_init(self, teardown):
        """Test edge case where the base class does not define
        an __init__ method. get_params should resolve to object.__init__
        which results in an empty dict.
        """

        class TransformerWithoutInit(TransformerMixin, BaseEstimator):
            pass

        class TransformerWithoutInitStep(Step, TransformerWithoutInit):
            pass

        step = TransformerWithoutInitStep()
        assert step.get_params() == {}

Source File: _test.py From ibex with BSD 3-Clause "New" or "Revised" License

5 votes

def _generate_bases_test(est, pd_est):
    def test(self):
        self.assertTrue(isinstance(pd_est, FrameMixin), pd_est)
        self.assertFalse(isinstance(est, FrameMixin))
        self.assertTrue(isinstance(pd_est, base.BaseEstimator))
        try:
            mixins = [
                base.ClassifierMixin,
                base.ClusterMixin,
                base.BiclusterMixin,
                base.TransformerMixin,
                base.DensityMixin,
                base.MetaEstimatorMixin,
                base.ClassifierMixin,
                base.RegressorMixin]
        except:
            if _sklearn_ver > 17:
                raise
            mixins = [
                base.ClassifierMixin,
                base.ClusterMixin,
                base.BiclusterMixin,
                base.TransformerMixin,
                base.MetaEstimatorMixin,
                base.ClassifierMixin,
                base.RegressorMixin]
        for mixin in mixins:
            self.assertEqual(
                isinstance(pd_est, mixin),
                isinstance(est, mixin),
                mixin)

    return test