Python sklearn.base.TransformerMixin() Examples
The following are 27
code examples of sklearn.base.TransformerMixin().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.base
, or try the search function
.
Example #1
Source File: common_utils.py From interpret-text with MIT License | 8 votes |
def create_pandas_only_svm_classifier(X, y, probability=True): class PandasOnlyEstimator(TransformerMixin): def fit(self, X, y=None, **fitparams): return self def transform(self, X, **transformparams): dataset_is_df = isinstance(X, pd.DataFrame) if not dataset_is_df: raise Exception("Dataset must be a pandas dataframe!") return X pandas_only = PandasOnlyEstimator() clf = svm.SVC(gamma=0.001, C=100.0, probability=probability, random_state=777) pipeline = Pipeline([("pandas_only", pandas_only), ("clf", clf)]) return pipeline.fit(X, y)
Example #2
Source File: common_utils.py From interpret-community with MIT License | 6 votes |
def create_pandas_only_svm_classifier(X, y, probability=True): class PandasOnlyEstimator(TransformerMixin): def fit(self, X, y=None, **fitparams): return self def transform(self, X, **transformparams): dataset_is_df = isinstance(X, pd.DataFrame) if not dataset_is_df: raise Exception("Dataset must be a pandas dataframe!") return X pandas_only = PandasOnlyEstimator() clf = svm.SVC(gamma=0.001, C=100., probability=probability, random_state=777) pipeline = Pipeline([('pandas_only', pandas_only), ('clf', clf)]) return pipeline.fit(X, y)
Example #3
Source File: filters.py From causallib with Apache License 2.0 | 6 votes |
def track_selected_features(pipeline_stages, num_features): """ Args: pipeline_stages (list [tuple[str, TransformerMixin]]): list of steps. each step is a tuple of Name and Transformer Object. num_features (int): Returns: np.ndarray: """ selected_features = np.arange(num_features) for p_name, p in pipeline_stages: if not isinstance(p, BaseFeatureSelector): continue p_features = p.selected_features selected_features = selected_features[p_features] return selected_features
Example #4
Source File: tpot_tests.py From tpot with GNU Lesser General Public License v3.0 | 6 votes |
def test_template_4(): """Assert that TPOT template option generates pipeline when one of steps is a specific operator.""" tpot_obj = TPOTClassifier( population_size=5, generations=2, random_state=42, verbosity=0, config_dict = 'TPOT light', template='SelectPercentile-Transformer-Classifier' ) tpot_obj.fit(pretest_X, pretest_y) assert isinstance(tpot_obj._optimized_pipeline, creator.Individual) assert not (tpot_obj._start_datetime is None) sklearn_pipeline = tpot_obj.fitted_pipeline_ operator_count = tpot_obj._operator_count(tpot_obj._optimized_pipeline) assert operator_count == 3 assert sklearn_pipeline.steps[0][0] == 'SelectPercentile'.lower() assert issubclass(sklearn_pipeline.steps[0][1].__class__, SelectorMixin) assert issubclass(sklearn_pipeline.steps[1][1].__class__, TransformerMixin) assert issubclass(sklearn_pipeline.steps[2][1].__class__, ClassifierMixin)
Example #5
Source File: tpot_tests.py From tpot with GNU Lesser General Public License v3.0 | 6 votes |
def test_template_3(): """Assert that TPOT template option generates pipeline when one of steps is a specific operator.""" tpot_obj = TPOTClassifier( random_state=42, verbosity=0, template='SelectPercentile-Transformer-Classifier' ) tpot_obj._fit_init() pop = tpot_obj._toolbox.population(n=10) for deap_pipeline in pop: operator_count = tpot_obj._operator_count(deap_pipeline) sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline) assert operator_count == 3 assert sklearn_pipeline.steps[0][0] == 'SelectPercentile'.lower() assert issubclass(sklearn_pipeline.steps[0][1].__class__, SelectorMixin) assert issubclass(sklearn_pipeline.steps[1][1].__class__, TransformerMixin) assert issubclass(sklearn_pipeline.steps[2][1].__class__, ClassifierMixin)
Example #6
Source File: tpot_tests.py From tpot with GNU Lesser General Public License v3.0 | 6 votes |
def test_template_2(): """Assert that TPOT template option generates pipeline when each step is operator type with a duplicate main type.""" tpot_obj = TPOTClassifier( random_state=42, verbosity=0, template='Selector-Selector-Transformer-Classifier' ) tpot_obj._fit_init() pop = tpot_obj._toolbox.population(n=10) for deap_pipeline in pop: operator_count = tpot_obj._operator_count(deap_pipeline) sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline) assert operator_count == 4 assert issubclass(sklearn_pipeline.steps[0][1].__class__, SelectorMixin) assert issubclass(sklearn_pipeline.steps[1][1].__class__, SelectorMixin) assert issubclass(sklearn_pipeline.steps[2][1].__class__, TransformerMixin) assert issubclass(sklearn_pipeline.steps[3][1].__class__, ClassifierMixin)
Example #7
Source File: tpot_tests.py From tpot with GNU Lesser General Public License v3.0 | 6 votes |
def test_template_1(): """Assert that TPOT template option generates pipeline when each step is a type of operator.""" tpot_obj = TPOTClassifier( random_state=42, verbosity=0, template='Selector-Transformer-Classifier' ) tpot_obj._fit_init() pop = tpot_obj._toolbox.population(n=10) for deap_pipeline in pop: operator_count = tpot_obj._operator_count(deap_pipeline) sklearn_pipeline = tpot_obj._toolbox.compile(expr=deap_pipeline) assert operator_count == 3 assert issubclass(sklearn_pipeline.steps[0][1].__class__, SelectorMixin) assert issubclass(sklearn_pipeline.steps[1][1].__class__, TransformerMixin) assert issubclass(sklearn_pipeline.steps[2][1].__class__, ClassifierMixin)
Example #8
Source File: test_base.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_clone_pandas_dataframe(): class DummyEstimator(BaseEstimator, TransformerMixin): """This is a dummy class for generating numerical features This feature extractor extracts numerical features from pandas data frame. Parameters ---------- df: pandas data frame The pandas data frame parameter. Notes ----- """ def __init__(self, df=None, scalar_param=1): self.df = df self.scalar_param = scalar_param def fit(self, X, y=None): pass def transform(self, X): pass # build and clone estimator d = np.arange(10) df = MockDataFrame(d) e = DummyEstimator(df, scalar_param=1) cloned_e = clone(e) # the test assert_true((e.df == cloned_e.df).values.all()) assert_equal(e.scalar_param, cloned_e.scalar_param)
Example #9
Source File: normalization_strategy_selector.py From Auto-PyTorch with Apache License 2.0 | 5 votes |
def add_normalization_strategy(self, name, normalization_type, is_default_normalization_strategy=False): """Add a normalization strategy. Will be called with {pipeline_config, X, Y} Arguments: name {string} -- name of normalization strategy for definition in config normalization_strategy {function} -- callable with {pipeline_config, X} is_default_normalization_strategy {bool} -- should the given normalization_strategy be the default normalization_strategy if not specified in config """ if (not issubclass(normalization_type, BaseEstimator) and not issubclass(normalization_type, TransformerMixin)): raise ValueError("normalization_type must be subclass of BaseEstimator") self.normalization_strategies[name] = normalization_type
Example #10
Source File: data_splitters.py From MAST-ML with MIT License | 5 votes |
def split(self, X, y, groups): n_groups = self.get_n_splits(groups=groups) #print('n_groups', n_groups) lpgo = ms.LeavePGroupsOut(n_groups=n_groups-1) return lpgo.split(X, y, groups) #class WithoutElement(BaseEstimator, TransformerMixin): # " Train the model without each element, then test on the rows with that element " # pass
Example #11
Source File: ABuML.py From abu with GNU General Public License v3.0 | 5 votes |
def fit_transform(self, **kwargs): """ 被装饰器@entry_wrapper()装饰,默认参数即支持有监督和无监督学习, 内部通过检测isinstance(fiter, TransformerMixin) or hasattr(fiter, 'fit_transform') 来判定是否可以fit_transform eg: input: ttn_abu.x.shape output: (891, 14) input: ttn_abu.fit_transform(fiter_type=ml.EMLFitType.E_FIT_PCA).shape output: (891, 4) input: ttn_abu.fit_transform(fiter_type=ml.EMLFitType.E_FIT_KMEAN).shape output: (891, 2) :param kwargs: 外部可以传递x, y, 通过 x = kwargs.pop('x', self.x) y = kwargs.pop('y', self.y) 以及装饰器使用的fiter_type,eg:ttn_abu.fit_transform(fiter_type=ml.EMLFitType.E_FIT_CLF) :return: fit_transform后的转换结果矩阵 """ fiter = self.get_fiter() if isinstance(fiter, TransformerMixin) or hasattr(fiter, 'fit_transform'): x = kwargs.pop('x', self.x) y = kwargs.pop('y', self.y) if self.is_supervised_learning(): trans = fiter.fit_transform(x, y) else: trans = fiter.fit_transform(x) return trans else: self.log_func('{} not support fit_transform'.format(fiter))
Example #12
Source File: base.py From smrt with BSD 3-Clause "New" or "Revised" License | 5 votes |
def transform(self, X): """Inherited from the ``TransformerMixin``. Pass the ``X`` array through the inferential MLP layers. Parameters ---------- X : array-like, shape=(n_samples, n_features) The array of samples that will be encoded into the new hidden layer space. """ return self.encode(X)
Example #13
Source File: test_preprocessing.py From skl-groups with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_basic(): bags = [np.random.normal(5, 3, size=(np.random.randint(10, 100), 20)) for _ in xrange(50)] feats = Features(bags, stack=True) stder = BagStandardizer() stdized = stder.fit_transform(bags) stdized.make_stacked() assert np.allclose(np.mean(stdized.stacked_features), 0) assert np.allclose(np.std(stdized.stacked_features), 1) first_five = stder.transform(bags[:5]) assert first_five == stdized[:5] minmaxer = BagMinMaxScaler([3, 7]) minmaxed = minmaxer.fit_transform(feats) minmaxed.make_stacked() assert np.allclose(np.min(minmaxed.stacked_features, 0), 3) assert np.allclose(np.max(minmaxed.stacked_features, 0), 7) normer = BagNormalizer('l1') normed = normer.fit_transform(Features(bags)) normed.make_stacked() assert np.allclose(np.sum(np.abs(normed.stacked_features), 1), 1) class GetMean(BaseEstimator, TransformerMixin): def fit(self, X, y=None): return self def transform(self, X): return X.mean(axis=1)[None, :] m = BagPreprocesser(GetMean()) assert_raises(ValueError, lambda: m.transform(bags))
Example #14
Source File: test_step.py From baikal with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_get_params_without_init(self, teardown): """Test edge case where the base class does not define an __init__ method. get_params should resolve to object.__init__ which results in an empty dict. """ class TransformerWithoutInit(TransformerMixin, BaseEstimator): pass class TransformerWithoutInitStep(Step, TransformerWithoutInit): pass step = TransformerWithoutInitStep() assert step.get_params() == {}
Example #15
Source File: _test.py From ibex with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _generate_bases_test(est, pd_est): def test(self): self.assertTrue(isinstance(pd_est, FrameMixin), pd_est) self.assertFalse(isinstance(est, FrameMixin)) self.assertTrue(isinstance(pd_est, base.BaseEstimator)) try: mixins = [ base.ClassifierMixin, base.ClusterMixin, base.BiclusterMixin, base.TransformerMixin, base.DensityMixin, base.MetaEstimatorMixin, base.ClassifierMixin, base.RegressorMixin] except: if _sklearn_ver > 17: raise mixins = [ base.ClassifierMixin, base.ClusterMixin, base.BiclusterMixin, base.TransformerMixin, base.MetaEstimatorMixin, base.ClassifierMixin, base.RegressorMixin] for mixin in mixins: self.assertEqual( isinstance(pd_est, mixin), isinstance(est, mixin), mixin) return test
Example #16
Source File: category_vector.py From talkingdata-adtracking-fraud-detection with MIT License | 5 votes |
def transformer_factory(self) -> TransformerMixin: return NMF(n_components=self.width, random_state=71)
Example #17
Source File: category_vector.py From talkingdata-adtracking-fraud-detection with MIT License | 5 votes |
def transformer_factory(self) -> TransformerMixin: return TruncatedSVD(n_components=self.width, random_state=71)
Example #18
Source File: category_vector.py From talkingdata-adtracking-fraud-detection with MIT License | 5 votes |
def transformer_factory(self) -> TransformerMixin: return LatentDirichletAllocation(n_components=self.width, learning_method='online', random_state=71)
Example #19
Source File: category_vector.py From talkingdata-adtracking-fraud-detection with MIT License | 5 votes |
def transformer_factory(self) -> TransformerMixin: return LatentDirichletAllocation(n_components=self.width, learning_method='online', random_state=71)
Example #20
Source File: category_vector.py From talkingdata-adtracking-fraud-detection with MIT License | 5 votes |
def vectorizer_factory(self) -> TransformerMixin: raise NotImplementedError
Example #21
Source File: test_base.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_clone_pandas_dataframe(): class DummyEstimator(BaseEstimator, TransformerMixin): """This is a dummy class for generating numerical features This feature extractor extracts numerical features from pandas data frame. Parameters ---------- df: pandas data frame The pandas data frame parameter. Notes ----- """ def __init__(self, df=None, scalar_param=1): self.df = df self.scalar_param = scalar_param def fit(self, X, y=None): pass def transform(self, X): pass # build and clone estimator d = np.arange(10) df = MockDataFrame(d) e = DummyEstimator(df, scalar_param=1) cloned_e = clone(e) # the test assert (e.df == cloned_e.df).values.all() assert_equal(e.scalar_param, cloned_e.scalar_param)
Example #22
Source File: diff.py From gordo with GNU Affero General Public License v3.0 | 5 votes |
def __init__( self, base_estimator: BaseEstimator = KerasAutoEncoder(kind="feedforward_hourglass"), scaler: TransformerMixin = RobustScaler(), require_thresholds: bool = True, window=None, ): """ Classifier which wraps a ``base_estimator`` and provides a diff error based approach to anomaly detection. It trains a ``scaler`` to the target **after** training, purely for error calculations. The underlying ``base_estimator`` is trained with the original, unscaled, ``y``. Parameters ---------- base_estimator: sklearn.base.BaseEstimator The model to which normal ``.fit``, ``.predict`` methods will be used. defaults to py:class:`gordo.machine.model.models.KerasAutoEncoder` with ``kind='feedforward_hourglass`` scaler: sklearn.base.TransformerMixin Defaults to ``sklearn.preprocessing.RobustScaler`` Used for transforming model output and the original ``y`` to calculate the difference/error in model output vs expected. require_thresholds: bool Requires calculating ``thresholds_`` via a call to :func:`~DiffBasedAnomalyDetector.cross_validate`. If this is set (default True), but :func:`~DiffBasedAnomalyDetector.cross_validate` was not called before calling :func:`~DiffBasedAnomalyDetector.anomaly` an ``AttributeError`` will be raised. window: int Window size for smoothed thresholds """ self.base_estimator = base_estimator self.scaler = scaler self.require_thresholds = require_thresholds self.window = window
Example #23
Source File: utils.py From gordo with GNU Affero General Public License v3.0 | 5 votes |
def metric_wrapper(metric, scaler: Optional[TransformerMixin] = None): """ Ensures that a given metric works properly when the model itself returns a y which is shorter than the target y, and allows scaling the data before applying the metrics. Parameters ---------- metric Metric which must accept y_true and y_pred of the same length scaler : Optional[TransformerMixin] Transformer which will be applied on y and y_pred before the metrics is calculated. Must have method `transform`, so for most scalers it must already be fitted on `y`. """ @functools.wraps(metric) def _wrapper(y_true, y_pred, *args, **kwargs): if scaler: logger.debug( "Transformer provided to metrics wrapper, scaling y and y_pred before " "passing to metrics" ) y_true = scaler.transform(y_true) y_pred = scaler.transform(y_pred) return metric(y_true[-len(y_pred) :], y_pred, *args, **kwargs) return _wrapper
Example #24
Source File: investigate.py From sklearn-onnx with MIT License | 4 votes |
def enumerate_pipeline_models(pipe, coor=None, vs=None): """ Enumerates all the models within a pipeline. """ if coor is None: coor = (0,) yield coor, pipe, vs if hasattr(pipe, 'transformer_and_mapper_list') and len( pipe.transformer_and_mapper_list): # azureml DataTransformer raise NotImplementedError("Unable to handle this specific case.") elif hasattr(pipe, 'mapper') and pipe.mapper: # azureml DataTransformer for couple in enumerate_pipeline_models(pipe.mapper, coor + (0,)): yield couple elif hasattr(pipe, 'built_features'): # sklearn_pandas.dataframe_mapper.DataFrameMapper for i, (columns, transformers, _) in enumerate(pipe.built_features): if isinstance(columns, str): columns = (columns,) if transformers is None: yield (coor + (i,)), None, columns else: for couple in enumerate_pipeline_models(transformers, coor + (i,), columns): yield couple elif isinstance(pipe, Pipeline): for i, (_, model) in enumerate(pipe.steps): for couple in enumerate_pipeline_models(model, coor + (i,)): yield couple elif ColumnTransformer is not None and isinstance(pipe, ColumnTransformer): for i, (_, fitted_transformer, column) in enumerate(pipe.transformers): for couple in enumerate_pipeline_models( fitted_transformer, coor + (i,), column): yield couple elif isinstance(pipe, FeatureUnion): for i, (_, model) in enumerate(pipe.transformer_list): for couple in enumerate_pipeline_models(model, coor + (i,)): yield couple elif TransformedTargetRegressor is not None and isinstance( pipe, TransformedTargetRegressor): raise NotImplementedError( "Not yet implemented for TransformedTargetRegressor.") elif isinstance(pipe, (TransformerMixin, ClassifierMixin, RegressorMixin)): pass elif isinstance(pipe, BaseEstimator): pass else: raise TypeError( "Parameter pipe is not a scikit-learn object: {}\n{}".format( type(pipe), pipe))
Example #25
Source File: test_algebra_onnx_operators.py From sklearn-onnx with MIT License | 4 votes |
def test_sub(self): class CustomOpTransformer(BaseEstimator, TransformerMixin): def __init__(self, op_version=None): self.op_version = op_version def fit(self, X, y=None): self.W = np.mean(X, axis=0) return self def transform(self, X): return X - self.W mat = np.array([[0., 1.], [1., 2.], [3., 4.]]) tr = CustomOpTransformer(op_version=None) tr.fit(mat) z = tr.transform(mat) def conv(scope, operator, container): W = operator.raw_operator.W.astype(container.dtype) op = OnnxSub( operator.inputs[0], W, output_names=operator.outputs, op_version=TARGET_OPSET) op.add_to(scope, container) text = str(container) if 'name:"Su_Sub"' not in text: raise AssertionError( "Unnamed operator: '{}'".format(text)) nin = list(op.enumerate_initial_types()) nno = list(op.enumerate_nodes()) nva = list(op.enumerate_variables()) assert len(nin) == 1 assert nin[0][0] == 'input' assert nin[0][1].shape == [None, 2] assert len(nno) == 1 assert nno[0].output_names == ['variable'] assert len(nva) == 1 assert isinstance(nva[0], tuple) assert nva[0][1] == 0 def shape(operator): N = operator.inputs[0].type.shape[0] W = operator.raw_operator.W operator.outputs[0].type.shape = [N, W.shape[0]] model_onnx = convert_sklearn( tr, 'a-sub', [('input', FloatTensorType([None, 2]))], custom_shape_calculators={CustomOpTransformer: shape}, custom_conversion_functions={CustomOpTransformer: conv}) sess = InferenceSession(model_onnx.SerializeToString()) z2 = sess.run(None, {'input': mat.astype(np.float32)})[0] assert_almost_equal(z, z2)
Example #26
Source File: test_algebra_onnx_operators.py From sklearn-onnx with MIT License | 4 votes |
def test_sub_div(self): class CustomOpTransformer(BaseEstimator, TransformerMixin): def __init__(self): pass def fit(self, X, y=None): self.W = np.mean(X, axis=0) self.S = np.std(X, axis=0) return self def transform(self, X): return (X - self.W) / self.S mat = np.array([[0., 1.], [0., 1.], [2., 2.]]) tr = CustomOpTransformer() tr.fit(mat) z = tr.transform(mat) def conv(scope, operator, container): W = operator.raw_operator.W.astype(np.float32) S = operator.raw_operator.S.astype(np.float32) X = operator.inputs[0] out = operator.outputs op = OnnxDiv( OnnxSub(X, W, op_version=container.target_opset), S, output_names=out, op_version=container.target_opset) op.add_to(scope, container) def shape(operator): N = operator.inputs[0].type.shape[0] W = operator.raw_operator.W operator.outputs[0].type.shape = [N, W.shape[0]] model_onnx = convert_sklearn( tr, 'a-sub-div', [('input', FloatTensorType([None, 2]))], custom_shape_calculators={CustomOpTransformer: shape}, custom_conversion_functions={CustomOpTransformer: conv}, target_opset=None) try: sess = InferenceSession(model_onnx.SerializeToString()) except RuntimeError as e: raise AssertionError( "Cannot load model\n---\n{}\n---".format(model_onnx)) from e z2 = sess.run(None, {'input': mat.astype(np.float32)})[0] assert_almost_equal(z, z2)
Example #27
Source File: common_tabular_tests.py From interpret-community with MIT License | 4 votes |
def verify_explain_model_categorical(self, pass_categoricals=False): headers = ["symboling", "normalized_losses", "make", "fuel_type", "aspiration", "num_doors", "body_style", "drive_wheels", "engine_location", "wheel_base", "length", "width", "height", "curb_weight", "engine_type", "num_cylinders", "engine_size", "fuel_system", "bore", "stroke", "compression_ratio", "horsepower", "peak_rpm", "city_mpg", "highway_mpg", "price"] df = retrieve_dataset('imports-85.csv', header=None, names=headers, na_values="?") df_y = df['price'] df_X = df.drop(columns='price') df_train_X, df_test_X, df_train_y, df_test_y = train_test_split(df_X, df_y, test_size=0.2, random_state=7) # Encode strings to ordinal values categorical_col_names = list(df_train_X.select_dtypes(include='object').columns) categorical_col_indices = [df_train_X.columns.get_loc(col_name) for col_name in categorical_col_names] kwargs = {'num_leaves': 31, 'num_trees': 100, 'objective': 'regression', 'categorical_feature': categorical_col_indices} lgbm_regressor = LGBMRegressor(**kwargs) # Impute the x and y values imp_X = SimpleImputer(missing_values=np.nan, strategy='most_frequent') imp_y = SimpleImputer(missing_values=np.nan, strategy='mean') # reshape to 2D array since SimpleImputer can't work on 1D array df_train_y = df_train_y.values.reshape(df_train_y.shape[0], 1) imp_y.fit(df_train_y) imp_df_y = imp_y.transform(df_train_y) imp_X.fit(df_train_X) imp_train_X = pd.DataFrame(imp_X.transform(df_train_X)) class CustomTextTransformer(BaseEstimator, TransformerMixin): def __init__(self): return def fit(self, X, y=None): return self def transform(self, X): return X.astype('U') custom_text = CustomTextTransformer() encoder = OneHotEncoder(handle_unknown='ignore', sparse=False) ct1 = ColumnTransformer([('cu', custom_text, categorical_col_indices)], remainder='passthrough') ct2 = ColumnTransformer([('ord', encoder, slice(0, len(categorical_col_indices)))], remainder='passthrough') pipeline = Pipeline([('cu', ct1), ('ct', ct2), ('lgbm', lgbm_regressor)]) pipeline.fit(imp_train_X, imp_df_y[:, 0]) if pass_categoricals: explainer = self.create_explainer(pipeline, imp_train_X, categorical_features=categorical_col_indices) else: explainer = self.create_explainer(pipeline, imp_train_X) explanation = explainer.explain_global(imp_X.transform(df_test_X)) verify_serialization(explanation, exist_ok=True)