Python sklearn.preprocessing.QuantileTransformer() Examples
The following are 15
code examples of sklearn.preprocessing.QuantileTransformer().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.preprocessing
, or try the search function
.
Example #1
Source File: base.py From sagemaker-scikit-learn-extension with Apache License 2.0 | 6 votes |
def fit(self, X, y=None): """Compute the lower and upper quantile cutoffs, columns to transform, and each column's quantiles. Parameters ---------- X : array-like, shape [n_samples, n_features] The data array to transform. Must be numeric, non-sparse, and two-dimensional. Returns ------- self : QuantileExtremeValueTransformer """ super().fit(X) X = check_array(X) self.quantile_transformer_ = QuantileTransformer(random_state=0, copy=True) self.quantile_transformer_.fit(X) return self
Example #2
Source File: processing.py From CAIL2019 with MIT License | 6 votes |
def do_feature_engineering(list_text): df = pd.DataFrame(list_text, columns=["col2"]) feature_list = [] feature = get_length_related_features_col2(df) feature_list.append(feature) feature = get_col2_re_features(df) feature_list.append(feature) index = feature_list[0].index for feature_dataset in feature_list[1:]: pd.testing.assert_index_equal(index, feature_dataset.index) data = pd.concat(feature_list, axis=1) qt = QuantileTransformer(random_state=2019) for col in data.columns: data[col] = qt.fit_transform(data[[col]]) return data
Example #3
Source File: utils.py From scikit-downscale with Apache License 2.0 | 5 votes |
def fit(self, X): """ Fit the quantile mapping model. Parameters ---------- X : array-like, shape [n_samples, n_features] Training data. """ X = ensure_samples_features(X) qt_kws = self.qt_kwargs.copy() if "n_quantiles" not in qt_kws: qt_kws["n_quantiles"] = len(X) # maybe detrend the input datasets if self.detrend: x_to_cdf = LinearTrendTransformer(**self.lt_kwargs).fit_transform(X) else: x_to_cdf = X # calculate the cdfs for X # TODO: replace this transformer with something that uses robust # empirical cdf plotting positions self.x_cdf_fit_ = QuantileTransformer(**qt_kws).fit(x_to_cdf) return self
Example #4
Source File: base.py From sagemaker-scikit-learn-extension with Apache License 2.0 | 5 votes |
def _transform_function(self, x, idx=None): """Applies single column quantile transform from ``sklearn.preprocessing.QuantileTransformer``. Uses ``quantile_transformer_.quantiles_`` calculated during ``fit`` if given an index, otherwise the quantiles will be calculated from input ``x``. """ if idx: return self.quantile_transformer_._transform_col( # pylint: disable=protected-access x, self.quantile_transformer_.quantiles_[:, idx], False ) return quantile_transform_nonrandom(x)
Example #5
Source File: quantile_transformer.py From lale with Apache License 2.0 | 5 votes |
def __init__(self, n_quantiles=1000, output_distribution='uniform', ignore_implicit_zeros=False, subsample=100000, random_state=None, copy=True): self._hyperparams = { 'n_quantiles': n_quantiles, 'output_distribution': output_distribution, 'ignore_implicit_zeros': ignore_implicit_zeros, 'subsample': subsample, 'random_state': random_state, 'copy': copy} self._wrapped_model = SKLModel(**self._hyperparams)
Example #6
Source File: QuantileTransformer.py From mltk-algo-contrib with Apache License 2.0 | 5 votes |
def __init__(self, options): self.handle_options(options) out_params = convert_params( options.get('params', {}), bools=['copy'], ints=['n_quantiles'], strs=['output_distribution'] ) self.estimator = _QuantileTransformer(**out_params) self.columns = None
Example #7
Source File: target_transform_inverse_example.py From hyperparameter_hunter with MIT License | 5 votes |
def quantile_transform(train_targets, non_train_targets): transformer = QuantileTransformer(output_distribution="normal", n_quantiles=100) train_targets[train_targets.columns] = transformer.fit_transform(train_targets.values) non_train_targets[train_targets.columns] = transformer.transform(non_train_targets.values) return train_targets, non_train_targets, transformer
Example #8
Source File: test_support.py From hyperparameter_hunter with MIT License | 5 votes |
def bad_quantile_transform(train_targets, non_train_targets): transformer = QuantileTransformer(output_distribution="normal", n_quantiles=100) train_targets[train_targets.columns] = transformer.fit_transform(train_targets.values) non_train_targets[train_targets.columns] = transformer.transform(non_train_targets.values) return train_targets, non_train_targets, "i am the wrong type for an inversion result"
Example #9
Source File: test_intra_cv_target_transform.py From hyperparameter_hunter with MIT License | 5 votes |
def my_quantile_transform(train_targets, non_train_targets): transformer = QuantileTransformer(output_distribution="uniform") train_targets[train_targets.columns] = transformer.fit_transform(train_targets.values) non_train_targets[train_targets.columns] = transformer.transform(non_train_targets.values) return train_targets, non_train_targets
Example #10
Source File: test_both_stages_transform.py From hyperparameter_hunter with MIT License | 5 votes |
def my_quantile_transform(train_targets, non_train_targets): transformer = QuantileTransformer(output_distribution="uniform") train_targets[train_targets.columns] = transformer.fit_transform(train_targets.values) non_train_targets[train_targets.columns] = transformer.transform(non_train_targets.values) return train_targets, non_train_targets
Example #11
Source File: test_feature_optimization.py From hyperparameter_hunter with MIT License | 5 votes |
def quantile_transform(train_targets, non_train_targets): transformer = QuantileTransformer(output_distribution="normal", n_quantiles=100) train_targets[train_targets.columns] = transformer.fit_transform(train_targets.values) non_train_targets[train_targets.columns] = transformer.transform(non_train_targets.values) return train_targets, non_train_targets, transformer
Example #12
Source File: test_data.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_basic(self, output_distribution): rs = da.random.RandomState(0) a = dpp.QuantileTransformer(output_distribution=output_distribution) b = spp.QuantileTransformer(output_distribution=output_distribution) X = rs.uniform(size=(1000, 3), chunks=50) a.fit(X) b.fit(X) assert_estimator_equal(a, b, atol=0.02) # set the quantiles, so that from here out, we're exact a.quantiles_ = b.quantiles_ assert_eq_ar(a.transform(X), b.transform(X), atol=1e-7) assert_eq_ar(X, a.inverse_transform(a.transform(X)))
Example #13
Source File: test_data.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_types(self, type_, kwargs): X = np.random.uniform(size=(1000, 3)) dX = type_(X, **kwargs) qt = spp.QuantileTransformer() qt.fit(X) dqt = dpp.QuantileTransformer() dqt.fit(dX)
Example #14
Source File: test_data.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_fit_transform_frame(self): df = pd.DataFrame(np.random.randn(1000, 3)) ddf = dd.from_pandas(df, 2) a = spp.QuantileTransformer() b = dpp.QuantileTransformer() expected = a.fit_transform(df) result = b.fit_transform(ddf) assert_eq_ar(result, expected, rtol=1e-3, atol=1e-3)
Example #15
Source File: lr_model.py From autogluon with Apache License 2.0 | 5 votes |
def preprocess_train(self, X, feature_types, vect_max_features): transformer_list = [] if len(feature_types['language']) > 0: pipeline = Pipeline(steps=[ ("preparator", NlpDataPreprocessor(nlp_cols=feature_types['language'])), ("vectorizer", TfidfVectorizer(ngram_range=self.params['proc.ngram_range'], sublinear_tf=True, max_features=vect_max_features, tokenizer=self.tokenize)) ]) transformer_list.append(('vect', pipeline)) if len(feature_types['onehot']) > 0: pipeline = Pipeline(steps=[ ('generator', OheFeaturesGenerator(cats_cols=feature_types['onehot'])), ]) transformer_list.append(('cats', pipeline)) if len(feature_types['continuous']) > 0: pipeline = Pipeline(steps=[ ('generator', NumericDataPreprocessor(cont_cols=feature_types['continuous'])), ('imputer', SimpleImputer(strategy=self.params['proc.impute_strategy'])), ('scaler', StandardScaler()) ]) transformer_list.append(('cont', pipeline)) if len(feature_types['skewed']) > 0: pipeline = Pipeline(steps=[ ('generator', NumericDataPreprocessor(cont_cols=feature_types['skewed'])), ('imputer', SimpleImputer(strategy=self.params['proc.impute_strategy'])), ('quantile', QuantileTransformer(output_distribution='normal')), # Or output_distribution = 'uniform' ]) transformer_list.append(('skew', pipeline)) self.pipeline = FeatureUnion(transformer_list=transformer_list) self.pipeline.fit(X)