Python sklearn.preprocessing.Normalizer() Examples
The following are 30
code examples of sklearn.preprocessing.Normalizer().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.preprocessing
, or try the search function
.
Example #1
Source File: train.py From skorch with BSD 3-Clause "New" or "Revised" License | 7 votes |
def get_model(with_pipeline=False): """Get a multi-layer perceptron model. Optionally, put it in a pipeline that scales the data. """ model = NeuralNetClassifier(MLPClassifier) if with_pipeline: model = Pipeline([ ('scale', FeatureUnion([ ('minmax', MinMaxScaler()), ('normalize', Normalizer()), ])), ('select', SelectKBest(k=N_FEATURES)), # keep input size constant ('net', model), ]) return model
Example #2
Source File: bow.py From broca with MIT License | 6 votes |
def __init__(self, min_df=1, max_df=0.9, tokenizer=LemmaTokenizer, hash=False): """ `min_df` is set to filter out extremely rare words, since we don't want those to dominate the distance metric. `max_df` is set to filter out extremely common words, since they don't convey much information. """ # Wrap the specified tokenizer t = Tokenizer(tokenizer()) if hash: vectr = HashingVectorizer(input='content', stop_words='english', lowercase=True, tokenizer=t) else: vectr = CountVectorizer(input='content', stop_words='english', lowercase=True, tokenizer=t, min_df=min_df, max_df=max_df) args = [ ('vectorizer', vectr), ('tfidf', TfidfTransformer(norm=None, use_idf=True, smooth_idf=True)), ('normalizer', Normalizer(copy=False)) ] self.pipeline = Pipeline(args) self.trained = False
Example #3
Source File: gp_repurposer.py From xfer with Apache License 2.0 | 6 votes |
def __init__(self, source_model: mx.mod.Module, feature_layer_names, context_function=mx.context.cpu, num_devices=1, max_function_evaluations=100, apply_l2_norm=False): # Call base class constructor with parameters required for meta-models super().__init__(source_model, feature_layer_names, context_function, num_devices) self.max_function_evaluations = max_function_evaluations self.apply_l2_norm = apply_l2_norm # Mean of features to use for normalization. Computed in training phase. # Used to normalize features in training and in prediction. self.feature_mean = None # Optimizer to use for training GP model self.optimizer = 'lbfgs' # Number of inducing points to use for sparse GP self.NUM_INDUCING_SPARSE_GP = 100 # Normalizer to use when apply_l2_norm flag is set self.l2_normalizer = Normalizer(norm='l2')
Example #4
Source File: test_scikit.py From pliers with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_within_pipeline(): pytest.importorskip('cv2') pytest.importorskip('sklearn') from sklearn.pipeline import Pipeline from sklearn.preprocessing import Normalizer stim = join(get_test_data_path(), 'image', 'apple.jpg') graph = Graph([BrightnessExtractor(), SharpnessExtractor()]) trans = PliersTransformer(graph) normalizer = Normalizer() pipeline = Pipeline([('pliers', trans), ('normalizer', normalizer)]) res = pipeline.fit_transform(stim) assert res.shape == (1, 2) assert np.isclose(res[0][0], 0.66393, 1e-5) assert np.isclose(res[0][1], 0.74780, 1e-5) meta = trans.metadata_ assert 'onset' in meta.columns assert meta['class'][0] == 'ImageStim'
Example #5
Source File: test_normalizer.py From coremltools with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_random(self): # Generate some random data_imputeValue.multiArrayValue[i] X = _np.random.random(size=(50, 3)) for param in ("l1", "l2", "max"): cur_model = Normalizer(norm=param) output = cur_model.fit_transform(X) spec = converter.convert(cur_model, ["a", "b", "c"], "out") evaluate_transformer( spec, [dict(zip(["a", "b", "c"], row)) for row in X], [{"out": row} for row in output], )
Example #6
Source File: test_one_hot_encoder.py From coremltools with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_boston_OHE_pipeline(self): data = load_boston() for categorical_features in [[3], [8], [3, 8], [8, 3]]: # Put it in a pipeline so that we can test whether the output dimension # handling is correct. model = Pipeline( [ ("OHE", OneHotEncoder(categorical_features=categorical_features)), ("Normalizer", Normalizer()), ] ) model.fit(data.data.copy(), data.target) # Convert the model spec = sklearn.convert(model, data.feature_names, "out").get_spec() input_data = [dict(zip(data.feature_names, row)) for row in data.data] output_data = [{"out": row} for row in model.transform(data.data.copy())] result = evaluate_transformer(spec, input_data, output_data) assert result["num_errors"] == 0
Example #7
Source File: test_lsh.py From scikit-hubness with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_kneighbors_with_or_without_self_hit(LSH: callable, metric, n_jobs, verbose): X, y = make_classification(random_state=234) X = Normalizer().fit_transform(X) lsh = LSH(metric=metric, n_jobs=n_jobs, verbose=verbose) lsh.fit(X, y) neigh_dist, neigh_ind = lsh.kneighbors(return_distance=True) neigh_dist_self, neigh_ind_self = lsh.kneighbors(X, return_distance=True) ind_only = lsh.kneighbors(return_distance=False) ind_only_self = lsh.kneighbors(X, return_distance=False) assert_array_equal(neigh_ind, ind_only) assert_array_equal(neigh_ind_self, ind_only_self) assert (neigh_ind - neigh_ind_self).mean() <= .01, f'More than 1% of neighbors mismatch' assert ((neigh_dist - neigh_dist_self) < 0.0001).mean() <= 0.01,\ f'Not almost equal to 4 decimals in more than 1% of neighbor slots'
Example #8
Source File: test_lsh.py From scikit-hubness with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_radius_neighbors_with_or_without_self_hit(LSH, metric, n_jobs, verbose): X, y = make_classification() X = Normalizer().fit_transform(X) lsh = LSH(metric=metric, n_jobs=n_jobs, verbose=verbose) lsh.fit(X, y) radius = lsh.kneighbors(n_candidates=3)[0][:, 2].max() neigh_dist, neigh_ind = lsh.radius_neighbors(return_distance=True, radius=radius) neigh_dist_self, neigh_ind_self = lsh.radius_neighbors(X, return_distance=True, radius=radius) ind_only = lsh.radius_neighbors(return_distance=False, radius=radius) ind_only_self = lsh.radius_neighbors(X, return_distance=False, radius=radius) assert len(neigh_ind) == len(neigh_ind_self) == len(neigh_dist) == len(neigh_dist_self) for i in range(len(neigh_ind)): assert_array_equal(neigh_ind[i], ind_only[i]) assert_array_equal(neigh_ind_self[i], ind_only_self[i]) assert_array_equal(neigh_ind[i][:3], neigh_ind_self[i][1:4]) assert_array_almost_equal(neigh_dist[i][:3], neigh_dist_self[i][1:4])
Example #9
Source File: test_lsh.py From scikit-hubness with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_squared_euclidean_same_neighbors_as_euclidean(LSH): X, y = make_classification(random_state=234) X = Normalizer().fit_transform(X) lsh = LSH(metric='minkowski') lsh.fit(X, y) neigh_dist_eucl, neigh_ind_eucl = lsh.kneighbors() lsh_sq = LSH(metric='sqeuclidean') lsh_sq.fit(X, y) neigh_dist_sqeucl, neigh_ind_sqeucl = lsh_sq.kneighbors() assert_array_equal(neigh_ind_eucl, neigh_ind_sqeucl) assert_array_almost_equal(neigh_dist_eucl ** 2, neigh_dist_sqeucl) if LSH in LSH_WITH_RADIUS: radius = neigh_dist_eucl[:, 2].max() rad_dist_eucl, rad_ind_eucl = lsh.radius_neighbors(radius=radius) rad_dist_sqeucl, rad_ind_sqeucl = lsh_sq.radius_neighbors(radius=radius**2) for i in range(len(rad_ind_eucl)): assert_array_equal(rad_ind_eucl[i], rad_ind_sqeucl[i]) assert_array_almost_equal(rad_dist_eucl[i] ** 2, rad_dist_sqeucl[i])
Example #10
Source File: models.py From ntua-slp-semeval2018 with MIT License | 6 votes |
def nbow_model(task, embeddings, word2idx): if task == "clf": algo = LogisticRegression(C=0.6, random_state=0, class_weight='balanced') elif task == "reg": algo = SVR(kernel='linear', C=0.6) else: raise ValueError("invalid task!") embeddings_features = NBOWVectorizer(aggregation=["mean"], embeddings=embeddings, word2idx=word2idx, stopwords=False) model = Pipeline([ ('embeddings-feats', embeddings_features), ('normalizer', Normalizer(norm='l2')), ('clf', algo) ]) return model
Example #11
Source File: test_column_transformer.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_make_column_transformer_kwargs(): scaler = StandardScaler() norm = Normalizer() ct = make_column_transformer((scaler, 'first'), (norm, ['second']), n_jobs=3, remainder='drop', sparse_threshold=0.5) assert_equal(ct.transformers, make_column_transformer( (scaler, 'first'), (norm, ['second'])).transformers) assert_equal(ct.n_jobs, 3) assert_equal(ct.remainder, 'drop') assert_equal(ct.sparse_threshold, 0.5) # invalid keyword parameters should raise an error message assert_raise_message( TypeError, 'Unknown keyword arguments: "transformer_weights"', make_column_transformer, (scaler, 'first'), (norm, ['second']), transformer_weights={'pca': 10, 'Transf': 1} )
Example #12
Source File: test_preprocessing.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_objectmapper(self): df = pdml.ModelFrame([]) self.assertIs(df.preprocessing.Binarizer, pp.Binarizer) self.assertIs(df.preprocessing.FunctionTransformer, pp.FunctionTransformer) self.assertIs(df.preprocessing.Imputer, pp.Imputer) self.assertIs(df.preprocessing.KernelCenterer, pp.KernelCenterer) self.assertIs(df.preprocessing.LabelBinarizer, pp.LabelBinarizer) self.assertIs(df.preprocessing.LabelEncoder, pp.LabelEncoder) self.assertIs(df.preprocessing.MultiLabelBinarizer, pp.MultiLabelBinarizer) self.assertIs(df.preprocessing.MaxAbsScaler, pp.MaxAbsScaler) self.assertIs(df.preprocessing.MinMaxScaler, pp.MinMaxScaler) self.assertIs(df.preprocessing.Normalizer, pp.Normalizer) self.assertIs(df.preprocessing.OneHotEncoder, pp.OneHotEncoder) self.assertIs(df.preprocessing.PolynomialFeatures, pp.PolynomialFeatures) self.assertIs(df.preprocessing.RobustScaler, pp.RobustScaler) self.assertIs(df.preprocessing.StandardScaler, pp.StandardScaler)
Example #13
Source File: test_lsh.py From scikit-hubness with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_falconn_parallel(): X, y = make_classification(random_state=346) X = Normalizer().fit_transform(X) lsh = FalconnLSH(n_jobs=1) lsh.fit(X, y) neigh_dist, neigh_ind = lsh.kneighbors() lsh_parallel = FalconnLSH(n_jobs=4) lsh_parallel.fit(X, y) neigh_dist_parallel, neigh_ind_parallel = lsh_parallel.kneighbors() assert_array_equal(neigh_ind, neigh_ind_parallel) assert_array_almost_equal(neigh_dist, neigh_dist_parallel)
Example #14
Source File: test_lsh.py From scikit-hubness with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_warn_on_invalid_metric(LSH, metric): X, y = make_classification(random_state=24643) X = Normalizer().fit_transform(X) lsh = LSH(metric='euclidean') lsh.fit(X, y) neigh_dist, neigh_ind = lsh.kneighbors() lsh.metric = metric with pytest.warns(UserWarning): lsh.fit(X, y) neigh_dist_inv, neigh_ind_inv = lsh.kneighbors() assert_array_equal(neigh_ind, neigh_ind_inv) assert_array_almost_equal(neigh_dist, neigh_dist_inv)
Example #15
Source File: pipeline_builder.py From texta with GNU General Public License v3.0 | 5 votes |
def build(self, fields): """ Build model Pipeline and Grid Search params """ params = {} # Field transform pipeline per field + params transformer_list = [] for field in fields: pipe_key = 'pipe_{}'.format(field) steps = [] steps.append(tuple(['selector', ItemSelector(key=field)])) steps.append(self.extractor_list[self.extractor_op].get_step()) steps.append(self.reductor_list[self.reductor_op].get_step()) steps.append(self.normalizer_list[self.normalizer_op].get_step()) transformer_list.append(tuple([pipe_key, Pipeline(steps)])) # Nest params inside the union field - Extractor p_dict = self.extractor_list[self.extractor_op].get_param() for k in p_dict: new_k = '{}__{}__{}'.format('union', pipe_key, k) params[new_k] = p_dict[k] # Nest params inside the union field - Reductor p_dict = self.reductor_list[self.reductor_op].get_param() for k in p_dict: new_k = '{}__{}__{}'.format('union', pipe_key, k) params[new_k] = p_dict[k] # Nest params inside the union field - Normalizer p_dict = self.normalizer_list[self.normalizer_op].get_param() for k in p_dict: new_k = '{}__{}__{}'.format('union', pipe_key, k) params[new_k] = p_dict[k] # Classifier pipeline + params steps = [] steps.append(tuple(['union', FeatureUnion(transformer_list=transformer_list)])) steps.append(self.classifier_list[self.classifier_op].get_step()) pipe = Pipeline(steps) params.update(self.classifier_list[self.classifier_op].get_param()) return pipe, params
Example #16
Source File: models.py From ntua-slp-semeval2018 with MIT License | 5 votes |
def bow_model(task, max_features=10000): if task == "clf": algo = LogisticRegression(C=0.6, random_state=0, class_weight='balanced') elif task == "reg": algo = SVR(kernel='linear', C=0.6) else: raise ValueError("invalid task!") word_features = TfidfVectorizer(ngram_range=(1, 1), tokenizer=lambda x: x, analyzer='word', min_df=5, # max_df=0.9, lowercase=False, use_idf=True, smooth_idf=True, max_features=max_features, sublinear_tf=True) model = Pipeline([ ('bow-feats', word_features), ('normalizer', Normalizer(norm='l2')), ('clf', algo) ]) return model
Example #17
Source File: sklearn.py From datastories-semeval2017-task4 with MIT License | 5 votes |
def nbow_model(task, embeddings, word2idx): if task == "clf": algo = LogisticRegression(C=0.6, random_state=0, class_weight='balanced') elif task == "reg": algo = SVR(kernel='linear', C=0.6) else: raise ValueError("invalid task!") embeddings_features = NBOWVectorizer(aggregation=["mean"], embeddings=embeddings, word2idx=word2idx, stopwords=False) preprocessor = TextPreProcessor( backoff=['url', 'email', 'percent', 'money', 'phone', 'user', 'time', 'url', 'date', 'number'], include_tags={"hashtag", "allcaps", "elongated", "repeated", 'emphasis', 'censored'}, fix_html=True, segmenter="twitter", corrector="twitter", unpack_hashtags=True, unpack_contractions=True, spell_correct_elong=False, tokenizer=SocialTokenizer(lowercase=True).tokenize, dicts=[emoticons]) model = Pipeline([ ('preprocess', CustomPreProcessor(preprocessor, to_list=True)), ('embeddings-feats', embeddings_features), ('normalizer', Normalizer(norm='l2')), ('clf', algo) ]) return model
Example #18
Source File: data_manipulator.py From LSTM_Anomaly_Detector with MIT License | 5 votes |
def normalize(mat): return Normalizer(norm='l2').fit_transform(mat)
Example #19
Source File: test_sklearn_normalizer_converter.py From sklearn-onnx with MIT License | 5 votes |
def test_model_normalizer(self): model = Normalizer(norm="l2") model_onnx = convert_sklearn( model, "scikit-learn normalizer", [("input", Int64TensorType([None, 1]))], ) self.assertTrue(model_onnx is not None) self.assertTrue(len(model_onnx.graph.node) == 1)
Example #20
Source File: test_sklearn_normalizer_converter.py From sklearn-onnx with MIT License | 5 votes |
def test_model_normalizer_float(self): model = Normalizer(norm="l2") model_onnx = convert_sklearn( model, "scikit-learn normalizer", [("input", FloatTensorType([None, 3]))], ) self.assertTrue(model_onnx is not None) self.assertTrue(len(model_onnx.graph.node) == 1) dump_data_and_model( numpy.array([[1, 1, 3], [3, 1, 2]], dtype=numpy.float32), model, model_onnx, basename="SklearnNormalizerL2-SkipDim1", )
Example #21
Source File: bnn_classifier.py From xfer with Apache License 2.0 | 5 votes |
def __init__(self, model: gluon.nn.Sequential, var_posterior: VariationalPosterior, normalizer: Normalizer): self.model = model self.var_posterior = var_posterior self.normalizer = normalizer
Example #22
Source File: bnn_repurposer.py From xfer with Apache License 2.0 | 5 votes |
def __init__(self, source_model: mx.mod.Module, feature_layer_names, context_function=mx.cpu, num_devices=1, bnn_context_function=mx.cpu, sigma=100.0, num_layers=1, n_hidden=10, num_samples_mc=3, learning_rate=1e-3, batch_size=20, num_epochs=200, start_annealing=None, end_annealing=None, num_samples_mc_prediction=100, verbose=0): # Call base class constructor with parameters required for meta-models super().__init__(source_model, feature_layer_names, context_function, num_devices) # Initialize BNN specific parameters self.sigma = sigma self.num_layers = num_layers self.n_hidden = n_hidden self.num_samples_mc = num_samples_mc self.learning_rate = learning_rate self.batch_size = batch_size self.num_epochs = num_epochs self.num_samples_mc_prediction = num_samples_mc_prediction self.verbose = verbose self.start_annealing = start_annealing self.end_annealing = end_annealing self.step_annealing_sample_weight = 1.0 / float(self.end_annealing - self.start_annealing) self.annealing_weight = 0.0 # Initialize variables to track performance self.train_acc = [] self.test_acc = [] self.moving_loss_total = [] self.current_loss_total = [] self.average_loss = [] self.anneal_weights = [] # L2 normalization of the features self.normalizer = Normalizer(norm='l2') self.bnn_context_function = bnn_context_function self._context_bnn = self.bnn_context_function() # init parameters for constructing network to None. These will be set during repurposing self.dim_input = None self.num_classes = None
Example #23
Source File: sklearn_example.py From Hunch with Apache License 2.0 | 5 votes |
def train(self, training_data_X, training_data_Y): self.normalizer = Normalizer() self.svc = svm.SVC(gamma=0.001, C=100.) normalised_training_data_X = self.normalizer.fit_transform(training_data_X) self.svc.fit(normalised_training_data_X, training_data_Y)
Example #24
Source File: preprocessing.py From open-solution-toxic-comments with MIT License | 5 votes |
def __init__(self): self.normalizer = sk_prep.Normalizer()
Example #25
Source File: test_feature_optimization.py From hyperparameter_hunter with MIT License | 5 votes |
def normalize(train_inputs, non_train_inputs): normalizer = Normalizer() train_inputs[train_inputs.columns] = normalizer.fit_transform(train_inputs.values) non_train_inputs[train_inputs.columns] = normalizer.transform(non_train_inputs.values) return train_inputs, non_train_inputs
Example #26
Source File: topic.py From Python-DevOps with MIT License | 5 votes |
def train_lsa(corpus,n_topics, max_df=0.95, min_df=2,cleaning=clearstring,stop_words='english'): if cleaning is not None: for i in range(len(corpus)): corpus[i] = cleaning(corpus[i]) tfidf_vectorizer = TfidfVectorizer(max_df = max_df, min_df = min_df, stop_words = stop_words) tfidf = tfidf_vectorizer.fit_transform(corpus) tfidf_features = tfidf_vectorizer.get_feature_names() tfidf = Normalizer().fit_transform(tfidf) lsa = TruncatedSVD(n_topics).fit(tfidf) return TOPIC(tfidf_features,lsa)
Example #27
Source File: misc.py From steppy-toolkit with MIT License | 5 votes |
def __init__(self): super().__init__() self.normalizer = Normalizer()
Example #28
Source File: main.py From AutoOut with MIT License | 5 votes |
def data_cleaning_formatting(X): # Basic cleaning X = X.fillna(0) X = X.fillna('ffill') # Encode data X = encode_data(X) X = Normalizer().fit_transform(X) return X
Example #29
Source File: test_column_transformer.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_make_column_transformer(): scaler = StandardScaler() norm = Normalizer() ct = make_column_transformer((scaler, 'first'), (norm, ['second'])) names, transformers, columns = zip(*ct.transformers) assert_equal(names, ("standardscaler", "normalizer")) assert_equal(transformers, (scaler, norm)) assert_equal(columns, ('first', ['second'])) # XXX remove in v0.22 with pytest.warns(DeprecationWarning, match='`make_column_transformer` now expects'): ct1 = make_column_transformer(([0], norm)) ct2 = make_column_transformer((norm, [0])) X_array = np.array([[0, 1, 2], [2, 4, 6]]).T assert_almost_equal(ct1.fit_transform(X_array), ct2.fit_transform(X_array)) with pytest.warns(DeprecationWarning, match='`make_column_transformer` now expects'): make_column_transformer(('first', 'drop')) with pytest.warns(DeprecationWarning, match='`make_column_transformer` now expects'): make_column_transformer(('passthrough', 'passthrough'), ('first', 'drop'))
Example #30
Source File: test_column_transformer.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_make_column_transformer_pandas(): pd = pytest.importorskip('pandas') X_array = np.array([[0, 1, 2], [2, 4, 6]]).T X_df = pd.DataFrame(X_array, columns=['first', 'second']) norm = Normalizer() # XXX remove in v0.22 with pytest.warns(DeprecationWarning, match='`make_column_transformer` now expects'): ct1 = make_column_transformer((X_df.columns, norm)) ct2 = make_column_transformer((norm, X_df.columns)) assert_almost_equal(ct1.fit_transform(X_df), ct2.fit_transform(X_df))