Python sklearn.preprocessing.LabelBinarizer() Examples
The following are 30
code examples of sklearn.preprocessing.LabelBinarizer().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.preprocessing
, or try the search function
.
Example #1
Source File: mmbot.py From MaliciousMacroBot with MIT License | 8 votes |
def mmb_evaluate_model(self): """ Returns scores from cross validation evaluation on the malicious / benign classifier """ predictive_features = self.features['predictive_features'] self.clf_X = self.modeldata[predictive_features].values self.clf_y = np.array(self.modeldata['label']) X_train, X_test, y_train, y_test = train_test_split(self.clf_X, self.clf_y, test_size=0.2, random_state=0) lb = LabelBinarizer() y_train = np.array([number[0] for number in lb.fit_transform(y_train)]) eval_cls = RandomForestClassifier(n_estimators=100, max_features=.2) eval_cls.fit(X_train, y_train) recall = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='recall') precision = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='precision') accuracy = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='accuracy') f1_score = cross_val_score(eval_cls, X_train, y_train, cv=5, scoring='f1_macro') return {'accuracy': accuracy, 'f1': f1_score, 'precision': precision, 'recall': recall}
Example #2
Source File: test_k_neighbors_classifier.py From coremltools with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_conversion_with_sparse_y(self): """Tests conversion of a model that's fitted with y values in a sparse format.""" from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split( self.iris_X, self.iris_y, test_size=0.2, train_size=0.8 ) from sklearn import preprocessing lb = preprocessing.LabelBinarizer(sparse_output=True) binarized_y = lb.fit_transform(y_train) sklearn_model = KNeighborsClassifier(algorithm="brute") sklearn_model.fit(X_train, binarized_y) self.assertRaises(ValueError, sklearn.convert, sklearn_model)
Example #3
Source File: test_preprocessing.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_LabelBinarizer(self): arr = np.array([1, 2, 3, 2]) s = pdml.ModelSeries(arr, index=['a', 'b', 'c', 'd']) mod1 = s.pp.LabelBinarizer() s.fit(mod1) result = s.transform(mod1) expected = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1], [0, 1, 0]]) self.assertIsInstance(result, pdml.ModelFrame) self.assert_numpy_array_almost_equal(result.values, expected) tm.assert_index_equal(result.index, s.index) mod1 = s.pp.LabelBinarizer() result = s.fit_transform(mod1) self.assertIsInstance(result, pdml.ModelFrame) self.assert_numpy_array_almost_equal(result.values, expected) inversed = result.inverse_transform(mod1) self.assertIsInstance(inversed, pdml.ModelFrame) self.assert_numpy_array_almost_equal(inversed.values.flatten(), arr) tm.assert_index_equal(result.index, s.index)
Example #4
Source File: test_sklearn_label_binariser_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_model_label_binariser_binary_labels(self): X = np.array([1, 0, 0, 0, 1]) model = LabelBinarizer().fit(X) model_onnx = convert_sklearn( model, "scikit-learn label binariser", [("input", Int64TensorType([None]))], ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.int64), model, model_onnx, basename="SklearnLabelBinariserBinaryLabels", allow_failure="StrictVersion(" "onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #5
Source File: test_sklearn_label_binariser_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_model_label_binariser_neg_label(self): X = np.array([1, 2, 6, 4, 2]) model = LabelBinarizer(neg_label=-101).fit(X) model_onnx = convert_sklearn( model, "scikit-learn label binariser", [("input", Int64TensorType([None]))], ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.int64), model, model_onnx, basename="SklearnLabelBinariserNegLabel", allow_failure="StrictVersion(" "onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #6
Source File: test_sklearn_label_binariser_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_model_label_binariser_neg_pos_label(self): X = np.array([1, 2, 6, 4, 2]) model = LabelBinarizer(neg_label=10, pos_label=20).fit(X) model_onnx = convert_sklearn( model, "scikit-learn label binariser", [("input", Int64TensorType([None]))], ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.int64), model, model_onnx, basename="SklearnLabelBinariserNegPosLabel", allow_failure="StrictVersion(" "onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #7
Source File: test_preprocessing.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_objectmapper(self): df = pdml.ModelFrame([]) self.assertIs(df.preprocessing.Binarizer, pp.Binarizer) self.assertIs(df.preprocessing.FunctionTransformer, pp.FunctionTransformer) self.assertIs(df.preprocessing.Imputer, pp.Imputer) self.assertIs(df.preprocessing.KernelCenterer, pp.KernelCenterer) self.assertIs(df.preprocessing.LabelBinarizer, pp.LabelBinarizer) self.assertIs(df.preprocessing.LabelEncoder, pp.LabelEncoder) self.assertIs(df.preprocessing.MultiLabelBinarizer, pp.MultiLabelBinarizer) self.assertIs(df.preprocessing.MaxAbsScaler, pp.MaxAbsScaler) self.assertIs(df.preprocessing.MinMaxScaler, pp.MinMaxScaler) self.assertIs(df.preprocessing.Normalizer, pp.Normalizer) self.assertIs(df.preprocessing.OneHotEncoder, pp.OneHotEncoder) self.assertIs(df.preprocessing.PolynomialFeatures, pp.PolynomialFeatures) self.assertIs(df.preprocessing.RobustScaler, pp.RobustScaler) self.assertIs(df.preprocessing.StandardScaler, pp.StandardScaler)
Example #8
Source File: lstm.py From mindmeld with Apache License 2.0 | 6 votes |
def setup_model(self, config): self.set_params(**config.params) self.label_encoder = LabelBinarizer() self.gaz_encoder = LabelBinarizer() self.graph = tf.Graph() self.saver = None self.example_type = config.example_type self.features = config.features self.query_encoder = WordSequenceEmbedding( self.padding_length, self.token_embedding_dimension, self.token_pretrained_embedding_filepath, ) if self.use_char_embeddings: self.char_encoder = CharacterSequenceEmbedding( self.padding_length, self.character_embedding_dimension, self.max_char_per_word, )
Example #9
Source File: lstm.py From mindmeld with Apache License 2.0 | 6 votes |
def _gaz_transform(self, list_of_tokens_to_transform): """This function is used to handle special logic around SKLearn's LabelBinarizer class which behaves in a non-standard way for 2 classes. In a 2 class system, it encodes the classes as [0] and [1]. However, in a 3 class system, it encodes the classes as [0,0,1], [0,1,0], [1,0,0] and sustains this behavior for num_class > 2. We want to encode 2 class systems as [0,1] and [1,0]. This function does that. Args: list_of_tokens_to_transform (list): A sequence of class labels Returns: (array): corrected encoding from the binarizer """ output = self.gaz_encoder.transform(list_of_tokens_to_transform) if len(self.gaz_encoder.classes_) == 2: output = np.hstack((1 - output, output)) return output
Example #10
Source File: train_model.py From production-tools with BSD 3-Clause "New" or "Revised" License | 6 votes |
def get_mnist_data(): """Loads the MNIST data set into memory. Returns ------- X : array-like, shape=[n_samples, n_features] Training data for the MNIST data set. y : array-like, shape=[n_samples,] Labels for the MNIST data set. """ digits = load_digits() X, y = digits.data, digits.target y = LabelBinarizer().fit_transform(y) return X, y
Example #11
Source File: test_classify.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_sklearn_labelbin(self): m = np.array([1.0, .81, .85, .81, .85, .81]) u = np.array([1.0, .23, .50, .23, .30, 0.13]) # Create the train dataset. X_train, true_links = binary_vectors( 1000, 500, m=m, u=u, random_state=535, return_links=True) binarizer = LabelBinarizer() binarizer.fit(X_train.iloc[:, 0]) assert len(binarizer.classes_) == 1 binarizer.classes_ = np.array([0, 1]) assert len(binarizer.classes_) == 2 binarizer.transform(X_train.iloc[:, 1]) assert len(binarizer.classes_) == 2
Example #12
Source File: stacking.py From stacked_generalization with Apache License 2.0 | 6 votes |
def _get_child_predict(self, clf, X, index=None): if self.stack_by_proba and hasattr(clf, 'predict_proba'): if self.save_stage0 and index is not None: proba = util.saving_predict_proba(clf, X, index) else: proba = clf.predict_proba(X) return proba[:, 1:] elif hasattr(clf, 'predict'): predict_result = clf.predict(X) if isinstance(clf, ClassifierMixin): lb = LabelBinarizer() lb.fit(predict_result) return lb.fit_transform(predict_result) else: return predict_result.reshape((predict_result.size, 1)) else: return clf.fit_transform(X)
Example #13
Source File: test_sklearn_label_binariser_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_model_label_binariser_pos_label(self): X = np.array([1, 2, 6, 4, 2]) model = LabelBinarizer(pos_label=123).fit(X) model_onnx = convert_sklearn( model, "scikit-learn label binariser", [("input", Int64TensorType([None]))], ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.int64), model, model_onnx, basename="SklearnLabelBinariserPosLabel", allow_failure="StrictVersion(" "onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #14
Source File: test_sklearn_label_binariser_converter.py From sklearn-onnx with MIT License | 6 votes |
def test_model_label_binariser_default(self): X = np.array([1, 2, 6, 4, 2]) model = LabelBinarizer().fit(X) model_onnx = convert_sklearn( model, "scikit-learn label binariser", [("input", Int64TensorType([None]))], ) self.assertTrue(model_onnx is not None) dump_data_and_model( X.astype(np.int64), model, model_onnx, basename="SklearnLabelBinariserDefault", allow_failure="StrictVersion(" "onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
Example #15
Source File: base.py From polylearn with BSD 2-Clause "Simplified" License | 6 votes |
def _check_X_y(self, X, y): # helpful error message for sklearn < 1.17 is_2d = hasattr(y, 'shape') and len(y.shape) > 1 and y.shape[1] >= 2 if is_2d or type_of_target(y) != 'binary': raise TypeError("Only binary targets supported. For training " "multiclass or multilabel models, you may use the " "OneVsRest or OneVsAll metaestimators in " "scikit-learn.") X, Y = check_X_y(X, y, dtype=np.double, accept_sparse='csc', multi_output=False) self.label_binarizer_ = LabelBinarizer(pos_label=1, neg_label=-1) y = self.label_binarizer_.fit_transform(Y).ravel().astype(np.double) return X, y
Example #16
Source File: SpectraLearnPredict.py From SpectralMachine with GNU General Public License v3.0 | 6 votes |
def formatClass(rootFile, Cl): import sklearn.preprocessing as pp print('==========================================================================\n') print(' Running basic TensorFlow. Creating class data in binary form...') Cl2 = pp.LabelBinarizer().fit_transform(Cl) import matplotlib.pyplot as plt plt.hist([float(x) for x in Cl], bins=np.unique([float(x) for x in Cl]), edgecolor="black") plt.xlabel('Class') plt.ylabel('Occurrances') plt.title('Class distibution') plt.savefig(rootFile + '_ClassDistrib.png', dpi = 160, format = 'png') # Save plot if tfDef.plotClassDistribTF == True: print(' Plotting Class distibution \n') plt.show() return Cl2 #********************************************************************************
Example #17
Source File: test_mlp_classifier.py From muffnn with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_cross_val_predict(): # Make sure it works in cross_val_predict for multiclass. X, y = load_iris(return_X_y=True) y = LabelBinarizer().fit_transform(y) X = StandardScaler().fit_transform(X) mlp = MLPClassifier(n_epochs=10, solver_kwargs={'learning_rate': 0.05}, random_state=4567).fit(X, y) cv = KFold(n_splits=4, random_state=457, shuffle=True) y_oos = cross_val_predict(mlp, X, y, cv=cv, method='predict_proba') auc = roc_auc_score(y, y_oos, average=None) assert np.all(auc >= 0.96)
Example #18
Source File: crf_unit.py From medical-entity-recognition with Apache License 2.0 | 6 votes |
def bio_classification_report(y_true, y_pred): """ Classification report for a l ist of BIOSE-encoded sequences. It computes token-level metrics and discards 'O' labels. :param y_true: :param y_pred: :return: """ lb = LabelBinarizer() y_true_combined = lb.fit_transform(y_true) y_pred_combined = lb.transform(y_pred) tagset = set(lb.classes_) - {'O'} tagset = set(lb.classes_) tagset = sorted(tagset, key=lambda tag: tag.split('-', 1)[::-1]) class_indices = { cls: idx for idx, cls in enumerate(lb.classes_) } return classification_report( y_true_combined, y_pred_combined, labels=[class_indices[cls] for cls in tagset], target_names=tagset )
Example #19
Source File: crf_sent_tagger.py From Jiayan with MIT License | 6 votes |
def eval(self, test_x, test_y, crf_model): tagger = pycrfsuite.Tagger() tagger.open(crf_model) y_pred = [] for feat_list in test_x: preds = tagger.tag(feat_list) y_pred.append(preds) lb = LabelBinarizer() y_true_all = lb.fit_transform(list(chain.from_iterable(test_y))) y_pred_all = lb.transform(list(chain.from_iterable(y_pred))) tagset = sorted(set(lb.classes_)) class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)} print(classification_report( y_true_all, y_pred_all, labels=[class_indices[cls] for cls in tagset], target_names=tagset, digits=5 ))
Example #20
Source File: xgboost.py From sklearn2pmml with GNU Affero General Public License v3.0 | 6 votes |
def make_xgboost_dataframe_mapper(dtypes, missing_value_aware = True): """Construct a DataFrameMapper for feeding complex data into an XGBModel. Parameters ---------- dtypes: iterable of tuples (column, dtype) missing_value_aware: boolean If true, use missing value aware transformers. Returns ------- DataFrameMapper """ features = list() for column, dtype in dtypes.items(): if _is_categorical(dtype): features.append(([column], PMMLLabelBinarizer(sparse_output = True) if missing_value_aware else LabelBinarizer(sparse_output = True))) else: features.append(([column], None)) return DataFrameMapper(features)
Example #21
Source File: crf_pos_tagger.py From Jiayan with MIT License | 6 votes |
def eval(self, test_x, test_y, crf_model): tagger = pycrfsuite.Tagger() tagger.open(crf_model) y_pred = [] for feat_list in test_x: preds = tagger.tag(feat_list) y_pred.append(preds) lb = LabelBinarizer() y_true_all = lb.fit_transform(list(chain.from_iterable(test_y))) y_pred_all = lb.transform(list(chain.from_iterable(y_pred))) tagset = sorted(set(lb.classes_)) class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)} print(classification_report( y_true_all, y_pred_all, labels=[class_indices[cls] for cls in tagset], target_names=tagset, digits=5 ))
Example #22
Source File: char2ir_gpu.py From plastering with MIT License | 6 votes |
def encode_labels(self, label_dict, srcids): flat_labels = ['O'] if self.use_brick_flag: with open('brick/tags.json', 'r') as fp: brick_tags = json.load(fp) flat_labels += ['B_' + tag for tag in brick_tags] + \ ['I_' + tag for tag in brick_tags] flat_labels += reduce(adder, [reduce(adder, label_dict[srcid].values()) for srcid in srcids]) self.le = LabelBinarizer().fit(flat_labels) stack = [] for srcid in srcids: labels = label_dict[srcid] sentences = self.sentence_dict[srcid] for metadata_type in self.sentence_dict[srcid].keys(): labels = label_dict[srcid][metadata_type] if len(labels) == 0: encoded = np.zeros((self.max_len, encoded.shape[1])) else: encoded = self.le.transform(labels) encoded = np.vstack([encoded, np.zeros( (self.max_len - encoded.shape[0], encoded.shape[1]))]) stack.append(encoded) return np.stack(stack)
Example #23
Source File: one_against_rest.py From qiskit-aqua with Apache License 2.0 | 6 votes |
def train(self, x, y): """ Training multiple estimators each for distinguishing a pair of classes. Args: x (numpy.ndarray): input points y (numpy.ndarray): input labels Raises: Exception: given all data points are assigned to the same class, the prediction would be boring """ self.label_binarizer_ = LabelBinarizer(neg_label=0) Y = self.label_binarizer_.fit_transform(y) self.classes = self.label_binarizer_.classes_ columns = (np.ravel(col) for col in Y.T) self.estimators = [] for _, column in enumerate(columns): unique_y = np.unique(column) if len(unique_y) == 1: raise Exception("given all data points are assigned to the same class, " "the prediction would be boring.") estimator = self.estimator_cls(*self.params) estimator.fit(x, column) self.estimators.append(estimator)
Example #24
Source File: training.py From OpenCV-3-x-with-Python-By-Example with MIT License | 6 votes |
def __init__(self, feature_vector_size, label_words): self.ann = cv2.ml.ANN_MLP_create() # Number of centroids used to build the feature vectors input_size = feature_vector_size # Number of models to recongnize output_size = len(label_words) # Applying Heaton rules hidden_size = (input_size * (2 / 3)) + output_size nn_config = np.array([input_size, hidden_size, output_size], dtype=np.uint8) self.label_words = label_words self.ann.setLayerSizes(np.array(nn_config)) # Symmetrical Sigmoid as activation function self.ann.setActivationFunction(cv2.ml.ANN_MLP_SIGMOID_SYM) # Map models as tuples of probabilities self.le = preprocessing.LabelBinarizer() self.le.fit(label_words) # Label words are ['dress', 'footwear', 'backpack']
Example #25
Source File: elm.py From SVM-CNN with Apache License 2.0 | 6 votes |
def __init__(self, n_hidden=20, alpha=0.5, rbf_width=1.0, activation_func='tanh', activation_args=None, user_components=None, regressor=None, binarizer=LabelBinarizer(-1, 1), random_state=None): super(ELMClassifier, self).__init__(n_hidden=n_hidden, alpha=alpha, random_state=random_state, activation_func=activation_func, activation_args=activation_args, user_components=user_components, rbf_width=rbf_width, regressor=regressor) self.classes_ = None self.binarizer = binarizer
Example #26
Source File: naive_bayes.py From plume with MIT License | 6 votes |
def fit(self, X, y): """ :param X_: shape = [n_samples, n_features] :param y: shape = [n_samples] :return: self """ labelbin = LabelBinarizer() Y = labelbin.fit_transform(y) self.classes = labelbin.classes_ self.class_count = np.zeros(Y.shape[1], dtype=np.float64) self.feature_count = np.zeros((Y.shape[1], X.shape[1]), dtype=np.float64) self.feature_count += Y.T @ X self.class_count += Y.sum(axis=0) smoothed_fc = self.feature_count + self.alpha smoothed_cc = smoothed_fc.sum(axis=1) self.feature_log_prob = (np.log(smoothed_fc) - np.log(smoothed_cc.reshape(-1, 1)))
Example #27
Source File: transformers.py From scikit-optimize with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self): """Convert labeled categories into one-hot encoded features.""" self._lb = LabelBinarizer()
Example #28
Source File: calc_auc_roc.py From zincbase with MIT License | 5 votes |
def calc_auc_roc(truth, pred, average="macro"): lb = LabelBinarizer() lb.fit(truth) truth = lb.transform(truth) pred = lb.transform(pred) return roc_auc_score(truth, pred, average=average)
Example #29
Source File: community_weighting.py From reveal-graph-embedding with Apache License 2.0 | 5 votes |
def chi2_contingency_matrix(X_train, y_train): X = X_train.copy() X.data = np.ones_like(X.data) X = check_array(X, accept_sparse='csr') if np.any((X.data if issparse(X) else X) < 0): raise ValueError("Input X must be non-negative.") Y = LabelBinarizer().fit_transform(y_train) if Y.shape[1] == 1: Y = np.append(1 - Y, Y, axis=1) observed = safe_sparse_dot(Y.T, X) # n_classes * n_features # feature_count = check_array(X.sum(axis=0)) # class_prob = check_array(Y.mean(axis=0)) feature_count = X.sum(axis=0).reshape(1, -1) class_prob = Y.mean(axis=0).reshape(1, -1) expected = np.dot(class_prob.T, feature_count) observed = np.asarray(observed, dtype=np.float64) k = len(observed) # Reuse observed for chi-squared statistics contingency_matrix = observed contingency_matrix -= expected contingency_matrix **= 2 expected[expected == 0.0] = 1.0 contingency_matrix /= expected # weights = contingency_matrix.max(axis=0) return contingency_matrix
Example #30
Source File: loadData.py From birdsong-keras with GNU General Public License v3.0 | 5 votes |
def getOneHotOFGS(df): from sklearn.preprocessing import LabelBinarizer lb = LabelBinarizer() lb.fit(df["OFGS"]) return ( lb, lb.transform(df["OFGS"]) ) # create onehot encoding for classid