Python sklearn.preprocessing.binarize() Examples
The following are 14
code examples of sklearn.preprocessing.binarize().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.preprocessing
, or try the search function
.
Example #1
Source File: trace.py From neuroglia with BSD 3-Clause "New" or "Revised" License | 6 votes |
def transform(self, X): """Binarize each element of X Parameters ---------- X : {array-like, sparse matrix}, shape [n_samples, n_features] The data to binarize, element by element. """ df = True try: index = X.index columns = X.columns except AttributeError: df = False X_ = binarize(X, threshold=self.threshold, copy=self.copy) if df: return pd.DataFrame(data=X_,index=index,columns=columns) else: return X_
Example #2
Source File: multi_class_svm.py From JusticeAI with MIT License | 6 votes |
def predict(self, data): """ 1) Predicts an outcome given facts 2) Predicts probability that prediction is correct 2.1) Range goes from [0-1] where x < 0.5 is False 2.2) The model only returns the probability that a fact is 1 2.3) therefore to predict that the probability that a fact is 0 we do 1 - x when x < 0.5 :param data: numpy([1, 0, 0, ...]) :return: np.array([...]) """ if self.model is None: self.model = Load.load_binary("multi_class_svm_model.bin") data = binarize([data], threshold=0) probabilities = self.model.predict_proba(data)[0] predictions = self.model.predict(data) for i in range(len(probabilities)): prediction = predictions[0][i] if prediction == 0: probabilities[i] = 1 - probabilities[i] probabilities[i] = format(probabilities[i], '.2f') return self.model.predict(data), probabilities
Example #3
Source File: multi_class_svm.py From JusticeAI with MIT License | 6 votes |
def reshape_dataset(self): """ Restructure the data to accomodate the sklearn library 1) Reshape the x data 1.1) 2D numpy array: [ [precedent #1 facts], [precedent #2 facts], ... ] 2) Reshape the y data :return: x_total <#1.1>, y_total <#2.4> """ # 1 x_total = np.array( [np.reshape(precedent['facts_vector'], (len(precedent['facts_vector'], ))) for precedent in self.data_set]) x_total = binarize(x_total, threshold=0) # 2 y_list = [] for precedent in self.data_set: y_list.append(self.__classify_precedent(precedent)) y_total = np.array(y_list) return x_total, y_total
Example #4
Source File: test_preprocessing.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_preprocessing_assignment(self): iris = datasets.load_iris() df = pdml.ModelFrame(iris) original_columns = df.data.columns df['sepal length (cm)'] = df['sepal length (cm)'].preprocessing.binarize(threshold=6) self.assertIsInstance(df, pdml.ModelFrame) binarized = pp.binarize(np.atleast_2d(iris.data[:, 0]), threshold=6) expected = np.hstack([binarized.T, iris.data[:, 1:]]) self.assert_numpy_array_almost_equal(df.data.values, expected) tm.assert_index_equal(df.data.columns, original_columns) # recreate data iris = datasets.load_iris() df = pdml.ModelFrame(iris) target_columns = ['sepal length (cm)', 'sepal width (cm)'] df[target_columns] = df[target_columns].preprocessing.binarize(threshold=6) self.assertIsInstance(df, pdml.ModelFrame) binarized = pp.binarize(iris.data[:, 0:2], threshold=6) expected = np.hstack([binarized, iris.data[:, 2:]]) self.assert_numpy_array_almost_equal(df.data.values, expected) tm.assert_index_equal(df.data.columns, original_columns)
Example #5
Source File: nb_sklearn.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _fit_data(self, X): """Binarize the data for each column separately. Parameters ---------- X : array-like, shape = [n_samples, n_features] Returns ------- X_transformed : array-like Returns the data where in each columns the labels are binarized. """ if self.binarize is not None: X = binarize(X, threshold=self.binarize) for i in range(X.shape[1]): # initialise binarizer and save binarizer = LabelBinarizer() if self.binarize: binarizer.classes_ = np.array([0, 1]) # fit the data to the binarizer binarizer.fit(X[:, i]) self._binarizers.append(binarizer) return self._transform_data(X)
Example #6
Source File: nb_sklearn.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _transform_data(self, X): """Binarize the data for each column separately.""" if self._binarizers == []: raise NotFittedError() if self.binarize is not None: X = binarize(X, threshold=self.binarize) if len(self._binarizers) != X.shape[1]: raise ValueError( "Expected input with %d features, got %d instead" % (len(self._binarizers), X.shape[1])) X_parts = [] for i in range(X.shape[1]): X_i = self._binarizers[i].transform(X[:, i]) # sklearn returns ndarray with shape (samples, 1) on binary input. if self._binarizers[i].classes_.shape[0] == 1: X_parts.append(1 - X_i) elif self._binarizers[i].classes_.shape[0] == 2: X_parts.append(1 - X_i) X_parts.append(X_i) else: X_parts.append(X_i) return np.concatenate(X_parts, axis=1)
Example #7
Source File: nb_sklearn.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, alpha=1.0, binarize=0.0, fit_prior=True, class_prior=None): self.alpha = alpha self.binarize = binarize self.fit_prior = fit_prior self.class_log_prior_ = class_prior self.class_prior = class_prior self._binarizers = []
Example #8
Source File: nb_sklearn.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, init='jaro', max_iter=100, binarize=binarize, atol=10e-5): self.init = init self.max_iter = max_iter self.binarize = binarize self.atol = atol self._binarizers = []
Example #9
Source File: test_util.py From FeatureHub with MIT License | 5 votes |
def g(a): from sklearn.preprocessing import binarize return f(a)
Example #10
Source File: test_util.py From FeatureHub with MIT License | 5 votes |
def test_run_isolated_from_function_from_source(): args = [1,3,7] f_source = b'def f(a):\n return a+1\n' f1 = featurehub.util.get_function(f_source) g_source = b'def f(a):\n return a+1\n\ndef g(a):\n from sklearn.preprocessing import binarize\n return f(a)\n' g1 = featurehub.util.get_function(g_source) for arg in args: assert f1(arg) == featurehub.util.run_isolated(f1, arg) assert g1(arg) == featurehub.util.run_isolated(g1, arg)
Example #11
Source File: test_util.py From FeatureHub with MIT License | 5 votes |
def test_run_isolated_from_function2_from_source(): args = [1,3,7] f_source = b'def f(a):\n return a+1\n' f1 = featurehub.util.get_function2(f_source) g_source = b'def f(a):\n return a+1\n\ndef g(a):\n from sklearn.preprocessing import binarize\n return f(a)\n' g1 = featurehub.util.get_function2(g_source) for arg in args: assert f1(arg) == featurehub.util.run_isolated(f1, arg) assert g1(arg) == featurehub.util.run_isolated(g1, arg) # ------------------------------------------------------------------------------ # Test compute_dataset_hash
Example #12
Source File: test_demo.py From FeatureHub with MIT License | 5 votes |
def hi_lo_age(dataset): from sklearn.preprocessing import binarize cutoff = 30 return binarize(dataset["users"]["age"].values.reshape(-1,1), cutoff)
Example #13
Source File: test_preprocessing.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_binarize(self): iris = datasets.load_iris() df = pdml.ModelFrame(iris) result = df.preprocessing.binarize() expected = pp.binarize(iris.data) self.assertIsInstance(result, pdml.ModelFrame) self.assert_numpy_array_almost_equal(result.data.values, expected) tm.assert_index_equal(result.columns, df.data.columns) result = df.preprocessing.binarize(threshold=5) expected = pp.binarize(iris.data, threshold=5) self.assertIsInstance(result, pdml.ModelFrame) self.assert_numpy_array_almost_equal(result.data.values, expected) tm.assert_index_equal(result.columns, df.data.columns) s = df['sepal length (cm)'] self.assertIsInstance(s, pdml.ModelSeries) result = s.preprocessing.binarize() expected = pp.binarize(iris.data[:, 0].reshape(-1, 1)) self.assertIsInstance(result, pdml.ModelSeries) self.assert_numpy_array_almost_equal(result.values, expected.flatten()) self.assertEqual(result.name, 'sepal length (cm)') result = s.preprocessing.binarize(threshold=6) expected = pp.binarize(iris.data[:, 0].reshape(-1, 1), threshold=6) self.assertIsInstance(result, pdml.ModelSeries) self.assert_numpy_array_almost_equal(result.values, expected.flatten()) self.assertEqual(result.name, 'sepal length (cm)')
Example #14
Source File: cross_validation.py From Pyspatialml with GNU General Public License v3.0 | 4 votes |
def score(self, tpr_threshold=None, cutoff_threshold=None): """ Calculates the scoring metrics using a cutoff threshold that attains a true positive rate that is equal or greater than the desired tpr_threshold Args ---- tpr_threshold : float Minimum true positive rate to achieve cutoff_threshold : float As an alternative to using a minimum true positive, a probability cutoff threshold can be specified to calculate the scoring """ if tpr_threshold is None and cutoff_threshold is None: raise ValueError('Either tpr_threshold or cutoff_threshold must be specified') scores = OrderedDict((k, []) for (k, v) in self.scoring.items()) self.thresholds_ = [] self.tpr_ = [] self.fpr_ = [] self.roc_thresholds_ = [] for idx in self.test_idx_: # split fold y_true = self.y_true[idx] y_pred_ = self.y_pred_[idx, :] # get roc curve data fpr, tpr, thresholds = roc_curve( y_true, y_pred_[:, self.positive]) self.fpr_.append(fpr) self.tpr_.append(tpr) self.roc_thresholds_.append(thresholds) # calculate cutoff that produces tpr >= threshold if cutoff_threshold is None: opt_threshold = thresholds[np.where(tpr >= tpr_threshold)[0].min()] self.thresholds_ = np.append(self.thresholds_, opt_threshold) else: opt_threshold = cutoff_threshold # calculate performance metrics y_pred_opt = binarize(y_pred_, opt_threshold) # calculate scores for name, score_func in self.scoring.items(): score_func = self.scoring[name] scores[name] = np.append(scores[name], score_func(y_true, y_pred_opt[:, self.positive])) return scores