Python sklearn.utils.column_or_1d() Examples
The following are 30
code examples of sklearn.utils.column_or_1d().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.utils
, or try the search function
.
Example #1
Source File: test_utils.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_column_or_1d(): EXAMPLES = [ ("binary", ["spam", "egg", "spam"]), ("binary", [0, 1, 0, 1]), ("continuous", np.arange(10) / 20.), ("multiclass", [1, 2, 3]), ("multiclass", [0, 1, 2, 2, 0]), ("multiclass", [[1], [2], [3]]), ("multilabel-indicator", [[0, 1, 0], [0, 0, 1]]), ("multiclass-multioutput", [[1, 2, 3]]), ("multiclass-multioutput", [[1, 1], [2, 2], [3, 1]]), ("multiclass-multioutput", [[5, 1], [4, 2], [3, 1]]), ("multiclass-multioutput", [[1, 2, 3]]), ("continuous-multioutput", np.arange(30).reshape((-1, 3))), ] for y_type, y in EXAMPLES: if y_type in ["binary", 'multiclass', "continuous"]: assert_array_equal(column_or_1d(y), np.ravel(y)) else: assert_raises(ValueError, column_or_1d, y)
Example #2
Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0 | 6 votes |
def fit(self, X, y = None): X = column_or_1d(X, warn = True) if self._empty_fit(): return self if self.dtype is not None: X = cast(X, self.dtype) mask = self._missing_value_mask(X) values, counts = numpy.unique(X[~mask], return_counts = True) if self.with_data: if (self.missing_value_replacement is not None) and numpy.any(mask) > 0: self.data_ = numpy.unique(numpy.append(values, self.missing_value_replacement)) else: self.data_ = values if self.with_statistics: self.counts_ = _count(mask) self.discr_stats_ = (values, counts) return self
Example #3
Source File: calibration.py From carl with BSD 3-Clause "New" or "Revised" License | 6 votes |
def predict(self, T): """Calibrate data. Parameters ---------- * `T` [array-like, shape=(n_samples,)]: Data to calibrate. Returns ------- * `Tt` [array, shape=(n_samples,)]: Calibrated data. """ T = column_or_1d(T).reshape(-1, 1) num = self.calibrator1.pdf(T) den = self.calibrator0.pdf(T) + self.calibrator1.pdf(T) p = num / den p[den == 0] = 0.5 return p
Example #4
Source File: calibration.py From carl with BSD 3-Clause "New" or "Revised" License | 6 votes |
def predict(self, T): """Calibrate data. Parameters ---------- * `T` [array-like, shape=(n_samples,)]: Data to calibrate. Returns ------- * `Tt` [array, shape=(n_samples,)]: Calibrated data. """ T = column_or_1d(T).reshape(-1, 1) num = self.calibrator1.pdf(T) den = self.calibrator0.pdf(T) + self.calibrator1.pdf(T) p = num / den p[den == 0] = 0.5 return p
Example #5
Source File: label.py From sparkit-learn with Apache License 2.0 | 6 votes |
def fit(self, y): """Fit label encoder Parameters ---------- y : ArrayRDD (n_samples,) Target values. Returns ------- self : returns an instance of self. """ def mapper(y): y = column_or_1d(y, warn=True) _check_numpy_unicode_bug(y) return np.unique(y) def reducer(a, b): return np.unique(np.concatenate((a, b))) self.classes_ = y.map(mapper).reduce(reducer) return self
Example #6
Source File: labels.py From pumpp with ISC License | 6 votes |
def inverse_transform(self, y): """Transform labels back to original encoding. Parameters ---------- y : numpy array of shape [n_samples] Target values. Returns ------- y : numpy array of shape [n_samples] """ check_is_fitted(self, 'classes_') y = column_or_1d(y, warn=True) # inverse transform of empty array is empty array if _num_samples(y) == 0: return np.array([]) diff = np.setdiff1d(y, np.arange(len(self.classes_))) if len(diff): raise ValueError( "y contains previously unseen labels: %s" % str(diff)) y = np.asarray(y) return self.classes_[y]
Example #7
Source File: labels.py From pumpp with ISC License | 6 votes |
def transform(self, y): """Transform labels to normalized encoding. Parameters ---------- y : array-like of shape [n_samples] Target values. Returns ------- y : array-like of shape [n_samples] """ check_is_fitted(self, 'classes_') y = column_or_1d(y, warn=True) # transform of empty array is empty array if _num_samples(y) == 0: return np.array([]) _, y = _encode(y, uniques=self.classes_, encode=True) return y
Example #8
Source File: patsy_adaptor.py From patsylearn with GNU General Public License v2.0 | 6 votes |
def fit(self, data, y=None): """Fit the scikit-learn model using the formula. Parameters ---------- data : dict-like (pandas dataframe) Input data. Contains features and possible labels. Column names need to match variables in formula. """ eval_env = EvalEnvironment.capture(self.eval_env, reference=1) formula = _drop_intercept(self.formula, self.add_intercept) design_y, design_X = dmatrices(formula, data, eval_env=eval_env, NA_action=self.NA_action) self.design_y_ = design_y.design_info self.design_X_ = design_X.design_info self.feature_names_ = design_X.design_info.column_names # convert to 1d vector so we don't get a warning # from sklearn. design_y = column_or_1d(design_y) est = clone(self.estimator) self.estimator_ = est.fit(design_X, design_y) return self
Example #9
Source File: data.py From pyod with BSD 2-Clause "Simplified" License | 6 votes |
def evaluate_print(clf_name, y, y_pred): """Utility function for evaluating and printing the results for examples. Default metrics include ROC and Precision @ n Parameters ---------- clf_name : str The name of the detector. y : list or numpy array of shape (n_samples,) The ground truth. Binary (0: inliers, 1: outliers). y_pred : list or numpy array of shape (n_samples,) The raw outlier scores as returned by a fitted model. """ y = column_or_1d(y) y_pred = column_or_1d(y_pred) check_consistent_length(y, y_pred) print('{clf_name} ROC:{roc}, precision @ rank n:{prn}'.format( clf_name=clf_name, roc=np.round(roc_auc_score(y, y_pred), decimals=4), prn=np.round(precision_n_scores(y, y_pred), decimals=4)))
Example #10
Source File: test_utils.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_column_or_1d(): EXAMPLES = [ ("binary", ["spam", "egg", "spam"]), ("binary", [0, 1, 0, 1]), ("continuous", np.arange(10) / 20.), ("multiclass", [1, 2, 3]), ("multiclass", [0, 1, 2, 2, 0]), ("multiclass", [[1], [2], [3]]), ("multilabel-indicator", [[0, 1, 0], [0, 0, 1]]), ("multiclass-multioutput", [[1, 2, 3]]), ("multiclass-multioutput", [[1, 1], [2, 2], [3, 1]]), ("multiclass-multioutput", [[5, 1], [4, 2], [3, 1]]), ("multiclass-multioutput", [[1, 2, 3]]), ("continuous-multioutput", np.arange(30).reshape((-1, 3))), ] for y_type, y in EXAMPLES: if y_type in ["binary", 'multiclass', "continuous"]: assert_array_equal(column_or_1d(y), np.ravel(y)) else: assert_raises(ValueError, column_or_1d, y)
Example #11
Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0 | 5 votes |
def fit(self, X, y = None): X = column_or_1d(X, warn = True) self.classes_ = numpy.unique(X[~pandas.isnull(X)]) return self
Example #12
Source File: base.py From combo with BSD 2-Clause "Simplified" License | 5 votes |
def _set_weights(self, weights): """Internal function to set estimator weights. Parameters ---------- weights : numpy array of shape (n_estimators,) Estimator weights. May be used after the alignment. Returns ------- self """ if weights is None: self.weights = np.ones([1, self.n_base_estimators_]) else: self.weights = column_or_1d(weights).reshape(1, len(weights)) assert (self.weights.shape[1] == self.n_base_estimators_) # adjust probability by a factor for integrity (added to 1) adjust_factor = self.weights.shape[1] / np.sum(weights) self.weights = self.weights * adjust_factor print(self.weights) return self
Example #13
Source File: utils.py From auto_ml with MIT License | 5 votes |
def transform(self, y): y = column_or_1d(y, warn=True) classes = np.unique(y) if len(np.intersect1d(classes, self.classes_)) < len(classes): diff = np.setdiff1d(classes, self.classes_) self.classes_ = np.hstack((self.classes_, diff)) return np.searchsorted(self.classes_, y)[0]
Example #14
Source File: scipy.py From sklearn2pmml with GNU Affero General Public License v3.0 | 5 votes |
def transform(self, X): X = column_or_1d(X, warn = True) return self.bspline(X)
Example #15
Source File: scipy.py From sklearn2pmml with GNU Affero General Public License v3.0 | 5 votes |
def fit(self, X, y = None): X = column_or_1d(X, warn = True) return self
Example #16
Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0 | 5 votes |
def fit(self, X, y = None): X = column_or_1d(X, warn = True) return self
Example #17
Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0 | 5 votes |
def transform(self, X): X = column_or_1d(X, warn = True) Xt = pandas.cut(X, bins = self.bins, right = self.right, labels = self.labels, include_lowest = self.include_lowest) if isinstance(Xt, Categorical): Xt = numpy.asarray(Xt) return _col2d(Xt)
Example #18
Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0 | 5 votes |
def fit(self, X, y = None): X = column_or_1d(X, warn = True) return self
Example #19
Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0 | 5 votes |
def transform(self, X): X = column_or_1d(X, warn = True) transform_dict = self._transform_dict() func = lambda k: transform_dict[k] if hasattr(X, "apply"): Xt = X.apply(func) else: Xt = numpy.array([func(row) for row in X]) return _col2d(Xt)
Example #20
Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0 | 5 votes |
def transform(self, X): X = column_or_1d(X, warn = True) index = list(self.classes_) if self.sparse_output: Xt = lil_matrix((len(X), len(index)), dtype = numpy.int) else: Xt = numpy.zeros((len(X), len(index)), dtype = numpy.int) for i, v in enumerate(X): if not pandas.isnull(v): Xt[i, index.index(v)] = 1 if self.sparse_output: Xt = Xt.tocsr() return Xt
Example #21
Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0 | 5 votes |
def transform(self, X): X = column_or_1d(X, warn = True) index = list(self.classes_) Xt = numpy.array([self.missing_values if pandas.isnull(v) else index.index(v) for v in X]) return _col2d(Xt)
Example #22
Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0 | 5 votes |
def transform(self, X): X = column_or_1d(X, warn = True) func = lambda x: x[self.begin:self.end] Xt = eval_rows(X, func) return _col2d(Xt)
Example #23
Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0 | 5 votes |
def fit(self, X, y = None): X = column_or_1d(X, warn = True) return self
Example #24
Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0 | 5 votes |
def transform(self, X): X = column_or_1d(X, warn = True) engine = _regex_engine(self.pattern) func = lambda x: engine.sub(self.replacement, x) Xt = eval_rows(X, func) return _col2d(Xt)
Example #25
Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0 | 5 votes |
def fit(self, X, y = None): X = column_or_1d(X, warn = True) return self
Example #26
Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0 | 5 votes |
def transform(self, X): X = column_or_1d(X, warn = True) engine = _regex_engine(self.pattern) func = lambda x: bool(engine.search(x)) Xt = eval_rows(X, func) return _col2d(Xt)
Example #27
Source File: encode.py From skutil with BSD 3-Clause "New" or "Revised" License | 5 votes |
def transform(self, y): """Perform encoding if already fit. Parameters ---------- y : array_like, shape=(n_samples,) The array to encode Returns ------- e : array_like, shape=(n_samples,) The encoded array """ check_is_fitted(self, 'classes_') y = column_or_1d(y, warn=True) classes = np.unique(y) _check_numpy_unicode_bug(classes) # Check not too many: unseen = _get_unseen() if len(classes) >= unseen: raise ValueError('Too many factor levels in feature. Max is %i' % unseen) e = np.array([ np.searchsorted(self.classes_, x) if x in self.classes_ else unseen for x in y ]) return e
Example #28
Source File: utility.py From pyod with BSD 2-Clause "Simplified" License | 5 votes |
def score_to_label(pred_scores, outliers_fraction=0.1): """Turn raw outlier outlier scores to binary labels (0 or 1). Parameters ---------- pred_scores : list or numpy array of shape (n_samples,) Raw outlier scores. Outliers are assumed have larger values. outliers_fraction : float in (0,1) Percentage of outliers. Returns ------- outlier_labels : numpy array of shape (n_samples,) For each observation, tells whether or not it should be considered as an outlier according to the fitted model. Return the outlier probability, ranging in [0,1]. """ # check input values pred_scores = column_or_1d(pred_scores) check_parameter(outliers_fraction, 0, 1) threshold = percentile(pred_scores, 100 * (1 - outliers_fraction)) pred_labels = (pred_scores > threshold).astype('int') return pred_labels
Example #29
Source File: utility.py From pyod with BSD 2-Clause "Simplified" License | 5 votes |
def precision_n_scores(y, y_pred, n=None): """Utility function to calculate precision @ rank n. Parameters ---------- y : list or numpy array of shape (n_samples,) The ground truth. Binary (0: inliers, 1: outliers). y_pred : list or numpy array of shape (n_samples,) The raw outlier scores as returned by a fitted model. n : int, optional (default=None) The number of outliers. if not defined, infer using ground truth. Returns ------- precision_at_rank_n : float Precision at rank n score. """ # turn raw prediction decision scores into binary labels y_pred = get_label_n(y, y_pred, n) # enforce formats of y and labels_ y = column_or_1d(y) y_pred = column_or_1d(y_pred) return precision_score(y, y_pred)
Example #30
Source File: utility.py From pyod with BSD 2-Clause "Simplified" License | 5 votes |
def invert_order(scores, method='multiplication'): """ Invert the order of a list of values. The smallest value becomes the largest in the inverted list. This is useful while combining multiple detectors since their score order could be different. Parameters ---------- scores : list, array or numpy array with shape (n_samples,) The list of values to be inverted method : str, optional (default='multiplication') Methods used for order inversion. Valid methods are: - 'multiplication': multiply by -1 - 'subtraction': max(scores) - scores Returns ------- inverted_scores : numpy array of shape (n_samples,) The inverted list Examples -------- >>> scores1 = [0.1, 0.3, 0.5, 0.7, 0.2, 0.1] >>> invert_order(scores1) array([-0.1, -0.3, -0.5, -0.7, -0.2, -0.1]) >>> invert_order(scores1, method='subtraction') array([0.6, 0.4, 0.2, 0. , 0.5, 0.6]) """ scores = column_or_1d(scores) if method == 'multiplication': return scores.ravel() * -1 if method == 'subtraction': return (scores.max() - scores).ravel()