Python Examples of sklearn.utils.column_or

Source File: test_utils.py From twitter-stock-recommendation with MIT License

6 votes

def test_column_or_1d():
    EXAMPLES = [
        ("binary", ["spam", "egg", "spam"]),
        ("binary", [0, 1, 0, 1]),
        ("continuous", np.arange(10) / 20.),
        ("multiclass", [1, 2, 3]),
        ("multiclass", [0, 1, 2, 2, 0]),
        ("multiclass", [[1], [2], [3]]),
        ("multilabel-indicator", [[0, 1, 0], [0, 0, 1]]),
        ("multiclass-multioutput", [[1, 2, 3]]),
        ("multiclass-multioutput", [[1, 1], [2, 2], [3, 1]]),
        ("multiclass-multioutput", [[5, 1], [4, 2], [3, 1]]),
        ("multiclass-multioutput", [[1, 2, 3]]),
        ("continuous-multioutput", np.arange(30).reshape((-1, 3))),
    ]

    for y_type, y in EXAMPLES:
        if y_type in ["binary", 'multiclass', "continuous"]:
            assert_array_equal(column_or_1d(y), np.ravel(y))
        else:
            assert_raises(ValueError, column_or_1d, y)

Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0

6 votes

def fit(self, X, y = None):
		X = column_or_1d(X, warn = True)
		if self._empty_fit():
			return self
		if self.dtype is not None:
			X = cast(X, self.dtype)
		mask = self._missing_value_mask(X)
		values, counts = numpy.unique(X[~mask], return_counts = True)
		if self.with_data:
			if (self.missing_value_replacement is not None) and numpy.any(mask) > 0:
				self.data_ = numpy.unique(numpy.append(values, self.missing_value_replacement))
			else:
				self.data_ = values
		if self.with_statistics:
			self.counts_ = _count(mask)
			self.discr_stats_ = (values, counts)
		return self

Source File: calibration.py From carl with BSD 3-Clause "New" or "Revised" License

6 votes

def predict(self, T):
        """Calibrate data.

        Parameters
        ----------
        * `T` [array-like, shape=(n_samples,)]:
            Data to calibrate.

        Returns
        -------
        * `Tt` [array, shape=(n_samples,)]:
            Calibrated data.
        """
        T = column_or_1d(T).reshape(-1, 1)
        num = self.calibrator1.pdf(T)
        den = self.calibrator0.pdf(T) + self.calibrator1.pdf(T)

        p = num / den
        p[den == 0] = 0.5

        return p

Source File: calibration.py From carl with BSD 3-Clause "New" or "Revised" License

6 votes

def predict(self, T):
        """Calibrate data.

        Parameters
        ----------
        * `T` [array-like, shape=(n_samples,)]:
            Data to calibrate.

        Returns
        -------
        * `Tt` [array, shape=(n_samples,)]:
            Calibrated data.
        """
        T = column_or_1d(T).reshape(-1, 1)
        num = self.calibrator1.pdf(T)
        den = self.calibrator0.pdf(T) + self.calibrator1.pdf(T)

        p = num / den
        p[den == 0] = 0.5

        return p

Source File: label.py From sparkit-learn with Apache License 2.0

6 votes

def fit(self, y):
        """Fit label encoder
        Parameters
        ----------
        y : ArrayRDD (n_samples,)
            Target values.
        Returns
        -------
        self : returns an instance of self.
        """

        def mapper(y):
            y = column_or_1d(y, warn=True)
            _check_numpy_unicode_bug(y)
            return np.unique(y)

        def reducer(a, b):
            return np.unique(np.concatenate((a, b)))

        self.classes_ = y.map(mapper).reduce(reducer)

        return self

Source File: labels.py From pumpp with ISC License

6 votes

def inverse_transform(self, y):
        """Transform labels back to original encoding.

        Parameters
        ----------
        y : numpy array of shape [n_samples]
            Target values.

        Returns
        -------
        y : numpy array of shape [n_samples]
        """
        check_is_fitted(self, 'classes_')
        y = column_or_1d(y, warn=True)
        # inverse transform of empty array is empty array
        if _num_samples(y) == 0:
            return np.array([])

        diff = np.setdiff1d(y, np.arange(len(self.classes_)))
        if len(diff):
            raise ValueError(
                    "y contains previously unseen labels: %s" % str(diff))
        y = np.asarray(y)
        return self.classes_[y]

Source File: labels.py From pumpp with ISC License

6 votes

def transform(self, y):
        """Transform labels to normalized encoding.

        Parameters
        ----------
        y : array-like of shape [n_samples]
            Target values.

        Returns
        -------
        y : array-like of shape [n_samples]
        """
        check_is_fitted(self, 'classes_')
        y = column_or_1d(y, warn=True)
        # transform of empty array is empty array
        if _num_samples(y) == 0:
            return np.array([])

        _, y = _encode(y, uniques=self.classes_, encode=True)
        return y

Source File: patsy_adaptor.py From patsylearn with GNU General Public License v2.0

6 votes

def fit(self, data, y=None):
        """Fit the scikit-learn model using the formula.

        Parameters
        ----------
        data : dict-like (pandas dataframe)
            Input data. Contains features and possible labels.
            Column names need to match variables in formula.
        """
        eval_env = EvalEnvironment.capture(self.eval_env, reference=1)
        formula = _drop_intercept(self.formula, self.add_intercept)
        design_y, design_X = dmatrices(formula, data, eval_env=eval_env,
                                       NA_action=self.NA_action)
        self.design_y_ = design_y.design_info
        self.design_X_ = design_X.design_info
        self.feature_names_ = design_X.design_info.column_names
        # convert to 1d vector so we don't get a warning
        # from sklearn.
        design_y = column_or_1d(design_y)
        est = clone(self.estimator)
        self.estimator_ = est.fit(design_X, design_y)
        return self

Source File: data.py From pyod with BSD 2-Clause "Simplified" License

6 votes

def evaluate_print(clf_name, y, y_pred):
    """Utility function for evaluating and printing the results for examples.
    Default metrics include ROC and Precision @ n

    Parameters
    ----------
    clf_name : str
        The name of the detector.

    y : list or numpy array of shape (n_samples,)
        The ground truth. Binary (0: inliers, 1: outliers).

    y_pred : list or numpy array of shape (n_samples,)
        The raw outlier scores as returned by a fitted model.

    """

    y = column_or_1d(y)
    y_pred = column_or_1d(y_pred)
    check_consistent_length(y, y_pred)

    print('{clf_name} ROC:{roc}, precision @ rank n:{prn}'.format(
        clf_name=clf_name,
        roc=np.round(roc_auc_score(y, y_pred), decimals=4),
        prn=np.round(precision_n_scores(y, y_pred), decimals=4)))

Source File: test_utils.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def test_column_or_1d():
    EXAMPLES = [
        ("binary", ["spam", "egg", "spam"]),
        ("binary", [0, 1, 0, 1]),
        ("continuous", np.arange(10) / 20.),
        ("multiclass", [1, 2, 3]),
        ("multiclass", [0, 1, 2, 2, 0]),
        ("multiclass", [[1], [2], [3]]),
        ("multilabel-indicator", [[0, 1, 0], [0, 0, 1]]),
        ("multiclass-multioutput", [[1, 2, 3]]),
        ("multiclass-multioutput", [[1, 1], [2, 2], [3, 1]]),
        ("multiclass-multioutput", [[5, 1], [4, 2], [3, 1]]),
        ("multiclass-multioutput", [[1, 2, 3]]),
        ("continuous-multioutput", np.arange(30).reshape((-1, 3))),
    ]

    for y_type, y in EXAMPLES:
        if y_type in ["binary", 'multiclass', "continuous"]:
            assert_array_equal(column_or_1d(y), np.ravel(y))
        else:
            assert_raises(ValueError, column_or_1d, y)

Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0

5 votes

def fit(self, X, y = None):
		X = column_or_1d(X, warn = True)
		self.classes_ = numpy.unique(X[~pandas.isnull(X)])
		return self

Source File: base.py From combo with BSD 2-Clause "Simplified" License

5 votes

def _set_weights(self, weights):
        """Internal function to set estimator weights.

        Parameters
        ----------
        weights : numpy array of shape (n_estimators,)
            Estimator weights. May be used after the alignment.

        Returns
        -------
        self

        """

        if weights is None:
            self.weights = np.ones([1, self.n_base_estimators_])
        else:
            self.weights = column_or_1d(weights).reshape(1, len(weights))
            assert (self.weights.shape[1] == self.n_base_estimators_)

            # adjust probability by a factor for integrity （added to 1）
            adjust_factor = self.weights.shape[1] / np.sum(weights)
            self.weights = self.weights * adjust_factor

            print(self.weights)
        return self

Source File: utils.py From auto_ml with MIT License

5 votes

def transform(self, y):
        y = column_or_1d(y, warn=True)

        classes = np.unique(y)
        if len(np.intersect1d(classes, self.classes_)) < len(classes):
            diff = np.setdiff1d(classes, self.classes_)
            self.classes_ = np.hstack((self.classes_, diff))
        return np.searchsorted(self.classes_, y)[0]

Source File: scipy.py From sklearn2pmml with GNU Affero General Public License v3.0

5 votes

def transform(self, X):
		X = column_or_1d(X, warn = True)
		return self.bspline(X)

Source File: scipy.py From sklearn2pmml with GNU Affero General Public License v3.0

5 votes

def fit(self, X, y = None):
		X = column_or_1d(X, warn = True)
		return self

Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0

5 votes

def fit(self, X, y = None):
		X = column_or_1d(X, warn = True)
		return self

Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0

5 votes

def transform(self, X):
		X = column_or_1d(X, warn = True)
		Xt = pandas.cut(X, bins = self.bins, right = self.right, labels = self.labels, include_lowest = self.include_lowest)
		if isinstance(Xt, Categorical):
			Xt = numpy.asarray(Xt)
		return _col2d(Xt)

Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0

5 votes

def fit(self, X, y = None):
		X = column_or_1d(X, warn = True)
		return self

Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0

5 votes

def transform(self, X):
		X = column_or_1d(X, warn = True)
		transform_dict = self._transform_dict()
		func = lambda k: transform_dict[k]
		if hasattr(X, "apply"):
			Xt = X.apply(func)
		else:
			Xt = numpy.array([func(row) for row in X])
		return _col2d(Xt)

Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0

5 votes

def transform(self, X):
		X = column_or_1d(X, warn = True)
		index = list(self.classes_)
		if self.sparse_output:
			Xt = lil_matrix((len(X), len(index)), dtype = numpy.int)
		else:
			Xt = numpy.zeros((len(X), len(index)), dtype = numpy.int)
		for i, v in enumerate(X):
			if not pandas.isnull(v):
				Xt[i, index.index(v)] = 1
		if self.sparse_output:
			Xt = Xt.tocsr()
		return Xt

Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0

5 votes

def transform(self, X):
		X = column_or_1d(X, warn = True)
		index = list(self.classes_)
		Xt = numpy.array([self.missing_values if pandas.isnull(v) else index.index(v) for v in X])
		return _col2d(Xt)

Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0

5 votes

def transform(self, X):
		X = column_or_1d(X, warn = True)
		func = lambda x: x[self.begin:self.end]
		Xt = eval_rows(X, func)
		return _col2d(Xt)

Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0

5 votes

def fit(self, X, y = None):
		X = column_or_1d(X, warn = True)
		return self

Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0

5 votes

def transform(self, X):
		X = column_or_1d(X, warn = True)
		engine = _regex_engine(self.pattern)
		func = lambda x: engine.sub(self.replacement, x)
		Xt = eval_rows(X, func)
		return _col2d(Xt)

Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0

5 votes

def fit(self, X, y = None):
		X = column_or_1d(X, warn = True)
		return self

Source File: __init__.py From sklearn2pmml with GNU Affero General Public License v3.0

5 votes

def transform(self, X):
		X = column_or_1d(X, warn = True)
		engine = _regex_engine(self.pattern)
		func = lambda x: bool(engine.search(x))
		Xt = eval_rows(X, func)
		return _col2d(Xt)

Source File: encode.py From skutil with BSD 3-Clause "New" or "Revised" License

5 votes

def transform(self, y):
        """Perform encoding if already fit.

        Parameters
        ----------

        y : array_like, shape=(n_samples,)
            The array to encode

        Returns
        -------

        e : array_like, shape=(n_samples,)
            The encoded array
        """
        check_is_fitted(self, 'classes_')
        y = column_or_1d(y, warn=True)

        classes = np.unique(y)
        _check_numpy_unicode_bug(classes)

        # Check not too many:
        unseen = _get_unseen()
        if len(classes) >= unseen:
            raise ValueError('Too many factor levels in feature. Max is %i' % unseen)

        e = np.array([
                         np.searchsorted(self.classes_, x) if x in self.classes_ else unseen
                         for x in y
                         ])

        return e

Source File: utility.py From pyod with BSD 2-Clause "Simplified" License

5 votes

def score_to_label(pred_scores, outliers_fraction=0.1):
    """Turn raw outlier outlier scores to binary labels (0 or 1).

    Parameters
    ----------
    pred_scores : list or numpy array of shape (n_samples,)
        Raw outlier scores. Outliers are assumed have larger values.

    outliers_fraction : float in (0,1)
        Percentage of outliers.

    Returns
    -------
    outlier_labels : numpy array of shape (n_samples,)
        For each observation, tells whether or not
        it should be considered as an outlier according to the
        fitted model. Return the outlier probability, ranging
        in [0,1].
    """
    # check input values
    pred_scores = column_or_1d(pred_scores)
    check_parameter(outliers_fraction, 0, 1)

    threshold = percentile(pred_scores, 100 * (1 - outliers_fraction))
    pred_labels = (pred_scores > threshold).astype('int')
    return pred_labels

Source File: utility.py From pyod with BSD 2-Clause "Simplified" License

5 votes

def precision_n_scores(y, y_pred, n=None):
    """Utility function to calculate precision @ rank n.

    Parameters
    ----------
    y : list or numpy array of shape (n_samples,)
        The ground truth. Binary (0: inliers, 1: outliers).

    y_pred : list or numpy array of shape (n_samples,)
        The raw outlier scores as returned by a fitted model.

    n : int, optional (default=None)
        The number of outliers. if not defined, infer using ground truth.

    Returns
    -------
    precision_at_rank_n : float
        Precision at rank n score.

    """

    # turn raw prediction decision scores into binary labels
    y_pred = get_label_n(y, y_pred, n)

    # enforce formats of y and labels_
    y = column_or_1d(y)
    y_pred = column_or_1d(y_pred)

    return precision_score(y, y_pred)

Source File: utility.py From pyod with BSD 2-Clause "Simplified" License

5 votes

def invert_order(scores, method='multiplication'):
    """ Invert the order of a list of values. The smallest value becomes
    the largest in the inverted list. This is useful while combining
    multiple detectors since their score order could be different.

    Parameters
    ----------
    scores : list, array or numpy array with shape (n_samples,)
        The list of values to be inverted

    method : str, optional (default='multiplication')
        Methods used for order inversion. Valid methods are:

        - 'multiplication': multiply by -1
        - 'subtraction': max(scores) - scores

    Returns
    -------
    inverted_scores : numpy array of shape (n_samples,)
        The inverted list

    Examples
    --------
    >>> scores1 = [0.1, 0.3, 0.5, 0.7, 0.2, 0.1]
    >>> invert_order(scores1)
    array([-0.1, -0.3, -0.5, -0.7, -0.2, -0.1])
    >>> invert_order(scores1, method='subtraction')
    array([0.6, 0.4, 0.2, 0. , 0.5, 0.6])
    """

    scores = column_or_1d(scores)

    if method == 'multiplication':
        return scores.ravel() * -1

    if method == 'subtraction':
        return (scores.max() - scores).ravel()

Python sklearn.utils.column_or_1d() Examples