Python sklearn.utils.check_X_y() Examples

The following are 30 code examples of sklearn.utils.check_X_y(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.utils , or try the search function .
Example #1
Source File: classifier_comb.py    From combo with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def fit(self, X, y):
        """Fit classifier.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        y : numpy array of shape (n_samples,), optional (default=None)
            The ground truth of the input samples (labels).
        """

        # Validate inputs X and y
        X, y = check_X_y(X, y)
        X = check_array(X)
        self._set_n_classes(y)

        if self.pre_fitted:
            print("Training skipped")
            return
        else:
            for clf in self.base_estimators:
                clf.fit(X, y)
                clf.fitted_ = True
            return 
Example #2
Source File: externals.py    From sports-betting with MIT License 6 votes vote down vote up
def fit(self, X, y, sample_weight=None):
        """Fit a separate classifier for each output variable."""

        for _, clf in self.classifiers:
            if not hasattr(clf, 'fit'):
                raise ValueError('Every base classifier should implement a fit method.')

        X, y = check_X_y(X, y, multi_output=True, accept_sparse=True)

        if is_classifier(self):
            check_classification_targets(y)

        if y.ndim == 1:
            raise ValueError('Output y must have at least two dimensions for multi-output classification but has only one.')

        if sample_weight is not None and any([not has_fit_parameter(clf, 'sample_weight') for _, clf in self.classifiers]):
            raise ValueError('One of base classifiers does not support sample weights.')

        self.classifiers_ = Parallel(n_jobs=self.n_jobs)(delayed(_fit_estimator)(clf, X, y[:, i], sample_weight) 
                                                        for i, (_, clf) in zip(range(y.shape[1]), self.classifiers))
        
        return self 
Example #3
Source File: dummy.py    From scikit-lego with MIT License 6 votes vote down vote up
def fit(self, X: np.array, y: np.array) -> "RandomRegressor":
        """
        Fit the model using X, y as training data.

        :param X: array-like, shape=(n_columns, n_samples,) training data.
        :param y: array-like, shape=(n_samples,) training data.
        :return: Returns an instance of self.
        """
        if self.strategy not in self.allowed_strategies:
            raise ValueError(
                f"strategy {self.strategy} is not in {self.allowed_strategies}"
            )
        X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
        self.dim_ = X.shape[1]

        self.min_ = np.min(y)
        self.max_ = np.max(y)
        self.mu_ = np.mean(y)
        self.sigma_ = np.std(y)

        return self 
Example #4
Source File: linear_model.py    From scikit-lego with MIT License 6 votes vote down vote up
def fit(self, X, y):
        """
        Fit the model using X, y as training data.

        :param X: array-like, shape=(n_columns, n_samples, ) training data.
        :param y: array-like, shape=(n_samples, ) training data.
        :return: Returns an instance of self.
        """
        X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
        if self.span is not None:
            if not 0 <= self.span <= 1:
                raise ValueError(f"Param `span` must be 0 <= span <= 1, got: {self.span}")
        if self.sigma < 0:
            raise ValueError(f"Param `sigma` must be >= 0, got: {self.sigma}")
        self.X_ = X
        self.y_ = y
        return self 
Example #5
Source File: top_terms.py    From xam with MIT License 6 votes vote down vote up
def fit(self, X, y=None, **fit_params):

        # scikit-learn checks
        X, y = utils.check_X_y(X, y, accept_sparse='csr', order='C')

        n_terms = min(self.n_terms, X.shape[1])

        # Get a list of unique labels from y
        labels = np.unique(y)

        # Determine the n top terms per class
        self.top_terms_per_class_ = {
            c: set(np.argpartition(np.sum(X[y == c], axis=0), -n_terms)[-n_terms:])
            for c in labels
        }

        # Return the classifier
        return self 
Example #6
Source File: linear_model.py    From scikit-lego with MIT License 5 votes vote down vote up
def fit(self, X, y):
        X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
        if self.effect not in self.allowed_effects:
            raise ValueError(f"effect {self.effect} must be in {self.allowed_effects}")

        def deadzone(errors):
            if self.effect == "linear":
                return np.where(errors > self.threshold, errors, np.zeros(errors.shape))
            if self.effect == "quadratic":
                return np.where(
                    errors > self.threshold, errors ** 2, np.zeros(errors.shape)
                )

        def training_loss(weights):
            diff = np.abs(np.dot(X, weights) - y)
            if self.relative:
                diff = diff / y
            return np.mean(deadzone(diff))

        n, k = X.shape

        # Build a function that returns gradients of training loss using autograd.
        training_gradient_fun = grad(training_loss)

        # Check the gradients numerically, just to be safe.
        weights = np.random.normal(0, 1, k)
        if self.check_grad:
            check_grads(training_loss, modes=["rev"])(weights)

        # Optimize weights using gradient descent.
        self.loss_log_ = np.zeros(self.n_iter)
        self.wts_log_ = np.zeros((self.n_iter, k))
        self.deriv_log_ = np.zeros((self.n_iter, k))
        for i in range(self.n_iter):
            weights -= training_gradient_fun(weights) * self.stepsize
            self.wts_log_[i, :] = weights.ravel()
            self.loss_log_[i] = training_loss(weights)
            self.deriv_log_[i, :] = training_gradient_fun(weights).ravel()
        self.coefs_ = weights
        return self 
Example #7
Source File: classifier_dcs.py    From combo with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def fit(self, X, y):
        """Fit classifier.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        y : numpy array of shape (n_samples,), optional (default=None)
            The ground truth of the input samples (labels).
        """

        # Validate inputs X and y
        X, y = check_X_y(X, y)
        X = check_array(X)
        check_classification_targets(y)
        self._classes = len(np.unique(y))
        n_samples = X.shape[0]

        # save the train ground truth for evaluation purpose
        self.y_train_ = y

        # build KDTree out of training subspace
        self.tree_ = KDTree(X)

        self.y_train_predicted_ = np.zeros(
            [n_samples, self.n_base_estimators_])

        # train all base classifiers on X, and get their local predicted scores
        # iterate over all base classifiers
        for i, clf in enumerate(self.base_estimators):
            clf.fit(X, y)
            self.y_train_predicted_[:, i] = clf.predict(X)
            clf.fitted_ = True

        self.fitted_ = True

        return 
Example #8
Source File: al_experiment.py    From ALiPy with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self, X, y, model=LogisticRegression(solver='liblinear'), performance_metric='accuracy_score',
                 stopping_criteria=None, stopping_value=None, batch_size=1, **kwargs):
        self.__custom_strategy_flag = False
        self._split = False
        self._metrics = False
        self._split_count = 0
        self._query_function_need_train_ind = False
        self._existed_query_strategy = False

        self._X, self._y = check_X_y(X, y, accept_sparse='csc', multi_output=True)
        self._model = model
        self._experiment_result = []
        # set split in the initial
        train_idx = kwargs.pop('train_idx', None)
        test_idx = kwargs.pop('test_idx', None)
        label_idx = kwargs.pop('label_idx', None)
        unlabel_idx = kwargs.pop('unlabel_idx', None)
        if train_idx is not None and test_idx is not None and label_idx is not None and unlabel_idx is not None:
            if not (len(train_idx) == len(test_idx) == len(label_idx) == len(unlabel_idx)):
                raise ValueError("train_idx, test_idx, label_idx, unlabel_idx "
                                 "should have the same split count (length)")
            self._split = True
            self._train_idx = train_idx
            self._test_idx = test_idx
            self._label_idx = label_idx
            self._unlabel_idx = unlabel_idx
            self._split_count = len(train_idx)

        self._stopping_criterion = StoppingCriteria(stopping_criteria, stopping_value)
        self._batch_size = batch_size 
Example #9
Source File: neighbors.py    From scikit-lego with MIT License 5 votes vote down vote up
def fit(self, X: np.ndarray, y: np.ndarray):
        """
        Fit the model using X, y as training data.

        :param X: array-like, shape=(n_features, n_samples)
        :param y: array-like, shape=(n_samples)
        :return: Returns an instance of self
        """
        X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)

        self.classes_ = unique_labels(y)
        self.models_, self.priors_logp_ = {}, {}
        for target_label in self.classes_:
            x_subset = X[y == target_label]

            # Computing joint distribution
            self.models_[target_label] = KernelDensity(
                bandwidth=self.bandwidth,
                kernel=self.kernel,
                algorithm=self.algorithm,
                metric=self.metric,
                atol=self.atol,
                rtol=self.rtol,
                breadth_first=self.breath_first,
                leaf_size=self.leaf_size,
                metric_params=self.metric_params,
            ).fit(x_subset)

            # Computing target class prior
            self.priors_logp_[target_label] = np.log(len(x_subset) / len(X))

        return self 
Example #10
Source File: gmm_classifier.py    From scikit-lego with MIT License 5 votes vote down vote up
def fit(self, X: np.array, y: np.array) -> "GMMClassifier":
        """
        Fit the model using X, y as training data.

        :param X: array-like, shape=(n_columns, n_samples, ) training data.
        :param y: array-like, shape=(n_samples, ) training data.
        :return: Returns an instance of self.
        """
        X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
        if X.ndim == 1:
            X = np.expand_dims(X, 1)

        self.gmms_ = {}
        self.classes_ = unique_labels(y)
        for c in self.classes_:
            subset_x, subset_y = X[y == c], y[y == c]
            mixture = GaussianMixture(
                n_components=self.n_components,
                covariance_type=self.covariance_type,
                tol=self.tol,
                reg_covar=self.reg_covar,
                max_iter=self.max_iter,
                n_init=self.n_init,
                init_params=self.init_params,
                weights_init=self.weights_init,
                means_init=self.means_init,
                precisions_init=self.precisions_init,
                random_state=self.random_state,
                warm_start=self.warm_start,
                verbose=self.verbose,
                verbose_interval=self.verbose_interval,
            )
            self.gmms_[c] = mixture.fit(subset_x, subset_y)
        return self 
Example #11
Source File: bayesian_gmm_classifier.py    From scikit-lego with MIT License 5 votes vote down vote up
def fit(self, X: np.array, y: np.array) -> "BayesianGMMClassifier":
        """
        Fit the model using X, y as training data.

        :param X: array-like, shape=(n_columns, n_samples, ) training data.
        :param y: array-like, shape=(n_samples, ) training data.
        :return: Returns an instance of self.
        """
        X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
        if X.ndim == 1:
            X = np.expand_dims(X, 1)

        self.gmms_ = {}
        self.classes_ = unique_labels(y)
        for c in self.classes_:
            subset_x, subset_y = X[y == c], y[y == c]
            mixture = BayesianGaussianMixture(
                n_components=self.n_components,
                covariance_type=self.covariance_type,
                tol=self.tol,
                reg_covar=self.reg_covar,
                max_iter=self.max_iter,
                n_init=self.n_init,
                init_params=self.init_params,
                weight_concentration_prior_type=self.weight_concentration_prior_type,
                weight_concentration_prior=self.weight_concentration_prior,
                mean_precision_prior=self.mean_precision_prior,
                mean_prior=self.mean_prior,
                degrees_of_freedom_prior=self.degrees_of_freedom_prior,
                covariance_prior=self.covariance_prior,
                random_state=self.random_state,
                warm_start=self.warm_start,
                verbose=self.verbose,
                verbose_interval=self.verbose_interval,
            )
            self.gmms_[c] = mixture.fit(subset_x, subset_y)
        return self 
Example #12
Source File: boruta_py.py    From boruta_py with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _check_params(self, X, y):
        """
        Check hyperparameters as well as X and y before proceeding with fit.
        """
        # check X and y are consistent len, X is Array and y is column
        X, y = check_X_y(X, y)
        if self.perc <= 0 or self.perc > 100:
            raise ValueError('The percentile should be between 0 and 100.')

        if self.alpha <= 0 or self.alpha > 1:
            raise ValueError('Alpha should be between 0 and 1.') 
Example #13
Source File: linear_model.py    From scikit-lego with MIT License 5 votes vote down vote up
def fit(self, X, y):
        if self.penalty not in ["l1", "none"]:
            raise ValueError(
                f"penalty should be either 'l1' or 'none', got {self.penalty}"
            )

        self.sensitive_col_idx_ = self.sensitive_cols
        if isinstance(X, pd.DataFrame):
            self.sensitive_col_idx_ = [
                i for i, name in enumerate(X.columns) if name in self.sensitive_cols
            ]
        X, y = check_X_y(X, y, accept_large_sparse=False)

        sensitive = X[:, self.sensitive_col_idx_]
        if not self.train_sensitive_cols:
            X = np.delete(X, self.sensitive_col_idx_, axis=1)
        X = self._add_intercept(X)

        column_or_1d(y)
        label_encoder = LabelEncoder().fit(y)
        y = label_encoder.transform(y)
        self.classes_ = label_encoder.classes_

        if len(self.classes_) > 2:
            raise ValueError(
                f"This solver needs samples of exactly 2 classes"
                f" in the data, but the data contains {len(self.classes_)}: {self.classes_}"
            )

        self._solve(sensitive, X, y)
        return self 
Example #14
Source File: randomadder.py    From scikit-lego with MIT License 5 votes vote down vote up
def fit(self, X, y):
        super().fit(X, y)
        X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
        self.dim_ = X.shape[1]

        return self 
Example #15
Source File: naive_bayes.py    From scikit-lego with MIT License 5 votes vote down vote up
def fit(self, X: np.array, y: np.array) -> "GaussianMixtureNB":
        """
        Fit the model using X, y as training data.

        :param X: array-like, shape=(n_columns, n_samples, ) training data.
        :param y: array-like, shape=(n_samples, ) training data.
        :return: Returns an instance of self.
        """
        X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES)
        if X.ndim == 1:
            X = np.expand_dims(X, 1)

        self.gmms_ = {}
        self.classes_ = unique_labels(y)
        self.num_fit_cols_ = X.shape[1]
        for c in self.classes_:
            subset_x, subset_y = X[y == c], y[y == c]
            self.gmms_[c] = [
                GaussianMixture(
                    n_components=self.n_components,
                    covariance_type=self.covariance_type,
                    tol=self.tol,
                    reg_covar=self.reg_covar,
                    max_iter=self.max_iter,
                    n_init=self.n_init,
                    init_params=self.init_params,
                    weights_init=self.weights_init,
                    means_init=self.means_init,
                    precisions_init=self.precisions_init,
                    random_state=self.random_state,
                    warm_start=self.warm_start,
                ).fit(subset_x[:, i].reshape(-1, 1), subset_y)
                for i in range(X.shape[1])
            ]
        return self 
Example #16
Source File: test_estimatortransformer.py    From scikit-lego with MIT License 5 votes vote down vote up
def test_values_uniform(random_xy_dataset_clf):
    X, y = random_xy_dataset_clf
    X, y = check_X_y(X, y)
    clf = DummyClassifier(strategy="most_frequent")
    transformer = EstimatorTransformer(clone(clf))
    transformed = transformer.fit(X, y).transform(X)

    assert transformed.shape == (y.shape[0], 1)
    assert np.all(transformed == clf.fit(X, y).predict(X)) 
Example #17
Source File: _ridge_0_21.py    From daal4py with Apache License 2.0 5 votes vote down vote up
def fit(self, X, y, sample_weight=None):
    """Fit Ridge regression model

    Parameters
    ----------
    X : {array-like, sparse matrix}, shape = [n_samples, n_features]
        Training data

    y : array-like, shape = [n_samples] or [n_samples, n_targets]
        Target values

    sample_weight : float or numpy array of shape [n_samples]
        Individual weights for each sample

    Returns
    -------
    self : returns an instance of self.
    """
    X, y = check_X_y(X, y, ['csr', 'csc', 'coo'], dtype=[np.float64, np.float32],
            multi_output=True, y_numeric=True)
    self.sample_weight_ = sample_weight
    self.fit_shape_good_for_daal_ = True if X.shape[0] >= X.shape[1] else False
    if (not self.solver == 'auto' or
            sp.issparse(X) or
            not self.fit_shape_good_for_daal_ or
            not (X.dtype == np.float64 or X.dtype == np.float32) or
            sample_weight is not None):
        if hasattr(self, 'daal_model_'):
            del self.daal_model_
        logging.info("sklearn.linear_model.Ridge.fit: " + method_uses_sklearn)
        return super(Ridge, self).fit(X, y, sample_weight=sample_weight)
    else:
        logging.info("sklearn.linear_model.Ridge.fit: " + method_uses_daal)
        self.n_iter_ = None
        return _daal4py_fit(self, X, y) 
Example #18
Source File: metrics.py    From AIF360 with Apache License 2.0 5 votes vote down vote up
def consistency_score(X, y, n_neighbors=5):
    r"""Compute the consistency score.

    Individual fairness metric from [#zemel13]_ that measures how similar the
    labels are for similar instances.

    .. math::
        1 - \frac{1}{n\cdot\text{n_neighbors}}\sum_{i=1}^n |\hat{y}_i -
        \sum_{j\in\mathcal{N}_{\text{n_neighbors}}(x_i)} \hat{y}_j|

    Args:
        X (array-like): Sample features.
        y (array-like): Sample targets.
        n_neighbors (int, optional): Number of neighbors for the knn
            computation.

    References:
        .. [#zemel13] `R. Zemel, Y. Wu, K. Swersky, T. Pitassi, and C. Dwork,
           "Learning Fair Representations," International Conference on Machine
           Learning, 2013. <http://proceedings.mlr.press/v28/zemel13.html>`_
    """
    # cast as ndarrays
    X, y = check_X_y(X, y)
    # learn a KNN on the features
    nbrs = NearestNeighbors(n_neighbors, algorithm='ball_tree').fit(X)
    indices = nbrs.kneighbors(X, return_distance=False)

    # compute consistency score
    return 1 - abs(y - y[indices].mean(axis=1)).mean()


# ================================ ALIASES ===================================== 
Example #19
Source File: utils.py    From mvlearn with Apache License 2.0 5 votes vote down vote up
def check_Xs_y(Xs, y, multiview=False, enforce_views=None):
    r"""
    Checks Xs and y for consistent length. Xs is set to be of dimension 3.

    Parameters
    ----------
    Xs : nd-array, list
        Input data. F

    y : nd-array, list
        Labels.

    multiview : boolean, (default=False)
        If True, throws error if just 1 data matrix given.

    enforce_views : int, (default=not checked)
        If provided, ensures this number of views in Xs. Otherwise not
        checked.

    Returns
    -------
    Xs_converted : object
        The converted and validated Xs (list of data arrays).

    y_converted : object
        The converted and validated y.
    """
    Xs_converted = check_Xs(Xs, multiview=multiview,
                            enforce_views=enforce_views)
    _, y_converted = check_X_y(Xs_converted[0], y, allow_nd=False)

    return Xs_converted, y_converted 
Example #20
Source File: knne.py    From DESlib with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def fit(self, X, y):
        """Fit the model according to the given training data.

        Parameters
        ----------
        X : array of shape (n_samples, n_features)
            Data used to fit the model.

        y : array of shape (n_samples)
            class labels of each example in X.
        """
        X, y = check_X_y(X, y)

        self.knns_ = {}
        self.classes_indexes_ = {}
        self.fit_X_ = X
        self.fit_y_ = y
        self.classes_ = np.unique(y)
        self.n_classes_ = self.classes_.size

        # Checking inputs
        self._check_n_neighbors(self.n_neighbors)
        self._set_knn_type()

        tmp = self._handle_n_neighbors(self.n_neighbors)
        self._mdc, self._mod, self._neighbors_per_class = tmp
        for class_ in self.classes_:
            self.classes_indexes_[class_] = np.argwhere(
                np.array(y) == class_).ravel()
            y_c = y[self.classes_indexes_[class_]]
            X_c = X[self.classes_indexes_[class_], :]
            knn = self.knn_type_(n_neighbors=self._neighbors_per_class,
                                 **self.kwargs)
            self.knns_[class_] = knn.fit(X_c, y_c)

        return self 
Example #21
Source File: nonnegative.py    From civisml-extensions with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def fit(self, X, y, sample_weight=None):
        """Fit non-negative linear model.

        Parameters
        ----------
        X : numpy array or sparse matrix of shape [n_samples, n_features]
            Training data
        y : numpy array of shape [n_samples,]
            Target values
        sample_weight : numpy array of shape [n_samples]
            Individual weights for each sample

        Returns
        -------
        self : returns an instance of self.

        """
        X, y = check_X_y(X, y, y_numeric=True, multi_output=False)

        if sample_weight is not None and np.atleast_1d(sample_weight).ndim > 1:
            raise ValueError("Sample weights must be 1D array or scalar")

        X, y, X_offset, y_offset, X_scale = self._preprocess_data(
            X, y, fit_intercept=self.fit_intercept, normalize=self.normalize,
            copy=self.copy_X, sample_weight=sample_weight)

        if sample_weight is not None:
            # Sample weight can be implemented via a simple rescaling.
            X, y = _rescale_data(X, y, sample_weight)

        self.coef_, result = nnls(X, y.squeeze())

        if np.all(self.coef_ == 0):
            raise ConvergenceWarning("All coefficients estimated to be zero in"
                                     " the non-negative least squares fit.")

        self._set_intercept(X_offset, y_offset, X_scale)
        self.opt_result_ = OptimizeResult(success=True, status=0, x=self.coef_,
                                          fun=result)
        return self 
Example #22
Source File: stacking.py    From civisml-extensions with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def fit(self, X, y, **fit_params):
        """Fit the model

        Fit the base estimators on CV folds, then use their prediction on the
        validation folds to train the meta-estimator. Then re-fit base
        estimators on full training set.

        Parameters
        ----------
        X : np.ndarray, list of numbers
            Training data.
        y : np.ndarray, list of numbers
            Training targets.
        **fit_params : dict of {string, object}
            Parameters passed to the ``fit`` method of each estimator, where
            each parameter name is prefixed such that parameter ``p`` for
            estimator ``s`` has key ``s__p``.

        Returns
        -------
        self : BaseStackedModel
            This estimator
        """
        self._validate_estimators()
        X, y = check_X_y(X, y, multi_output=True)

        # Fit base estimators on CV training folds, produce features for
        # meta-estimator from predictions on CV test folds.
        Xmeta, ymeta, meta_params = self._base_est_fit_predict(X, y,
                                                               **fit_params)
        # Fit meta-estimator on test fold predictions of base estimators.
        self.meta_estimator.fit(Xmeta, ymeta, **meta_params)
        # Now fit base estimators again, this time on full training set
        self._base_est_fit(X, y, **fit_params)

        return self

    # _replace_est copied nearly verbatim from sklearn.pipeline._BasePipeline
    # v0.18.1 "_replace_step" method. 
Example #23
Source File: randomized_lasso.py    From stability-selection with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def fit(self, X, y, sample_weight=None):
        """Fit the model according to the given training data.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            The training input samples.

        y : array-like, shape = [n_samples]
            The target values.

        sample_weight : array-like, shape (n_samples,) optional
            Array of weights that are assigned to individual samples.
            If not provided, then each sample is given unit weight.
        """
        if not isinstance(self.weakness, float) or not (0.0 < self.weakness <= 1.0):
            raise ValueError('weakness should be a float in (0, 1], got %s' % self.weakness)

        X, y = check_X_y(X, y, accept_sparse='csr', dtype=[np.float64, np.float32],
                         order="C")

        n_features = X.shape[1]
        weakness = 1. - self.weakness
        random_state = check_random_state(self.random_state)

        weights = weakness * random_state.randint(0, 1 + 1, size=(n_features,))
        X_rescaled = _rescale_data(X, weights)
        return super(RandomizedLogisticRegression, self).fit(X_rescaled, y, sample_weight) 
Example #24
Source File: mdlp.py    From xam with MIT License 5 votes vote down vote up
def fit(self, X, y, **fit_params):
        """Determine which are the best cut points for each column in X based on y."""

        X, y = check_X_y(X, y, y_numeric=True)

        self.cut_points_ = [mdlp_cut(x, y, []) for x in X.T]
        return self 
Example #25
Source File: nb_svm.py    From xam with MIT License 5 votes vote down vote up
def fit(self, X, y, sample_weight=None):

        X, y = utils.check_X_y(X, y, accept_sparse='csr', order='C')

        def pr(x, y_i, y):
            p = x[y == y_i].sum(0)
            return (p+1) / ((y==y_i).sum()+1)

        self.r_ = sparse.csr_matrix(np.log(pr(X, 1, y) / pr(X, 0, y)))

        return super().fit(X.multiply(self.r_), y, sample_weight) 
Example #26
Source File: randomized_lasso.py    From stability-selection with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def fit(self, X, y):
        """Fit the model according to the given training data.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            The training input samples.

        y : array-like, shape = [n_samples]
            The target values.
        """
        if not isinstance(self.weakness, float) or not (0.0 < self.weakness <= 1.0):
            raise ValueError('weakness should be a float in (0, 1], got %s' % self.weakness)

        X, y = check_X_y(X, y, accept_sparse=True)

        n_features = X.shape[1]
        weakness = 1. - self.weakness
        random_state = check_random_state(self.random_state)

        weights = weakness * random_state.randint(0, 1 + 1, size=(n_features,))

        # TODO: I am afraid this will do double normalization if set to true
        #X, y, _, _ = _preprocess_data(X, y, self.fit_intercept, normalize=self.normalize, copy=False,
        #             sample_weight=None, return_mean=False)

        # TODO: Check if this is a problem if it happens before standardization
        X_rescaled = _rescale_data(X, weights)
        return super(RandomizedLasso, self).fit(X_rescaled, y) 
Example #27
Source File: test_validation.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_check_X_y_informative_error():
    X = np.ones((2, 2))
    y = None
    assert_raise_message(ValueError, "y cannot be None", check_X_y, X, y) 
Example #28
Source File: classifier_des.py    From combo with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def fit(self, X, y):
        """Fit classifier.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        y : numpy array of shape (n_samples,), optional (default=None)
            The ground truth of the input samples (labels).
        """

        # Validate inputs X and y
        X, y = check_X_y(X, y)
        X = check_array(X)
        check_classification_targets(y)
        self._classes = len(np.unique(y))
        n_samples = X.shape[0]

        # save the train ground truth for evaluation purpose
        self.y_train_ = y

        # build KDTree out of training subspace
        self.tree_ = KDTree(X)

        self.y_train_predicted_ = np.zeros(
            [n_samples, self.n_base_estimators_])

        # train all base classifiers on X, and get their local predicted scores
        # iterate over all base classifiers
        for i, clf in enumerate(self.base_estimators):
            clf.fit(X, y)
            self.y_train_predicted_[:, i] = clf.predict(X)
            clf.fitted_ = True

        self.fitted_ = True

        return 
Example #29
Source File: test_validation.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_check_array_warn_on_dtype_deprecation():
    X = np.asarray([[0.0], [1.0]])
    Y = np.asarray([[2.0], [3.0]])
    with pytest.warns(DeprecationWarning,
                      match="'warn_on_dtype' is deprecated"):
        check_array(X, warn_on_dtype=True)
    with pytest.warns(DeprecationWarning,
                      match="'warn_on_dtype' is deprecated"):
        check_X_y(X, Y, warn_on_dtype=True) 
Example #30
Source File: mifs.py    From mifs with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _check_params(self, X, y):
        # checking input data and scaling it if y is continuous
        X, y = check_X_y(X, y)

        if not self.categorical:
            ss = StandardScaler()
            X = ss.fit_transform(X)
            y = ss.fit_transform(y.reshape(-1, 1))

        # sanity checks
        methods = ['JMI', 'JMIM', 'MRMR']
        if self.method not in methods:
            raise ValueError('Please choose one of the following methods:\n' +
                             '\n'.join(methods))

        if not isinstance(self.k, int):
            raise ValueError("k must be an integer.")
        if self.k < 1:
            raise ValueError('k must be larger than 0.')
        if self.categorical and np.any(self.k > np.bincount(y)):
            raise ValueError('k must be smaller than your smallest class.')

        if not isinstance(self.categorical, bool):
            raise ValueError('Categorical must be Boolean.')
        if self.categorical and np.unique(y).shape[0] > 5:
            print ('Are you sure y is categorical? It has more than 5 levels.')
        if not self.categorical and self._isinteger(y):
            print ('Are you sure y is continuous? It seems to be discrete.')
        if self._isinteger(X):
            print ('The values of X seem to be discrete. MI_FS will treat them'
                   'as continuous.')
        return X, y