Python sklearn.utils.check_X_y() Examples
The following are 30
code examples of sklearn.utils.check_X_y().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.utils
, or try the search function
.
Example #1
Source File: classifier_comb.py From combo with BSD 2-Clause "Simplified" License | 6 votes |
def fit(self, X, y): """Fit classifier. Parameters ---------- X : numpy array of shape (n_samples, n_features) The input samples. y : numpy array of shape (n_samples,), optional (default=None) The ground truth of the input samples (labels). """ # Validate inputs X and y X, y = check_X_y(X, y) X = check_array(X) self._set_n_classes(y) if self.pre_fitted: print("Training skipped") return else: for clf in self.base_estimators: clf.fit(X, y) clf.fitted_ = True return
Example #2
Source File: externals.py From sports-betting with MIT License | 6 votes |
def fit(self, X, y, sample_weight=None): """Fit a separate classifier for each output variable.""" for _, clf in self.classifiers: if not hasattr(clf, 'fit'): raise ValueError('Every base classifier should implement a fit method.') X, y = check_X_y(X, y, multi_output=True, accept_sparse=True) if is_classifier(self): check_classification_targets(y) if y.ndim == 1: raise ValueError('Output y must have at least two dimensions for multi-output classification but has only one.') if sample_weight is not None and any([not has_fit_parameter(clf, 'sample_weight') for _, clf in self.classifiers]): raise ValueError('One of base classifiers does not support sample weights.') self.classifiers_ = Parallel(n_jobs=self.n_jobs)(delayed(_fit_estimator)(clf, X, y[:, i], sample_weight) for i, (_, clf) in zip(range(y.shape[1]), self.classifiers)) return self
Example #3
Source File: dummy.py From scikit-lego with MIT License | 6 votes |
def fit(self, X: np.array, y: np.array) -> "RandomRegressor": """ Fit the model using X, y as training data. :param X: array-like, shape=(n_columns, n_samples,) training data. :param y: array-like, shape=(n_samples,) training data. :return: Returns an instance of self. """ if self.strategy not in self.allowed_strategies: raise ValueError( f"strategy {self.strategy} is not in {self.allowed_strategies}" ) X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES) self.dim_ = X.shape[1] self.min_ = np.min(y) self.max_ = np.max(y) self.mu_ = np.mean(y) self.sigma_ = np.std(y) return self
Example #4
Source File: linear_model.py From scikit-lego with MIT License | 6 votes |
def fit(self, X, y): """ Fit the model using X, y as training data. :param X: array-like, shape=(n_columns, n_samples, ) training data. :param y: array-like, shape=(n_samples, ) training data. :return: Returns an instance of self. """ X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES) if self.span is not None: if not 0 <= self.span <= 1: raise ValueError(f"Param `span` must be 0 <= span <= 1, got: {self.span}") if self.sigma < 0: raise ValueError(f"Param `sigma` must be >= 0, got: {self.sigma}") self.X_ = X self.y_ = y return self
Example #5
Source File: top_terms.py From xam with MIT License | 6 votes |
def fit(self, X, y=None, **fit_params): # scikit-learn checks X, y = utils.check_X_y(X, y, accept_sparse='csr', order='C') n_terms = min(self.n_terms, X.shape[1]) # Get a list of unique labels from y labels = np.unique(y) # Determine the n top terms per class self.top_terms_per_class_ = { c: set(np.argpartition(np.sum(X[y == c], axis=0), -n_terms)[-n_terms:]) for c in labels } # Return the classifier return self
Example #6
Source File: linear_model.py From scikit-lego with MIT License | 5 votes |
def fit(self, X, y): X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES) if self.effect not in self.allowed_effects: raise ValueError(f"effect {self.effect} must be in {self.allowed_effects}") def deadzone(errors): if self.effect == "linear": return np.where(errors > self.threshold, errors, np.zeros(errors.shape)) if self.effect == "quadratic": return np.where( errors > self.threshold, errors ** 2, np.zeros(errors.shape) ) def training_loss(weights): diff = np.abs(np.dot(X, weights) - y) if self.relative: diff = diff / y return np.mean(deadzone(diff)) n, k = X.shape # Build a function that returns gradients of training loss using autograd. training_gradient_fun = grad(training_loss) # Check the gradients numerically, just to be safe. weights = np.random.normal(0, 1, k) if self.check_grad: check_grads(training_loss, modes=["rev"])(weights) # Optimize weights using gradient descent. self.loss_log_ = np.zeros(self.n_iter) self.wts_log_ = np.zeros((self.n_iter, k)) self.deriv_log_ = np.zeros((self.n_iter, k)) for i in range(self.n_iter): weights -= training_gradient_fun(weights) * self.stepsize self.wts_log_[i, :] = weights.ravel() self.loss_log_[i] = training_loss(weights) self.deriv_log_[i, :] = training_gradient_fun(weights).ravel() self.coefs_ = weights return self
Example #7
Source File: classifier_dcs.py From combo with BSD 2-Clause "Simplified" License | 5 votes |
def fit(self, X, y): """Fit classifier. Parameters ---------- X : numpy array of shape (n_samples, n_features) The input samples. y : numpy array of shape (n_samples,), optional (default=None) The ground truth of the input samples (labels). """ # Validate inputs X and y X, y = check_X_y(X, y) X = check_array(X) check_classification_targets(y) self._classes = len(np.unique(y)) n_samples = X.shape[0] # save the train ground truth for evaluation purpose self.y_train_ = y # build KDTree out of training subspace self.tree_ = KDTree(X) self.y_train_predicted_ = np.zeros( [n_samples, self.n_base_estimators_]) # train all base classifiers on X, and get their local predicted scores # iterate over all base classifiers for i, clf in enumerate(self.base_estimators): clf.fit(X, y) self.y_train_predicted_[:, i] = clf.predict(X) clf.fitted_ = True self.fitted_ = True return
Example #8
Source File: al_experiment.py From ALiPy with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, X, y, model=LogisticRegression(solver='liblinear'), performance_metric='accuracy_score', stopping_criteria=None, stopping_value=None, batch_size=1, **kwargs): self.__custom_strategy_flag = False self._split = False self._metrics = False self._split_count = 0 self._query_function_need_train_ind = False self._existed_query_strategy = False self._X, self._y = check_X_y(X, y, accept_sparse='csc', multi_output=True) self._model = model self._experiment_result = [] # set split in the initial train_idx = kwargs.pop('train_idx', None) test_idx = kwargs.pop('test_idx', None) label_idx = kwargs.pop('label_idx', None) unlabel_idx = kwargs.pop('unlabel_idx', None) if train_idx is not None and test_idx is not None and label_idx is not None and unlabel_idx is not None: if not (len(train_idx) == len(test_idx) == len(label_idx) == len(unlabel_idx)): raise ValueError("train_idx, test_idx, label_idx, unlabel_idx " "should have the same split count (length)") self._split = True self._train_idx = train_idx self._test_idx = test_idx self._label_idx = label_idx self._unlabel_idx = unlabel_idx self._split_count = len(train_idx) self._stopping_criterion = StoppingCriteria(stopping_criteria, stopping_value) self._batch_size = batch_size
Example #9
Source File: neighbors.py From scikit-lego with MIT License | 5 votes |
def fit(self, X: np.ndarray, y: np.ndarray): """ Fit the model using X, y as training data. :param X: array-like, shape=(n_features, n_samples) :param y: array-like, shape=(n_samples) :return: Returns an instance of self """ X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES) self.classes_ = unique_labels(y) self.models_, self.priors_logp_ = {}, {} for target_label in self.classes_: x_subset = X[y == target_label] # Computing joint distribution self.models_[target_label] = KernelDensity( bandwidth=self.bandwidth, kernel=self.kernel, algorithm=self.algorithm, metric=self.metric, atol=self.atol, rtol=self.rtol, breadth_first=self.breath_first, leaf_size=self.leaf_size, metric_params=self.metric_params, ).fit(x_subset) # Computing target class prior self.priors_logp_[target_label] = np.log(len(x_subset) / len(X)) return self
Example #10
Source File: gmm_classifier.py From scikit-lego with MIT License | 5 votes |
def fit(self, X: np.array, y: np.array) -> "GMMClassifier": """ Fit the model using X, y as training data. :param X: array-like, shape=(n_columns, n_samples, ) training data. :param y: array-like, shape=(n_samples, ) training data. :return: Returns an instance of self. """ X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES) if X.ndim == 1: X = np.expand_dims(X, 1) self.gmms_ = {} self.classes_ = unique_labels(y) for c in self.classes_: subset_x, subset_y = X[y == c], y[y == c] mixture = GaussianMixture( n_components=self.n_components, covariance_type=self.covariance_type, tol=self.tol, reg_covar=self.reg_covar, max_iter=self.max_iter, n_init=self.n_init, init_params=self.init_params, weights_init=self.weights_init, means_init=self.means_init, precisions_init=self.precisions_init, random_state=self.random_state, warm_start=self.warm_start, verbose=self.verbose, verbose_interval=self.verbose_interval, ) self.gmms_[c] = mixture.fit(subset_x, subset_y) return self
Example #11
Source File: bayesian_gmm_classifier.py From scikit-lego with MIT License | 5 votes |
def fit(self, X: np.array, y: np.array) -> "BayesianGMMClassifier": """ Fit the model using X, y as training data. :param X: array-like, shape=(n_columns, n_samples, ) training data. :param y: array-like, shape=(n_samples, ) training data. :return: Returns an instance of self. """ X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES) if X.ndim == 1: X = np.expand_dims(X, 1) self.gmms_ = {} self.classes_ = unique_labels(y) for c in self.classes_: subset_x, subset_y = X[y == c], y[y == c] mixture = BayesianGaussianMixture( n_components=self.n_components, covariance_type=self.covariance_type, tol=self.tol, reg_covar=self.reg_covar, max_iter=self.max_iter, n_init=self.n_init, init_params=self.init_params, weight_concentration_prior_type=self.weight_concentration_prior_type, weight_concentration_prior=self.weight_concentration_prior, mean_precision_prior=self.mean_precision_prior, mean_prior=self.mean_prior, degrees_of_freedom_prior=self.degrees_of_freedom_prior, covariance_prior=self.covariance_prior, random_state=self.random_state, warm_start=self.warm_start, verbose=self.verbose, verbose_interval=self.verbose_interval, ) self.gmms_[c] = mixture.fit(subset_x, subset_y) return self
Example #12
Source File: boruta_py.py From boruta_py with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _check_params(self, X, y): """ Check hyperparameters as well as X and y before proceeding with fit. """ # check X and y are consistent len, X is Array and y is column X, y = check_X_y(X, y) if self.perc <= 0 or self.perc > 100: raise ValueError('The percentile should be between 0 and 100.') if self.alpha <= 0 or self.alpha > 1: raise ValueError('Alpha should be between 0 and 1.')
Example #13
Source File: linear_model.py From scikit-lego with MIT License | 5 votes |
def fit(self, X, y): if self.penalty not in ["l1", "none"]: raise ValueError( f"penalty should be either 'l1' or 'none', got {self.penalty}" ) self.sensitive_col_idx_ = self.sensitive_cols if isinstance(X, pd.DataFrame): self.sensitive_col_idx_ = [ i for i, name in enumerate(X.columns) if name in self.sensitive_cols ] X, y = check_X_y(X, y, accept_large_sparse=False) sensitive = X[:, self.sensitive_col_idx_] if not self.train_sensitive_cols: X = np.delete(X, self.sensitive_col_idx_, axis=1) X = self._add_intercept(X) column_or_1d(y) label_encoder = LabelEncoder().fit(y) y = label_encoder.transform(y) self.classes_ = label_encoder.classes_ if len(self.classes_) > 2: raise ValueError( f"This solver needs samples of exactly 2 classes" f" in the data, but the data contains {len(self.classes_)}: {self.classes_}" ) self._solve(sensitive, X, y) return self
Example #14
Source File: randomadder.py From scikit-lego with MIT License | 5 votes |
def fit(self, X, y): super().fit(X, y) X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES) self.dim_ = X.shape[1] return self
Example #15
Source File: naive_bayes.py From scikit-lego with MIT License | 5 votes |
def fit(self, X: np.array, y: np.array) -> "GaussianMixtureNB": """ Fit the model using X, y as training data. :param X: array-like, shape=(n_columns, n_samples, ) training data. :param y: array-like, shape=(n_samples, ) training data. :return: Returns an instance of self. """ X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES) if X.ndim == 1: X = np.expand_dims(X, 1) self.gmms_ = {} self.classes_ = unique_labels(y) self.num_fit_cols_ = X.shape[1] for c in self.classes_: subset_x, subset_y = X[y == c], y[y == c] self.gmms_[c] = [ GaussianMixture( n_components=self.n_components, covariance_type=self.covariance_type, tol=self.tol, reg_covar=self.reg_covar, max_iter=self.max_iter, n_init=self.n_init, init_params=self.init_params, weights_init=self.weights_init, means_init=self.means_init, precisions_init=self.precisions_init, random_state=self.random_state, warm_start=self.warm_start, ).fit(subset_x[:, i].reshape(-1, 1), subset_y) for i in range(X.shape[1]) ] return self
Example #16
Source File: test_estimatortransformer.py From scikit-lego with MIT License | 5 votes |
def test_values_uniform(random_xy_dataset_clf): X, y = random_xy_dataset_clf X, y = check_X_y(X, y) clf = DummyClassifier(strategy="most_frequent") transformer = EstimatorTransformer(clone(clf)) transformed = transformer.fit(X, y).transform(X) assert transformed.shape == (y.shape[0], 1) assert np.all(transformed == clf.fit(X, y).predict(X))
Example #17
Source File: _ridge_0_21.py From daal4py with Apache License 2.0 | 5 votes |
def fit(self, X, y, sample_weight=None): """Fit Ridge regression model Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Training data y : array-like, shape = [n_samples] or [n_samples, n_targets] Target values sample_weight : float or numpy array of shape [n_samples] Individual weights for each sample Returns ------- self : returns an instance of self. """ X, y = check_X_y(X, y, ['csr', 'csc', 'coo'], dtype=[np.float64, np.float32], multi_output=True, y_numeric=True) self.sample_weight_ = sample_weight self.fit_shape_good_for_daal_ = True if X.shape[0] >= X.shape[1] else False if (not self.solver == 'auto' or sp.issparse(X) or not self.fit_shape_good_for_daal_ or not (X.dtype == np.float64 or X.dtype == np.float32) or sample_weight is not None): if hasattr(self, 'daal_model_'): del self.daal_model_ logging.info("sklearn.linear_model.Ridge.fit: " + method_uses_sklearn) return super(Ridge, self).fit(X, y, sample_weight=sample_weight) else: logging.info("sklearn.linear_model.Ridge.fit: " + method_uses_daal) self.n_iter_ = None return _daal4py_fit(self, X, y)
Example #18
Source File: metrics.py From AIF360 with Apache License 2.0 | 5 votes |
def consistency_score(X, y, n_neighbors=5): r"""Compute the consistency score. Individual fairness metric from [#zemel13]_ that measures how similar the labels are for similar instances. .. math:: 1 - \frac{1}{n\cdot\text{n_neighbors}}\sum_{i=1}^n |\hat{y}_i - \sum_{j\in\mathcal{N}_{\text{n_neighbors}}(x_i)} \hat{y}_j| Args: X (array-like): Sample features. y (array-like): Sample targets. n_neighbors (int, optional): Number of neighbors for the knn computation. References: .. [#zemel13] `R. Zemel, Y. Wu, K. Swersky, T. Pitassi, and C. Dwork, "Learning Fair Representations," International Conference on Machine Learning, 2013. <http://proceedings.mlr.press/v28/zemel13.html>`_ """ # cast as ndarrays X, y = check_X_y(X, y) # learn a KNN on the features nbrs = NearestNeighbors(n_neighbors, algorithm='ball_tree').fit(X) indices = nbrs.kneighbors(X, return_distance=False) # compute consistency score return 1 - abs(y - y[indices].mean(axis=1)).mean() # ================================ ALIASES =====================================
Example #19
Source File: utils.py From mvlearn with Apache License 2.0 | 5 votes |
def check_Xs_y(Xs, y, multiview=False, enforce_views=None): r""" Checks Xs and y for consistent length. Xs is set to be of dimension 3. Parameters ---------- Xs : nd-array, list Input data. F y : nd-array, list Labels. multiview : boolean, (default=False) If True, throws error if just 1 data matrix given. enforce_views : int, (default=not checked) If provided, ensures this number of views in Xs. Otherwise not checked. Returns ------- Xs_converted : object The converted and validated Xs (list of data arrays). y_converted : object The converted and validated y. """ Xs_converted = check_Xs(Xs, multiview=multiview, enforce_views=enforce_views) _, y_converted = check_X_y(Xs_converted[0], y, allow_nd=False) return Xs_converted, y_converted
Example #20
Source File: knne.py From DESlib with BSD 3-Clause "New" or "Revised" License | 5 votes |
def fit(self, X, y): """Fit the model according to the given training data. Parameters ---------- X : array of shape (n_samples, n_features) Data used to fit the model. y : array of shape (n_samples) class labels of each example in X. """ X, y = check_X_y(X, y) self.knns_ = {} self.classes_indexes_ = {} self.fit_X_ = X self.fit_y_ = y self.classes_ = np.unique(y) self.n_classes_ = self.classes_.size # Checking inputs self._check_n_neighbors(self.n_neighbors) self._set_knn_type() tmp = self._handle_n_neighbors(self.n_neighbors) self._mdc, self._mod, self._neighbors_per_class = tmp for class_ in self.classes_: self.classes_indexes_[class_] = np.argwhere( np.array(y) == class_).ravel() y_c = y[self.classes_indexes_[class_]] X_c = X[self.classes_indexes_[class_], :] knn = self.knn_type_(n_neighbors=self._neighbors_per_class, **self.kwargs) self.knns_[class_] = knn.fit(X_c, y_c) return self
Example #21
Source File: nonnegative.py From civisml-extensions with BSD 3-Clause "New" or "Revised" License | 5 votes |
def fit(self, X, y, sample_weight=None): """Fit non-negative linear model. Parameters ---------- X : numpy array or sparse matrix of shape [n_samples, n_features] Training data y : numpy array of shape [n_samples,] Target values sample_weight : numpy array of shape [n_samples] Individual weights for each sample Returns ------- self : returns an instance of self. """ X, y = check_X_y(X, y, y_numeric=True, multi_output=False) if sample_weight is not None and np.atleast_1d(sample_weight).ndim > 1: raise ValueError("Sample weights must be 1D array or scalar") X, y, X_offset, y_offset, X_scale = self._preprocess_data( X, y, fit_intercept=self.fit_intercept, normalize=self.normalize, copy=self.copy_X, sample_weight=sample_weight) if sample_weight is not None: # Sample weight can be implemented via a simple rescaling. X, y = _rescale_data(X, y, sample_weight) self.coef_, result = nnls(X, y.squeeze()) if np.all(self.coef_ == 0): raise ConvergenceWarning("All coefficients estimated to be zero in" " the non-negative least squares fit.") self._set_intercept(X_offset, y_offset, X_scale) self.opt_result_ = OptimizeResult(success=True, status=0, x=self.coef_, fun=result) return self
Example #22
Source File: stacking.py From civisml-extensions with BSD 3-Clause "New" or "Revised" License | 5 votes |
def fit(self, X, y, **fit_params): """Fit the model Fit the base estimators on CV folds, then use their prediction on the validation folds to train the meta-estimator. Then re-fit base estimators on full training set. Parameters ---------- X : np.ndarray, list of numbers Training data. y : np.ndarray, list of numbers Training targets. **fit_params : dict of {string, object} Parameters passed to the ``fit`` method of each estimator, where each parameter name is prefixed such that parameter ``p`` for estimator ``s`` has key ``s__p``. Returns ------- self : BaseStackedModel This estimator """ self._validate_estimators() X, y = check_X_y(X, y, multi_output=True) # Fit base estimators on CV training folds, produce features for # meta-estimator from predictions on CV test folds. Xmeta, ymeta, meta_params = self._base_est_fit_predict(X, y, **fit_params) # Fit meta-estimator on test fold predictions of base estimators. self.meta_estimator.fit(Xmeta, ymeta, **meta_params) # Now fit base estimators again, this time on full training set self._base_est_fit(X, y, **fit_params) return self # _replace_est copied nearly verbatim from sklearn.pipeline._BasePipeline # v0.18.1 "_replace_step" method.
Example #23
Source File: randomized_lasso.py From stability-selection with BSD 3-Clause "New" or "Revised" License | 5 votes |
def fit(self, X, y, sample_weight=None): """Fit the model according to the given training data. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] The training input samples. y : array-like, shape = [n_samples] The target values. sample_weight : array-like, shape (n_samples,) optional Array of weights that are assigned to individual samples. If not provided, then each sample is given unit weight. """ if not isinstance(self.weakness, float) or not (0.0 < self.weakness <= 1.0): raise ValueError('weakness should be a float in (0, 1], got %s' % self.weakness) X, y = check_X_y(X, y, accept_sparse='csr', dtype=[np.float64, np.float32], order="C") n_features = X.shape[1] weakness = 1. - self.weakness random_state = check_random_state(self.random_state) weights = weakness * random_state.randint(0, 1 + 1, size=(n_features,)) X_rescaled = _rescale_data(X, weights) return super(RandomizedLogisticRegression, self).fit(X_rescaled, y, sample_weight)
Example #24
Source File: mdlp.py From xam with MIT License | 5 votes |
def fit(self, X, y, **fit_params): """Determine which are the best cut points for each column in X based on y.""" X, y = check_X_y(X, y, y_numeric=True) self.cut_points_ = [mdlp_cut(x, y, []) for x in X.T] return self
Example #25
Source File: nb_svm.py From xam with MIT License | 5 votes |
def fit(self, X, y, sample_weight=None): X, y = utils.check_X_y(X, y, accept_sparse='csr', order='C') def pr(x, y_i, y): p = x[y == y_i].sum(0) return (p+1) / ((y==y_i).sum()+1) self.r_ = sparse.csr_matrix(np.log(pr(X, 1, y) / pr(X, 0, y))) return super().fit(X.multiply(self.r_), y, sample_weight)
Example #26
Source File: randomized_lasso.py From stability-selection with BSD 3-Clause "New" or "Revised" License | 5 votes |
def fit(self, X, y): """Fit the model according to the given training data. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] The training input samples. y : array-like, shape = [n_samples] The target values. """ if not isinstance(self.weakness, float) or not (0.0 < self.weakness <= 1.0): raise ValueError('weakness should be a float in (0, 1], got %s' % self.weakness) X, y = check_X_y(X, y, accept_sparse=True) n_features = X.shape[1] weakness = 1. - self.weakness random_state = check_random_state(self.random_state) weights = weakness * random_state.randint(0, 1 + 1, size=(n_features,)) # TODO: I am afraid this will do double normalization if set to true #X, y, _, _ = _preprocess_data(X, y, self.fit_intercept, normalize=self.normalize, copy=False, # sample_weight=None, return_mean=False) # TODO: Check if this is a problem if it happens before standardization X_rescaled = _rescale_data(X, weights) return super(RandomizedLasso, self).fit(X_rescaled, y)
Example #27
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_check_X_y_informative_error(): X = np.ones((2, 2)) y = None assert_raise_message(ValueError, "y cannot be None", check_X_y, X, y)
Example #28
Source File: classifier_des.py From combo with BSD 2-Clause "Simplified" License | 5 votes |
def fit(self, X, y): """Fit classifier. Parameters ---------- X : numpy array of shape (n_samples, n_features) The input samples. y : numpy array of shape (n_samples,), optional (default=None) The ground truth of the input samples (labels). """ # Validate inputs X and y X, y = check_X_y(X, y) X = check_array(X) check_classification_targets(y) self._classes = len(np.unique(y)) n_samples = X.shape[0] # save the train ground truth for evaluation purpose self.y_train_ = y # build KDTree out of training subspace self.tree_ = KDTree(X) self.y_train_predicted_ = np.zeros( [n_samples, self.n_base_estimators_]) # train all base classifiers on X, and get their local predicted scores # iterate over all base classifiers for i, clf in enumerate(self.base_estimators): clf.fit(X, y) self.y_train_predicted_[:, i] = clf.predict(X) clf.fitted_ = True self.fitted_ = True return
Example #29
Source File: test_validation.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_check_array_warn_on_dtype_deprecation(): X = np.asarray([[0.0], [1.0]]) Y = np.asarray([[2.0], [3.0]]) with pytest.warns(DeprecationWarning, match="'warn_on_dtype' is deprecated"): check_array(X, warn_on_dtype=True) with pytest.warns(DeprecationWarning, match="'warn_on_dtype' is deprecated"): check_X_y(X, Y, warn_on_dtype=True)
Example #30
Source File: mifs.py From mifs with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _check_params(self, X, y): # checking input data and scaling it if y is continuous X, y = check_X_y(X, y) if not self.categorical: ss = StandardScaler() X = ss.fit_transform(X) y = ss.fit_transform(y.reshape(-1, 1)) # sanity checks methods = ['JMI', 'JMIM', 'MRMR'] if self.method not in methods: raise ValueError('Please choose one of the following methods:\n' + '\n'.join(methods)) if not isinstance(self.k, int): raise ValueError("k must be an integer.") if self.k < 1: raise ValueError('k must be larger than 0.') if self.categorical and np.any(self.k > np.bincount(y)): raise ValueError('k must be smaller than your smallest class.') if not isinstance(self.categorical, bool): raise ValueError('Categorical must be Boolean.') if self.categorical and np.unique(y).shape[0] > 5: print ('Are you sure y is categorical? It has more than 5 levels.') if not self.categorical and self._isinteger(y): print ('Are you sure y is continuous? It seems to be discrete.') if self._isinteger(X): print ('The values of X seem to be discrete. MI_FS will treat them' 'as continuous.') return X, y