Python sklearn.utils.validation.check_array() Examples
The following are 30
code examples of sklearn.utils.validation.check_array().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.utils.validation
, or try the search function
.
Example #1
Source File: nfpc.py From fylearn with MIT License | 6 votes |
def fit(self, X, y): X = check_array(X) self.classes_, y = np.unique(y, return_inverse=True) if "?" in tuple(self.classes_): raise ValueError("nan not supported for class values") # build membership functions for each feature for each class learned = [ learn_class(X, y, y == idx, self.membership_factory, self.aggregation_factory) for idx, class_value in enumerate(self.classes_) ] logger.info("learned %s" % (str(learned),)) self.protos_ = [ x[0] for x in learned ] self.aggregations_ = [ x[1] for x in learned ] self.selection_method_ = self.selection_factory(X, y) return self
Example #2
Source File: fpcga.py From fylearn with MIT License | 6 votes |
def predict(self, X): """ Predict outputs given examples. Parameters: ----------- X : the examples to predict (array or matrix) Returns: -------- y_pred : Predicted values for each row in matrix. """ if self.protos_ is None: raise Exception("Prototypes not initialized. Perform a fit first.") X = check_array(X) # predict return _predict(self.protos_, self.aggregation, self.classes_, X)
Example #3
Source File: fpcga.py From fylearn with MIT License | 6 votes |
def fit(self, X, y_orig): def as_factory(r): return r if isinstance(r, AggregationRuleFactory) else DummyAggregationRuleFactory(r) self.aggregation_rules__ = [ as_factory(r) for r in self.aggregation_rules ] X = check_array(X) self.classes_, _ = np.unique(y_orig, return_inverse=True) self.m = X.shape[1] if np.nan in self.classes_: raise Exception("nan not supported for class values") self.build_with_ga(X, y_orig) return self
Example #4
Source File: rafpc.py From fylearn with MIT License | 6 votes |
def predict(self, X): """ Predict outputs given examples. Parameters: ----------- X : the examples to predict (array or matrix) Returns: -------- y_pred : Predicted values for each row in matrix. """ if self.protos_ is None: raise Exception("Prototypes not initialized. Perform a fit first.") X = check_array(X) # predict return _predict_multi(self.protos_, self.aggregation, self.classes_, X, self.n_features)
Example #5
Source File: garules.py From fylearn with MIT License | 6 votes |
def fit(self, X, y): X = check_array(X) self.classes_, _ = np.unique(y, return_inverse=True) # construct distance measure self.distance_ = self.df(X) # build models models = np.zeros((len(self.classes_), X.shape[1])) for c_idx, c_value in enumerate(self.classes_): models[c_idx, :] = self.build_for_class(X[y == c_value]) self.models_ = models return self
Example #6
Source File: predictive_imputer.py From predictive_imputer with MIT License | 6 votes |
def transform(self, X): check_is_fitted(self, ['statistics_', 'estimators_', 'gamma_']) X = check_array(X, copy=True, dtype=np.float64, force_all_finite=False) if X.shape[1] != self.statistics_.shape[1]: raise ValueError("X has %d features per sample, expected %d" % (X.shape[1], self.statistics_.shape[1])) X_nan = np.isnan(X) imputed = self.initial_imputer.transform(X) if len(self.estimators_) > 1: for i, estimator_ in enumerate(self.estimators_): X_s = np.delete(imputed, i, 1) y_nan = X_nan[:, i] X_unk = X_s[y_nan] if len(X_unk) > 0: X[y_nan, i] = estimator_.predict(X_unk) else: estimator_ = self.estimators_[0] X[X_nan] = estimator_.inverse_transform(estimator_.transform(imputed))[X_nan] return X
Example #7
Source File: event.py From brainiak with Apache License 2.0 | 6 votes |
def predict(self, X): """Applies learned event segmentation to new testing dataset Alternative function for segmenting a new dataset after using fit() to learn a sequence of events, to comply with the sklearn Classifier interface Parameters ---------- X: timepoint by voxel ndarray fMRI data to segment based on previously-learned event patterns Returns ------- Event label for each timepoint """ check_is_fitted(self, ["event_pat_", "event_var_"]) X = check_array(X) segments, test_ll = self.find_events(X) return np.argmax(segments, axis=1)
Example #8
Source File: mcd.py From pyod with BSD 2-Clause "Simplified" License | 6 votes |
def decision_function(self, X): """Predict raw anomaly score of X using the fitted detector. The anomaly score of an input sample is computed based on different detector algorithms. For consistency, outliers are assigned with larger anomaly scores. Parameters ---------- X : numpy array of shape (n_samples, n_features) The training input samples. Sparse matrices are accepted only if they are supported by the base estimator. Returns ------- anomaly_scores : numpy array of shape (n_samples,) The anomaly score of the input samples. """ check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_']) X = check_array(X) # Computer mahalanobis distance of the samples return self.detector_.mahalanobis(X)
Example #9
Source File: nfpc.py From fylearn with MIT License | 6 votes |
def fit(self, X, y): X = check_array(X) self.classes_, y = np.unique(y, return_inverse=True) if "?" in tuple(self.classes_): raise ValueError("nan not supported for class values") # build membership functions for each feature for each class self.protos_ = [ build_memberships(X, y == idx, self.membership_factory) for idx, class_value in enumerate(self.classes_) ] # build aggregation self.aggregation_ = self.aggregation_factory(self.protos_, X, y, self.classes_) return self
Example #10
Source File: arima.py From pmdarima with MIT License | 6 votes |
def _seasonal_prediction_with_confidence(arima_res, start, end, exog, alpha, **kwargs): """Compute the prediction for a SARIMAX and get a conf interval Unfortunately, SARIMAX does not really provide a nice way to get the confidence intervals out of the box, so we have to perform the ``get_prediction`` code here and unpack the confidence intervals manually. Notes ----- For internal use only. """ results = arima_res.get_prediction( start=start, end=end, exog=exog, **kwargs) f = results.predicted_mean conf_int = results.conf_int(alpha=alpha) return check_endog(f, dtype=None, copy=False), \ check_array(conf_int, copy=False, dtype=None)
Example #11
Source File: array.py From pmdarima with MIT License | 6 votes |
def _diff_inv_matrix(x, lag, differences, xi): n, m = x.shape y = np.zeros((n + lag * differences, m), dtype=DTYPE) if m >= 1: # todo: R checks this. do we need to? # R: if(missing(xi)) xi <- matrix(0.0, lag*differences, m) if xi is None: xi = np.zeros((lag * differences, m), dtype=DTYPE) else: xi = check_array( xi, dtype=DTYPE, copy=False, force_all_finite=False, ensure_2d=True) if xi.shape != (lag * differences, m): raise IndexError('"xi" does not have the right shape') # TODO: can we vectorize? for i in range(m): y[:, i] = _diff_inv_vector(x[:, i], lag, differences, xi[:, i]) return y
Example #12
Source File: fpt.py From fylearn with MIT License | 6 votes |
def fit(self, X, y): X = check_array(X) self.classes_, y = np.unique(y, return_inverse=True) if np.nan in self.classes_: raise Exception("nan not supported for class values") self.trees_ = {} # build membership functions P = [] for feature_idx, feature in enumerate(X.T): P.extend(self.fuzzifier(feature_idx, feature)) # build the pattern tree for each class for class_idx, class_value in enumerate(self.classes_): class_vector = np.zeros(len(y)) class_vector[y == class_idx] = 1.0 root = self.build_for_class(X, y, class_vector, list(P)) self.trees_[class_idx] = root return self
Example #13
Source File: base.py From sagemaker-scikit-learn-extension with Apache License 2.0 | 6 votes |
def fit(self, X, y=None): """Compute the lower and upper quantile cutoffs, columns to transform, and nonnegative columns. Parameters ---------- X : array-like, shape [n_samples, n_features] The data array to transform. Must be numeric, non-sparse, and two-dimensional. Returns ------- self : LogExtremeValueTransformer """ super().fit(X) X = check_array(X) self.nonnegative_cols_ = [j for j in range(self.n_input_features_) if np.all(X[:, j] >= 0)] return self
Example #14
Source File: base.py From sagemaker-scikit-learn-extension with Apache License 2.0 | 6 votes |
def fit(self, X, y=None): """Compute the lower and upper quantile cutoffs, columns to transform, and each column's quantiles. Parameters ---------- X : array-like, shape [n_samples, n_features] The data array to transform. Must be numeric, non-sparse, and two-dimensional. Returns ------- self : QuantileExtremeValueTransformer """ super().fit(X) X = check_array(X) self.quantile_transformer_ = QuantileTransformer(random_state=0, copy=True) self.quantile_transformer_.fit(X) return self
Example #15
Source File: util.py From skutil with BSD 3-Clause "New" or "Revised" License | 6 votes |
def from_array(X, column_names=None): """A simple wrapper for H2OFrame.from_python. This takes a numpy array (or 2d array) and returns an H2OFrame with all the default args. Parameters ---------- X : ndarray The array to convert. column_names : list, tuple (default=None) the names to use for your columns Returns ------- H2OFrame """ X = check_array(X, force_all_finite=False) return from_pandas(pd.DataFrame.from_records(data=X, columns=column_names))
Example #16
Source File: base.py From sagemaker-scikit-learn-extension with Apache License 2.0 | 6 votes |
def fit(self, X, y=None): """Learn empirical variances from X. Parameters ---------- X : array of shape [n_samples, n_features] Input samples from which to check uniqueness. Returns ------- self """ X = check_array(X, force_all_finite=False) _, self.n_input_features_ = X.shape all_nan_cols = np.all(np.isnan(X), axis=0) self.cols_to_transform_ = np.logical_or( np.array([np.unique(X[:, j]).size == 1 for j in range(self.n_input_features_)]), all_nan_cols ) return self
Example #17
Source File: fpt.py From fylearn with MIT License | 6 votes |
def predict(self, X): """Predict class for X. Parameters ---------- X : Array-like of shape [n_samples, n_features] The input to classify. Returns ------- y : array of shape = [n_samples] The predicted classes. """ X = check_array(X) if self.trees_ is None: raise Exception("Pattern trees not initialized. Perform a fit first.") y_classes = np.zeros((X.shape[0], len(self.classes_))) for i, c in enumerate(self.classes_): y_classes[:, i] = self.trees_[i](X) # predict the maximum value return self.classes_.take(np.argmax(y_classes, -1))
Example #18
Source File: nb_sklearn.py From recordlinkage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _joint_log_likelihood(self, X): """Calculate the posterior log probability of the samples X""" check_is_fitted(self, "classes_") X = check_array(X, accept_sparse='csr') X_bin = self._transform_data(X) n_classes, n_features = self.feature_log_prob_.shape n_samples, n_features_X = X_bin.shape if n_features_X != n_features: raise ValueError( "Expected input with %d features, got %d instead" % (n_features, n_features_X)) # see chapter 4.1 of http://www.cs.columbia.edu/~mcollins/em.pdf # implementation as in Formula 4. jll = safe_sparse_dot(X_bin, self.feature_log_prob_.T) jll += self.class_log_prior_ return jll
Example #19
Source File: base.py From sagemaker-scikit-learn-extension with Apache License 2.0 | 5 votes |
def _validate_input(self, X): if self._is_constant_multicolumn_imputation(): if len(self.fill_values) != X.shape[1]: raise ValueError( "'fill_values' should have length equal to number of features in X {num_features}, " "got {fill_values_length}".format(num_features=X.shape[1], fill_values_length=len(self.fill_values)) ) dtype = self.dtype or np.dtype("O") if hasattr(X, "dtype") and X.dtype is not None and hasattr(X.dtype, "kind") and X.dtype.kind == "c": raise ValueError("Complex data not supported\n{}\n".format(X)) return check_array(X, dtype=dtype, copy=True, force_all_finite=False, ensure_2d=True)
Example #20
Source File: base.py From sagemaker-scikit-learn-extension with Apache License 2.0 | 5 votes |
def fit(self, X, y=None): """Compute the lower and upper quantile cutoffs and which columns to transform. Parameters ---------- X : array-like, shape [n_samples, n_features] The data array to transform. Must be numeric, non-sparse, and two-dimensional. Returns ------- self : BaseExtremeValueTransformer """ if not 0 <= self.quantile <= 100: raise ValueError( "Parameter `quantile` {} is invalid. `quantile` must be an integer between 0 and 100".format( self.quantile ) ) X = check_array(X) _, self.n_input_features_ = X.shape self.quantiles_ = np.percentile(X, [100 - self.quantile, self.quantile], axis=0) nonstandard_threshold_stds = self.threshold_std * np.std(X, axis=0) col_means = np.mean(X, axis=0) threshold_upper_bound = nonstandard_threshold_stds + col_means threshold_lower_bound = -nonstandard_threshold_stds + col_means self.cols_to_transform_ = [ j for j in range(self.n_input_features_) if self.quantiles_[0, j] < threshold_lower_bound[j] or self.quantiles_[1, j] > threshold_upper_bound[j] ] return self
Example #21
Source File: base.py From sagemaker-scikit-learn-extension with Apache License 2.0 | 5 votes |
def _validate_input(self, X): if hasattr(X, "dtype") and X.dtype is not None and hasattr(X.dtype, "kind") and X.dtype.kind == "c": raise ValueError("Complex data not supported\n{}\n".format(X)) return check_array(X, dtype=np.dtype("O"), copy=True, force_all_finite=False, ensure_2d=True)
Example #22
Source File: date_time.py From sagemaker-scikit-learn-extension with Apache License 2.0 | 5 votes |
def transform(self, X, y=None): X = check_array(X, dtype=None, force_all_finite="allow-nan") check_is_fitted(self, "extract_") X = np.array(X) X = self._to_datetime_array(X) return self._convert(X, self.mode)
Example #23
Source File: test_estimator_checks.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def predict(self, X): X = check_array(X) self.key = 1000 return np.ones(X.shape[0])
Example #24
Source File: stat_models.py From pyod with BSD 2-Clause "Simplified" License | 5 votes |
def pearsonr_mat(mat, w=None): """Utility function to calculate pearson matrix (row-wise). Parameters ---------- mat : numpy array of shape (n_samples, n_features) Input matrix. w : numpy array of shape (n_features,) Weights. Returns ------- pear_mat : numpy array of shape (n_samples, n_samples) Row-wise pearson score matrix. """ mat = check_array(mat) n_row = mat.shape[0] n_col = mat.shape[1] pear_mat = np.full([n_row, n_row], 1).astype(float) if w is not None: for cx in range(n_row): for cy in range(cx + 1, n_row): curr_pear = wpearsonr(mat[cx, :], mat[cy, :], w) pear_mat[cx, cy] = curr_pear pear_mat[cy, cx] = curr_pear else: for cx in range(n_col): for cy in range(cx + 1, n_row): curr_pear = pearsonr(mat[cx, :], mat[cy, :])[0] pear_mat[cx, cy] = curr_pear pear_mat[cy, cx] = curr_pear return pear_mat
Example #25
Source File: test_ranking.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def _my_lrap(y_true, y_score): """Simple implementation of label ranking average precision""" check_consistent_length(y_true, y_score) y_true = check_array(y_true) y_score = check_array(y_score) n_samples, n_labels = y_true.shape score = np.empty((n_samples, )) for i in range(n_samples): # The best rank correspond to 1. Rank higher than 1 are worse. # The best inverse ranking correspond to n_labels. unique_rank, inv_rank = np.unique(y_score[i], return_inverse=True) n_ranks = unique_rank.size rank = n_ranks - inv_rank # Rank need to be corrected to take into account ties # ex: rank 1 ex aequo means that both label are rank 2. corr_rank = np.bincount(rank, minlength=n_ranks + 1).cumsum() rank = corr_rank[rank] relevant = y_true[i].nonzero()[0] if relevant.size == 0 or relevant.size == n_labels: score[i] = 1 continue score[i] = 0. for label in relevant: # Let's count the number of relevant label with better rank # (smaller rank). n_ranked_above = sum(rank[r] <= rank[label] for r in relevant) # Weight by the rank of the actual label score[i] += n_ranked_above / rank[label] score[i] /= relevant.size return score.mean()
Example #26
Source File: split.py From nyaggle with MIT License | 5 votes |
def split(self, X, y, groups=None): """ Generate indices to split data into training and test set. """ y = check_array(y, ensure_2d=False, dtype=None) return super(StratifiedGroupKFold, self).split(X, y, groups)
Example #27
Source File: lof.py From pyod with BSD 2-Clause "Simplified" License | 5 votes |
def fit(self, X, y=None): """Fit detector. y is ignored in unsupervised methods. Parameters ---------- X : numpy array of shape (n_samples, n_features) The input samples. y : Ignored Not used, present for API consistency by convention. Returns ------- self : object Fitted estimator. """ # validate inputs X and y (optional) X = check_array(X) self._set_n_classes(y) self.detector_ = LocalOutlierFactor(n_neighbors=self.n_neighbors, algorithm=self.algorithm, leaf_size=self.leaf_size, metric=self.metric, p=self.p, metric_params=self.metric_params, contamination=self.contamination, n_jobs=self.n_jobs) self.detector_.fit(X=X, y=y) # Invert decision_scores_. Outliers comes with higher outlier scores self.decision_scores_ = invert_order( self.detector_.negative_outlier_factor_) self._process_decision_scores() return self
Example #28
Source File: pca.py From pyod with BSD 2-Clause "Simplified" License | 5 votes |
def decision_function(self, X): """Predict raw anomaly score of X using the fitted detector. The anomaly score of an input sample is computed based on different detector algorithms. For consistency, outliers are assigned with larger anomaly scores. Parameters ---------- X : numpy array of shape (n_samples, n_features) The training input samples. Sparse matrices are accepted only if they are supported by the base estimator. Returns ------- anomaly_scores : numpy array of shape (n_samples,) The anomaly score of the input samples. """ check_is_fitted(self, ['components_', 'w_components_']) X = check_array(X) if self.standardization: X = self.scaler_.transform(X) return np.sum( cdist(X, self.selected_components_) / self.selected_w_components_, axis=1).ravel()
Example #29
Source File: mcd.py From pyod with BSD 2-Clause "Simplified" License | 5 votes |
def fit(self, X, y=None): """Fit detector. y is ignored in unsupervised methods. Parameters ---------- X : numpy array of shape (n_samples, n_features) The input samples. y : Ignored Not used, present for API consistency by convention. Returns ------- self : object Fitted estimator. """ # Validate inputs X and y (optional) X = check_array(X) self._set_n_classes(y) self.detector_ = MinCovDet(store_precision=self.store_precision, assume_centered=self.assume_centered, support_fraction=self.support_fraction, random_state=self.random_state) self.detector_.fit(X=X, y=y) # Use mahalanabis distance as the outlier score self.decision_scores_ = self.detector_.dist_ self._process_decision_scores() return self
Example #30
Source File: genetic.py From gplearn with BSD 3-Clause "New" or "Revised" License | 5 votes |
def transform(self, X): """Transform X according to the fitted transformer. Parameters ---------- X : array-like, shape = [n_samples, n_features] Input vectors, where n_samples is the number of samples and n_features is the number of features. Returns ------- X_new : array-like, shape = [n_samples, n_components] Transformed array. """ if not hasattr(self, '_best_programs'): raise NotFittedError('SymbolicTransformer not fitted.') X = check_array(X) _, n_features = X.shape if self.n_features_ != n_features: raise ValueError('Number of features of the model must match the ' 'input. Model n_features is %s and input ' 'n_features is %s.' % (self.n_features_, n_features)) X_new = np.array([gp.execute(X) for gp in self._best_programs]).T return X_new