Python sklearn.utils.validation.FLOAT_DTYPES Examples
The following are 29
code examples of sklearn.utils.validation.FLOAT_DTYPES().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.utils.validation
, or try the search function
.
Example #1
Source File: naive_bayes.py From scikit-lego with MIT License | 6 votes |
def predict_proba(self, X: np.array): check_is_fitted(self, ["gmms_", "classes_", "num_fit_cols_"]) X = check_array(X, estimator=self, dtype=FLOAT_DTYPES) if self.num_fit_cols_ != X.shape[1]: raise ValueError( f"number of columns {X.shape[1]} does not match fit size {self.num_fit_cols_}" ) check_is_fitted(self, ["gmms_", "classes_"]) probs = np.zeros((X.shape[0], len(self.classes_))) for k, v in self.gmms_.items(): class_idx = int(np.argwhere(self.classes_ == k)) probs[:, class_idx] = np.array( [ m.score_samples(np.expand_dims(X[:, idx], 1)) for idx, m in enumerate(v) ] ).sum(axis=0) likelihood = np.exp(probs) return likelihood / likelihood.sum(axis=1).reshape(-1, 1)
Example #2
Source File: umap_reconstruction.py From scikit-lego with MIT License | 6 votes |
def fit(self, X, y=None): """ Fit the model using X as training data. :param X: array-like, shape=(n_columns, n_samples,) training data. :param y: ignored but kept in for pipeline support :return: Returns an instance of self. """ X = check_array(X, estimator=self, dtype=FLOAT_DTYPES) if self.n_components < 2: raise ValueError("Number of components must be at least two.") if not self.threshold: raise ValueError(f"The `threshold` value cannot be `None`.") self.umap_ = umap.UMAP( n_components=self.n_components, n_neighbors=self.n_neighbors, min_dist=self.min_dist, metric=self.metric, random_state=self.random_state, ) self.umap_.fit(X, y) self.offset_ = -self.threshold return self
Example #3
Source File: bayesian_gmm_classifier.py From scikit-lego with MIT License | 5 votes |
def predict_proba(self, X): X = check_array(X, estimator=self, dtype=FLOAT_DTYPES) check_is_fitted(self, ["gmms_", "classes_"]) res = np.zeros((X.shape[0], self.classes_.shape[0])) for idx, c in enumerate(self.classes_): res[:, idx] = self.gmms_[c].score_samples(X) return np.exp(res) / np.exp(res).sum(axis=1)[:, np.newaxis]
Example #4
Source File: equal_groups.py From Same-Size-K-Means with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _check_test_data(self, X): X = check_array(X, accept_sparse='csr', dtype=FLOAT_DTYPES, warn_on_dtype=True) n_samples, n_features = X.shape expected_n_features = self.cluster_centers_.shape[1] if not n_features == expected_n_features: raise ValueError("Incorrect number of features. " "Got %d features, expected %d" % ( n_features, expected_n_features)) return X
Example #5
Source File: naive_bayes.py From scikit-lego with MIT License | 5 votes |
def predict(self, X): check_is_fitted(self, ["gmms_", "classes_", "num_fit_cols_"]) X = check_array(X, estimator=self, dtype=FLOAT_DTYPES) return self.classes_[self.predict_proba(X).argmax(axis=1)]
Example #6
Source File: naive_bayes.py From scikit-lego with MIT License | 5 votes |
def fit(self, X: np.array, y: np.array) -> "BayesianGaussianMixtureNB": """ Fit the model using X, y as training data. :param X: array-like, shape=(n_columns, n_samples, ) training data. :param y: array-like, shape=(n_samples, ) training data. :return: Returns an instance of self. """ X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES) if X.ndim == 1: X = np.expand_dims(X, 1) self.gmms_ = {} self.classes_ = unique_labels(y) self.num_fit_cols_ = X.shape[1] for c in self.classes_: subset_x, subset_y = X[y == c], y[y == c] self.gmms_[c] = [ BayesianGaussianMixture( n_components=self.n_components, covariance_type=self.covariance_type, tol=self.tol, reg_covar=self.reg_covar, max_iter=self.max_iter, n_init=self.n_init, init_params=self.init_params, weight_concentration_prior_type=self.weight_concentration_prior_type, weight_concentration_prior=self.weight_concentration_prior, mean_precision_prior=self.mean_precision_prior, mean_prior=self.mean_prior, degrees_of_freedom_prior=self.degrees_of_freedom_prior, covariance_prior=self.covariance_prior, random_state=self.random_state, warm_start=self.warm_start, verbose=self.verbose, verbose_interval=self.verbose_interval, ).fit(subset_x[:, i].reshape(-1, 1), subset_y) for i in range(X.shape[1]) ] return self
Example #7
Source File: naive_bayes.py From scikit-lego with MIT License | 5 votes |
def predict(self, X): check_is_fitted(self, ["gmms_", "classes_"]) X = check_array(X, estimator=self, dtype=FLOAT_DTYPES) return self.classes_[self.predict_proba(X).argmax(axis=1)]
Example #8
Source File: naive_bayes.py From scikit-lego with MIT License | 5 votes |
def fit(self, X: np.array, y: np.array) -> "GaussianMixtureNB": """ Fit the model using X, y as training data. :param X: array-like, shape=(n_columns, n_samples, ) training data. :param y: array-like, shape=(n_samples, ) training data. :return: Returns an instance of self. """ X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES) if X.ndim == 1: X = np.expand_dims(X, 1) self.gmms_ = {} self.classes_ = unique_labels(y) self.num_fit_cols_ = X.shape[1] for c in self.classes_: subset_x, subset_y = X[y == c], y[y == c] self.gmms_[c] = [ GaussianMixture( n_components=self.n_components, covariance_type=self.covariance_type, tol=self.tol, reg_covar=self.reg_covar, max_iter=self.max_iter, n_init=self.n_init, init_params=self.init_params, weights_init=self.weights_init, means_init=self.means_init, precisions_init=self.precisions_init, random_state=self.random_state, warm_start=self.warm_start, ).fit(subset_x[:, i].reshape(-1, 1), subset_y) for i in range(X.shape[1]) ] return self
Example #9
Source File: randomadder.py From scikit-lego with MIT License | 5 votes |
def transform_train(self, X): rs = check_random_state(self.random_state) check_is_fitted(self, ["dim_"]) X = check_array(X, estimator=self, dtype=FLOAT_DTYPES) return X + rs.normal(0, self.noise, size=X.shape)
Example #10
Source File: randomadder.py From scikit-lego with MIT License | 5 votes |
def fit(self, X, y): super().fit(X, y) X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES) self.dim_ = X.shape[1] return self
Example #11
Source File: columncapper.py From scikit-lego with MIT License | 5 votes |
def transform(self, X): """ Performs the capping on the column(s) of ``X``. :type X: pandas.DataFrame or numpy.ndarray :param X: The column(s) for which the capping limit(s) will be applied. :rtype: numpy.ndarray :returns: ``X`` values with capped limits. :raises: ``ValueError`` if the number of columns from ``X`` differs from the number of columns when fitting """ check_is_fitted(self, "quantiles_") X = check_array( X, copy=self.copy, force_all_finite=False, dtype=FLOAT_DTYPES, estimator=self, ) if X.shape[1] != self.n_columns_: raise ValueError( "X must have the same number of columns in fit and transform" ) if self.discard_infs: np.putmask(X, (X == np.inf) | (X == -np.inf), np.nan) # Actually capping X = np.minimum(X, self.quantiles_[1, :]) X = np.maximum(X, self.quantiles_[0, :]) return X
Example #12
Source File: umap_reconstruction.py From scikit-lego with MIT License | 5 votes |
def predict(self, X): """ Predict if a point is an outlier. :param X: array-like, shape=(n_columns, n_samples, ) training data. :return: array, shape=(n_samples,) the predicted data. 1 for inliers, -1 for outliers. """ X = check_array(X, estimator=self, dtype=FLOAT_DTYPES) check_is_fitted(self, ["umap_", "offset_"]) result = np.ones(X.shape[0]) result[self.difference(X) > self.threshold] = -1 return result.astype(np.int)
Example #13
Source File: umap_reconstruction.py From scikit-lego with MIT License | 5 votes |
def transform(self, X): """ Uses the underlying UMAP method to transform the data. """ X = check_array(X, estimator=self, dtype=FLOAT_DTYPES) check_is_fitted(self, ["umap_", "offset_"]) return self.umap_.transform(X)
Example #14
Source File: pca_reconstruction.py From scikit-lego with MIT License | 5 votes |
def predict(self, X): """ Predict if a point is an outlier. :param X: array-like, shape=(n_columns, n_samples, ) training data. :return: array, shape=(n_samples,) the predicted data. 1 for inliers, -1 for outliers. """ X = check_array(X, estimator=self, dtype=FLOAT_DTYPES) check_is_fitted(self, ["pca_", "offset_"]) result = np.ones(X.shape[0]) result[self.difference(X) > self.threshold] = -1 return result.astype(np.int)
Example #15
Source File: pca_reconstruction.py From scikit-lego with MIT License | 5 votes |
def transform(self, X): """ Uses the underlying PCA method to transform the data. """ X = check_array(X, estimator=self, dtype=FLOAT_DTYPES) check_is_fitted(self, ["pca_", "offset_"]) return self.pca_.transform(X)
Example #16
Source File: linear_regression.py From differential-privacy-library with MIT License | 5 votes |
def _preprocess_data(X, y, fit_intercept, epsilon=1.0, bounds_X=None, bounds_y=None, copy=True, check_input=True, **unused_args): warn_unused_args(unused_args) if check_input: X = check_array(X, copy=copy, accept_sparse=False, dtype=FLOAT_DTYPES) elif copy: X = X.copy(order='K') y = np.asarray(y, dtype=X.dtype) X_scale = np.ones(X.shape[1], dtype=X.dtype) if fit_intercept: bounds_X = check_bounds(bounds_X, X.shape[1]) bounds_y = check_bounds(bounds_y, y.shape[1] if y.ndim > 1 else 1) X = clip_to_bounds(X, bounds_X) y = clip_to_bounds(y, bounds_y) X_offset = mean(X, axis=0, bounds=bounds_X, epsilon=epsilon, accountant=BudgetAccountant()) X -= X_offset y_offset = mean(y, axis=0, bounds=bounds_y, epsilon=epsilon, accountant=BudgetAccountant()) y = y - y_offset else: X_offset = np.zeros(X.shape[1], dtype=X.dtype) if y.ndim == 1: y_offset = X.dtype.type(0) else: y_offset = np.zeros(y.shape[1], dtype=X.dtype) return X, y, X_offset, y_offset, X_scale # noinspection PyPep8Naming,PyAttributeOutsideInit
Example #17
Source File: bayesian_gmm_classifier.py From scikit-lego with MIT License | 5 votes |
def predict(self, X): check_is_fitted(self, ["gmms_", "classes_"]) X = check_array(X, estimator=self, dtype=FLOAT_DTYPES) return self.classes_[self.predict_proba(X).argmax(axis=1)]
Example #18
Source File: bayesian_gmm_classifier.py From scikit-lego with MIT License | 5 votes |
def fit(self, X: np.array, y: np.array) -> "BayesianGMMClassifier": """ Fit the model using X, y as training data. :param X: array-like, shape=(n_columns, n_samples, ) training data. :param y: array-like, shape=(n_samples, ) training data. :return: Returns an instance of self. """ X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES) if X.ndim == 1: X = np.expand_dims(X, 1) self.gmms_ = {} self.classes_ = unique_labels(y) for c in self.classes_: subset_x, subset_y = X[y == c], y[y == c] mixture = BayesianGaussianMixture( n_components=self.n_components, covariance_type=self.covariance_type, tol=self.tol, reg_covar=self.reg_covar, max_iter=self.max_iter, n_init=self.n_init, init_params=self.init_params, weight_concentration_prior_type=self.weight_concentration_prior_type, weight_concentration_prior=self.weight_concentration_prior, mean_precision_prior=self.mean_precision_prior, mean_prior=self.mean_prior, degrees_of_freedom_prior=self.degrees_of_freedom_prior, covariance_prior=self.covariance_prior, random_state=self.random_state, warm_start=self.warm_start, verbose=self.verbose, verbose_interval=self.verbose_interval, ) self.gmms_[c] = mixture.fit(subset_x, subset_y) return self
Example #19
Source File: bayesian_gmm_detector.py From scikit-lego with MIT License | 5 votes |
def score_samples(self, X): X = check_array(X, estimator=self, dtype=FLOAT_DTYPES) check_is_fitted(self, ["gmm_", "likelihood_threshold_"]) if len(X.shape) == 1: X = np.expand_dims(X, 1) return self.gmm_.score_samples(X) * -1
Example #20
Source File: gmm_classifier.py From scikit-lego with MIT License | 5 votes |
def predict_proba(self, X): X = check_array(X, estimator=self, dtype=FLOAT_DTYPES) check_is_fitted(self, ["gmms_", "classes_"]) res = np.zeros((X.shape[0], self.classes_.shape[0])) for idx, c in enumerate(self.classes_): res[:, idx] = self.gmms_[c].score_samples(X) return np.exp(res) / np.exp(res).sum(axis=1)[:, np.newaxis]
Example #21
Source File: gmm_classifier.py From scikit-lego with MIT License | 5 votes |
def fit(self, X: np.array, y: np.array) -> "GMMClassifier": """ Fit the model using X, y as training data. :param X: array-like, shape=(n_columns, n_samples, ) training data. :param y: array-like, shape=(n_samples, ) training data. :return: Returns an instance of self. """ X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES) if X.ndim == 1: X = np.expand_dims(X, 1) self.gmms_ = {} self.classes_ = unique_labels(y) for c in self.classes_: subset_x, subset_y = X[y == c], y[y == c] mixture = GaussianMixture( n_components=self.n_components, covariance_type=self.covariance_type, tol=self.tol, reg_covar=self.reg_covar, max_iter=self.max_iter, n_init=self.n_init, init_params=self.init_params, weights_init=self.weights_init, means_init=self.means_init, precisions_init=self.precisions_init, random_state=self.random_state, warm_start=self.warm_start, verbose=self.verbose, verbose_interval=self.verbose_interval, ) self.gmms_[c] = mixture.fit(subset_x, subset_y) return self
Example #22
Source File: gmm_outlier_detector.py From scikit-lego with MIT License | 5 votes |
def score_samples(self, X): X = check_array(X, estimator=self, dtype=FLOAT_DTYPES) check_is_fitted(self, ["gmm_", "likelihood_threshold_"]) if len(X.shape) == 1: X = np.expand_dims(X, 1) return -self.gmm_.score_samples(X)
Example #23
Source File: neighbors.py From scikit-lego with MIT License | 5 votes |
def predict(self, X): """ Predict class labels for samples in X. :param X: array_like, shape (n_samples, n_features) :return: array, shape (n_samples) """ check_is_fitted(self) X = check_array(X, estimator=self, dtype=FLOAT_DTYPES) return self.classes_[np.argmax(self.predict_proba(X), 1)]
Example #24
Source File: neighbors.py From scikit-lego with MIT License | 5 votes |
def predict_proba(self, X): """ Probability estimates. The returned estimates for all classes are in the same order found in the `.classes_` attribute. :param X: array-like of shape (n_samples, n_features) :return: array-like of shape (n_samples, n_classes) Returns the probability of the sample for each class in the model, where classes are ordered as they are in self.classes_. """ check_is_fitted(self) X = check_array(X, estimator=self, dtype=FLOAT_DTYPES) log_prior = np.array( [self.priors_logp_[target_label] for target_label in self.classes_] ) log_likelihood = np.array( [ self.models_[target_label].score_samples(X) for target_label in self.classes_ ] ).T log_likelihood_and_prior = np.exp(log_likelihood + log_prior) evidence = log_likelihood_and_prior.sum(axis=1, keepdims=True) posterior = log_likelihood_and_prior / evidence return posterior
Example #25
Source File: neighbors.py From scikit-lego with MIT License | 5 votes |
def fit(self, X: np.ndarray, y: np.ndarray): """ Fit the model using X, y as training data. :param X: array-like, shape=(n_features, n_samples) :param y: array-like, shape=(n_samples) :return: Returns an instance of self """ X, y = check_X_y(X, y, estimator=self, dtype=FLOAT_DTYPES) self.classes_ = unique_labels(y) self.models_, self.priors_logp_ = {}, {} for target_label in self.classes_: x_subset = X[y == target_label] # Computing joint distribution self.models_[target_label] = KernelDensity( bandwidth=self.bandwidth, kernel=self.kernel, algorithm=self.algorithm, metric=self.metric, atol=self.atol, rtol=self.rtol, breadth_first=self.breath_first, leaf_size=self.leaf_size, metric_params=self.metric_params, ).fit(x_subset) # Computing target class prior self.priors_logp_[target_label] = np.log(len(x_subset) / len(X)) return self
Example #26
Source File: data.py From sagemaker-scikit-learn-extension with Apache License 2.0 | 5 votes |
def fit(self, X, y=None): """Fit RobustStandardScaler to X. If input is sparse, `fit` overrides `self.with_mean` to standardize without subtracting mean (avoids breaking for sparse matrix) If the data is dense, the mean is adjusted for sparse features and the scaled with mean. Parameters ---------- X : array-like, shape [n_samples, n_features] The data to standardize. Returns ------- self : RobustStandardScaler """ X = check_array( X, accept_sparse=("csr", "csc"), estimator=self, dtype=FLOAT_DTYPES, force_all_finite="allow-nan" ) with_mean = True if issparse(X): with_mean = False self.scaler_ = StandardScaler(with_mean=with_mean, with_std=True, copy=self.copy) self.scaler_.fit(X) if self.scaler_.with_mean: nnz_mean_mask = np.where(np.count_nonzero(X, axis=0) / X.shape[0] > 0.3, 1, 0) self.scaler_.mean_ = self.scaler_.mean_ * nnz_mean_mask return self
Example #27
Source File: iterative_imputer.py From ME-Net with MIT License | 5 votes |
def _validate_input(self, X): allowed_strategies = ["mean", "median", "most_frequent", "constant"] if self.strategy not in allowed_strategies: raise ValueError("Can only use these strategies: {0} " " got strategy={1}".format(allowed_strategies, self.strategy)) if self.strategy in ("most_frequent", "constant"): dtype = None else: dtype = FLOAT_DTYPES if not is_scalar_nan(self.missing_values): force_all_finite = True else: force_all_finite = False # "allow-nan" try: X = check_array(X, accept_sparse='csc', dtype=dtype, force_all_finite=force_all_finite, copy=self.copy) except ValueError as ve: if "could not convert" in str(ve): raise ValueError("Cannot use {0} strategy with non-numeric " "data. Received datatype :{1}." "".format(self.strategy, X.dtype.kind)) else: raise ve _check_inputs_dtype(X, self.missing_values) if X.dtype.kind not in ("i", "u", "f", "O"): raise ValueError("_SimpleImputer does not support data with dtype " "{0}. Please provide either a numeric array (with" " a floating point or integer dtype) or " "categorical data represented either as an array " "with integer dtype or an array of string values " "with an object dtype.".format(X.dtype)) return X
Example #28
Source File: text_transformers.py From cdQA with Apache License 2.0 | 5 votes |
def transform(self, X=None, copy=True, is_query=False): """ Parameters ---------- X : sparse matrix, [n_samples, n_features] document-term query matrix copy : boolean, optional (default=True) query: boolean (default=False) whether to transform a query or the documents database Returns ------- vectors : sparse matrix, [n_samples, n_features] """ if is_query: X = check_array(X, accept_sparse="csr", dtype=FLOAT_DTYPES, copy=copy) if not sp.issparse(X): X = sp.csr_matrix(X, dtype=np.float64) n_samples, n_features = X.shape expected_n_features = self._doc_matrix.shape[1] if n_features != expected_n_features: raise ValueError( "Input has n_features=%d while the model" " has been trained with n_features=%d" % (n_features, expected_n_features) ) if self.use_idf: check_is_fitted(self, "_idf_diag", "idf vector is not fitted") X = sp.csr_matrix(X.toarray() * self._idf_diag.diagonal()) return X else: return self._doc_matrix
Example #29
Source File: iterative_imputer.py From ME-Net with MIT License | 4 votes |
def _initial_imputation(self, X): """Perform initial imputation for input X. Parameters ---------- X : ndarray, shape (n_samples, n_features) Input data, where "n_samples" is the number of samples and "n_features" is the number of features. Returns ------- Xt : ndarray, shape (n_samples, n_features) Input data, where "n_samples" is the number of samples and "n_features" is the number of features. X_filled : ndarray, shape (n_samples, n_features) Input data with the most recent imputations. mask_missing_values : ndarray, shape (n_samples, n_features) Input data's missing indicator matrix, where "n_samples" is the number of samples and "n_features" is the number of features. """ # TODO: change False to "allow-nan" if is_scalar_nan(self.missing_values): force_all_finite = False # "allow-nan" else: force_all_finite = True X = check_array(X, dtype=FLOAT_DTYPES, order="F", force_all_finite=force_all_finite) _check_inputs_dtype(X, self.missing_values) mask_missing_values = _get_mask(X, self.missing_values) if self.initial_imputer_ is None: self.initial_imputer_ = _SimpleImputer( missing_values=self.missing_values, strategy=self.initial_strategy) X_filled = self.initial_imputer_.fit_transform(X) else: X_filled = self.initial_imputer_.transform(X) valid_mask = np.flatnonzero(np.logical_not( np.isnan(self.initial_imputer_.statistics_))) Xt = X[:, valid_mask] mask_missing_values = mask_missing_values[:, valid_mask] return Xt, X_filled, mask_missing_values