Python sklearn.utils.validation.check_X_y() Examples
The following are 30
code examples of sklearn.utils.validation.check_X_y().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.utils.validation
, or try the search function
.
Example #1
Source File: su_learning.py From SU_Classification with MIT License | 6 votes |
def fit(self, x, y): check_classification_targets(y) x, y = check_X_y(x, y) x_s, x_u = x[y == +1, :], x[y == 0, :] n_s, n_u = len(x_s), len(x_u) p_p = self.prior p_n = 1 - self.prior p_s = p_p ** 2 + p_n ** 2 k_s = self._basis(x_s) k_u = self._basis(x_u) d = k_u.shape[1] """ Note that `2 *` is needed for `b` while this coefficient does not seem appear in the original paper at a glance. This is because `k_s.T.mean` takes mean over `2 * n_s` entries, while the division is taken with `n_s` in the original paper. """ A = (p_p - p_n) / n_u * (k_u.T.dot(k_u) + 2 * self.lam * n_u * np.eye(d)) b = 2 * p_s * k_s.T.mean(axis=1) - k_u.T.mean(axis=1) self.coef_ = np.linalg.solve(A, b) return self
Example #2
Source File: _template.py From project-template with BSD 3-Clause "New" or "Revised" License | 6 votes |
def fit(self, X, y): """A reference implementation of a fitting function. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) The training input samples. y : array-like, shape (n_samples,) or (n_samples, n_outputs) The target values (class labels in classification, real numbers in regression). Returns ------- self : object Returns self. """ X, y = check_X_y(X, y, accept_sparse=True) self.is_fitted_ = True # `fit` should always return `self` return self
Example #3
Source File: model_v0.py From Quora with MIT License | 6 votes |
def fit(self, X, y): # Check that X and y have correct shape # if isinstance(y, (pd.DataFrame, pd.Serise)): # y = y.values X, y = check_X_y(X, y, accept_sparse=True) def pr(X, y_i, y): p = X[y == y_i].sum(0) return (p+1) / ((y == y_i).sum()+1) self._r = sparse.csr_matrix(np.log(pr(X, 1, y) / pr(X, 0, y))) X_nb = X.multiply(self._r) self._clf = LogisticRegression( C=self.C, dual=self.dual, n_jobs=self.n_jobs ).fit(X_nb, y) return self
Example #4
Source File: submission_v0.py From Quora with MIT License | 6 votes |
def fit(self, X, y): # Check that X and y have correct shape y = y.values X, y = check_X_y(X, y, accept_sparse=True) def pr(X, y_i, y): p = X[y == y_i].sum(0) return (p+1) / ((y == y_i).sum()+1) self._r = sparse.csr_matrix(np.log(pr(X, 1, y) / pr(X, 0, y))) X_nb = X.multiply(self._r) self._clf = LogisticRegression( C=self.C, dual=self.dual, n_jobs=self.n_jobs ).fit(X_nb, y) return self
Example #5
Source File: bayes.py From sparsereg with MIT License | 6 votes |
def fit(self, x, y): x, y = check_X_y(x, y, accept_sparse=[], y_numeric=True, multi_output=False) # boilerplate x, y, X_offset, y_offset, X_scale = self._preprocess_data( x, y, fit_intercept=self.fit_intercept, normalize=self.normalize, copy=self.copy_X ) fh, vf, ve, sigma = jmap( y, x, self.ae0, self.be0, self.af0, self.bf0, max_iter=self.max_iter, tol=self.tol ) self.X_offset_ = X_offset self.X_scale_ = X_scale self.sigma_ = sigma self.ve_ = ve self.vf_ = vf self.coef_ = fh self.alpha_ = 1.0 / np.mean(ve) self.lambda_ = 1.0 / np.mean(vf) self.std_intercept_, self.std_coef_ = scale_sigma(self, X_offset, X_scale) self._set_intercept(X_offset, y_offset, X_scale) return self
Example #6
Source File: group_lasso.py From sparsereg with MIT License | 6 votes |
def fit(self, x, y, sample_weight=None): x, y = check_X_y(x, y, accept_sparse=[], y_numeric=True, multi_output=False) x, y, X_offset, y_offset, X_scale = self._preprocess_data( x, y, fit_intercept=self.fit_intercept, normalize=self.normalize, copy=self.copy_X, sample_weight=sample_weight, ) if sample_weight is not None: x, y = _rescale_data(x, y, sample_weight) self.coef_ = sparse_group_lasso( x, y, self.alpha, self.rho, self.groups, max_iter=self.max_iter, rtol=self.tol ) self._set_intercept(X_offset, y_offset, X_scale) return self
Example #7
Source File: _template.py From project-template with BSD 3-Clause "New" or "Revised" License | 6 votes |
def fit(self, X, y): """A reference implementation of a fitting function for a classifier. Parameters ---------- X : array-like, shape (n_samples, n_features) The training input samples. y : array-like, shape (n_samples,) The target values. An array of int. Returns ------- self : object Returns self. """ # Check that X and y have correct shape X, y = check_X_y(X, y) # Store the classes seen during fit self.classes_ = unique_labels(y) self.X_ = X self.y_ = y # Return the classifier return self
Example #8
Source File: base.py From sparsereg with MIT License | 6 votes |
def fit(self, x_, y, sample_weight=None): n_samples, n_features = x_.shape X, y = check_X_y(x_, y, accept_sparse=[], y_numeric=True, multi_output=False) x, y, X_offset, y_offset, X_scale = self._preprocess_data( x_, y, fit_intercept=self.fit_intercept, normalize=self.normalize, copy=self.copy_X, sample_weight=None, ) if sample_weight is not None: # Sample weight can be implemented via a simple rescaling. x, y = _rescale_data(x, y, sample_weight) coefs, intercept = fit_with_noise(x, y, self.sigma, self.alpha, self.n) self.intercept_ = intercept self.coef_ = coefs self._set_intercept(X_offset, y_offset, X_scale) return self
Example #9
Source File: nn.py From tpot with GNU Lesser General Public License v3.0 | 6 votes |
def validate_inputs(self, X, y): # Things we don't want to allow until we've tested them: # - Sparse inputs # - Multiclass outputs (e.g., more than 2 classes in `y`) # - Non-finite inputs # - Complex inputs X, y = check_X_y(X, y, accept_sparse=False, allow_nd=False) assert_all_finite(X, y) if type_of_target(y) != 'binary': raise ValueError("Non-binary targets not supported") if np.any(np.iscomplex(X)) or np.any(np.iscomplex(y)): raise ValueError("Complex data not supported") if np.issubdtype(X.dtype, np.object_) or np.issubdtype(y.dtype, np.object_): try: X = X.astype(float) y = y.astype(int) except (TypeError, ValueError): raise ValueError("argument must be a string.* number") return (X, y)
Example #10
Source File: test_estimator_checks.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def fit(self, X, y): X, y = check_X_y(X, y, accept_sparse=("csr", "csc", "coo"), accept_large_sparse=True, multi_output=True, y_numeric=True) if sp.issparse(X): if X.getformat() == "coo": if X.row.dtype == "int64" or X.col.dtype == "int64": raise ValueError( "Estimator doesn't support 64-bit indices") elif X.getformat() in ["csc", "csr"]: if X.indices.dtype == "int64" or X.indptr.dtype == "int64": raise ValueError( "Estimator doesn't support 64-bit indices") return self
Example #11
Source File: base.py From polylearn with BSD 2-Clause "Simplified" License | 6 votes |
def _check_X_y(self, X, y): # helpful error message for sklearn < 1.17 is_2d = hasattr(y, 'shape') and len(y.shape) > 1 and y.shape[1] >= 2 if is_2d or type_of_target(y) != 'binary': raise TypeError("Only binary targets supported. For training " "multiclass or multilabel models, you may use the " "OneVsRest or OneVsAll metaestimators in " "scikit-learn.") X, Y = check_X_y(X, y, dtype=np.double, accept_sparse='csc', multi_output=False) self.label_binarizer_ = LabelBinarizer(pos_label=1, neg_label=-1) y = self.label_binarizer_.fit_transform(Y).ravel().astype(np.double) return X, y
Example #12
Source File: oracle.py From DESlib with BSD 3-Clause "New" or "Revised" License | 6 votes |
def fit(self, X, y): """Fit the model according to the given training data. Parameters ---------- X : array of shape (n_samples, n_features) Data used to fit the model. y : array of shape (n_samples) class labels of each example in X. Returns ------- self : object Returns self. """ X, y = check_X_y(X, y) super(Oracle, self).fit(X, y) return self
Example #13
Source File: pu_mr.py From pywsl with MIT License | 6 votes |
def fit(self, x, y): check_classification_targets(y) # x, y = check_X_y(x, y, y_numeric=True) x, y = check_X_y(x, y) x_p, x_u = x[y == +1, :], x[y == 0, :] n_p, n_u = x_p.shape[0], x_u.shape[0] if self.basis == 'gauss': b = np.minimum(n_u, self.n_basis) center_index = np.random.permutation(n_u)[:b] self._x_c = x_u[center_index, :] elif self.basis == 'lm': b = x_p.shape[1] + 1 else: raise ValueError('Invalid basis type: {}.'.format(basis)) k_p, k_u = self._ker(x_p), self._ker(x_u) H = k_u.T.dot(k_u)/n_u h = 2*self.prior*np.mean(k_p, axis=0) - np.mean(k_u, axis=0) R = self.lam*np.eye(b) self.coef_ = sp.linalg.solve(H + R, h) return self
Example #14
Source File: cart.py From Hands-on-Supervised-Machine-Learning-with-Python with MIT License | 6 votes |
def __init__(self, X, y, criterion, min_samples_split, max_depth, n_val_sample, random_state): # make sure max_depth > 1 if max_depth < 2: raise ValueError("max depth must be > 1") # check the input arrays, and if it's classification validate the # target values in y X, y = check_X_y(X, y, accept_sparse=False, dtype=None, copy=True) if is_classifier(self): check_classification_targets(y) # hyper parameters so we can later inspect attributes of the model self.min_samples_split = min_samples_split self.max_depth = max_depth self.n_val_sample = n_val_sample self.random_state = random_state # create the splitting class random_state = check_random_state(random_state) self.splitter = RandomSplitter(random_state, criterion, n_val_sample) # grow the tree depth first self.tree = self._find_next_split(X, y, 0)
Example #15
Source File: mlp.py From Hands-on-Supervised-Machine-Learning-with-Python with MIT License | 5 votes |
def _init_weights_biases(X, y, hidden, random_state, last_dim=None): # make sure dims all match in X, y and that we have appropriate # classification targets X, y = check_X_y(X, y, copy=False) check_classification_targets(y) random_state = check_random_state(random_state) # initialize the weights and biases. For each layer, we create a new # matrix of dimensions [last_layer_col_dim, new_col_dim]. This ensures # we can compute matrix products across the layers and that the # dimensions all match up. The biases will each be a vector of ones # in this example, though in other networks that can be initialized # differently weights = [] biases = [] # if last dim is undefined, use the column shape of the input data. # this argument is used to simplify the initialization of weights/ # biases in the transfer learning class... if last_dim is None: last_dim = X.shape[1] for layer_size in hidden: # initialize to extremely small values w = random_state.rand(last_dim, layer_size) * 0.01 b = np.ones(layer_size) last_dim = layer_size weights.append(w) biases.append(b) # we need to add one more layer (the output layer) that is the size of # the expected output probabilities. We'll apply the softmax function # to the output of this layer. n_outputs = np.unique(y).shape[0] weights.append(random_state.rand(last_dim, n_outputs)) biases.append(np.ones(n_outputs)) return X, y, weights, biases
Example #16
Source File: base.py From sparsereg with MIT License | 5 votes |
def fit(self, x, y, **kwargs): # x, y = check_X_y(x, y, multi_output=False) super().fit(self._transform(x, y), y, **kwargs) self._arrange_coef() return self
Example #17
Source File: gpclass.py From auto-tikv with Apache License 2.0 | 5 votes |
def check_X_y(self, X, y): from sklearn.utils.validation import check_X_y if X.shape[0] > self.max_train_size_: raise Exception("X_train size cannot exceed {} ({})" .format(self.max_train_size_, X.shape[0])) return check_X_y(X, y, multi_output=True, allow_nd=True, y_numeric=True, estimator="GPR")
Example #18
Source File: mcb.py From pyts with BSD 3-Clause "New" or "Revised" License | 5 votes |
def fit(self, X, y=None): """Compute the bin edges for each feature. Parameters ---------- X : array-like, shape = (n_samples, n_timestamps) Data to transform. y : None or array-like, shape = (n_samples,) Class labels for each sample. Only used if ``strategy='entropy'``. """ if self.strategy == 'entropy': if y is None: raise ValueError("y cannot be None if strategy='entropy'.") X, y = check_X_y(X, y, dtype='float64') check_classification_targets(y) else: X = check_array(X, dtype='float64') n_samples, n_timestamps = X.shape self._n_timestamps_fit = n_timestamps self._alphabet = self._check_params(n_samples) self._check_constant(X) self.bin_edges_ = self._compute_bins( X, y, n_timestamps, self.n_bins, self.strategy) return self
Example #19
Source File: base.py From polylearn with BSD 2-Clause "Simplified" License | 5 votes |
def _check_X_y(self, X, y): X, y = check_X_y(X, y, accept_sparse='csc', multi_output=False, dtype=np.double, y_numeric=True) y = y.astype(np.double).ravel() return X, y
Example #20
Source File: GBTDAAL.py From daal4py with Apache License 2.0 | 5 votes |
def fit(self, X, y): # Check the algorithm parameters self._check_params() # Check that X and y have correct shape X, y = check_X_y(X, y, y_numeric=True, dtype=[np.single, np.double]) # Convert to 2d array y_ = y.reshape((-1, 1)) self.n_features_ = X.shape[1] # Get random seed rs_ = check_random_state(self.random_state) seed_ = rs_.randint(0, np.iinfo('i').max) # Define type of data fptype = getFPType(X) # Fit the model train_algo = d4p.gbt_regression_training(fptype=fptype, splitMethod=self.split_method, maxIterations=self.max_iterations, maxTreeDepth=self.max_tree_depth, shrinkage=self.shrinkage, minSplitLoss=self.min_split_loss, lambda_=self.reg_lambda, observationsPerTreeFraction=self.observations_per_tree_fraction, featuresPerNode=self.features_per_node, minObservationsInLeafNode=self.min_observations_in_leaf_node, memorySavingMode=self.memory_saving_mode, maxBins=self.max_bins, minBinSize=self.min_bin_size, engine=d4p.engines_mcg59(seed=seed_)) train_result = train_algo.compute(X, y_) # Store the model self.daal_model_ = train_result.model # Return the classifier return self
Example #21
Source File: gpclass.py From auto-tikv with Apache License 2.0 | 5 votes |
def fit(self, X_train, y_train, ridge=1.0): self._reset() X_train, y_train = self.check_X_y(X_train, y_train) self.X_train = np.float32(X_train) self.y_train = np.float32(y_train) sample_size = self.X_train.shape[0] if np.isscalar(ridge): ridge = np.ones(sample_size) * ridge assert isinstance(ridge, np.ndarray) assert ridge.ndim == 1 X_dists = np.zeros((sample_size, sample_size), dtype=np.float32) with tf.Session(graph=self.graph, config=tf.ConfigProto( intra_op_parallelism_threads=self.num_threads_)) as sess: dist_op = self.ops['dist_op'] v1, v2 = self.vars['v1_h'], self.vars['v2_h'] for i in range(sample_size): X_dists[i] = sess.run(dist_op, feed_dict={v1: self.X_train[i], v2: self.X_train}) K_ridge_op = self.ops['K_ridge_op'] X_dists_ph = self.vars['X_dists_h'] ridge_ph = self.vars['ridge_h'] self.K = sess.run(K_ridge_op, feed_dict={X_dists_ph: X_dists, ridge_ph: ridge}) K_ph = self.vars['K_h'] K_inv_op = self.ops['K_inv_op'] self.K_inv = sess.run(K_inv_op, feed_dict={K_ph: self.K}) xy_op = self.ops['xy_op'] K_inv_ph = self.vars['K_inv_h'] yt_ph = self.vars['yt_h'] self.xy_ = sess.run(xy_op, feed_dict={K_inv_ph: self.K_inv, yt_ph: self.y_train}) return self
Example #22
Source File: simple_regression.py From Hands-on-Supervised-Machine-Learning-with-Python with MIT License | 5 votes |
def __init__(self, X, y): # First check X, y and make sure they are of equal length, no NaNs # and that they are numeric X, y = check_X_y(X, y, y_numeric=True, accept_sparse=False) # keep it simple # Next, we want to scale all of our features so X is centered # We will do the same with our target variable, y X_means = np.average(X, axis=0) y_mean = y.mean(axis=0) # don't do in place, so we get a copy X = X - X_means y = y - y_mean # Let's compute the least squares on X wrt y # Least squares solves the equation `a x = b` by computing a # vector `x` that minimizes the Euclidean 2-norm `|| b - a x ||^2`. theta, _, rank, singular_values = lstsq(X, y, rcond=None) # finally, we compute the intercept values as the mean of the target # variable MINUS the inner product of the X_means and the coefficients intercept = y_mean - np.dot(X_means, theta.T) # ... and set everything as an instance attribute self.theta = theta self.rank = rank self.singular_values = singular_values # we have to retain some of the statistics around the data too self.X_means = X_means self.y_mean = y_mean self.intercept = intercept
Example #23
Source File: knn.py From Hands-on-Supervised-Machine-Learning-with-Python with MIT License | 5 votes |
def __init__(self, X, y, k=10): # check the input array X, y = check_X_y(X, y, accept_sparse=False, dtype=np.float32, copy=True) # make sure we're performing classification here check_classification_targets(y) # Save the K hyper-parameter so we can use it later self.k = k # kNN is a special case where we have to save the training data in # order to make predictions in the future self.X = X self.y = y
Example #24
Source File: dft.py From pyts with BSD 3-Clause "New" or "Revised" License | 5 votes |
def fit(self, X, y=None): """Learn indices of the Fourier coefficients to keep. Parameters ---------- X : array-like, shape = (n_samples, n_timestamps) Training vector. y : None or array-like, shape = (n_samples,) (default = None) Class labels for each data sample. Only used if ``anova=True``. Returns ------- self : object """ if self.anova: X, y = check_X_y(X, y, dtype='float64') else: X = check_array(X, dtype='float64') n_samples, n_timestamps = X.shape n_coefs = self._check_params(n_timestamps) if self.anova: ss = StandardScaler(self.norm_mean, self.norm_std) X = ss.fit_transform(X) X_fft = np.fft.rfft(X) X_fft = np.vstack([np.real(X_fft), np.imag(X_fft)]) if n_timestamps % 2 == 0: X_fft = X_fft.reshape(n_samples, n_timestamps + 2, order='F') X_fft = np.c_[X_fft[:, 0], X_fft[:, 2:-1]] else: X_fft = X_fft.reshape(n_samples, n_timestamps + 1, order='F') X_fft = np.c_[X_fft[:, 0], X_fft[:, 2:]] if self.drop_sum: X_fft = X_fft[:, 1:] self.support_ = self._anova(X_fft, y, n_coefs, n_timestamps) else: self.support_ = np.arange(n_coefs) return self
Example #25
Source File: blender.py From Quora with MIT License | 5 votes |
def fit(self, X, y): # # Check that X and y have correct shape # y = y.values X, y = check_X_y(X, y, accept_sparse=True) # fit models self._clfs = [] for model in self.models: self._clfs.append(model.fit(X, y)) return self
Example #26
Source File: base.py From ALiPy with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, X=None, y=None, **kwargs): if X is not None and y is not None: if isinstance(X, np.ndarray) and isinstance(y, np.ndarray): # will not use additional memory check_X_y(X, y, accept_sparse='csc', multi_output=True) self.X = X self.y = y else: self.X, self.y = check_X_y(X, y, accept_sparse='csc', multi_output=True) else: self.X = X self.y = y
Example #27
Source File: base.py From ALiPy with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, X=None, y=None, **kwargs): if X is not None and y is not None: self._check_multi_label(y) if isinstance(X, np.ndarray) and isinstance(y, np.ndarray): # will not use additional memory check_X_y(X, y, accept_sparse='csc', multi_output=True) self.X = X self.y = y else: self.X, self.y = check_X_y(X, y, accept_sparse='csc', multi_output=True) else: self.X = X self.y = y
Example #28
Source File: interface.py From ALiPy with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, X=None, y=None, **kwargs): if X is not None and y is not None: if isinstance(X, np.ndarray) and isinstance(y, np.ndarray): # will not use additional memory check_X_y(X, y, accept_sparse='csc', multi_output=True) self.X = X self.y = y else: self.X, self.y = check_X_y(X, y, accept_sparse='csc', multi_output=True) else: self.X = X self.y = y
Example #29
Source File: test_detector_lscp.py From combo with BSD 2-Clause "Simplified" License | 5 votes |
def setUp(self): # Define data file and read X and y # Generate some data if the source data is missing this_directory = path.abspath(path.dirname(__file__)) mat_file = 'cardio.mat' try: mat = loadmat(path.join(*[this_directory, 'data', mat_file])) except TypeError: print('{data_file} does not exist. Use generated data'.format( data_file=mat_file)) X, y = generate_data(train_only=True) # load data except IOError: print('{data_file} does not exist. Use generated data'.format( data_file=mat_file)) X, y = generate_data(train_only=True) # load data else: X = mat['X'] y = mat['y'].ravel() X, y = check_X_y(X, y) self.X_train, self.X_test, self.y_train, self.y_test = \ train_test_split(X, y, test_size=0.4, random_state=42) detectors = [LOF(), LOF()] self.clf = LSCP(base_estimators=detectors) self.clf.fit(self.X_train) self.roc_floor = 0.6
Example #30
Source File: su_learning.py From SU_Classification with MIT License | 5 votes |
def fit(self, x, y): from cvxopt import matrix, solvers solvers.options['show_progress'] = False check_classification_targets(y) x, y = check_X_y(x, y) x_s, x_u = x[y == +1, :], x[y == 0, :] n_s, n_u = len(x_s), len(x_u) p_p = self.prior p_n = 1 - self.prior p_s = p_p ** 2 + p_n ** 2 k_s = self._basis(x_s) k_u = self._basis(x_u) d = k_u.shape[1] P = np.zeros((d + 2 * n_u, d + 2 * n_u)) P[:d, :d] = self.lam * np.eye(d) q = np.vstack(( -p_s / (n_s * (p_p - p_n)) * k_s.T.dot(np.ones((n_s, 1))), -p_n / (n_u * (p_p - p_n)) * np.ones((n_u, 1)), -p_p / (n_u * (p_p - p_n)) * np.ones((n_u, 1)) )) G = np.vstack(( np.hstack((np.zeros((n_u, d)), -np.eye(n_u), np.zeros((n_u, n_u)))), np.hstack((0.5 * k_u, -np.eye(n_u), np.zeros((n_u, n_u)))), np.hstack((k_u, -np.eye(n_u), np.zeros((n_u, n_u)))), np.hstack((np.zeros((n_u, d)), np.zeros((n_u, n_u)), -np.eye(n_u))), np.hstack((-0.5 * k_u, np.zeros((n_u, n_u)), -np.eye(n_u))), np.hstack((-k_u, np.zeros((n_u, n_u)), -np.eye(n_u))) )) h = np.vstack(( np.zeros((n_u, 1)), -0.5 * np.ones((n_u, 1)), np.zeros((n_u, 1)), np.zeros((n_u, 1)), -0.5 * np.ones((n_u, 1)), np.zeros((n_u, 1)) )) sol = solvers.qp(matrix(P), matrix(q), matrix(G), matrix(h)) self.coef_ = np.array(sol['x'])[:d]