Python sklearn.utils.validation.check_X_y() Examples

The following are 30 code examples of sklearn.utils.validation.check_X_y(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module sklearn.utils.validation , or try the search function .
Example #1
Source File:    From SU_Classification with MIT License 6 votes vote down vote up
def fit(self, x, y):
        x, y = check_X_y(x, y)
        x_s, x_u = x[y == +1, :], x[y == 0, :]
        n_s, n_u = len(x_s), len(x_u)

        p_p = self.prior
        p_n = 1 - self.prior
        p_s = p_p ** 2 + p_n ** 2
        k_s = self._basis(x_s)
        k_u = self._basis(x_u)
        d = k_u.shape[1]

        Note that `2 *` is needed for `b` while this coefficient does not seem
        appear in the original paper at a glance.
        This is because `k_s.T.mean` takes mean over `2 * n_s` entries,
        while the division is taken with `n_s` in the original paper.
        A = (p_p - p_n) / n_u * ( + 2 * self.lam * n_u * np.eye(d))
        b = 2 * p_s * k_s.T.mean(axis=1) - k_u.T.mean(axis=1)
        self.coef_ = np.linalg.solve(A, b)

        return self 
Example #2
Source File:    From project-template with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def fit(self, X, y):
        """A reference implementation of a fitting function.

        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            The training input samples.
        y : array-like, shape (n_samples,) or (n_samples, n_outputs)
            The target values (class labels in classification, real numbers in

        self : object
            Returns self.
        X, y = check_X_y(X, y, accept_sparse=True)
        self.is_fitted_ = True
        # `fit` should always return `self`
        return self 
Example #3
Source File:    From Quora with MIT License 6 votes vote down vote up
def fit(self, X, y):
        # Check that X and y have correct shape
        # if isinstance(y, (pd.DataFrame, pd.Serise)):
        #     y = y.values
        X, y = check_X_y(X, y, accept_sparse=True)

        def pr(X, y_i, y):
            p = X[y == y_i].sum(0)
            return (p+1) / ((y == y_i).sum()+1)

        self._r = sparse.csr_matrix(np.log(pr(X, 1, y) / pr(X, 0, y)))
        X_nb = X.multiply(self._r)
        self._clf = LogisticRegression(
        ).fit(X_nb, y)
        return self 
Example #4
Source File:    From Quora with MIT License 6 votes vote down vote up
def fit(self, X, y):
        # Check that X and y have correct shape
        y = y.values
        X, y = check_X_y(X, y, accept_sparse=True)

        def pr(X, y_i, y):
            p = X[y == y_i].sum(0)
            return (p+1) / ((y == y_i).sum()+1)

        self._r = sparse.csr_matrix(np.log(pr(X, 1, y) / pr(X, 0, y)))
        X_nb = X.multiply(self._r)
        self._clf = LogisticRegression(
        ).fit(X_nb, y)
        return self 
Example #5
Source File:    From sparsereg with MIT License 6 votes vote down vote up
def fit(self, x, y):

        x, y = check_X_y(x, y, accept_sparse=[], y_numeric=True, multi_output=False)  # boilerplate

        x, y, X_offset, y_offset, X_scale = self._preprocess_data(
            x, y, fit_intercept=self.fit_intercept, normalize=self.normalize, copy=self.copy_X

        fh, vf, ve, sigma = jmap(
            y, x, self.ae0, self.be0, self.af0, self.bf0, max_iter=self.max_iter, tol=self.tol
        self.X_offset_ = X_offset
        self.X_scale_ = X_scale

        self.sigma_ = sigma
        self.ve_ = ve
        self.vf_ = vf
        self.coef_ = fh
        self.alpha_ = 1.0 / np.mean(ve)
        self.lambda_ = 1.0 / np.mean(vf)
        self.std_intercept_, self.std_coef_ = scale_sigma(self, X_offset, X_scale)
        self._set_intercept(X_offset, y_offset, X_scale)
        return self 
Example #6
Source File:    From sparsereg with MIT License 6 votes vote down vote up
def fit(self, x, y, sample_weight=None):
        x, y = check_X_y(x, y, accept_sparse=[], y_numeric=True, multi_output=False)

        x, y, X_offset, y_offset, X_scale = self._preprocess_data(

        if sample_weight is not None:
            x, y = _rescale_data(x, y, sample_weight)

        self.coef_ = sparse_group_lasso(
            x, y, self.alpha, self.rho, self.groups, max_iter=self.max_iter, rtol=self.tol

        self._set_intercept(X_offset, y_offset, X_scale)
        return self 
Example #7
Source File:    From project-template with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def fit(self, X, y):
        """A reference implementation of a fitting function for a classifier.

        X : array-like, shape (n_samples, n_features)
            The training input samples.
        y : array-like, shape (n_samples,)
            The target values. An array of int.

        self : object
            Returns self.
        # Check that X and y have correct shape
        X, y = check_X_y(X, y)
        # Store the classes seen during fit
        self.classes_ = unique_labels(y)

        self.X_ = X
        self.y_ = y
        # Return the classifier
        return self 
Example #8
Source File:    From sparsereg with MIT License 6 votes vote down vote up
def fit(self, x_, y, sample_weight=None):
        n_samples, n_features = x_.shape

        X, y = check_X_y(x_, y, accept_sparse=[], y_numeric=True, multi_output=False)

        x, y, X_offset, y_offset, X_scale = self._preprocess_data(

        if sample_weight is not None:
            # Sample weight can be implemented via a simple rescaling.
            x, y = _rescale_data(x, y, sample_weight)

        coefs, intercept = fit_with_noise(x, y, self.sigma, self.alpha, self.n)
        self.intercept_ = intercept
        self.coef_ = coefs
        self._set_intercept(X_offset, y_offset, X_scale)
        return self 
Example #9
Source File:    From tpot with GNU Lesser General Public License v3.0 6 votes vote down vote up
def validate_inputs(self, X, y):
        # Things we don't want to allow until we've tested them:
        # - Sparse inputs
        # - Multiclass outputs (e.g., more than 2 classes in `y`)
        # - Non-finite inputs
        # - Complex inputs

        X, y = check_X_y(X, y, accept_sparse=False, allow_nd=False)

        assert_all_finite(X, y)

        if type_of_target(y) != 'binary':
            raise ValueError("Non-binary targets not supported")

        if np.any(np.iscomplex(X)) or np.any(np.iscomplex(y)):
            raise ValueError("Complex data not supported")
        if np.issubdtype(X.dtype, np.object_) or np.issubdtype(y.dtype, np.object_):
                X = X.astype(float)
                y = y.astype(int)
            except (TypeError, ValueError):
                raise ValueError("argument must be a string.* number")

        return (X, y) 
Example #10
Source File:    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def fit(self, X, y):
        X, y = check_X_y(X, y,
                         accept_sparse=("csr", "csc", "coo"),
        if sp.issparse(X):
            if X.getformat() == "coo":
                if X.row.dtype == "int64" or X.col.dtype == "int64":
                    raise ValueError(
                        "Estimator doesn't support 64-bit indices")
            elif X.getformat() in ["csc", "csr"]:
                if X.indices.dtype == "int64" or X.indptr.dtype == "int64":
                    raise ValueError(
                        "Estimator doesn't support 64-bit indices")

        return self 
Example #11
Source File:    From polylearn with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def _check_X_y(self, X, y):

        # helpful error message for sklearn < 1.17
        is_2d = hasattr(y, 'shape') and len(y.shape) > 1 and y.shape[1] >= 2

        if is_2d or type_of_target(y) != 'binary':
            raise TypeError("Only binary targets supported. For training "
                            "multiclass or multilabel models, you may use the "
                            "OneVsRest or OneVsAll metaestimators in "

        X, Y = check_X_y(X, y, dtype=np.double, accept_sparse='csc',

        self.label_binarizer_ = LabelBinarizer(pos_label=1, neg_label=-1)
        y = self.label_binarizer_.fit_transform(Y).ravel().astype(np.double)
        return X, y 
Example #12
Source File:    From DESlib with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def fit(self, X, y):
        """Fit the model according to the given training data.

        X : array of shape (n_samples, n_features)
            Data used to fit the model.

        y : array of shape (n_samples)
            class labels of each example in X.

        self : object
            Returns self.
        X, y = check_X_y(X, y)
        super(Oracle, self).fit(X, y)
        return self 
Example #13
Source File:    From pywsl with MIT License 6 votes vote down vote up
def fit(self, x, y):
#        x, y = check_X_y(x, y, y_numeric=True)
        x, y = check_X_y(x, y)
        x_p, x_u = x[y == +1, :], x[y == 0, :]
        n_p, n_u = x_p.shape[0], x_u.shape[0]

        if self.basis == 'gauss':
            b = np.minimum(n_u, self.n_basis)
            center_index = np.random.permutation(n_u)[:b]
            self._x_c = x_u[center_index, :]
        elif self.basis == 'lm':
            b = x_p.shape[1] + 1
            raise ValueError('Invalid basis type: {}.'.format(basis))

        k_p, k_u = self._ker(x_p), self._ker(x_u)

        H =
        h = 2*self.prior*np.mean(k_p, axis=0) - np.mean(k_u, axis=0)
        R = self.lam*np.eye(b)
        self.coef_ = sp.linalg.solve(H + R, h)

        return self 
Example #14
Source File:    From Hands-on-Supervised-Machine-Learning-with-Python with MIT License 6 votes vote down vote up
def __init__(self, X, y, criterion, min_samples_split, max_depth,
                 n_val_sample, random_state):
        # make sure max_depth > 1
        if max_depth < 2:
            raise ValueError("max depth must be > 1")

        # check the input arrays, and if it's classification validate the
        # target values in y
        X, y = check_X_y(X, y, accept_sparse=False, dtype=None, copy=True)
        if is_classifier(self):

        # hyper parameters so we can later inspect attributes of the model
        self.min_samples_split = min_samples_split
        self.max_depth = max_depth
        self.n_val_sample = n_val_sample
        self.random_state = random_state

        # create the splitting class
        random_state = check_random_state(random_state)
        self.splitter = RandomSplitter(random_state, criterion, n_val_sample)

        # grow the tree depth first
        self.tree = self._find_next_split(X, y, 0) 
Example #15
Source File:    From Hands-on-Supervised-Machine-Learning-with-Python with MIT License 5 votes vote down vote up
def _init_weights_biases(X, y, hidden, random_state, last_dim=None):
        # make sure dims all match in X, y and that we have appropriate
        # classification targets
        X, y = check_X_y(X, y, copy=False)

        random_state = check_random_state(random_state)

        # initialize the weights and biases. For each layer, we create a new
        # matrix of dimensions [last_layer_col_dim, new_col_dim]. This ensures
        # we can compute matrix products across the layers and that the
        # dimensions all match up. The biases will each be a vector of ones
        # in this example, though in other networks that can be initialized
        # differently
        weights = []
        biases = []

        # if last dim is undefined, use the column shape of the input data.
        # this argument is used to simplify the initialization of weights/
        # biases in the transfer learning class...
        if last_dim is None:
            last_dim = X.shape[1]

        for layer_size in hidden:
            # initialize to extremely small values
            w = random_state.rand(last_dim, layer_size) * 0.01
            b = np.ones(layer_size)
            last_dim = layer_size


        # we need to add one more layer (the output layer) that is the size of
        # the expected output probabilities. We'll apply the softmax function
        # to the output of this layer.
        n_outputs = np.unique(y).shape[0]
        weights.append(random_state.rand(last_dim, n_outputs))

        return X, y, weights, biases 
Example #16
Source File:    From sparsereg with MIT License 5 votes vote down vote up
def fit(self, x, y, **kwargs):
        # x, y = check_X_y(x, y, multi_output=False)
        super().fit(self._transform(x, y), y, **kwargs)
        return self 
Example #17
Source File:    From auto-tikv with Apache License 2.0 5 votes vote down vote up
def check_X_y(self, X, y):
        from sklearn.utils.validation import check_X_y

        if X.shape[0] > self.max_train_size_:
            raise Exception("X_train size cannot exceed {} ({})"
                            .format(self.max_train_size_, X.shape[0]))
        return check_X_y(X, y, multi_output=True,
                         allow_nd=True, y_numeric=True,
Example #18
Source File:    From pyts with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def fit(self, X, y=None):
        """Compute the bin edges for each feature.

        X : array-like, shape = (n_samples, n_timestamps)
            Data to transform.

        y : None or array-like, shape = (n_samples,)
            Class labels for each sample. Only used if ``strategy='entropy'``.

        if self.strategy == 'entropy':
            if y is None:
                raise ValueError("y cannot be None if strategy='entropy'.")
            X, y = check_X_y(X, y, dtype='float64')
            X = check_array(X, dtype='float64')
        n_samples, n_timestamps = X.shape
        self._n_timestamps_fit = n_timestamps
        self._alphabet = self._check_params(n_samples)
        self.bin_edges_ = self._compute_bins(
            X, y, n_timestamps, self.n_bins, self.strategy)
        return self 
Example #19
Source File:    From polylearn with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def _check_X_y(self, X, y):
        X, y = check_X_y(X, y, accept_sparse='csc', multi_output=False,
                         dtype=np.double, y_numeric=True)
        y = y.astype(np.double).ravel()
        return X, y 
Example #20
Source File:    From daal4py with Apache License 2.0 5 votes vote down vote up
def fit(self, X, y):
        # Check the algorithm parameters

        # Check that X and y have correct shape
        X, y = check_X_y(X, y, y_numeric=True, dtype=[np.single, np.double])

        # Convert to 2d array
        y_ = y.reshape((-1, 1))

        self.n_features_ = X.shape[1]

        # Get random seed
        rs_ = check_random_state(self.random_state)
        seed_ = rs_.randint(0, np.iinfo('i').max)

        # Define type of data
        fptype = getFPType(X)

        # Fit the model
        train_algo = d4p.gbt_regression_training(fptype=fptype,
        train_result = train_algo.compute(X, y_)

        # Store the model
        self.daal_model_ = train_result.model

        # Return the classifier
        return self 
Example #21
Source File:    From auto-tikv with Apache License 2.0 5 votes vote down vote up
def fit(self, X_train, y_train, ridge=1.0):
        X_train, y_train = self.check_X_y(X_train, y_train)
        self.X_train = np.float32(X_train)
        self.y_train = np.float32(y_train)
        sample_size = self.X_train.shape[0]

        if np.isscalar(ridge):
            ridge = np.ones(sample_size) * ridge
        assert isinstance(ridge, np.ndarray)
        assert ridge.ndim == 1

        X_dists = np.zeros((sample_size, sample_size), dtype=np.float32)
        with tf.Session(graph=self.graph,
                            intra_op_parallelism_threads=self.num_threads_)) as sess:
            dist_op = self.ops['dist_op']
            v1, v2 = self.vars['v1_h'], self.vars['v2_h']
            for i in range(sample_size):
                X_dists[i] =, feed_dict={v1: self.X_train[i], v2: self.X_train})

            K_ridge_op = self.ops['K_ridge_op']
            X_dists_ph = self.vars['X_dists_h']
            ridge_ph = self.vars['ridge_h']

            self.K =, feed_dict={X_dists_ph: X_dists, ridge_ph: ridge})

            K_ph = self.vars['K_h']

            K_inv_op = self.ops['K_inv_op']
            self.K_inv =, feed_dict={K_ph: self.K})

            xy_op = self.ops['xy_op']
            K_inv_ph = self.vars['K_inv_h']
            yt_ph = self.vars['yt_h']
            self.xy_ =, feed_dict={K_inv_ph: self.K_inv,
                                                  yt_ph: self.y_train})
        return self 
Example #22
Source File:    From Hands-on-Supervised-Machine-Learning-with-Python with MIT License 5 votes vote down vote up
def __init__(self, X, y):
        # First check X, y and make sure they are of equal length, no NaNs
        # and that they are numeric
        X, y = check_X_y(X, y, y_numeric=True,
                         accept_sparse=False)  # keep it simple

        # Next, we want to scale all of our features so X is centered
        # We will do the same with our target variable, y
        X_means = np.average(X, axis=0)
        y_mean = y.mean(axis=0)

        # don't do in place, so we get a copy
        X = X - X_means
        y = y - y_mean

        # Let's compute the least squares on X wrt y
        # Least squares solves the equation `a x = b` by computing a
        # vector `x` that minimizes the Euclidean 2-norm `|| b - a x ||^2`.
        theta, _, rank, singular_values = lstsq(X, y, rcond=None)

        # finally, we compute the intercept values as the mean of the target
        # variable MINUS the inner product of the X_means and the coefficients
        intercept = y_mean -, theta.T)

        # ... and set everything as an instance attribute
        self.theta = theta
        self.rank = rank
        self.singular_values = singular_values

        # we have to retain some of the statistics around the data too
        self.X_means = X_means
        self.y_mean = y_mean
        self.intercept = intercept 
Example #23
Source File:    From Hands-on-Supervised-Machine-Learning-with-Python with MIT License 5 votes vote down vote up
def __init__(self, X, y, k=10):
        # check the input array
        X, y = check_X_y(X, y, accept_sparse=False, dtype=np.float32,

        # make sure we're performing classification here

        # Save the K hyper-parameter so we can use it later
        self.k = k

        # kNN is a special case where we have to save the training data in
        # order to make predictions in the future
        self.X = X
        self.y = y 
Example #24
Source File:    From pyts with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def fit(self, X, y=None):
        """Learn indices of the Fourier coefficients to keep.

        X : array-like, shape = (n_samples, n_timestamps)
            Training vector.

        y : None or array-like, shape = (n_samples,) (default = None)
            Class labels for each data sample. Only used if ``anova=True``.

        self : object

        if self.anova:
            X, y = check_X_y(X, y, dtype='float64')
            X = check_array(X, dtype='float64')

        n_samples, n_timestamps = X.shape
        n_coefs = self._check_params(n_timestamps)
        if self.anova:
            ss = StandardScaler(self.norm_mean, self.norm_std)
            X = ss.fit_transform(X)
            X_fft = np.fft.rfft(X)
            X_fft = np.vstack([np.real(X_fft), np.imag(X_fft)])
            if n_timestamps % 2 == 0:
                X_fft = X_fft.reshape(n_samples, n_timestamps + 2, order='F')
                X_fft = np.c_[X_fft[:, 0], X_fft[:, 2:-1]]
                X_fft = X_fft.reshape(n_samples, n_timestamps + 1, order='F')
                X_fft = np.c_[X_fft[:, 0], X_fft[:, 2:]]
            if self.drop_sum:
                X_fft = X_fft[:, 1:]
            self.support_ = self._anova(X_fft, y, n_coefs, n_timestamps)
            self.support_ = np.arange(n_coefs)
        return self 
Example #25
Source File:    From Quora with MIT License 5 votes vote down vote up
def fit(self, X, y):
        # # Check that X and y have correct shape
        # y = y.values
        X, y = check_X_y(X, y, accept_sparse=True)
        # fit models
        self._clfs = []
        for model in self.models:
            self._clfs.append(, y))
        return self 
Example #26
Source File:    From ALiPy with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self, X=None, y=None, **kwargs):
        if X is not None and y is not None:
            if isinstance(X, np.ndarray) and isinstance(y, np.ndarray):
                # will not use additional memory
                check_X_y(X, y, accept_sparse='csc', multi_output=True)
                self.X = X
                self.y = y
                self.X, self.y = check_X_y(X, y, accept_sparse='csc', multi_output=True)
            self.X = X
            self.y = y 
Example #27
Source File:    From ALiPy with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self, X=None, y=None, **kwargs):
        if X is not None and y is not None:
            if isinstance(X, np.ndarray) and isinstance(y, np.ndarray):
                # will not use additional memory
                check_X_y(X, y, accept_sparse='csc', multi_output=True)
                self.X = X
                self.y = y
                self.X, self.y = check_X_y(X, y, accept_sparse='csc', multi_output=True)
            self.X = X
            self.y = y 
Example #28
Source File:    From ALiPy with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self, X=None, y=None, **kwargs):
        if X is not None and y is not None:
            if isinstance(X, np.ndarray) and isinstance(y, np.ndarray):
                # will not use additional memory
                check_X_y(X, y, accept_sparse='csc', multi_output=True)
                self.X = X
                self.y = y
                self.X, self.y = check_X_y(X, y, accept_sparse='csc', multi_output=True)
            self.X = X
            self.y = y 
Example #29
Source File:    From combo with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def setUp(self):
        # Define data file and read X and y
        # Generate some data if the source data is missing
        this_directory = path.abspath(path.dirname(__file__))
        mat_file = 'cardio.mat'
            mat = loadmat(path.join(*[this_directory, 'data', mat_file]))

        except TypeError:
            print('{data_file} does not exist. Use generated data'.format(
            X, y = generate_data(train_only=True)  # load data
        except IOError:
            print('{data_file} does not exist. Use generated data'.format(
            X, y = generate_data(train_only=True)  # load data
            X = mat['X']
            y = mat['y'].ravel()
            X, y = check_X_y(X, y)

        self.X_train, self.X_test, self.y_train, self.y_test = \
            train_test_split(X, y, test_size=0.4, random_state=42)

        detectors = [LOF(), LOF()]

        self.clf = LSCP(base_estimators=detectors)
        self.roc_floor = 0.6 
Example #30
Source File:    From SU_Classification with MIT License 5 votes vote down vote up
def fit(self, x, y):
        from cvxopt import matrix, solvers
        solvers.options['show_progress'] = False

        x, y = check_X_y(x, y)
        x_s, x_u = x[y == +1, :], x[y == 0, :]
        n_s, n_u = len(x_s), len(x_u)

        p_p = self.prior
        p_n = 1 - self.prior
        p_s = p_p ** 2 + p_n ** 2
        k_s = self._basis(x_s)
        k_u = self._basis(x_u)
        d = k_u.shape[1]

        P = np.zeros((d + 2 * n_u, d + 2 * n_u))
        P[:d, :d] = self.lam * np.eye(d)
        q = np.vstack((
            -p_s / (n_s * (p_p - p_n)) *, 1))),
            -p_n / (n_u * (p_p - p_n)) * np.ones((n_u, 1)),
            -p_p / (n_u * (p_p - p_n)) * np.ones((n_u, 1))
        G = np.vstack((
            np.hstack((np.zeros((n_u, d)), -np.eye(n_u), np.zeros((n_u, n_u)))),
            np.hstack((0.5 * k_u, -np.eye(n_u), np.zeros((n_u, n_u)))),
            np.hstack((k_u, -np.eye(n_u), np.zeros((n_u, n_u)))),
            np.hstack((np.zeros((n_u, d)), np.zeros((n_u, n_u)), -np.eye(n_u))),
            np.hstack((-0.5 * k_u, np.zeros((n_u, n_u)), -np.eye(n_u))),
            np.hstack((-k_u, np.zeros((n_u, n_u)), -np.eye(n_u)))
        h = np.vstack((
            np.zeros((n_u, 1)),
            -0.5 * np.ones((n_u, 1)),
            np.zeros((n_u, 1)),
            np.zeros((n_u, 1)),
            -0.5 * np.ones((n_u, 1)),
            np.zeros((n_u, 1))
        sol = solvers.qp(matrix(P), matrix(q), matrix(G), matrix(h))
        self.coef_ = np.array(sol['x'])[:d]