Python Examples of scipy.sparse.hstack

Source File: pipeline.py From Mastering-Elasticsearch-7.0 with MIT License

6 votes

def transform(self, X):
        """Transform X separately by each transformer, concatenate results.

        Parameters
        ----------
        X : iterable or array-like, depending on transformers
            Input data to be transformed.

        Returns
        -------
        X_t : array-like or sparse matrix, shape (n_samples, sum_n_components)
            hstack of results of transformers. sum_n_components is the
            sum of n_components (output dimension) over transformers.
        """
        Xs = Parallel(n_jobs=self.n_jobs)(
            delayed(_transform_one)(trans, X, None, weight)
            for name, trans, weight in self._iter())
        if not Xs:
            # All transformers are None
            return np.zeros((X.shape[0], 0))
        if any(sparse.issparse(f) for f in Xs):
            Xs = sparse.hstack(Xs).tocsr()
        else:
            Xs = np.hstack(Xs)
        return Xs

Source File: wordbatch_model.py From mercari-price-suggestion with MIT License

6 votes

def predict(self, df):
        X_desc = self.wb_desc.transform(df["item_description"])
        X_desc = X_desc[:, self.desc_indices]

        X_name = 2 * self.cv_name.transform(df["name"])
        X_name2 = 0.5 * self.cv_name2.transform(df["name"])

        X_category0 = self.cv_cat0.transform(df['subcat_0'])
        X_category1 = self.cv_cat1.transform(df['subcat_1'])
        X_category2 = self.cv_cat2.transform(df['subcat_2'])
        X_brand = self.cv_brand.transform(df['brand_name'])
        X_condition = self.cv_condition.transform((df['item_condition_id'] + 10 * df["shipping"]).apply(str))

        df["cat_brand"] = [a + " " + b for a, b in zip(df["category_name"], df["brand_name"])]
        X_cat_brand = self.cv_cat_brand.transform(df["cat_brand"])
        X_desc3 = self.desc3.transform(df["item_description"])

        X = hstack((X_condition,
                    X_desc, X_brand,
                    X_category0, X_category1, X_category2,
                    X_name, X_name2,
                    X_cat_brand, X_desc3)).tocsr()

        return self.model.predict(X)

Source File: categorical.py From Kaggler with MIT License

6 votes

def transform(self, X):
        """Encode categorical columns into sparse matrix with one-hot-encoding.

        Args:
            X (pandas.DataFrame): categorical columns to encode

        Returns:
            (scipy.sparse.coo_matrix): sparse matrix encoding categorical
                                       variables into dummy variables
        """

        for i, col in enumerate(X.columns):
            X_col = self._transform_col(X[col], i)
            if X_col is not None:
                if i == 0:
                    X_new = X_col
                else:
                    X_new = sparse.hstack((X_new, X_col))

            logger.debug('{} --> {} features'.format(
                col, self.label_encoder.label_maxes[i])
            )

        return X_new

Source File: designmatrix.py From lightkurve with MIT License

6 votes

def append_constant(self, prior_mu=0, prior_sigma=np.inf, inplace=False):
        """Returns a new `.SparseDesignMatrix` with a column of ones appended.

        Returns
        -------
        `.SparseDesignMatrix`
            New design matrix with a column of ones appended. This column is
            named "offset".
        """
        if inplace:
            dm = self
        else:
            dm = self.copy()
        dm._X = hstack([dm.X, lil_matrix(np.ones(dm.shape[0])).T], format='lil')
        dm.prior_mu = np.append(dm.prior_mu, prior_mu)
        dm.prior_sigma = np.append(dm.prior_sigma, prior_sigma)
        return dm

Source File: designmatrix.py From lightkurve with MIT License

6 votes

def __init__(self, matrices):
        if not np.all([issparse(m.X) for m in matrices]):
            # This collection is designed for sparse matrices, so we raise a warning if a dense DesignMatrix is passed
            warnings.warn(('Not all matrices are `SparseDesignMatrix` objects. '
                            'Dense matrices will be converted to sparse matrices.'), LightkurveWarning)
            sparse_matrices = []
            for m in matrices:
                if isinstance(m, DesignMatrix):
                    sparse_matrices.append(m.copy().to_sparse())
                else:
                    sparse_matrices.append(m)
            self.matrices = sparse_matrices
        else:
            self.matrices = matrices
        self.X = hstack([m.X for m in self.matrices], format='csr')
        self._child_class = SparseDesignMatrix
        self.validate()

Source File: core.py From neuropythy with GNU Affero General Public License v3.0

6 votes

def to_curve_spline(obj):
    '''
    to_curve_spline(obj) obj if obj is a curve spline and otherwise attempts to coerce obj into a
      curve spline, raising an error if it cannot.
    '''
    if   is_curve_spline(obj):            return obj
    elif is_tuple(obj) and len(obj) == 2: (crds,opts) = obj
    else:                                 (crds,opts) = (obj,{})
    if pimms.is_matrix(crds) or is_curve_spline(crds): crds = [crds]
    spls = [c for c in crds if is_curve_spline(c)]
    opts = dict(opts)
    if 'weights' not in opts and len(spls) == len(crds):
        if all(c.weights is not None for c in crds):
            opts['weights'] = np.concatenate([c.weights for c in crds])
    if 'order' not in opts and len(spls) > 0:
        opts['order'] = np.min([c.order for c in spls])
    if 'smoothing' not in opts and len(spls) > 0:
        sm = set([c.smoothing for c in spls])
        if len(sm) == 1: opts['smoothing'] = list(sm)[0]
        else: opts['smoothing'] = None
    crds = [x.crds if is_curve_spline(crds) else np.asarray(x) for x in crds]
    crds = [x if x.shape[0] == 2 else x.T for x in crds]
    crds = np.hstack(crds)
    return curve_spline(crds, **opts)

Source File: designmatrix.py From lightkurve with MIT License

6 votes

def plot(self, ax=None, **kwargs):
        """Visualize the design matrix values as an image.

        Uses Matplotlib's `~lightkurve.utils.plot_image` to visualize the
        matrix values.

        Parameters
        ----------
        ax : `~matplotlib.axes.Axes`
            A matplotlib axes object to plot into. If no axes is provided,
            a new one will be created.
        **kwargs : dict
            Extra parameters to be passed to `.plot_image`.

        Returns
        -------
        `~matplotlib.axes.Axes`
            The matplotlib axes object.
        """
        temp_dm = SparseDesignMatrix(hstack([d.X for d in self]))
        ax = temp_dm.plot(**kwargs)
        ax.set_title("Design Matrix Collection")
        return ax

Source File: designmatrix.py From lightkurve with MIT License

6 votes

def __init__(self, matrices):
        if np.any([issparse(m.X) for m in matrices]):
            # This collection is designed for dense matrices, so we warn if a
            # SparseDesignMatrix is passed
            warnings.warn(('Some matrices are `SparseDesignMatrix` objects. '
                           'Sparse matrices will be converted to dense matrices.'),
                          LightkurveWarning)
            dense_matrices = []
            for m in matrices:
                if isinstance(m, SparseDesignMatrix):
                    dense_matrices.append(m.copy().to_dense())
                else:
                    dense_matrices.append(m)
            self.matrices = dense_matrices
        else:
            self.matrices = matrices
        self.X = np.hstack(tuple(m.X for m in self.matrices))
        self._child_class = DesignMatrix
        self.validate()

Source File: featurizer.py From snips-nlu with Apache License 2.0

6 votes

def fit_transform(self, dataset, utterances, classes, none_class):
        import scipy.sparse as sp

        dataset = validate_and_format_dataset(dataset)
        self.language = dataset[LANGUAGE]

        utterances_texts = (get_text_from_chunks(u[DATA]) for u in utterances)
        if not any(tokenize_light(q, self.language) for q in utterances_texts):
            raise _EmptyDatasetUtterancesError(
                "Tokenized utterances are empty")

        x_tfidf = self._fit_transform_tfidf_vectorizer(
            utterances, classes, dataset)
        x = x_tfidf
        if self.config.added_cooccurrence_feature_ratio:
            self._fit_cooccurrence_vectorizer(
                utterances, classes, none_class, dataset)
            x_cooccurrence = self.cooccurrence_vectorizer.transform(utterances)
            x = sp.hstack((x_tfidf, x_cooccurrence))

        return x

Source File: features_generation_tools.py From corpus-to-graph-ml with MIT License

6 votes

def get_compound_features(train_data, test_data, feature_gen_methods):
    train_features_list = []
    test_features_list = []

    for m in feature_gen_methods:
        train_features, test_features = m(train_data, test_data)
        train_features_list.append(train_features)
        test_features_list.append(test_features)

    train_features = train_features_list[0]
    test_features = test_features_list[0]

    for i in xrange(1,len(feature_gen_methods)):
        train_features = hstack((train_features, train_features_list[i]))
        test_features = hstack((test_features, test_features_list[i]))

    return train_features, test_features

Source File: loader_nfm.py From knowledge_graph_attention_network with MIT License

6 votes

def generate_train_batch(self):

        users, pos_items, neg_items = self._generate_train_cf_batch()
        u_sp = self.user_one_hot[users]
        pos_i_sp = self.kg_feat_mat[pos_items]
        neg_i_sp = self.kg_feat_mat[neg_items]


        # Horizontally stack sparse matrices to get single positive & negative feature matrices
        pos_feats = sp.hstack([u_sp, pos_i_sp])
        neg_feats = sp.hstack([u_sp, neg_i_sp])

        batch_data = {}
        batch_data['pos_feats'] = pos_feats
        batch_data['neg_feats'] = neg_feats
        return batch_data

Source File: feature_expansion.py From KDDCup2019_admin with MIT License

6 votes

def cat_onehot_encoder_m(df,y,col,selection=True):
    ## ZJN: test raise memory error
    # raise MemoryError


    mlbs = MultiLabelBinarizer(sparse_output=True).fit(df.values)
    from scipy.sparse import csr_matrix
    features_tmp = mlbs.transform(df.values)
    features_tmp = csr_matrix(features_tmp,dtype=float).tocsr()
    models = None
    auc_score = None
    if selection is True:
        auc_score, models = train_lightgbm_for_feature_selection(features_tmp, y)
        print(col, "auc", auc_score)
    #new_feature = pd.DataFrame(features_tmp,columns=["mul_feature_"+col])
    new_feature = features_tmp
    from scipy.sparse import hstack



    return new_feature,mlbs,models,auc_score

Source File: feature_for_test.py From KDDCup2019_admin with MIT License

6 votes

def multi_features_for_test(df,columns,mlbs,models):

    new_features = {}
    #from multiprocessing import Pool
    #pool = Pool(processes=len(columns))

    for col in columns:
        if col in mlbs:
            mlb = mlbs[col]
            #model = models[col]
            model = None
            new_features[col] = multi_feature_for_one_col(df[col], mlb, model,col) #pool.apply_async(multi_feature_for_one_col, args=(df[col], mlb, model,col))

    new_features_list = []
    for col in columns:
        if col in new_features:
            new_features_list.append(new_features[col])
    from scipy.sparse import hstack
    new_features = hstack(new_features_list,dtype=float)
    #new_features = pd.concat(new_features_list,axis=1)

    return new_features

Source File: pandas_feature_union.py From pandas-feature-union with MIT License

6 votes

def fit_transform(self, X, y=None, **fit_params):
        self._validate_transformers()
        result = Parallel(n_jobs=self.n_jobs)(
            delayed(_fit_transform_one)(
                transformer=trans,
                X=X,
                y=y,
                weight=weight,
                **fit_params)
            for name, trans, weight in self._iter())

        if not result:
            # All transformers are None
            return np.zeros((X.shape[0], 0))
        Xs, transformers = zip(*result)
        self._update_transformer_list(transformers)
        if any(sparse.issparse(f) for f in Xs):
            Xs = sparse.hstack(Xs).tocsr()
        else:
            Xs = self.merge_dataframes_by_column(Xs)
        return Xs

Source File: pandas_feature_union.py From pandas-feature-union with MIT License

6 votes

def transform(self, X):
        Xs = Parallel(n_jobs=self.n_jobs)(
            delayed(_transform_one)(
                transformer=trans,
                X=X,
                y=None,
                weight=weight)
            for name, trans, weight in self._iter())
        if not Xs:
            # All transformers are None
            return np.zeros((X.shape[0], 0))
        if any(sparse.issparse(f) for f in Xs):
            Xs = sparse.hstack(Xs).tocsr()
        else:
            Xs = self.merge_dataframes_by_column(Xs)
        return Xs

Source File: backend.py From mlens with MIT License

6 votes

def _propagate_features(self, task):
        """Propagate features from input array to output array."""
        p_out, p_in = self.job.predict_out, self.job.predict_in

        # Check for loss of obs between layers (i.e. with blendindex)
        n_in, n_out = p_in.shape[0], p_out.shape[0]
        r = int(n_in - n_out)

        if not issparse(p_in):
            # Simple item setting
            p_out[:, :task.n_feature_prop] = p_in[r:, task.propagate_features]
        else:
            # Need to populate propagated features using scipy sparse hstack
            self.job.predict_out = hstack(
                [p_in[r:, task.propagate_features],
                 p_out[:, task.n_feature_prop:]]
            ).tolil()

Source File: operator_utils.py From grove with Apache License 2.0

6 votes

def __init__(self, labels_ops):
        """
        Encapsulates a set of linearly independent operators.

        :param (list|tuple) labels_ops: Sequence of tuples (label, operator) where label is a string
            and operator a qutip.Qobj operator representation.
        """
        self.ops_by_label = OrderedDict(labels_ops)
        self.labels = list(self.ops_by_label.keys())
        self.ops = list(self.ops_by_label.values())
        self.dim = len(self.ops)

        # the basis change transformation matrix from a representation in the operator basis
        # to the original basis. We enforce CSR sparse matrix representation to have efficient
        # matrix vector products.
        self.basis_transform = sphstack([qt.operator_to_vector(opj).data
                                         for opj in self.ops]).tocsr()
        self._metric = None
        self._is_orthonormal = None
        self._all_hermitian = None

Source File: xc_metrics.py From pyxclib with MIT License

6 votes

def _setup_metric(X, true_labels, inv_psp=None, k=5):
    assert compatible_shapes(X, true_labels), \
        "ground truth and prediction matrices must have same shape."
    num_instances, num_labels = true_labels.shape
    indices = _get_topk(X, num_labels, k)
    ps_indices = None
    if inv_psp is not None:
        ps_indices = _get_topk(
            true_labels.dot(
                sp.spdiags(inv_psp, diags=0,
                           m=num_labels, n=num_labels)),
            num_labels, k)
        inv_psp = np.hstack([inv_psp, np.zeros((1))])

    true_labels = sp.hstack([true_labels,
                             sp.lil_matrix((num_instances, 1),
                                           dtype=np.int32)]).tocsr()
    return indices, true_labels, ps_indices, inv_psp

Source File: loader_nfm.py From knowledge_graph_attention_network with MIT License

6 votes

def generate_test_feed_dict(self, model, user_batch, item_batch, drop_flag=True):
        user_list = np.repeat(user_batch, len(item_batch)).tolist()
        item_list = list(item_batch) * len(user_batch)

        u_sp = self.user_one_hot[user_list]
        pos_i_sp = self.kg_feat_mat[item_list]

        # Horizontally stack sparse matrices to get single positive & negative feature matrices
        pos_feats = sp.hstack([u_sp, pos_i_sp])
        pos_indices, pos_values, pos_shape = self._extract_sp_info(pos_feats)

        feed_dict = {
            model.pos_indices: pos_indices,
            model.pos_values: pos_values,
            model.pos_shape: pos_shape,

            model.mess_dropout: [0.] * len(eval(self.args.layer_size))
        }

        return feed_dict

Source File: feature_union.py From Wordbatch with GNU General Public License v2.0

6 votes

def transform(self, X):
		"""Transform X separately by each transformer, concatenate results.

		Parameters
		----------
		X : iterable or array-like, depending on transformers
			Input data to be transformed.

		Returns
		-------
		X_t : array-like or sparse matrix, shape (n_samples, sum_n_components)
			hstack of results of transformers. sum_n_components is the
			sum of n_components (output dimension) over transformers.
		"""
		paral_params = [[X[t['col_pick']] if hasattr(t, 'col_pick') else X, t] for _, t, _ in self._iter()]
		Xs = Apply(transform_one, self.batcher).transform(paral_params)
		if not Xs:
			# All transformers are None
			return np.zeros((X.shape[0], 0))
		if self.concatenate:
			if any(sparse.issparse(f) for f in Xs):
				Xs = sparse.hstack(Xs).tocsr()
			else:
				Xs = np.hstack(Xs)
		return Xs

Source File: designmatrix.py From lightkurve with MIT License

5 votes

def values(self):
        """2D numpy array containing the matrix values."""
        return np.hstack(tuple(m.values for m in self.matrices))

Source File: kernel_approximation.py From Mastering-Elasticsearch-7.0 with MIT License

5 votes

def _transform_sparse(self, X):
        indices = X.indices.copy()
        indptr = X.indptr.copy()

        data_step = np.sqrt(X.data * self.sample_interval_)
        X_step = sp.csr_matrix((data_step, indices, indptr),
                               shape=X.shape, dtype=X.dtype, copy=False)
        X_new = [X_step]

        log_step_nz = self.sample_interval_ * np.log(X.data)
        step_nz = 2 * X.data * self.sample_interval_

        for j in range(1, self.sample_steps):
            factor_nz = np.sqrt(step_nz /
                                np.cosh(np.pi * j * self.sample_interval_))

            data_step = factor_nz * np.cos(j * log_step_nz)
            X_step = sp.csr_matrix((data_step, indices, indptr),
                                   shape=X.shape, dtype=X.dtype, copy=False)
            X_new.append(X_step)

            data_step = factor_nz * np.sin(j * log_step_nz)
            X_step = sp.csr_matrix((data_step, indices, indptr),
                                   shape=X.shape, dtype=X.dtype, copy=False)
            X_new.append(X_step)

        return sp.hstack(X_new)

Source File: featurizer.py From snips-nlu with Apache License 2.0

5 votes

def transform(self, utterances):
        import scipy.sparse as sp

        x = self.tfidf_vectorizer.transform(utterances)
        if self.cooccurrence_vectorizer:
            x_cooccurrence = self.cooccurrence_vectorizer.transform(utterances)
            x = sp.hstack((x, x_cooccurrence))
        return x

Source File: matrix.py From ektelo with Apache License 2.0

5 votes

def dense_matrix(self):
        return np.hstack([Q.dense_matrix() for Q in self.matrices])

Source File: longitudinal_features_product.py From tick with BSD 3-Clause "New" or "Revised" License

5 votes

def _dense_finite_product(self, feat_mat):
        """Performs feature product on a numpy.ndarray containing
        finite exposures."""
        feat = [feat_mat]
        feat.extend([(feat_mat[:, i] * feat_mat[:, j]).reshape((-1, 1))
                     for i, j in self._mapper.values()])
        return np.hstack(feat)

Source File: longitudinal_features_product.py From tick with BSD 3-Clause "New" or "Revised" License

5 votes

def _sparse_finite_product(self, feat_mat):
        """Performs feature product on a scipy.sparse.csr_matrix containing
        finite exposures."""
        feat = [feat_mat.tocsc()]
        feat.extend([(feat_mat[:, i].multiply(feat_mat[:, j]))
                     for i, j in self.mapper.values()])
        return sps.hstack(feat).tocsr()

Source File: operator_utils.py From grove with Apache License 2.0

5 votes

def to_realimag(z):
    """
    Convert a complex hermitian matrix to a real valued doubled up representation, i.e., for
    ``Z = Z_r + 1j * Z_i`` return ``R(Z)``::

        R(Z) = [ Z_r   Z_i]
               [-Z_i   Z_r]

    A complex hermitian matrix ``Z`` with elementwise real and imaginary parts
    ``Z = Z_r + 1j * Z_i`` can be
    isomorphically represented in doubled up form as::

        R(Z) = [ Z_r   Z_i]
               [-Z_i   Z_r]

        R(X)*R(Y) = [ (X_r*Y_r-X_i*Y_i)    (X_r*Y_i + X_i*Y_r)]
                    [-(X_r*Y_i + X_i*Y_r)  (X_r*Y_r-X_i*Y_i)  ]

                  = R(X*Y).

    In particular, ``Z`` is complex positive (semi-)definite iff ``R(Z)`` is real positive
    (semi-)definite.

    :param (qutip.Qobj|scipy.sparse.base.spmatrix) z:  The operator representation matrix.
    :returns: R(Z) the doubled up representation.
    :rtype: scipy.sparse.csr_matrix
    """
    if isinstance(z, qt.Qobj):
        z = z.data
    if not is_hermitian(z):  # pragma no coverage
        raise ValueError("Need a hermitian matrix z")
    return spvstack([sphstack([z.real, z.imag]), sphstack([z.imag.T, z.real])]).tocsr().real

Source File: loader_nfm.py From knowledge_graph_attention_network with MIT License

5 votes

def _extract_sp_info(self, sp_feats):
        sp_indices = np.hstack((sp_feats.nonzero()[0][:, None],
                                sp_feats.nonzero()[1][:, None]))
        sp_values = sp_feats.data
        sp_shape = sp_feats.shape
        return sp_indices, sp_values, sp_shape

Source File: test_basic.py From attention-lvcsr with MIT License

5 votes

def test_hstack_vstack():
    """
    Tests sparse.hstack and sparse.vstack (as opposed to the HStack and VStack
    classes that they wrap).
    """

    def make_block(dtype):
        return theano.sparse.csr_matrix(name="%s block" % dtype,
                                        dtype=dtype)

    def get_expected_dtype(blocks, to_dtype):
        if to_dtype is None:
            block_dtypes = tuple(b.dtype for b in blocks)
            return theano.scalar.upcast(*block_dtypes)
        else:
            return to_dtype

    # a deliberately weird mix of dtypes to stack
    dtypes = ('complex128', theano.config.floatX)

    blocks = [make_block(dtype) for dtype in dtypes]

    for stack_dimension, stack_function in enumerate((theano.sparse.vstack,
                                                      theano.sparse.hstack)):

        for to_dtype in (None, ) + dtypes:
            stacked_blocks = stack_function(blocks, dtype=to_dtype)
            expected_dtype = get_expected_dtype(blocks, to_dtype)
            assert stacked_blocks.dtype == expected_dtype

Source File: feature.py From text-classifier with Apache License 2.0

5 votes

def _add_feature(self, X, feature_to_add):
        """
        Returns sparse feature matrix with added feature.
        feature_to_add can also be a list of features.
        """
        from scipy.sparse import csr_matrix, hstack
        return hstack([X, csr_matrix(feature_to_add)], 'csr')

Python scipy.sparse.hstack() Examples