Python scipy.sparse.hstack() Examples
The following are 30
code examples of scipy.sparse.hstack().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scipy.sparse
, or try the search function
.
Example #1
Source File: pipeline.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def transform(self, X): """Transform X separately by each transformer, concatenate results. Parameters ---------- X : iterable or array-like, depending on transformers Input data to be transformed. Returns ------- X_t : array-like or sparse matrix, shape (n_samples, sum_n_components) hstack of results of transformers. sum_n_components is the sum of n_components (output dimension) over transformers. """ Xs = Parallel(n_jobs=self.n_jobs)( delayed(_transform_one)(trans, X, None, weight) for name, trans, weight in self._iter()) if not Xs: # All transformers are None return np.zeros((X.shape[0], 0)) if any(sparse.issparse(f) for f in Xs): Xs = sparse.hstack(Xs).tocsr() else: Xs = np.hstack(Xs) return Xs
Example #2
Source File: wordbatch_model.py From mercari-price-suggestion with MIT License | 6 votes |
def predict(self, df): X_desc = self.wb_desc.transform(df["item_description"]) X_desc = X_desc[:, self.desc_indices] X_name = 2 * self.cv_name.transform(df["name"]) X_name2 = 0.5 * self.cv_name2.transform(df["name"]) X_category0 = self.cv_cat0.transform(df['subcat_0']) X_category1 = self.cv_cat1.transform(df['subcat_1']) X_category2 = self.cv_cat2.transform(df['subcat_2']) X_brand = self.cv_brand.transform(df['brand_name']) X_condition = self.cv_condition.transform((df['item_condition_id'] + 10 * df["shipping"]).apply(str)) df["cat_brand"] = [a + " " + b for a, b in zip(df["category_name"], df["brand_name"])] X_cat_brand = self.cv_cat_brand.transform(df["cat_brand"]) X_desc3 = self.desc3.transform(df["item_description"]) X = hstack((X_condition, X_desc, X_brand, X_category0, X_category1, X_category2, X_name, X_name2, X_cat_brand, X_desc3)).tocsr() return self.model.predict(X)
Example #3
Source File: categorical.py From Kaggler with MIT License | 6 votes |
def transform(self, X): """Encode categorical columns into sparse matrix with one-hot-encoding. Args: X (pandas.DataFrame): categorical columns to encode Returns: (scipy.sparse.coo_matrix): sparse matrix encoding categorical variables into dummy variables """ for i, col in enumerate(X.columns): X_col = self._transform_col(X[col], i) if X_col is not None: if i == 0: X_new = X_col else: X_new = sparse.hstack((X_new, X_col)) logger.debug('{} --> {} features'.format( col, self.label_encoder.label_maxes[i]) ) return X_new
Example #4
Source File: designmatrix.py From lightkurve with MIT License | 6 votes |
def append_constant(self, prior_mu=0, prior_sigma=np.inf, inplace=False): """Returns a new `.SparseDesignMatrix` with a column of ones appended. Returns ------- `.SparseDesignMatrix` New design matrix with a column of ones appended. This column is named "offset". """ if inplace: dm = self else: dm = self.copy() dm._X = hstack([dm.X, lil_matrix(np.ones(dm.shape[0])).T], format='lil') dm.prior_mu = np.append(dm.prior_mu, prior_mu) dm.prior_sigma = np.append(dm.prior_sigma, prior_sigma) return dm
Example #5
Source File: designmatrix.py From lightkurve with MIT License | 6 votes |
def __init__(self, matrices): if not np.all([issparse(m.X) for m in matrices]): # This collection is designed for sparse matrices, so we raise a warning if a dense DesignMatrix is passed warnings.warn(('Not all matrices are `SparseDesignMatrix` objects. ' 'Dense matrices will be converted to sparse matrices.'), LightkurveWarning) sparse_matrices = [] for m in matrices: if isinstance(m, DesignMatrix): sparse_matrices.append(m.copy().to_sparse()) else: sparse_matrices.append(m) self.matrices = sparse_matrices else: self.matrices = matrices self.X = hstack([m.X for m in self.matrices], format='csr') self._child_class = SparseDesignMatrix self.validate()
Example #6
Source File: core.py From neuropythy with GNU Affero General Public License v3.0 | 6 votes |
def to_curve_spline(obj): ''' to_curve_spline(obj) obj if obj is a curve spline and otherwise attempts to coerce obj into a curve spline, raising an error if it cannot. ''' if is_curve_spline(obj): return obj elif is_tuple(obj) and len(obj) == 2: (crds,opts) = obj else: (crds,opts) = (obj,{}) if pimms.is_matrix(crds) or is_curve_spline(crds): crds = [crds] spls = [c for c in crds if is_curve_spline(c)] opts = dict(opts) if 'weights' not in opts and len(spls) == len(crds): if all(c.weights is not None for c in crds): opts['weights'] = np.concatenate([c.weights for c in crds]) if 'order' not in opts and len(spls) > 0: opts['order'] = np.min([c.order for c in spls]) if 'smoothing' not in opts and len(spls) > 0: sm = set([c.smoothing for c in spls]) if len(sm) == 1: opts['smoothing'] = list(sm)[0] else: opts['smoothing'] = None crds = [x.crds if is_curve_spline(crds) else np.asarray(x) for x in crds] crds = [x if x.shape[0] == 2 else x.T for x in crds] crds = np.hstack(crds) return curve_spline(crds, **opts)
Example #7
Source File: designmatrix.py From lightkurve with MIT License | 6 votes |
def plot(self, ax=None, **kwargs): """Visualize the design matrix values as an image. Uses Matplotlib's `~lightkurve.utils.plot_image` to visualize the matrix values. Parameters ---------- ax : `~matplotlib.axes.Axes` A matplotlib axes object to plot into. If no axes is provided, a new one will be created. **kwargs : dict Extra parameters to be passed to `.plot_image`. Returns ------- `~matplotlib.axes.Axes` The matplotlib axes object. """ temp_dm = SparseDesignMatrix(hstack([d.X for d in self])) ax = temp_dm.plot(**kwargs) ax.set_title("Design Matrix Collection") return ax
Example #8
Source File: designmatrix.py From lightkurve with MIT License | 6 votes |
def __init__(self, matrices): if np.any([issparse(m.X) for m in matrices]): # This collection is designed for dense matrices, so we warn if a # SparseDesignMatrix is passed warnings.warn(('Some matrices are `SparseDesignMatrix` objects. ' 'Sparse matrices will be converted to dense matrices.'), LightkurveWarning) dense_matrices = [] for m in matrices: if isinstance(m, SparseDesignMatrix): dense_matrices.append(m.copy().to_dense()) else: dense_matrices.append(m) self.matrices = dense_matrices else: self.matrices = matrices self.X = np.hstack(tuple(m.X for m in self.matrices)) self._child_class = DesignMatrix self.validate()
Example #9
Source File: featurizer.py From snips-nlu with Apache License 2.0 | 6 votes |
def fit_transform(self, dataset, utterances, classes, none_class): import scipy.sparse as sp dataset = validate_and_format_dataset(dataset) self.language = dataset[LANGUAGE] utterances_texts = (get_text_from_chunks(u[DATA]) for u in utterances) if not any(tokenize_light(q, self.language) for q in utterances_texts): raise _EmptyDatasetUtterancesError( "Tokenized utterances are empty") x_tfidf = self._fit_transform_tfidf_vectorizer( utterances, classes, dataset) x = x_tfidf if self.config.added_cooccurrence_feature_ratio: self._fit_cooccurrence_vectorizer( utterances, classes, none_class, dataset) x_cooccurrence = self.cooccurrence_vectorizer.transform(utterances) x = sp.hstack((x_tfidf, x_cooccurrence)) return x
Example #10
Source File: features_generation_tools.py From corpus-to-graph-ml with MIT License | 6 votes |
def get_compound_features(train_data, test_data, feature_gen_methods): train_features_list = [] test_features_list = [] for m in feature_gen_methods: train_features, test_features = m(train_data, test_data) train_features_list.append(train_features) test_features_list.append(test_features) train_features = train_features_list[0] test_features = test_features_list[0] for i in xrange(1,len(feature_gen_methods)): train_features = hstack((train_features, train_features_list[i])) test_features = hstack((test_features, test_features_list[i])) return train_features, test_features
Example #11
Source File: loader_nfm.py From knowledge_graph_attention_network with MIT License | 6 votes |
def generate_train_batch(self): users, pos_items, neg_items = self._generate_train_cf_batch() u_sp = self.user_one_hot[users] pos_i_sp = self.kg_feat_mat[pos_items] neg_i_sp = self.kg_feat_mat[neg_items] # Horizontally stack sparse matrices to get single positive & negative feature matrices pos_feats = sp.hstack([u_sp, pos_i_sp]) neg_feats = sp.hstack([u_sp, neg_i_sp]) batch_data = {} batch_data['pos_feats'] = pos_feats batch_data['neg_feats'] = neg_feats return batch_data
Example #12
Source File: feature_expansion.py From KDDCup2019_admin with MIT License | 6 votes |
def cat_onehot_encoder_m(df,y,col,selection=True): ## ZJN: test raise memory error # raise MemoryError mlbs = MultiLabelBinarizer(sparse_output=True).fit(df.values) from scipy.sparse import csr_matrix features_tmp = mlbs.transform(df.values) features_tmp = csr_matrix(features_tmp,dtype=float).tocsr() models = None auc_score = None if selection is True: auc_score, models = train_lightgbm_for_feature_selection(features_tmp, y) print(col, "auc", auc_score) #new_feature = pd.DataFrame(features_tmp,columns=["mul_feature_"+col]) new_feature = features_tmp from scipy.sparse import hstack return new_feature,mlbs,models,auc_score
Example #13
Source File: feature_for_test.py From KDDCup2019_admin with MIT License | 6 votes |
def multi_features_for_test(df,columns,mlbs,models): new_features = {} #from multiprocessing import Pool #pool = Pool(processes=len(columns)) for col in columns: if col in mlbs: mlb = mlbs[col] #model = models[col] model = None new_features[col] = multi_feature_for_one_col(df[col], mlb, model,col) #pool.apply_async(multi_feature_for_one_col, args=(df[col], mlb, model,col)) new_features_list = [] for col in columns: if col in new_features: new_features_list.append(new_features[col]) from scipy.sparse import hstack new_features = hstack(new_features_list,dtype=float) #new_features = pd.concat(new_features_list,axis=1) return new_features
Example #14
Source File: pandas_feature_union.py From pandas-feature-union with MIT License | 6 votes |
def fit_transform(self, X, y=None, **fit_params): self._validate_transformers() result = Parallel(n_jobs=self.n_jobs)( delayed(_fit_transform_one)( transformer=trans, X=X, y=y, weight=weight, **fit_params) for name, trans, weight in self._iter()) if not result: # All transformers are None return np.zeros((X.shape[0], 0)) Xs, transformers = zip(*result) self._update_transformer_list(transformers) if any(sparse.issparse(f) for f in Xs): Xs = sparse.hstack(Xs).tocsr() else: Xs = self.merge_dataframes_by_column(Xs) return Xs
Example #15
Source File: pandas_feature_union.py From pandas-feature-union with MIT License | 6 votes |
def transform(self, X): Xs = Parallel(n_jobs=self.n_jobs)( delayed(_transform_one)( transformer=trans, X=X, y=None, weight=weight) for name, trans, weight in self._iter()) if not Xs: # All transformers are None return np.zeros((X.shape[0], 0)) if any(sparse.issparse(f) for f in Xs): Xs = sparse.hstack(Xs).tocsr() else: Xs = self.merge_dataframes_by_column(Xs) return Xs
Example #16
Source File: backend.py From mlens with MIT License | 6 votes |
def _propagate_features(self, task): """Propagate features from input array to output array.""" p_out, p_in = self.job.predict_out, self.job.predict_in # Check for loss of obs between layers (i.e. with blendindex) n_in, n_out = p_in.shape[0], p_out.shape[0] r = int(n_in - n_out) if not issparse(p_in): # Simple item setting p_out[:, :task.n_feature_prop] = p_in[r:, task.propagate_features] else: # Need to populate propagated features using scipy sparse hstack self.job.predict_out = hstack( [p_in[r:, task.propagate_features], p_out[:, task.n_feature_prop:]] ).tolil()
Example #17
Source File: operator_utils.py From grove with Apache License 2.0 | 6 votes |
def __init__(self, labels_ops): """ Encapsulates a set of linearly independent operators. :param (list|tuple) labels_ops: Sequence of tuples (label, operator) where label is a string and operator a qutip.Qobj operator representation. """ self.ops_by_label = OrderedDict(labels_ops) self.labels = list(self.ops_by_label.keys()) self.ops = list(self.ops_by_label.values()) self.dim = len(self.ops) # the basis change transformation matrix from a representation in the operator basis # to the original basis. We enforce CSR sparse matrix representation to have efficient # matrix vector products. self.basis_transform = sphstack([qt.operator_to_vector(opj).data for opj in self.ops]).tocsr() self._metric = None self._is_orthonormal = None self._all_hermitian = None
Example #18
Source File: xc_metrics.py From pyxclib with MIT License | 6 votes |
def _setup_metric(X, true_labels, inv_psp=None, k=5): assert compatible_shapes(X, true_labels), \ "ground truth and prediction matrices must have same shape." num_instances, num_labels = true_labels.shape indices = _get_topk(X, num_labels, k) ps_indices = None if inv_psp is not None: ps_indices = _get_topk( true_labels.dot( sp.spdiags(inv_psp, diags=0, m=num_labels, n=num_labels)), num_labels, k) inv_psp = np.hstack([inv_psp, np.zeros((1))]) true_labels = sp.hstack([true_labels, sp.lil_matrix((num_instances, 1), dtype=np.int32)]).tocsr() return indices, true_labels, ps_indices, inv_psp
Example #19
Source File: loader_nfm.py From knowledge_graph_attention_network with MIT License | 6 votes |
def generate_test_feed_dict(self, model, user_batch, item_batch, drop_flag=True): user_list = np.repeat(user_batch, len(item_batch)).tolist() item_list = list(item_batch) * len(user_batch) u_sp = self.user_one_hot[user_list] pos_i_sp = self.kg_feat_mat[item_list] # Horizontally stack sparse matrices to get single positive & negative feature matrices pos_feats = sp.hstack([u_sp, pos_i_sp]) pos_indices, pos_values, pos_shape = self._extract_sp_info(pos_feats) feed_dict = { model.pos_indices: pos_indices, model.pos_values: pos_values, model.pos_shape: pos_shape, model.mess_dropout: [0.] * len(eval(self.args.layer_size)) } return feed_dict
Example #20
Source File: feature_union.py From Wordbatch with GNU General Public License v2.0 | 6 votes |
def transform(self, X): """Transform X separately by each transformer, concatenate results. Parameters ---------- X : iterable or array-like, depending on transformers Input data to be transformed. Returns ------- X_t : array-like or sparse matrix, shape (n_samples, sum_n_components) hstack of results of transformers. sum_n_components is the sum of n_components (output dimension) over transformers. """ paral_params = [[X[t['col_pick']] if hasattr(t, 'col_pick') else X, t] for _, t, _ in self._iter()] Xs = Apply(transform_one, self.batcher).transform(paral_params) if not Xs: # All transformers are None return np.zeros((X.shape[0], 0)) if self.concatenate: if any(sparse.issparse(f) for f in Xs): Xs = sparse.hstack(Xs).tocsr() else: Xs = np.hstack(Xs) return Xs
Example #21
Source File: designmatrix.py From lightkurve with MIT License | 5 votes |
def values(self): """2D numpy array containing the matrix values.""" return np.hstack(tuple(m.values for m in self.matrices))
Example #22
Source File: kernel_approximation.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def _transform_sparse(self, X): indices = X.indices.copy() indptr = X.indptr.copy() data_step = np.sqrt(X.data * self.sample_interval_) X_step = sp.csr_matrix((data_step, indices, indptr), shape=X.shape, dtype=X.dtype, copy=False) X_new = [X_step] log_step_nz = self.sample_interval_ * np.log(X.data) step_nz = 2 * X.data * self.sample_interval_ for j in range(1, self.sample_steps): factor_nz = np.sqrt(step_nz / np.cosh(np.pi * j * self.sample_interval_)) data_step = factor_nz * np.cos(j * log_step_nz) X_step = sp.csr_matrix((data_step, indices, indptr), shape=X.shape, dtype=X.dtype, copy=False) X_new.append(X_step) data_step = factor_nz * np.sin(j * log_step_nz) X_step = sp.csr_matrix((data_step, indices, indptr), shape=X.shape, dtype=X.dtype, copy=False) X_new.append(X_step) return sp.hstack(X_new)
Example #23
Source File: featurizer.py From snips-nlu with Apache License 2.0 | 5 votes |
def transform(self, utterances): import scipy.sparse as sp x = self.tfidf_vectorizer.transform(utterances) if self.cooccurrence_vectorizer: x_cooccurrence = self.cooccurrence_vectorizer.transform(utterances) x = sp.hstack((x, x_cooccurrence)) return x
Example #24
Source File: matrix.py From ektelo with Apache License 2.0 | 5 votes |
def dense_matrix(self): return np.hstack([Q.dense_matrix() for Q in self.matrices])
Example #25
Source File: longitudinal_features_product.py From tick with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _dense_finite_product(self, feat_mat): """Performs feature product on a numpy.ndarray containing finite exposures.""" feat = [feat_mat] feat.extend([(feat_mat[:, i] * feat_mat[:, j]).reshape((-1, 1)) for i, j in self._mapper.values()]) return np.hstack(feat)
Example #26
Source File: longitudinal_features_product.py From tick with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _sparse_finite_product(self, feat_mat): """Performs feature product on a scipy.sparse.csr_matrix containing finite exposures.""" feat = [feat_mat.tocsc()] feat.extend([(feat_mat[:, i].multiply(feat_mat[:, j])) for i, j in self.mapper.values()]) return sps.hstack(feat).tocsr()
Example #27
Source File: operator_utils.py From grove with Apache License 2.0 | 5 votes |
def to_realimag(z): """ Convert a complex hermitian matrix to a real valued doubled up representation, i.e., for ``Z = Z_r + 1j * Z_i`` return ``R(Z)``:: R(Z) = [ Z_r Z_i] [-Z_i Z_r] A complex hermitian matrix ``Z`` with elementwise real and imaginary parts ``Z = Z_r + 1j * Z_i`` can be isomorphically represented in doubled up form as:: R(Z) = [ Z_r Z_i] [-Z_i Z_r] R(X)*R(Y) = [ (X_r*Y_r-X_i*Y_i) (X_r*Y_i + X_i*Y_r)] [-(X_r*Y_i + X_i*Y_r) (X_r*Y_r-X_i*Y_i) ] = R(X*Y). In particular, ``Z`` is complex positive (semi-)definite iff ``R(Z)`` is real positive (semi-)definite. :param (qutip.Qobj|scipy.sparse.base.spmatrix) z: The operator representation matrix. :returns: R(Z) the doubled up representation. :rtype: scipy.sparse.csr_matrix """ if isinstance(z, qt.Qobj): z = z.data if not is_hermitian(z): # pragma no coverage raise ValueError("Need a hermitian matrix z") return spvstack([sphstack([z.real, z.imag]), sphstack([z.imag.T, z.real])]).tocsr().real
Example #28
Source File: loader_nfm.py From knowledge_graph_attention_network with MIT License | 5 votes |
def _extract_sp_info(self, sp_feats): sp_indices = np.hstack((sp_feats.nonzero()[0][:, None], sp_feats.nonzero()[1][:, None])) sp_values = sp_feats.data sp_shape = sp_feats.shape return sp_indices, sp_values, sp_shape
Example #29
Source File: test_basic.py From attention-lvcsr with MIT License | 5 votes |
def test_hstack_vstack(): """ Tests sparse.hstack and sparse.vstack (as opposed to the HStack and VStack classes that they wrap). """ def make_block(dtype): return theano.sparse.csr_matrix(name="%s block" % dtype, dtype=dtype) def get_expected_dtype(blocks, to_dtype): if to_dtype is None: block_dtypes = tuple(b.dtype for b in blocks) return theano.scalar.upcast(*block_dtypes) else: return to_dtype # a deliberately weird mix of dtypes to stack dtypes = ('complex128', theano.config.floatX) blocks = [make_block(dtype) for dtype in dtypes] for stack_dimension, stack_function in enumerate((theano.sparse.vstack, theano.sparse.hstack)): for to_dtype in (None, ) + dtypes: stacked_blocks = stack_function(blocks, dtype=to_dtype) expected_dtype = get_expected_dtype(blocks, to_dtype) assert stacked_blocks.dtype == expected_dtype
Example #30
Source File: feature.py From text-classifier with Apache License 2.0 | 5 votes |
def _add_feature(self, X, feature_to_add): """ Returns sparse feature matrix with added feature. feature_to_add can also be a list of features. """ from scipy.sparse import csr_matrix, hstack return hstack([X, csr_matrix(feature_to_add)], 'csr')