Python scipy.sparse.vstack() Examples
The following are 30
code examples of scipy.sparse.vstack().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scipy.sparse
, or try the search function
.
Example #1
Source File: selection.py From ektelo with Apache License 2.0 | 6 votes |
def expand_offsets(cur_rect_l, cur_rect_u, offsets): ''' Expand offsets at different level along each dimension to generate the final offsets for all candidate by computing the sum of each tuple in the cross product of offset arrays. e.g For the some dimension two level offsets [[0, 1, 0], [2, 4, 2]] will be expanded to [2 4 2 3 5 3 2 4 2] cur_rect_l and cur_rect_u: coordinates of the lower and upper corner of the range. offsets: Nested array representing offsets of ranges along dimension, level of hierarchy ''' # remove empty list(no query at this level) offsets = [list(filter(lambda x: len(x) > 0, d)) for d in offsets] assert all([len(d) == len(offsets[0]) for d in offsets]),\ "Shape of offsets along each dimension should match." if len(offsets[0]) < 1: return [], [] # expand offsets across different levels. expanded_offsets = [HierarchicalRanges.quick_product(*d).sum(axis=0) for d in offsets] lower = np.vstack([ l + offset for l, offset in zip(cur_rect_l, expanded_offsets)]).T upper = np.vstack([ u + offset for u, offset in zip(cur_rect_u, expanded_offsets)]).T return lower, upper
Example #2
Source File: core_tests.py From modAL with MIT License | 6 votes |
def test_data_vstack(self): for n_samples, n_features in product(range(1, 10), range(1, 10)): # numpy arrays a, b = np.random.rand(n_samples, n_features), np.random.rand(n_samples, n_features) np.testing.assert_almost_equal( modAL.utils.data.data_vstack((a, b)), np.concatenate((a, b)) ) # sparse matrices for format in ['lil', 'csc', 'csr']: a, b = sp.random(n_samples, n_features, format=format), sp.random(n_samples, n_features, format=format) self.assertEqual((modAL.utils.data.data_vstack((a, b)) != sp.vstack((a, b))).sum(), 0) # not supported formats self.assertRaises(TypeError, modAL.utils.data.data_vstack, (1, 1)) # functions from modAL.utils.selection
Example #3
Source File: data.py From modAL with MIT License | 6 votes |
def data_vstack(blocks: Container) -> modALinput: """ Stack vertically both sparse and dense arrays. Args: blocks: Sequence of modALinput objects. Returns: New sequence of vertically stacked elements. """ if isinstance(blocks[0], np.ndarray): return np.concatenate(blocks) elif isinstance(blocks[0], list): return list(chain(blocks)) elif sp.issparse(blocks[0]): return sp.vstack(blocks) else: try: return np.concatenate(blocks) except: raise TypeError('%s datatype is not supported' % type(blocks[0]))
Example #4
Source File: core.py From neuropythy with GNU Affero General Public License v3.0 | 6 votes |
def curve_length(self, start=None, end=None, precision=0.01): ''' Calculates the length of the curve by dividing the curve up into pieces of parameterized-length <precision>. ''' if start is None: start = self.t[0] if end is None: end = self.t[-1] from scipy import interpolate if self.order == 1: # we just want to add up along the steps... ii = [ii for (ii,t) in enumerate(self.t) if start < t and t < end] ts = np.concatenate([[start], self.t[ii], [end]]) xy = np.vstack([[self(start)], self.coordinates[:,ii].T, [self(end)]]) return np.sum(np.sqrt(np.sum((xy[1:] - xy[:-1])**2, axis=1))) else: t = np.linspace(start, end, int(np.ceil((end-start)/precision))) dt = t[1] - t[0] dx = interpolate.splev(t, self.splrep[0], der=1) dy = interpolate.splev(t, self.splrep[1], der=1) return np.sum(np.sqrt(dx**2 + dy**2)) * dt
Example #5
Source File: core.py From neuropythy with GNU Affero General Public License v3.0 | 6 votes |
def subcurve(self, t0, t1): ''' curve.subcurve(t0, t1) yields a curve-spline object that is equivalent to the given curve but that extends from curve(t0) to curve(t1) only. ''' # if t1 is less than t0, then we want to actually do this in reverse... if t1 == t0: raise ValueError('Cannot take subcurve of a point') if t1 < t0: tt = self.curve_length() return self.reverse().subcurve(tt - t0, tt - t1) idx = [ii for (ii,t) in enumerate(self.t) if t0 < t and t < t1] pt0 = self(t0) pt1 = self(t1) coords = np.vstack([[pt0], self.coordinates.T[idx], [pt1]]) ts = np.concatenate([[t0], self.t[idx], [t1]]) dists = None if self.distances is None else np.diff(ts) return CurveSpline( coords.T, order=self.order, smoothing=self.smoothing, periodic=False, distances=dists, meta_data=self.meta_data)
Example #6
Source File: core.py From neuropythy with GNU Affero General Public License v3.0 | 6 votes |
def apply_affine(aff, coords): ''' apply_affine(affine, coords) yields the result of applying the given affine transformation to the given coordinate or coordinates. This function expects coords to be a (dims X n) matrix but if the first dimension is neither 2 nor 3, coords.T is used; i.e.: apply_affine(affine3x3, coords2xN) ==> newcoords2xN apply_affine(affine4x4, coords3xN) ==> newcoords3xN apply_affine(affine3x3, coordsNx2) ==> newcoordsNx2 (for N != 2) apply_affine(affine4x4, coordsNx3) ==> newcoordsNx3 (for N != 3) ''' if aff is None: return coords (coords,tr) = (np.asanyarray(coords), False) if len(coords.shape) == 1: return np.squeeze(apply_affine(np.reshape(coords, (-1,1)), aff)) elif len(coords.shape) > 2: raise ValueError('cannot apply affine to ND-array for N > 2') if len(coords) == 2: aff = to_affine(aff, 2) elif len(coords) == 3: aff = to_affine(aff, 3) else: (coords,aff,tr) = (coords.T, to_affine(aff, coords.shape[1]), True) r = np.dot(aff, np.vstack([coords, np.ones([1,coords.shape[1]])]))[:-1] return r.T if tr else r
Example #7
Source File: utils.py From contextualbandits with BSD 2-Clause "Simplified" License | 6 votes |
def get_batch(self, X, y): if self.curr == 0: self.add_obs(X, y) return X, y if (self.curr < self.n) and (isinstance(self.X_reserve, list)): if not self.has_sparse: old_X = np.concatenate(self.X_reserve, axis=0) else: old_X = sp_vstack(self.X_reserve) old_y = np.concatenate(self.y_reserve, axis=0) else: old_X = self.X_reserve[:self.curr].copy() old_y = self.y_reserve[:self.curr].copy() if X.shape[0] == 0: return old_X, old_y else: self.add_obs(X, y) if not issparse(old_X) and not issparse(X): return np.r_[old_X, X], np.r_[old_y, y] else: return sp_vstack([old_X, X]), np.r_[old_y, y]
Example #8
Source File: matrix_utils.py From nonnegfac-python with BSD 3-Clause "New" or "Revised" License | 6 votes |
def sparse_remove_row(X, to_remove): """ Delete rows from a sparse matrix Parameters ---------- X : scipy.sparse matrix to_remove : a list of row indices to be removed. Returns ------- Y : scipy.sparse matrix """ if not sps.isspmatrix_lil(X): X = X.tolil() to_keep = [i for i in iter(range(0, X.shape[0])) if i not in to_remove] Y = sps.vstack([X.getrowview(i) for i in to_keep]) return Y
Example #9
Source File: graph.py From EDeN with MIT License | 6 votes |
def auto_label(graphs, n_clusters=16, **opts): """Label nodes with cluster id. Cluster nodes using as features the output of vertex_vectorize. """ data_list = Vectorizer(**opts).vertex_transform(graphs) data_matrix = vstack(data_list) clu = MiniBatchKMeans(n_clusters=n_clusters, n_init=10) clu.fit(data_matrix) preds = clu.predict(data_matrix) vecs = clu.transform(data_matrix) sizes = [m.shape[0] for m in data_list] label_list = [] vecs_list = [] pointer = 0 for size in sizes: label_list.append(preds[pointer: pointer + size]) vecs_list.append(vecs[pointer: pointer + size]) pointer += size return label_list, vecs_list
Example #10
Source File: data.py From TextCategorization with MIT License | 6 votes |
def _load(self): data_dir = os.path.abspath(os.path.join(os.path.curdir, "data", "RCV1", "pickles", "RCV1-v2_Sparse")) class_names = pickle.load(open(data_dir + "/class_names.pkl", "rb")) pkl_files = os.listdir(data_dir) docs_pkls = list(filter(lambda x: x.startswith("documents"), pkl_files)) labels_pkls = list(filter(lambda x: x.startswith("labels"), pkl_files)) docs_pkls.sort() labels_pkls.sort() documents = [] for docs_pkl in docs_pkls: documents += pickle.load(open(data_dir + "/" + docs_pkl, "rb")) _labels = [] for labels_pkl in labels_pkls: _labels += pickle.load(open(data_dir + "/" + labels_pkl, "rb")) labels = vstack(_labels) return documents, labels, class_names
Example #11
Source File: utils.py From OpenNE with MIT License | 6 votes |
def sparse_to_tuple(sparse_mx): """Convert sparse matrix to tuple representation.""" def to_tuple(mx): if not sp.isspmatrix_coo(mx): mx = mx.tocoo() coords = np.vstack((mx.row, mx.col)).transpose() values = mx.data shape = mx.shape return coords, values, shape if isinstance(sparse_mx, list): for i in range(len(sparse_mx)): sparse_mx[i] = to_tuple(sparse_mx[i]) else: sparse_mx = to_tuple(sparse_mx) return sparse_mx
Example #12
Source File: DeepNeuralNetTrain.py From DeepNeuralNet-QSAR with GNU General Public License v3.0 | 6 votes |
def allMB_multi(casesPerTask,datasets,mbNumber): if mbNumber == 0: # to begin a new epoch, permute each dataset first, then sequencially use training data in new order for i in range(len(datasets)): datasets[i].perm() inpsList = [] targs = num.zeros((sum(casesPerTask), len(datasets)), dtype=num.float32) targsMask = num.zeros((sum(casesPerTask), len(datasets)), dtype=num.float32) for i in range(len(datasets)): # in case that we need to use certain datasets multiple times in one epoch idx = [ xx % (datasets[i].inps.shape[0]) for xx in range(casesPerTask[i]*(mbNumber-1), casesPerTask[i]*mbNumber)] inpsList.append(datasets[i].inps[idx]) targs[sum(casesPerTask[:i]):sum(casesPerTask[:(i+1)])] = datasets[i].targsFull[idx] targsMask[sum(casesPerTask[:i]):sum(casesPerTask[:(i+1)]), i] = 1 if isinstance(inpsList[0], num.ndarray): inps = num.vstack(inpsList) else: inps = sp.vstack(inpsList) return inps, targs, targsMask
Example #13
Source File: process.py From DGI with MIT License | 6 votes |
def sparse_to_tuple(sparse_mx, insert_batch=False): """Convert sparse matrix to tuple representation.""" """Set insert_batch=True if you want to insert a batch dimension.""" def to_tuple(mx): if not sp.isspmatrix_coo(mx): mx = mx.tocoo() if insert_batch: coords = np.vstack((np.zeros(mx.row.shape[0]), mx.row, mx.col)).transpose() values = mx.data shape = (1,) + mx.shape else: coords = np.vstack((mx.row, mx.col)).transpose() values = mx.data shape = mx.shape return coords, values, shape if isinstance(sparse_mx, list): for i in range(len(sparse_mx)): sparse_mx[i] = to_tuple(sparse_mx[i]) else: sparse_mx = to_tuple(sparse_mx) return sparse_mx
Example #14
Source File: citation_network_utils.py From tf-gnn-samples with MIT License | 6 votes |
def sparse_to_tuple(sparse_mx): """Convert sparse matrix to tuple representation.""" def to_tuple(mx): if not sp.isspmatrix_coo(mx): mx = mx.tocoo() coords = np.vstack((mx.row, mx.col)).transpose() values = mx.data shape = mx.shape # All of these will need to be sorted: sort_indices = np.lexsort(np.rot90(coords)) return coords[sort_indices], values[sort_indices], shape if isinstance(sparse_mx, list): for i in range(len(sparse_mx)): sparse_mx[i] = to_tuple(sparse_mx[i]) else: sparse_mx = to_tuple(sparse_mx) return sparse_mx
Example #15
Source File: utils.py From dgi with MIT License | 6 votes |
def sparse_to_tuple(sparse_mx): """Convert sparse matrix to tuple representation.""" def to_tuple(mx): if not sp.isspmatrix_coo(mx): mx = mx.tocoo() coords = np.vstack((mx.row, mx.col)).transpose() values = mx.data shape = mx.shape return coords, values, shape if isinstance(sparse_mx, list): for i in range(len(sparse_mx)): sparse_mx[i] = to_tuple(sparse_mx[i]) else: sparse_mx = to_tuple(sparse_mx) return sparse_mx
Example #16
Source File: ml.py From EDeN with MIT License | 6 votes |
def make_data_matrix(positive_data_matrix=None, negative_data_matrix=None, target=None): """make_data_matrix.""" assert(positive_data_matrix is not None), 'ERROR: expecting non null\ positive_data_matrix' if negative_data_matrix is None: negative_data_matrix = positive_data_matrix.multiply(-1) if target is None and negative_data_matrix is not None: yp = [1] * positive_data_matrix.shape[0] yn = [-1] * negative_data_matrix.shape[0] y = np.array(yp + yn) data_matrix = vstack( [positive_data_matrix, negative_data_matrix], format="csr") if target is not None: data_matrix = positive_data_matrix y = target return data_matrix, y
Example #17
Source File: selection.py From ektelo with Apache License 2.0 | 6 votes |
def select(self): QtQ = self.W.gram().dense_matrix() n = self.domain_shape[0] err, inv, weights, queries = self._GreedyHierByLv( QtQ, n, 0, withRoot=False) # form matrix from queries and weights row_list = [] for q, w in zip(queries, weights): if w > 0: row = np.zeros(self.domain_shape[0]) row[q[0]:q[1] + 1] = w row_list.append(row) mat = np.vstack(row_list) mat = sparse.csr_matrix(mat) if sparse.issparse(mat) is False else mat return matrix.EkteloMatrix(mat)
Example #18
Source File: batcher.py From Wordbatch with GNU General Public License v2.0 | 6 votes |
def merge_batches(self, data): """Merge a list of data minibatches into one single instance representing the data Parameters ---------- data: list List of minibatches to merge Returns ------- (anonymous): sparse matrix | pd.DataFrame | list Single complete list-like data merged from given batches """ if isinstance(data[0], ssp.csr_matrix): return ssp.vstack(data) if isinstance(data[0], pd.DataFrame) or isinstance(data[0], pd.Series): return pd.concat(data) return [item for sublist in data for item in sublist]
Example #19
Source File: data.py From recoder with MIT License | 6 votes |
def _extract(self, sparse_matrix, index): if sputils.issequence(index) and len(index) > CSR_MATRIX_INDEX_SIZE_LIMIT: # It happens that scipy implements the indexing of a csr_matrix with a list using # matrix multiplication, which gets to be an issue if the size of the index list is # large and lead to memory issues # Reference: https://stackoverflow.com/questions/46034212/sparse-matrix-slicing-memory-error/46040827#46040827 # In order to solve this issue, simply chunk the index into smaller indices of # size CSR_MATRIX_INDEX_SIZE_LIMIT and then stack the extracted chunks sparse_matrix_slices = [] for offset in range(0, len(index), CSR_MATRIX_INDEX_SIZE_LIMIT): sparse_matrix_slices.append(sparse_matrix[index[offset: offset + CSR_MATRIX_INDEX_SIZE_LIMIT]]) extracted_sparse_matrix = sparse.vstack(sparse_matrix_slices) else: extracted_sparse_matrix = sparse_matrix[index] return extracted_sparse_matrix
Example #20
Source File: data_statistics.py From pyxclib with MIT License | 6 votes |
def compute(self, train_features, train_labels, test_features=None, test_labels=None): """Compute features for given data. Test data is optional. Args: train_features: csr_matrix: train features train_labels: csr_matrix: train labels test_features: csr_matrix: test features test_labels: csr_matrix: test labels """ self.n_train_samples, self.n_features = train_features.shape self.n_labels = train_labels.shape[1] if test_features is not None: self.n_test_samples = test_features.shape[0] features = vstack([train_features, test_features]).tocsr() labels = vstack([train_labels, test_labels]).tocsr() else: features = train_features labels = train_labels self.n_avg_samples_per_label = self.compute_avg_samples_per_label(labels) self.n_avg_labels_per_sample = self.compute_avg_labels_per_sample(labels) self.avg_doc_length = self.compute_avg_doc_length(features)
Example #21
Source File: _canonical_constraint.py From ip-nonlinear-solver with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _concatenate_dense_jac(jac_list): # Read sequentially all jacobians. # Convert all values to numpy arrays. jac_ineq_list = [] jac_eq_list = [] for jac_tuple in jac_list: J_ineq, J_eq = jac_tuple if spc.issparse(J_ineq): jac_ineq_list += [J_ineq.toarray()] else: jac_ineq_list += [np.atleast_2d(J_ineq)] if spc.issparse(J_eq): jac_eq_list += [J_eq.toarray()] else: jac_eq_list += [np.atleast_2d(J_eq)] # Concatenate all J_ineq = np.vstack(jac_ineq_list) J_eq = np.vstack(jac_eq_list) # Return return J_ineq, J_eq
Example #22
Source File: DeepNeuralNetTrain.py From DeepNeuralNet-QSAR with GNU General Public License v3.0 | 5 votes |
def sampleMBFromAll(casesPerTask, datasets): inpsList = [] targs = num.zeros((sum(casesPerTask), len(datasets)), dtype=num.float32) targsMask = num.zeros((sum(casesPerTask), len(datasets)), dtype=num.float32) for i in range(len(datasets)): idx = num.random.randint(datasets[i].inps.shape[0], size=(casesPerTask[i],)) inpsList.append(datasets[i].inps[idx]) targs[sum(casesPerTask[:i]):sum(casesPerTask[:(i+1)])] = datasets[i].targsFull[idx] targsMask[sum(casesPerTask[:i]):sum(casesPerTask[:(i+1)]), i] = 1 if isinstance(inpsList[0], num.ndarray): inps = num.vstack(inpsList) else: inps = sp.vstack(inpsList) return inps, targs, targsMask
Example #23
Source File: tr_interior_point.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def _assemble_sparse_jacobian(self, J_eq, J_ineq, s): """Assemble sparse jacobian given its components. Given ``J_eq``, ``J_ineq`` and ``s`` returns: jacobian = [ J_eq, 0 ] [ J_ineq, diag(s) ] It is equivalent to: sps.bmat([[ J_eq, None ], [ J_ineq, diag(s) ]], "csr") but significantly more efficient for this given structure. """ n_vars, n_ineq, n_eq = self.n_vars, self.n_ineq, self.n_eq J_aux = sps.vstack([J_eq, J_ineq], "csr") indptr, indices, data = J_aux.indptr, J_aux.indices, J_aux.data new_indptr = indptr + np.hstack((np.zeros(n_eq, dtype=int), np.arange(n_ineq+1, dtype=int))) size = indices.size+n_ineq new_indices = np.empty(size) new_data = np.empty(size) mask = np.full(size, False, bool) mask[new_indptr[-n_ineq:]-1] = True new_indices[mask] = n_vars+np.arange(n_ineq) new_indices[~mask] = indices new_data[mask] = s new_data[~mask] = data J = sps.csr_matrix((new_data, new_indices, new_indptr), (n_eq + n_ineq, n_vars + n_ineq)) return J
Example #24
Source File: ova.py From pyxclib with MIT License | 5 votes |
def _merge_weights(self, weights, biases): # Bias is always a dense array if self.feature_type == 'sparse': self.weight = sp.vstack( weights, format='csr', dtype=np.float32) self.bias = sp.vstack( biases, format='csr', dtype=np.float32).toarray() else: self.weight = np.vstack(weights).astype(np.float32).squeeze() self.bias = np.vstack(biases).astype(np.float32)
Example #25
Source File: ova.py From pyxclib with MIT License | 5 votes |
def convert_to_sparse(weight, bias): weight = np.vstack(weight).squeeze() bias = np.vstack(bias).squeeze() return sp.csr_matrix(weight), sp.csr_matrix(bias).transpose()
Example #26
Source File: vis_topic.py From corex_topic with Apache License 2.0 | 5 votes |
def all_bbow(docs, n=100): """Split each document into a subdocuments of size n, and return as binary BOW""" proc = skt.CountVectorizer(token_pattern=pattern) proc.fit(docs) ids = [] for l, doc in enumerate(docs): subdocs = chunks(doc, n=n) submat = (proc.transform(subdocs) > 0) if l == 0: mat = submat else: mat = ss.vstack([mat, submat]) ids += [l]*submat.shape[0] return mat.asformat('csr'), proc, ids
Example #27
Source File: selection.py From ektelo with Apache License 2.0 | 5 votes |
def select(self): domain_shape = self.domain_shape marginals = [] for ind,shape in enumerate(domain_shape): queries = [np.ones(n) for n in domain_shape[:ind]] + [sparse.identity(shape)] + [np.ones(n) for n in domain_shape[ind+1:]] queries = reduce(sparse.kron, queries) marginals.append(queries) strategy = sparse.vstack(marginals) return strategy
Example #28
Source File: ml.py From EDeN with MIT License | 5 votes |
def multiprocess_vectorize(iterators, vectorizer=None, pre_processor=None, pre_processor_args=None, fit_flag=False, n_blocks=5, block_size=None, n_jobs=8): """multiprocess_vectorize.""" iterators = list(iterators) # fitting happens in a serial fashion if fit_flag: if pre_processor is not None: if pre_processor_args is not None: graphs = pre_processor(iterators, **pre_processor_args) else: graphs = pre_processor(iterators) else: graphs = iterators vectorizer.fit(graphs) size = len(iterators) intervals = compute_intervals(size=size, n_blocks=n_blocks, block_size=block_size) if n_jobs == -1: pool = mp.Pool() else: pool = mp.Pool(n_jobs) results = [apply_async(pool, serial_vectorize, args=(iterators[start:end], vectorizer, pre_processor, pre_processor_args, False)) for start, end in intervals] output = [p.get() for p in results] pool.close() pool.join() data_matrix = vstack(output, format="csr") return data_matrix
Example #29
Source File: ir2tagsets_seq.py From plastering with MIT License | 5 votes |
def _make_doc_vectorizer(self, doc): doc = [sentence.split() for sentence in doc] le = LabelBinarizer().fit(reduce(adder , doc)) stack = [] for sentence in doc: encoded = le.transform(sentence) padder = np.zeros((self.max_len - encoded.shape[0], encoded.shape[1])) encoded = np.vstack([encoded, padder]) stack.append(encoded) encoded_labels = np.stack(stack) return encoded_labels
Example #30
Source File: selection.py From ektelo with Apache License 2.0 | 5 votes |
def select(self): mat = sparse.vstack((self.M_hat, support.complement(self.M_hat, self.grid_size))) return matrix.EkteloMatrix(mat)