Python scipy.sparse.vstack() Examples

The following are 30 code examples of scipy.sparse.vstack(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scipy.sparse , or try the search function .
Example #1
Source File: selection.py    From ektelo with Apache License 2.0 6 votes vote down vote up
def expand_offsets(cur_rect_l, cur_rect_u, offsets):
        '''
        Expand offsets at different level along each dimension to generate the 
        final offsets for all candidate by computing the sum of each tuple in the 
        cross product of offset arrays.
        e.g For the some dimension two level offsets [[0, 1, 0], [2, 4, 2]] will be expanded to 
        [2 4 2 3 5 3 2 4 2]
        cur_rect_l and cur_rect_u: coordinates of the lower and upper corner of the range.
        offsets: Nested array representing offsets of ranges along dimension, level of hierarchy    

        ''' 
        # remove empty list(no query at this level)
        offsets = [list(filter(lambda x: len(x) > 0, d)) for d in offsets]
        assert all([len(d) == len(offsets[0]) for d in offsets]),\
               "Shape of offsets along each dimension should match."    
        if len(offsets[0]) < 1:
            return [], []   
        # expand offsets across different levels.
        expanded_offsets = [HierarchicalRanges.quick_product(*d).sum(axis=0) for d in offsets] 
        lower = np.vstack([ l + offset for l, offset in zip(cur_rect_l, expanded_offsets)]).T
        upper = np.vstack([ u + offset for u, offset in zip(cur_rect_u, expanded_offsets)]).T
        return lower, upper 
Example #2
Source File: core_tests.py    From modAL with MIT License 6 votes vote down vote up
def test_data_vstack(self):
        for n_samples, n_features in product(range(1, 10), range(1, 10)):
            # numpy arrays
            a, b = np.random.rand(n_samples, n_features), np.random.rand(n_samples, n_features)
            np.testing.assert_almost_equal(
                modAL.utils.data.data_vstack((a, b)),
                np.concatenate((a, b))
            )

            # sparse matrices
            for format in ['lil', 'csc', 'csr']:
                a, b = sp.random(n_samples, n_features, format=format), sp.random(n_samples, n_features, format=format)
                self.assertEqual((modAL.utils.data.data_vstack((a, b)) != sp.vstack((a, b))).sum(), 0)

        # not supported formats
        self.assertRaises(TypeError, modAL.utils.data.data_vstack, (1, 1))

    # functions from modAL.utils.selection 
Example #3
Source File: data.py    From modAL with MIT License 6 votes vote down vote up
def data_vstack(blocks: Container) -> modALinput:
    """
    Stack vertically both sparse and dense arrays.

    Args:
        blocks: Sequence of modALinput objects.

    Returns:
        New sequence of vertically stacked elements.
    """
    if isinstance(blocks[0], np.ndarray):
        return np.concatenate(blocks)
    elif isinstance(blocks[0], list):
        return list(chain(blocks))
    elif sp.issparse(blocks[0]):
        return sp.vstack(blocks)
    else:
        try:
            return np.concatenate(blocks)
        except:
            raise TypeError('%s datatype is not supported' % type(blocks[0])) 
Example #4
Source File: core.py    From neuropythy with GNU Affero General Public License v3.0 6 votes vote down vote up
def curve_length(self, start=None, end=None, precision=0.01):
        '''
        Calculates the length of the curve by dividing the curve up
        into pieces of parameterized-length <precision>.
        '''
        if start is None: start = self.t[0]
        if end is None: end = self.t[-1]
        from scipy import interpolate
        if self.order == 1:
            # we just want to add up along the steps...
            ii = [ii for (ii,t) in enumerate(self.t) if start < t and t < end]
            ts = np.concatenate([[start], self.t[ii], [end]])
            xy = np.vstack([[self(start)], self.coordinates[:,ii].T, [self(end)]])
            return np.sum(np.sqrt(np.sum((xy[1:] - xy[:-1])**2, axis=1)))
        else:
            t = np.linspace(start, end, int(np.ceil((end-start)/precision)))
            dt = t[1] - t[0]
            dx = interpolate.splev(t, self.splrep[0], der=1)
            dy = interpolate.splev(t, self.splrep[1], der=1)
            return np.sum(np.sqrt(dx**2 + dy**2)) * dt 
Example #5
Source File: core.py    From neuropythy with GNU Affero General Public License v3.0 6 votes vote down vote up
def subcurve(self, t0, t1):
        '''
        curve.subcurve(t0, t1) yields a curve-spline object that is equivalent to the given
          curve but that extends from curve(t0) to curve(t1) only.
        '''
        # if t1 is less than t0, then we want to actually do this in reverse...
        if t1 == t0: raise ValueError('Cannot take subcurve of a point')
        if t1 < t0:
            tt = self.curve_length()
            return self.reverse().subcurve(tt - t0, tt - t1)
        idx = [ii for (ii,t) in enumerate(self.t) if t0 < t and t < t1]
        pt0 = self(t0)
        pt1 = self(t1)
        coords = np.vstack([[pt0], self.coordinates.T[idx], [pt1]])
        ts = np.concatenate([[t0], self.t[idx], [t1]])
        dists  = None if self.distances is None else np.diff(ts)
        return CurveSpline(
            coords.T,
            order=self.order,
            smoothing=self.smoothing,
            periodic=False,
            distances=dists,
            meta_data=self.meta_data) 
Example #6
Source File: core.py    From neuropythy with GNU Affero General Public License v3.0 6 votes vote down vote up
def apply_affine(aff, coords):
    '''
    apply_affine(affine, coords) yields the result of applying the given affine transformation to
      the given coordinate or coordinates.

    This function expects coords to be a (dims X n) matrix but if the first dimension is neither 2
    nor 3, coords.T is used; i.e.:
      apply_affine(affine3x3, coords2xN) ==> newcoords2xN
      apply_affine(affine4x4, coords3xN) ==> newcoords3xN
      apply_affine(affine3x3, coordsNx2) ==> newcoordsNx2 (for N != 2)
      apply_affine(affine4x4, coordsNx3) ==> newcoordsNx3 (for N != 3)
    '''
    if aff is None: return coords
    (coords,tr) = (np.asanyarray(coords), False)
    if len(coords.shape) == 1: return np.squeeze(apply_affine(np.reshape(coords, (-1,1)), aff))
    elif len(coords.shape) > 2: raise ValueError('cannot apply affine to ND-array for N > 2')
    if   len(coords) == 2: aff = to_affine(aff, 2)
    elif len(coords) == 3: aff = to_affine(aff, 3)
    else: (coords,aff,tr) = (coords.T, to_affine(aff, coords.shape[1]), True)
    r = np.dot(aff, np.vstack([coords, np.ones([1,coords.shape[1]])]))[:-1]
    return r.T if tr else r 
Example #7
Source File: utils.py    From contextualbandits with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def get_batch(self, X, y):
        if self.curr == 0:
            self.add_obs(X, y)
            return X, y

        if (self.curr < self.n) and (isinstance(self.X_reserve, list)):
            if not self.has_sparse:
                old_X = np.concatenate(self.X_reserve, axis=0)
            else:
                old_X = sp_vstack(self.X_reserve)
            old_y = np.concatenate(self.y_reserve, axis=0)
        else:
            old_X = self.X_reserve[:self.curr].copy()
            old_y = self.y_reserve[:self.curr].copy()

        if X.shape[0] == 0:
            return old_X, old_y
        else:
            self.add_obs(X, y)

        if not issparse(old_X) and not issparse(X):
            return np.r_[old_X, X], np.r_[old_y, y]
        else:
            return sp_vstack([old_X, X]), np.r_[old_y, y] 
Example #8
Source File: matrix_utils.py    From nonnegfac-python with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def sparse_remove_row(X, to_remove):
    """ Delete rows from a sparse matrix

    Parameters
    ----------
    X : scipy.sparse matrix
    to_remove : a list of row indices to be removed.

    Returns
    -------
    Y : scipy.sparse matrix
    """
    if not sps.isspmatrix_lil(X):
        X = X.tolil()

    to_keep = [i for i in iter(range(0, X.shape[0])) if i not in to_remove]
    Y = sps.vstack([X.getrowview(i) for i in to_keep])
    return Y 
Example #9
Source File: graph.py    From EDeN with MIT License 6 votes vote down vote up
def auto_label(graphs, n_clusters=16, **opts):
    """Label nodes with cluster id.

    Cluster nodes using as features the output of vertex_vectorize.
    """
    data_list = Vectorizer(**opts).vertex_transform(graphs)
    data_matrix = vstack(data_list)
    clu = MiniBatchKMeans(n_clusters=n_clusters, n_init=10)
    clu.fit(data_matrix)
    preds = clu.predict(data_matrix)
    vecs = clu.transform(data_matrix)
    sizes = [m.shape[0] for m in data_list]
    label_list = []
    vecs_list = []
    pointer = 0
    for size in sizes:
        label_list.append(preds[pointer: pointer + size])
        vecs_list.append(vecs[pointer: pointer + size])
        pointer += size
    return label_list, vecs_list 
Example #10
Source File: data.py    From TextCategorization with MIT License 6 votes vote down vote up
def _load(self):
        data_dir = os.path.abspath(os.path.join(os.path.curdir, "data", "RCV1", "pickles", "RCV1-v2_Sparse"))

        class_names = pickle.load(open(data_dir + "/class_names.pkl", "rb"))

        pkl_files = os.listdir(data_dir)
        docs_pkls = list(filter(lambda x: x.startswith("documents"), pkl_files))
        labels_pkls = list(filter(lambda x: x.startswith("labels"), pkl_files))
        docs_pkls.sort()
        labels_pkls.sort()

        documents = []
        for docs_pkl in docs_pkls:
            documents += pickle.load(open(data_dir + "/" + docs_pkl, "rb"))

        _labels = []
        for labels_pkl in labels_pkls:
            _labels += pickle.load(open(data_dir + "/" + labels_pkl, "rb"))
        labels = vstack(_labels)

        return documents, labels, class_names 
Example #11
Source File: utils.py    From OpenNE with MIT License 6 votes vote down vote up
def sparse_to_tuple(sparse_mx):
    """Convert sparse matrix to tuple representation."""
    def to_tuple(mx):
        if not sp.isspmatrix_coo(mx):
            mx = mx.tocoo()
        coords = np.vstack((mx.row, mx.col)).transpose()
        values = mx.data
        shape = mx.shape
        return coords, values, shape

    if isinstance(sparse_mx, list):
        for i in range(len(sparse_mx)):
            sparse_mx[i] = to_tuple(sparse_mx[i])
    else:
        sparse_mx = to_tuple(sparse_mx)

    return sparse_mx 
Example #12
Source File: DeepNeuralNetTrain.py    From DeepNeuralNet-QSAR with GNU General Public License v3.0 6 votes vote down vote up
def allMB_multi(casesPerTask,datasets,mbNumber):
    if mbNumber == 0:
        # to begin a new epoch, permute each dataset first, then sequencially use training data in new order
        for i in range(len(datasets)):
            datasets[i].perm()

    inpsList = []
    targs = num.zeros((sum(casesPerTask), len(datasets)), dtype=num.float32)
    targsMask = num.zeros((sum(casesPerTask), len(datasets)), dtype=num.float32)
    for i in range(len(datasets)):
        # in case that we need to use certain datasets multiple times in one epoch
        idx = [ xx % (datasets[i].inps.shape[0]) for xx in range(casesPerTask[i]*(mbNumber-1), casesPerTask[i]*mbNumber)]
        inpsList.append(datasets[i].inps[idx])
        targs[sum(casesPerTask[:i]):sum(casesPerTask[:(i+1)])] = datasets[i].targsFull[idx]
        targsMask[sum(casesPerTask[:i]):sum(casesPerTask[:(i+1)]), i] = 1
    if isinstance(inpsList[0], num.ndarray):
        inps = num.vstack(inpsList)
    else:
        inps = sp.vstack(inpsList)
    return inps, targs, targsMask 
Example #13
Source File: process.py    From DGI with MIT License 6 votes vote down vote up
def sparse_to_tuple(sparse_mx, insert_batch=False):
    """Convert sparse matrix to tuple representation."""
    """Set insert_batch=True if you want to insert a batch dimension."""
    def to_tuple(mx):
        if not sp.isspmatrix_coo(mx):
            mx = mx.tocoo()
        if insert_batch:
            coords = np.vstack((np.zeros(mx.row.shape[0]), mx.row, mx.col)).transpose()
            values = mx.data
            shape = (1,) + mx.shape
        else:
            coords = np.vstack((mx.row, mx.col)).transpose()
            values = mx.data
            shape = mx.shape
        return coords, values, shape

    if isinstance(sparse_mx, list):
        for i in range(len(sparse_mx)):
            sparse_mx[i] = to_tuple(sparse_mx[i])
    else:
        sparse_mx = to_tuple(sparse_mx)

    return sparse_mx 
Example #14
Source File: citation_network_utils.py    From tf-gnn-samples with MIT License 6 votes vote down vote up
def sparse_to_tuple(sparse_mx):
    """Convert sparse matrix to tuple representation."""
    def to_tuple(mx):
        if not sp.isspmatrix_coo(mx):
            mx = mx.tocoo()
        coords = np.vstack((mx.row, mx.col)).transpose()
        values = mx.data
        shape = mx.shape
        # All of these will need to be sorted:
        sort_indices = np.lexsort(np.rot90(coords))
        return coords[sort_indices], values[sort_indices], shape

    if isinstance(sparse_mx, list):
        for i in range(len(sparse_mx)):
            sparse_mx[i] = to_tuple(sparse_mx[i])
    else:
        sparse_mx = to_tuple(sparse_mx)

    return sparse_mx 
Example #15
Source File: utils.py    From dgi with MIT License 6 votes vote down vote up
def sparse_to_tuple(sparse_mx):
    """Convert sparse matrix to tuple representation."""
    def to_tuple(mx):
        if not sp.isspmatrix_coo(mx):
            mx = mx.tocoo()
        coords = np.vstack((mx.row, mx.col)).transpose()
        values = mx.data
        shape = mx.shape
        return coords, values, shape

    if isinstance(sparse_mx, list):
        for i in range(len(sparse_mx)):
            sparse_mx[i] = to_tuple(sparse_mx[i])
    else:
        sparse_mx = to_tuple(sparse_mx)

    return sparse_mx 
Example #16
Source File: ml.py    From EDeN with MIT License 6 votes vote down vote up
def make_data_matrix(positive_data_matrix=None,
                     negative_data_matrix=None,
                     target=None):
    """make_data_matrix."""
    assert(positive_data_matrix is not None), 'ERROR: expecting non null\
    positive_data_matrix'
    if negative_data_matrix is None:
        negative_data_matrix = positive_data_matrix.multiply(-1)
    if target is None and negative_data_matrix is not None:
        yp = [1] * positive_data_matrix.shape[0]
        yn = [-1] * negative_data_matrix.shape[0]
        y = np.array(yp + yn)
        data_matrix = vstack(
            [positive_data_matrix, negative_data_matrix], format="csr")
    if target is not None:
        data_matrix = positive_data_matrix
        y = target
    return data_matrix, y 
Example #17
Source File: selection.py    From ektelo with Apache License 2.0 6 votes vote down vote up
def select(self):
        QtQ = self.W.gram().dense_matrix()
        n = self.domain_shape[0]
        err, inv, weights, queries = self._GreedyHierByLv(
            QtQ, n, 0, withRoot=False)

        # form matrix from queries and weights
        row_list = []
        for q, w in zip(queries, weights):
            if w > 0:
                row = np.zeros(self.domain_shape[0])
                row[q[0]:q[1] + 1] = w
                row_list.append(row)
        mat = np.vstack(row_list)
        mat = sparse.csr_matrix(mat) if sparse.issparse(mat) is False else mat

        return matrix.EkteloMatrix(mat) 
Example #18
Source File: batcher.py    From Wordbatch with GNU General Public License v2.0 6 votes vote down vote up
def merge_batches(self, data):
		"""Merge a list of data minibatches into one single instance representing the data

		Parameters
		----------
		data: list
			List of minibatches to merge

		Returns
		-------
		(anonymous): sparse matrix | pd.DataFrame | list
			Single complete list-like data merged from given batches
		"""
		if isinstance(data[0], ssp.csr_matrix):  return ssp.vstack(data)
		if isinstance(data[0], pd.DataFrame) or isinstance(data[0], pd.Series):  return pd.concat(data)
		return [item for sublist in data for item in sublist] 
Example #19
Source File: data.py    From recoder with MIT License 6 votes vote down vote up
def _extract(self, sparse_matrix, index):

    if sputils.issequence(index) and len(index) > CSR_MATRIX_INDEX_SIZE_LIMIT:
      # It happens that scipy implements the indexing of a csr_matrix with a list using
      # matrix multiplication, which gets to be an issue if the size of the index list is
      # large and lead to memory issues
      # Reference: https://stackoverflow.com/questions/46034212/sparse-matrix-slicing-memory-error/46040827#46040827

      # In order to solve this issue, simply chunk the index into smaller indices of
      # size CSR_MATRIX_INDEX_SIZE_LIMIT and then stack the extracted chunks

      sparse_matrix_slices = []
      for offset in range(0, len(index), CSR_MATRIX_INDEX_SIZE_LIMIT):
        sparse_matrix_slices.append(sparse_matrix[index[offset: offset + CSR_MATRIX_INDEX_SIZE_LIMIT]])

      extracted_sparse_matrix = sparse.vstack(sparse_matrix_slices)
    else:
      extracted_sparse_matrix = sparse_matrix[index]

    return extracted_sparse_matrix 
Example #20
Source File: data_statistics.py    From pyxclib with MIT License 6 votes vote down vote up
def compute(self, train_features, train_labels, 
                test_features=None, test_labels=None):
        """Compute features for given data. Test data is optional.
        Args:
            train_features: csr_matrix: train features
            train_labels: csr_matrix: train labels
            test_features: csr_matrix: test features
            test_labels: csr_matrix: test labels
        """
        self.n_train_samples, self.n_features = train_features.shape
        self.n_labels = train_labels.shape[1] 
        if test_features is not None:
            self.n_test_samples = test_features.shape[0]
            features = vstack([train_features, test_features]).tocsr()
            labels = vstack([train_labels, test_labels]).tocsr()
        else:
            features = train_features
            labels = train_labels
        self.n_avg_samples_per_label = self.compute_avg_samples_per_label(labels)
        self.n_avg_labels_per_sample = self.compute_avg_labels_per_sample(labels)
        self.avg_doc_length = self.compute_avg_doc_length(features) 
Example #21
Source File: _canonical_constraint.py    From ip-nonlinear-solver with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _concatenate_dense_jac(jac_list):
    # Read sequentially all jacobians.
    # Convert all values to numpy arrays.
    jac_ineq_list = []
    jac_eq_list = []
    for jac_tuple in jac_list:
        J_ineq, J_eq = jac_tuple
        if spc.issparse(J_ineq):
            jac_ineq_list += [J_ineq.toarray()]
        else:
            jac_ineq_list += [np.atleast_2d(J_ineq)]
        if spc.issparse(J_eq):
            jac_eq_list += [J_eq.toarray()]
        else:
            jac_eq_list += [np.atleast_2d(J_eq)]
    # Concatenate all
    J_ineq = np.vstack(jac_ineq_list)
    J_eq = np.vstack(jac_eq_list)
    # Return
    return J_ineq, J_eq 
Example #22
Source File: DeepNeuralNetTrain.py    From DeepNeuralNet-QSAR with GNU General Public License v3.0 5 votes vote down vote up
def sampleMBFromAll(casesPerTask, datasets):
    inpsList = []
    targs = num.zeros((sum(casesPerTask), len(datasets)), dtype=num.float32)
    targsMask = num.zeros((sum(casesPerTask), len(datasets)), dtype=num.float32)
    for i in range(len(datasets)):
        idx = num.random.randint(datasets[i].inps.shape[0], size=(casesPerTask[i],))
        inpsList.append(datasets[i].inps[idx])
        targs[sum(casesPerTask[:i]):sum(casesPerTask[:(i+1)])] = datasets[i].targsFull[idx]
        targsMask[sum(casesPerTask[:i]):sum(casesPerTask[:(i+1)]), i] = 1
    if isinstance(inpsList[0], num.ndarray):
        inps = num.vstack(inpsList)
    else:
        inps = sp.vstack(inpsList)
    return inps, targs, targsMask 
Example #23
Source File: tr_interior_point.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def _assemble_sparse_jacobian(self, J_eq, J_ineq, s):
        """Assemble sparse jacobian given its components.

        Given ``J_eq``, ``J_ineq`` and ``s`` returns:
            jacobian = [ J_eq,     0     ]
                       [ J_ineq, diag(s) ]

        It is equivalent to:
            sps.bmat([[ J_eq,   None    ],
                      [ J_ineq, diag(s) ]], "csr")
        but significantly more efficient for this
        given structure.
        """
        n_vars, n_ineq, n_eq = self.n_vars, self.n_ineq, self.n_eq
        J_aux = sps.vstack([J_eq, J_ineq], "csr")
        indptr, indices, data = J_aux.indptr, J_aux.indices, J_aux.data
        new_indptr = indptr + np.hstack((np.zeros(n_eq, dtype=int),
                                         np.arange(n_ineq+1, dtype=int)))
        size = indices.size+n_ineq
        new_indices = np.empty(size)
        new_data = np.empty(size)
        mask = np.full(size, False, bool)
        mask[new_indptr[-n_ineq:]-1] = True
        new_indices[mask] = n_vars+np.arange(n_ineq)
        new_indices[~mask] = indices
        new_data[mask] = s
        new_data[~mask] = data
        J = sps.csr_matrix((new_data, new_indices, new_indptr),
                           (n_eq + n_ineq, n_vars + n_ineq))
        return J 
Example #24
Source File: ova.py    From pyxclib with MIT License 5 votes vote down vote up
def _merge_weights(self, weights, biases):
        # Bias is always a dense array
        if self.feature_type == 'sparse':
            self.weight = sp.vstack(
                weights, format='csr', dtype=np.float32)
            self.bias = sp.vstack(
                biases, format='csr', dtype=np.float32).toarray()
        else:
            self.weight = np.vstack(weights).astype(np.float32).squeeze()
            self.bias = np.vstack(biases).astype(np.float32) 
Example #25
Source File: ova.py    From pyxclib with MIT License 5 votes vote down vote up
def convert_to_sparse(weight, bias):
    weight = np.vstack(weight).squeeze()
    bias = np.vstack(bias).squeeze()
    return sp.csr_matrix(weight), sp.csr_matrix(bias).transpose() 
Example #26
Source File: vis_topic.py    From corex_topic with Apache License 2.0 5 votes vote down vote up
def all_bbow(docs, n=100):
    """Split each document into a subdocuments of size n, and return as binary BOW"""
    proc = skt.CountVectorizer(token_pattern=pattern)
    proc.fit(docs)
    ids = []
    for l, doc in enumerate(docs):
        subdocs = chunks(doc, n=n)
        submat = (proc.transform(subdocs) > 0)
        if l == 0:
          mat = submat
        else:
          mat = ss.vstack([mat, submat])
        ids += [l]*submat.shape[0]
    return mat.asformat('csr'), proc, ids 
Example #27
Source File: selection.py    From ektelo with Apache License 2.0 5 votes vote down vote up
def select(self):
        domain_shape = self.domain_shape
        marginals = []
        for ind,shape in enumerate(domain_shape):
            queries = [np.ones(n) for n in domain_shape[:ind]] + [sparse.identity(shape)] + [np.ones(n) for n in domain_shape[ind+1:]]
            queries = reduce(sparse.kron, queries)
            marginals.append(queries)
        strategy = sparse.vstack(marginals)

        return strategy 
Example #28
Source File: ml.py    From EDeN with MIT License 5 votes vote down vote up
def multiprocess_vectorize(iterators,
                           vectorizer=None,
                           pre_processor=None,
                           pre_processor_args=None,
                           fit_flag=False,
                           n_blocks=5,
                           block_size=None,
                           n_jobs=8):
    """multiprocess_vectorize."""
    iterators = list(iterators)
    # fitting happens in a serial fashion
    if fit_flag:
        if pre_processor is not None:
            if pre_processor_args is not None:
                graphs = pre_processor(iterators, **pre_processor_args)
            else:
                graphs = pre_processor(iterators)
        else:
            graphs = iterators
        vectorizer.fit(graphs)
    size = len(iterators)
    intervals = compute_intervals(size=size,
                                  n_blocks=n_blocks,
                                  block_size=block_size)
    if n_jobs == -1:
        pool = mp.Pool()
    else:
        pool = mp.Pool(n_jobs)
    results = [apply_async(pool, serial_vectorize,
                           args=(iterators[start:end],
                                 vectorizer,
                                 pre_processor,
                                 pre_processor_args,
                                 False))
               for start, end in intervals]
    output = [p.get() for p in results]
    pool.close()
    pool.join()
    data_matrix = vstack(output, format="csr")
    return data_matrix 
Example #29
Source File: ir2tagsets_seq.py    From plastering with MIT License 5 votes vote down vote up
def _make_doc_vectorizer(self, doc):
        doc = [sentence.split() for sentence in doc]
        le = LabelBinarizer().fit(reduce(adder , doc))
        stack = []
        for sentence in doc:
            encoded = le.transform(sentence)
            padder = np.zeros((self.max_len - encoded.shape[0],
                               encoded.shape[1]))
            encoded = np.vstack([encoded, padder])
            stack.append(encoded)
        encoded_labels = np.stack(stack)
        return encoded_labels 
Example #30
Source File: selection.py    From ektelo with Apache License 2.0 5 votes vote down vote up
def select(self):
        mat = sparse.vstack((self.M_hat, support.complement(self.M_hat, self.grid_size)))
        return matrix.EkteloMatrix(mat)