Python Examples of scipy.sparse.save

Source File: base_attack.py From DeepRobust with MIT License

6 votes

def save_adj(self, root=r'/tmp/', name='mod_adj'):
        """Save attacked adjacency matrix.

        Parameters
        ----------
        root :
            root directory where the variable should be saved
        name : str
            saved file name

        Returns
        -------
        None.

        """
        assert self.modified_adj is not None, \
                'modified_adj is None! Please perturb the graph first.'
        name = name + '.npz'
        modified_adj = self.modified_adj

        if type(modified_adj) is torch.Tensor:
            sparse_adj = utils.to_scipy(modified_adj)
            sp.save_npz(osp.join(root, name), sparse_adj)
        else:
            sp.save_npz(osp.join(root, name), modified_adj)

Source File: base_attack.py From DeepRobust with MIT License

6 votes

def save_adj(self, root=r'/tmp/', name='mod_adj'):
        """Save attacked adjacency matrix.

        Parameters
        ----------
        root :
            root directory where the variable should be saved
        name : str
            saved file name

        Returns
        -------
        None.

        """
        assert self.modified_adj is not None, \
                'modified_adj is None! Please perturb the graph first.'
        name = name + '.npz'
        modified_adj = self.modified_adj

        if type(modified_adj) is torch.Tensor:
            sparse_adj = utils.to_scipy(modified_adj)
            sp.save_npz(osp.join(root, name), sparse_adj)
        else:
            sp.save_npz(osp.join(root, name), modified_adj)

Source File: reddit_fit_topics.py From causal-text-embeddings with MIT License

5 votes

def load_term_counts(reddit, path='../dat/reddit/', force_redo=False):
	count_filename = path  + 'term_counts'
	vocab_filename = path + 'vocab'

	if os.path.exists(count_filename + '.npz') and not force_redo:
		return sparse.load_npz(count_filename + '.npz'), np.load(vocab_filename + '.npy')

	post_docs = reddit['post_text'].values
	counts, vocab, _ = tokenize_documents(post_docs)    
	sparse.save_npz(count_filename, counts)
	np.save(vocab_filename, vocab)
	return counts, np.array(vocab)

Source File: dump_tfidf.py From denspi with Apache License 2.0

5 votes

def main():
    args = get_args()
    if args.nfs:
        from nsml import NSML_NFS_OUTPUT
        args.dump_dir = os.path.join(NSML_NFS_OUTPUT, args.dump_dir)
        args.out_dir = os.path.join(NSML_NFS_OUTPUT, args.out_dir)
        args.ranker_path = os.path.join(NSML_NFS_OUTPUT, args.ranker_path)
    args.ranker_path = os.path.join(args.ranker_path, 'docs-tfidf-ngram=2-hash=16777216-tokenizer=simple.npz')
    os.makedirs(args.out_dir)
    assert os.path.isdir(args.dump_dir)
    dump_paths = sorted([os.path.join(args.dump_dir, name) for name in os.listdir(args.dump_dir) if 'hdf5' in name])[
                 args.start:args.end]
    print(dump_paths)
    dump_names = [os.path.splitext(os.path.basename(path))[0] for path in dump_paths]
    dump_ranges = [list(map(int, name.split('-'))) for name in dump_names]
    phrase_dumps = [h5py.File(path, 'r') for path in dump_paths]

    ranker = None
    ranker = MyTfidfDocRanker(
        tfidf_path=args.ranker_path,
        strict=False
    )

    print('Ranker shape {} from {}'.format(ranker.doc_mat.shape, args.ranker_path))
    # new_mat = ranker.doc_mat.T.tocsr()
    # sp.save_npz('doc_tfidf.npz', new_mat)
    dump_tfidf(ranker, phrase_dumps, dump_names, args)

Source File: wikidatagraph.py From opentapioca with Apache License 2.0

5 votes

def save_matrix(self, fname):
        sparse.save_npz(fname, self.mat)

Source File: load_data.py From neural_graph_collaborative_filtering with MIT License

5 votes

def get_adj_mat(self):
        try:
            t1 = time()
            adj_mat = sp.load_npz(self.path + '/s_adj_mat.npz')
            norm_adj_mat = sp.load_npz(self.path + '/s_norm_adj_mat.npz')
            mean_adj_mat = sp.load_npz(self.path + '/s_mean_adj_mat.npz')
            print('already load adj matrix', adj_mat.shape, time() - t1)

        except Exception:
            adj_mat, norm_adj_mat, mean_adj_mat = self.create_adj_mat()
            sp.save_npz(self.path + '/s_adj_mat.npz', adj_mat)
            sp.save_npz(self.path + '/s_norm_adj_mat.npz', norm_adj_mat)
            sp.save_npz(self.path + '/s_mean_adj_mat.npz', mean_adj_mat)
        return adj_mat, norm_adj_mat, mean_adj_mat

Source File: loader_nfm.py From knowledge_graph_attention_network with MIT License

5 votes

def get_kg_feature(self, kg_feat_file):
        try:
            kg_feat_mat = sp.load_npz(kg_feat_file)
            print('already load item kg feature mat', kg_feat_mat.shape)
        except Exception:
            kg_feat_mat = self._create_kg_feat_mat()
            sp.save_npz(kg_feat_file, kg_feat_mat)
            print('already save item kg feature mat:', kg_feat_file)
        return kg_feat_mat

Source File: make-trie.py From isdi with MIT License

5 votes

def join_mats(fnames, s, e):
    ofname="mat_{}_{}".format(s, e)
    print(ofname, fnames)
    M = [sps.load_npz(f) for f in fnames]
    print("Done reading..")
    sps.save_npz(
        ofname,
        sps.vstack(M)
    )

Source File: make-trie.py From isdi with MIT License

5 votes

def join_smart_mat(fnames):
    """Join arrays in Mlist inplace"""
    # M.indptr M.indices
    indptr = np.zeros(num_devices+1, dtype=np.int32)
    indices = np.zeros(Msize, dtype=np.int32)    
    i_indptr, i_indices = 0, 0
    ofname = 'joined_mat.npz'
    M = [None for _ in fnames]
    for i, mf in enumerate(fnames) :
        M[i] = sps.load_npz(mf)
        print("Loaded matrix={}. shape={}. nnz={}".format(mf, M[i].shape, M[i].nnz))
        # Mindptr = M.indptr
        # Mindices = M.indices
        # indptr[i_indptr+1:i_indptr+len(Mindptr)] = Mindptr[1:] + indptr[i_indptr]
        # i_indptr += len(Mindptr)-1
        # indices[i_indices:i_indices+len(Mindices)] = Mindices
        # i_indices += i_indices
        # del M
    print("Saving the file...")
    M = sps.csr_matrix(
        (np.ones(len(indices)), indices, indptr),
        shape=(len(indptr)-1, num_apps),
        dtype=bool
    )
    print(M.nnz)
    sps.save_npz(ofname, M)

Source File: make-trie.py From isdi with MIT License

5 votes

def create_matrix(mf, mfname, ofname_cnt):
    indptr = np.zeros(LIM+1, dtype=np.int32)
    indices = array.array('I')
    ofname = mfname.rsplit('.', 2)[0] + '.csr_matrix'.format(ofname_cnt)
    j = 0
    for j, d in enumerate(mf):
        if j>LIM: break
        terms = d.decode('utf-8').strip().split(',')
        if len(terms)<1: continue
        i, terms = int(terms[0]), terms[1:]
        indices.extend([_get(t) for t in terms])
        indptr[j%LIM+1] = len(indices)
        if j % 10000 == 0:
            print("Done {}".format(j))
 
    # print("Saving: j={} start: {} stop: {}".format(j, start, stop))
    if j>0:
        print("Saving... {}".format(ofname))
        if len(indptr) > j:
            indptr = indptr[:j+2]
        print(len(indices), indptr)
         
        M = sps.csr_matrix(
            (np.ones(len(indices)), indices, indptr),
            shape=(len(indptr)-1, num_apps),
            dtype=bool
        )
        print(M.nnz)
        sps.save_npz(ofname, M)
        create_matrix(mf, mfname, ofname_cnt+1)

Source File: peerread_fit_topics.py From causal-text-embeddings with MIT License

5 votes

def load_term_counts(df, path='../dat/PeerRead/', force_redo=False, text_col='abstract_text'):
	count_filename = path  + 'term_counts'
	vocab_filename = path + 'vocab'

	if os.path.exists(count_filename + '.npz') and not force_redo:
		return sparse.load_npz(count_filename + '.npz'), np.load(vocab_filename + '.npy')

	post_docs = df[text_col].values
	counts, vocab, _ = tokenize_documents(post_docs)    
	sparse.save_npz(count_filename, counts)
	np.save(vocab_filename, vocab)
	return counts, np.array(vocab)

Source File: hashing.py From deep_architect with MIT License

5 votes

def save_state(self, folderpath):
        state = {
            'num_evals': len(self.vecs_lst),
            'vals_lst': self.vals_lst,
        }
        ut.write_jsonfile(state,
                          ut.join_paths([folderpath, 'hash_model_state.json']))
        for i, vecs in enumerate(self.vecs_lst):
            sp.save_npz(ut.join_paths([folderpath, str(i) + '.npz']), vecs)

    # TODO: improve

Source File: reddit_posts.py From causal-text-embeddings with MIT License

5 votes

def load_term_counts(path='../dat/', force_redo=False):
    count_filename = path  + 'reddit_term_counts'
    vocab_filename = path + 'vocab'

    if os.path.exists(count_filename + '.npz') and not force_redo:
        return sparse.load_npz(count_filename + '.npz'), np.load(vocab_filename + '.npy')
    
    reddit = load_reddit()
    post_docs = reddit['post_text'].values
    counts, vocab = tokenize_documents(post_docs)
    sparse.save_npz(path + 'reddit_term_counts', counts)
    np.save(path + 'vocab', vocab)
    return counts, vocab

Source File: peerread_output_att.py From causal-text-embeddings with MIT License

5 votes

def load_term_counts(df, path='../dat/PeerRead/', force_redo=False, text_col='abstract_text'):
	count_filename = path  + 'term_counts'
	vocab_filename = path + 'vocab'

	if os.path.exists(count_filename + '.npz') and not force_redo:
		return sparse.load_npz(count_filename + '.npz').toarray(), np.load(vocab_filename + '.npy')

	post_docs = df[text_col].values
	counts, vocab, _ = tokenize_documents(post_docs)    
	sparse.save_npz(count_filename, counts)
	np.save(vocab_filename, vocab)
	return counts.toarray(), np.array(vocab)

Source File: reddit_output_att.py From causal-text-embeddings with MIT License

5 votes

def load_term_counts(reddit, path='../dat/reddit/', force_redo=False):
	count_filename = path  + 'term_counts'
	vocab_filename = path + 'vocab'

	if os.path.exists(count_filename + '.npz') and not force_redo:
		return sparse.load_npz(count_filename + '.npz').toarray(), np.load(vocab_filename + '.npy')

	post_docs = reddit['post_text'].values
	counts, vocab, _ = tokenize_documents(post_docs)    
	sparse.save_npz(count_filename, counts)
	np.save(vocab_filename, vocab)
	return counts.toarray(), np.array(vocab)

Source File: base_attack.py From DeepRobust with MIT License

5 votes

def save_features(self, root=r'/tmp/', name='mod_features'):
        """Save attacked node feature matrix.

        Parameters
        ----------
        root :
            root directory where the variable should be saved
        name : str
            saved file name

        Returns
        -------
        None.

        """

        assert self.modified_features is not None, \
                'modified_features is None! Please perturb the graph first.'
        name = name + '.npz'
        modified_features = self.modified_features

        if type(modified_features) is torch.Tensor:
            sparse_features = utils.to_scipy(modified_features)
            sp.save_npz(osp.join(root, name), sparse_features)
        else:
            sp.save_npz(osp.join(root, name), modified_features)

Source File: base_attack.py From DeepRobust with MIT License

5 votes

def save_features(self, root=r'/tmp/', name='mod_features'):
        """Save attacked node feature matrix.

        Parameters
        ----------
        root :
            root directory where the variable should be saved
        name : str
            saved file name

        Returns
        -------
        None.

        """

        assert self.modified_features is not None, \
                'modified_features is None! Please perturb the graph first.'
        name = name + '.npz'
        modified_features = self.modified_features

        if type(modified_features) is torch.Tensor:
            sparse_features = utils.to_scipy(modified_features)
            sp.save_npz(osp.join(root, name), sparse_features)
        else:
            sp.save_npz(osp.join(root, name), modified_features)

Source File: libsvm.py From celer with BSD 3-Clause "New" or "Revised" License

4 votes

def get_X_y(dataset, compressed_path, multilabel, replace=False):
    """Load a LIBSVM dataset as sparse X and observation y/Y.
    If X and y already exists as npz and npy, they are not redownloaded unless
    replace=True."""

    ext = '.npz' if multilabel else '.npy'
    y_path = pjoin(CELER_PATH, "%s_target%s" % (NAMES[dataset], ext))
    X_path = pjoin(CELER_PATH, "%s_data.npz" % NAMES[dataset])
    if replace or not os.path.isfile(y_path) or not os.path.isfile(X_path):
        tmp_path = pjoin(CELER_PATH, "%s" % NAMES[dataset])

        decompressor = BZ2Decompressor()
        print("Decompressing...")
        with open(tmp_path, "wb") as f, open(compressed_path, "rb") as g:
            for data in iter(lambda: g.read(100 * 1024), b''):
                f.write(decompressor.decompress(data))

        n_features_total = N_FEATURES[dataset]
        print("Loading svmlight file...")
        with open(tmp_path, 'rb') as f:
            X, y = load_svmlight_file(
                f, n_features_total, multilabel=multilabel)

        os.remove(tmp_path)
        X = sparse.csc_matrix(X)
        X.sort_indices()
        sparse.save_npz(X_path, X)

        if multilabel:
            indices = np.array([lab for labels in y for lab in labels])
            indptr = np.cumsum([0] + [len(labels) for labels in y])
            data = np.ones_like(indices)
            Y = sparse.csr_matrix((data, indices, indptr))
            sparse.save_npz(y_path, Y)
            return X, Y

        else:
            np.save(y_path, y)

    else:
        X = sparse.load_npz(X_path)
        y = np.load(y_path)

    return X, y

Python scipy.sparse.save_npz() Examples