Python scipy.sparse.dok_matrix() Examples
code examples of scipy.sparse.dok_matrix().
Example #1
Source File: From pennylane with Apache License 2.0 | 6 votes |
def _get_alpha_z(omega, n, k): r"""Computes the rotation angles alpha for the Z rotations. Args: omega (float): phase of the input n (int): total number of qubits k (int): index of current qubit Returns: scipy.sparse.dok_matrix[np.float64]: a sparse vector representing :math:`\alpha^z_k` """ alpha_z_k = sparse.dok_matrix((2 ** (n - k), 1), dtype=np.float64) for (i, _), om in omega.items(): i += 1 j = int(np.ceil(i * 2 ** (-k))) s_condition = 2 ** (k - 1) * (2 * j - 1) s_i = 1.0 if i > s_condition else -1.0 alpha_z_k[j - 1, 0] = alpha_z_k[j - 1, 0] + s_i * om / 2 ** (k - 1) return alpha_z_k
Example #2
Source File: From PyShortTextCategorization with MIT License | 6 votes |
def shorttext_to_vec(self, shorttext): """ Convert the shorttext into a sparse vector given the dictionary. According to the dictionary (gensim.corpora.Dictionary), convert the given text into a vector representation, according to the occurence of tokens. This function is deprecated and no longer used because it is too slow to run in a loop. But this is used while doing prediction. :param shorttext: short text to be converted. :return: sparse vector of the vector representation :type shorttext: str :rtype: scipy.sparse.dok_matrix """ # too slow, deprecated tokens = tokenize(self.preprocessor(shorttext)) vec = dok_matrix((1, len(self.dictionary))) for token in tokens: if token in self.dictionary.token2id: vec[0, self.dictionary.token2id[token]] = 1.0 return vec[0, :]
Example #3
Source File: From sparse-evolutionary-artificial-neural-networks with MIT License | 6 votes |
def getCoreInputConnections(self): values = np.sort(self.w[1].data) firstZeroPos = find_first_pos(values, 0) lastZeroPos = find_last_pos(values, 0) largestNegative = values[int((1 - self.zeta) * firstZeroPos)] smallestPositive = values[ int(min(values.shape[0] - 1, lastZeroPos + self.zeta * (values.shape[0] - lastZeroPos)))] wlil = self.w[1].tolil() wdok = dok_matrix((self.dimensions[0], self.dimensions[1]), dtype="float64") # remove the weights closest to zero keepConnections = 0 for ik, (row, data) in enumerate(zip(wlil.rows, for jk, val in zip(row, data): if ((val < largestNegative) or (val > smallestPositive)): wdok[ik, jk] = val keepConnections += 1 return wdok.tocsr().getnnz(axis=1)
Example #4
Source File: From PyShortTextCategorization with MIT License | 6 votes |
def generate_dtm(self, corpus, tfidf=False): """ Generate the inside document-term matrix and other peripherical information objects. This is run when the class is instantiated. :param corpus: corpus. :param tfidf: whether to weigh using tf-idf. (Default: False) :return: None :type corpus: list :type tfidf: bool """ self.dictionary = Dictionary(corpus) self.dtm = dok_matrix((len(corpus), len(self.dictionary)), dtype=np.float) bow_corpus = [self.dictionary.doc2bow(doctokens) for doctokens in corpus] if tfidf: weighted_model = TfidfModel(bow_corpus) bow_corpus = weighted_model[bow_corpus] for docid in self.docids: for tokenid, count in bow_corpus[self.docid_dict[docid]]: self.dtm[self.docid_dict[docid], tokenid] = count
Example #5
Source File: From PyShortTextCategorization with MIT License | 6 votes |
def convert_classdict_to_XY(self, classdict): """ Convert the training data into sparse matrices for training. :param classdict: training data :return: a tuple, consisting of sparse matrices for X (training data) and y (the labels of the training data) :type classdict: dict :rtype: tuple """ nb_data = sum([len(classdict[k]) for k in classdict]) X = dok_matrix((nb_data, len(self.dictionary))) y = dok_matrix((nb_data, len(self.labels2idx))) rowid = 0 for label in classdict: if label in self.labels2idx.keys(): for shorttext in classdict[label]: tokens = tokenize(self.preprocessor(shorttext)) for token in tokens: X[rowid, self.dictionary.token2id[token]] += 1.0 y[rowid, self.labels2idx[label]] = 1. rowid += 1 return X, y
Example #6
Source File: From Recommender-Systems-Samples with MIT License | 6 votes |
def load_rating_file_as_matrix(self, filename): # get numbers of users and items num_users, num_items = 0, 0 with open(filename, 'r') as f: line = f.readline() while(line != None and line != ''): arr = line.split('\t') user, item = int(arr[0]), int(arr[1]) num_users = max(num_users, user) num_items = max(num_items, item) line = f.readline() # contruct matrix mat = sp.dok_matrix((num_users+1, num_items+1), dtype=np.float32) with open(filename, 'r') as f: line = f.readline() while(line != None and line != ''): arr = line.split('\t') user, item, rating = int(arr[0]), int(arr[1]), float(arr[2]) if(rating > 0): mat[user, item] = 1.0 line = f.readline() return mat
Example #7
Source File: From sparse-evolutionary-artificial-neural-networks with MIT License | 6 votes |
Example #8
Source File: From qiskit-aqua with Apache License 2.0 | 6 votes |
def __init__(self, quadratic_program: Any, coefficients: Union[ndarray, spmatrix, List[List[float]], Dict[Tuple[Union[int, str], Union[int, str]], float]]) -> None: """Creates a new quadratic expression. The quadratic expression can be defined via an array, a list, a sparse matrix, or a dictionary that uses variable names or indices as keys and stores the values internally as a dok_matrix. We stores values in a compressed way, i.e., values at symmetric positions are summed up in the upper triangle. For example, {(0, 1): 1, (1, 0): 2} -> {(0, 1): 3}. Args: quadratic_program: The parent QuadraticProgram. coefficients: The (sparse) representation of the coefficients. """ super().__init__(quadratic_program) self.coefficients = coefficients
Example #9
Source File: From CITE-seq-Count with MIT License | 6 votes |
def generate_sparse_matrices(final_results, ordered_tags_map, top_cells): """ Create two sparse matrices with umi and read counts. Args: final_results (dict): Results in a dict of dicts of Counters. ordered_tags_map (dict): Tags in order with indexes as values. Returns: umi_results_matrix (scipy.sparse.dok_matrix): UMI counts read_results_matrix (scipy.sparse.dok_matrix): Read counts """ umi_results_matrix = sparse.dok_matrix((len(ordered_tags_map) ,len(top_cells)), dtype=int32) read_results_matrix = sparse.dok_matrix((len(ordered_tags_map) ,len(top_cells)), dtype=int32) for i,cell_barcode in enumerate(top_cells): for j,TAG in enumerate(final_results[cell_barcode]): if final_results[cell_barcode][TAG]: umi_results_matrix[ordered_tags_map[TAG],i] = len(final_results[cell_barcode][TAG]) read_results_matrix[ordered_tags_map[TAG],i] = sum(final_results[cell_barcode][TAG].values()) return(umi_results_matrix, read_results_matrix)
Example #10
Source File: From markov_clustering with MIT License | 6 votes |
def prune(matrix, threshold): """ Prune the matrix so that very small edges are removed. The maximum value in each column is never pruned. :param matrix: The matrix to be pruned :param threshold: The value below which edges will be removed :returns: The pruned matrix """ if isspmatrix(matrix): pruned = dok_matrix(matrix.shape) pruned[matrix >= threshold] = matrix[matrix >= threshold] pruned = pruned.tocsc() else: pruned = matrix.copy() pruned[pruned < threshold] = 0 # keep max value in each column. same behaviour for dense/sparse num_cols = matrix.shape[1] row_indices = matrix.argmax(axis=0).reshape((num_cols,)) col_indices = np.arange(num_cols) pruned[row_indices, col_indices] = matrix[row_indices, col_indices] return pruned
Example #11
Source File: From markov_clustering with MIT License | 6 votes |
def delta_matrix(matrix, clusters): """ Compute delta matrix where delta[i,j]=1 if i and j belong to same cluster and i!=j :param matrix: The adjacency matrix :param clusters: The clusters returned by get_clusters :returns: delta matrix """ if isspmatrix(matrix): delta = dok_matrix(matrix.shape) else: delta = np.zeros(matrix.shape) for i in clusters : for j in permutations(i, 2): delta[j] = 1 return delta
Example #12
Source File: From moviegeek with MIT License | 6 votes |
def load_data(): print('loading data') user_ids = list( Rating.objects.values('user_id') .annotate(movie_count=Count('movie_id')) .order_by('-movie_count')) content_ids = list(Rating.objects.values('movie_id').distinct()) content_map = {content_ids[i]['movie_id']: i for i in range(len(content_ids))} num_users = len(user_ids) user_ratings = dok_matrix((num_users, len(content_ids)), dtype=np.float32) for i in range(num_users): # each user corresponds to a row, in the order of all_user_names ratings = Rating.objects.filter(user_id=user_ids[i]['user_id']) for user_rating in ratings: user_ratings[i, content_map[user_rating.movie_id]] = user_rating.rating print('data loaded') return user_ids, user_ratings
Example #13
Source File: From qiskit-aqua with Apache License 2.0 | 6 votes |
def test_init(self): """ test init. """ quadratic_program = QuadraticProgram() for _ in range(5): quadratic_program.continuous_var() coefficients_list = list(range(5)) coefficients_array = np.array(coefficients_list) coefficients_dok = dok_matrix([coefficients_list]) coefficients_dict_int = {i: i for i in range(1, 5)} coefficients_dict_str = {'x{}'.format(i): i for i in range(1, 5)} for coeffs in [coefficients_list, coefficients_array, coefficients_dok, coefficients_dict_int, coefficients_dict_str]: linear = LinearExpression(quadratic_program, coeffs) self.assertEqual((linear.coefficients != coefficients_dok).nnz, 0) self.assertTrue((linear.to_array() == coefficients_list).all()) self.assertDictEqual(linear.to_dict(use_name=False), coefficients_dict_int) self.assertDictEqual(linear.to_dict(use_name=True), coefficients_dict_str)
Example #14
Source File: From qiskit-aqua with Apache License 2.0 | 6 votes |
def __init__(self, quadratic_program: Any, coefficients: Union[ndarray, spmatrix, List[float], Dict[Union[int, str], float]]) -> None: """Creates a new linear expression. The linear expression can be defined via an array, a list, a sparse matrix, or a dictionary that uses variable names or indices as keys and stores the values internally as a dok_matrix. Args: quadratic_program: The parent QuadraticProgram. coefficients: The (sparse) representation of the coefficients. """ super().__init__(quadratic_program) self.coefficients = coefficients
Example #15
Source File: From pennylane with Apache License 2.0 | 6 votes |
def _compute_theta(alpha): """Calculates the rotation angles from the alpha vector. Args: alpha (array[float]): alpha parameters Returns: (array[float]): rotation angles theta """ k = np.log2(alpha.shape[0]) factor = 2 ** (-k) theta = sparse.dok_matrix(alpha.shape, dtype=np.float64) # type: sparse.dok_matrix for row in range(alpha.shape[0]): # Use transpose of M: entry = sum([_matrix_M_entry(col, row) * a for (col, _), a in alpha.items()]) entry *= factor if abs(entry) > 1e-6: theta[row, 0] = entry return theta
Example #16
Source File: From pymdptoolbox with BSD 3-Clause "New" or "Revised" License | 6 votes |
def getTransitionAndRewardArrays(): """""" P = [dok_matrix((STATES, STATES)) for a in range(ACTIONS)] #R = spdok((STATES, ACTIONS)) R = np.zeros((STATES, ACTIONS)) # Naive approach, iterate through all possible combinations for a in range(ACTIONS): for s in range(STATES): state = convertIndexToTuple(s) if not isValid(state): # There are no defined moves from an invalid state, so # transition probabilities cannot be calculated. However, # P must be a square stochastic matrix, so assign a # probability of one to the invalid state transitioning # back to itself. P[a][s, s] = 1 # Reward is 0 else: s1, p, r = getTransitionProbabilities(state, a) P[a][s, s1] = p R[s, a] = r P[a] = P[a].tocsr() #R = R.tolil() return(P, R)
Example #17
Source File: From MoguTDA with MIT License | 6 votes |
def boundary_operator(self, i): source_simplices = self.n_faces(i) target_simplices = self.n_faces(i-1) if len(target_simplices) == 0: S = dok_matrix((1, len(source_simplices)), dtype=np.float64) S[0, 0:len(source_simplices)] = 1 else: source_simplices_dict = {source_simplices[j]: j for j in range(len(source_simplices))} target_simplices_dict = {target_simplices[i]: i for i in range(len(target_simplices))} S = dok_matrix((len(target_simplices), len(source_simplices)), dtype=np.float64) for source_simplex in source_simplices: for a in range(len(source_simplex)): target_simplex = source_simplex[:a]+source_simplex[(a+1):] i = target_simplices_dict[target_simplex] j = source_simplices_dict[source_simplex] S[i, j] = -1 if a % 2 == 1 else 1 # S[i, j] = (-1)**a return S
Example #18
Source File: From Quadflor with BSD 3-Clause "New" or "Revised" License | 6 votes |
def predict(self, X): predictions = dok_matrix((X.shape[0], self.y.shape[1]), distances = self.base_classifier.predict_proba(X) topNIndices, topNDistances = self._get_top_labels(distances) for entry, (label_list, dist_list) in enumerate(zip(topNIndices, topNDistances)): for rank, label in enumerate(label_list): if not self.dependencies: training_sample = [[rank, dist_list[rank]]] else: training_sample = [distances[entry, :]] if label in self.meta_classifiers: prediction = self.meta_classifiers[label].predict(training_sample)[0] if prediction == 1: predictions[entry, label] = 1 return csr_matrix(predictions)
Example #19
Source File: From Quadflor with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _a(self, neighbor_ids): result = sp.csr_matrix((0, self.y.shape[1])) for ns in neighbor_ids: neighbor_labels = self.y[ns] # By squeezing we support matrix output from scipy.sparse.sum and 1D array from np.sum labels_sum = np.squeeze(np.array(neighbor_labels.sum(0))) predicted_labels = sp.csr_matrix([np.floor(np.divide(labels_sum, len(ns)) + (1 - self.threshold))]) # If there are no labels, we take the most frequent label. if predicted_labels.sum() == 0: divide = np.divide(labels_sum, len(ns)) max_label = divide.argmax() predicted_labels = sp.dok_matrix((1, predicted_labels.shape[1])) predicted_labels[0, max_label] = 1 predicted_labels = sp.csr_matrix(predicted_labels) result = sp.vstack((result, predicted_labels)) return result
Example #20
Source File: From Quadflor with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _b(self, neighbor_ids): result = sp.csr_matrix((0, self.y.shape[1])) for ns in neighbor_ids: average_label_nums = int(np.floor(np.mean([self.y[n].sum() for n in ns]))) neighbor_labels = self.y[ns] labels_sum = np.array(neighbor_labels.sum(0)) # By squeezing we support matrix output from scipy.sparse.sum and 1D array from np.sum divide = np.squeeze(np.divide(labels_sum, len(ns))) predicted_indices = np.argsort(divide)[-average_label_nums:] predicted_labels = sp.dok_matrix((1, len(divide))) # noinspection PyTypeChecker for index in predicted_indices: predicted_labels[0, index] = 1 predicted_labels = sp.csr_matrix(predicted_labels) result = sp.vstack((result, predicted_labels)) return result
Example #21
Source File: From deep_architect_legacy with MIT License | 6 votes |
def _compute_features(self, model): bls = [ b[0] for b in tuple(model.repr_model()) ] nfeats_other = 1 nfeats_ngrams = len(self.module_ngram_to_id) nfeats = nfeats_other + nfeats_ngrams feats = sp.dok_matrix((1, nfeats), dtype=np.float32) # other features feats[0, 0] = len(bls) # ngrams features for k in xrange(1, self.ngram_maxlen): for i in xrange(len(bls) - k): ngram = tuple(bls[i:i + k]) if ngram in self.module_ngram_to_id: ngram_i = self.module_ngram_to_id[ngram] feats_i = nfeats_other + ngram_i feats[0, feats_i] += 1.0 return sp.csr_matrix(feats)
Example #22
Source File: From nasbot with MIT License | 6 votes |
def test_kahn_topoligical_sort(self): """ Tests topological sorting. """'Testing topological sort.') A = dok_matrix((6, 6)) A[0, 2] = 1 A[1, 2] = 1 A[1, 3] = 1 A[2, 3] = 1 A[2, 4] = 1 A[4, 3] = 1 A[5, 0] = 1 A[5, 1] = 1 sorted_order, has_cycles = graph_utils.kahn_topological_sort(A, 5) assert has_cycles == False assert sorted_order == [5, 0, 1, 2, 4, 3] or sorted_order == [5, 1, 0, 2, 4, 3] # Now create a graph with cycles A[3, 0] = 1 sorted_order, has_cycles = graph_utils.kahn_topological_sort(A, 5) assert has_cycles == True
Example #23
Source File: From qiskit-aqua with Apache License 2.0 | 6 votes |
def evaluate(self, x: Union[ndarray, List, Dict[Union[int, str], float]]) -> float: """Evaluate the linear expression for given variables. Args: x: The values of the variables to be evaluated. Returns: The value of the linear expression given the variable values. """ # cast input to dok_matrix if it is a dictionary x = self._coeffs_to_dok_matrix(x) # compute the dot-product of the input and the linear coefficients val = (x @ self.coefficients.transpose())[0, 0] # return the result return val
Example #24
Source File: From RecSys2019_DeepLearning_Evaluation with GNU Affero General Public License v3.0 | 5 votes |
def load_rating_file_as_matrix(self, filename): ''' Read .rating file and Return dok matrix. The first line of .rating file is: num_users\t num_items ''' # Get number of users and items num_users, num_items = 0, 0 with open(filename, "r") as f: line = f.readline() while line != None and line != "": arr = line.split("\t") u, i = int(arr[0]), int(arr[1]) num_users = max(num_users, u) num_items = max(num_items, i) line = f.readline() # Construct matrix mat = sp.dok_matrix((num_users+1, num_items+1), dtype=np.float32) with open(filename, "r") as f: line = f.readline() while line != None and line != "": arr = line.split("\t") user, item, rating = int(arr[0]), int(arr[1]), float(arr[2]) if (rating > 0): mat[user, item] = 1.0 line = f.readline() return mat
Example #25
Source File: From RecSys2019_DeepLearning_Evaluation with GNU Affero General Public License v3.0 | 5 votes |
def load_rating_file_as_matrix(self, filename): ''' Read .rating file and Return dok matrix. The first line of .rating file is: num_users\t num_items ''' # Get number of users and items num_users, num_items = 0, 0 with open(filename, "r") as f: line = f.readline() while line != None and line != "": arr = line.split("\t") u, i = int(arr[0]), int(arr[1]) num_users = max(num_users, u) num_items = max(num_items, i) line = f.readline() # Construct matrix mat = sps.dok_matrix((num_users+1, num_items+1), dtype=np.float32) with open(filename, "r") as f: line = f.readline() while line != None and line != "": arr = line.split("\t") user, item, rating = int(arr[0]), int(arr[1]), float(arr[2]) if (rating > 0): mat[user, item] = 1.0 line = f.readline() return mat
Example #26
Source File: From Quadflor with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _make_sparse(self, scores): n_features = len(self.vocabulary) result = sp.csr_matrix((0, n_features)) for score in scores: sparse_score = sp.dok_matrix((1, n_features)) for s in score.items(): sparse_score[0, self.vocabulary[s[0]]] = s[1] result = sp.vstack((result, sp.csr_matrix(sparse_score))) return result
Example #27
Source File: From Quadflor with BSD 3-Clause "New" or "Revised" License | 5 votes |
def imitate_tr(self, graph, root): def tr(): pass tr.nx_graph = graph tr.nx_root = root return tr # # def test_speed(self): # _, _, tr = load_dataset('econ62k') # graph = tr.nx_graph # # def random_labels(): # def set_random_ones(n_nodes): # ids = np.random.choice(n_nodes, 5) # zeros = sp.dok_matrix((1, n_nodes), dtype=np.bool_) # for index in ids: # zeros[0, index] = True # return zeros # # number_of_nodes = graph.number_of_nodes() # matrix = set_random_ones(number_of_nodes) # for i in range(0, 62000): # zeros = set_random_ones(number_of_nodes) # matrix = sp.vstack((matrix, zeros)) # return sp.csr_matrix(matrix) # # y_true = random_labels() # y_pred = random_labels() # print('random constructed') # # start = default_timer() # hierarchical_f_measure(graph, y_true, y_pred) # print(default_timer() - start)
Example #28
Source File: From RecSys2019_DeepLearning_Evaluation with GNU Affero General Public License v3.0 | 5 votes |
def load_itemGenres_as_matrix(path='ml-1m/'): num_items, num_type, dict = 0, 0, {} with open(path+"", encoding="utf-8") as f: line = f.readline().strip('\n') while line != None and line != "": arr = line.split(" ") if (arr[1] == 'items'): num_items = int(arr[0]) line = f.readline().strip('\n') with open(path+"u.genre", "r", encoding="utf-8") as f: line = f.readline().strip('\n') while line != None and line != "": arr = line.split("|") dict[arr[0]] = num_type num_type = num_type + 1 line = f.readline().strip('\n') # Construct matrix mat = sp.dok_matrix((num_items + 1, num_type), dtype=np.float32) with open(path+"movies.dat", encoding="utf-8") as f: line = f.readline().strip('\r\n') while line != None and line != "": arr = line.split("::") types = arr[2].split("|") for ts in types: if(ts in dict.keys()): mat[int(arr[0]), dict[ts]] = 1 line = f.readline().strip('\r\n') itemGenres_mat = mat.toarray() return num_items, itemGenres_mat
Example #29
Source File: From RecSys2019_DeepLearning_Evaluation with GNU Affero General Public License v3.0 | 5 votes |
def load_rating_train_as_matrix(path='tafeng/'): # Get number of users and items num_users = 32266 num_items = 23812 # Construct matrix mat = sp.dok_matrix((num_users + 1, num_items + 1), dtype=np.float32) with open(path+"train.rating", "r", encoding="utf-8") as f: line = f.readline() while line != None and line != "": arr = line.split("\t") user, item = int(arr[0]), int(arr[1]) mat[user, item] = 1.0 line = f.readline() return mat
Example #30
Source File: From pennylane with Apache License 2.0 | 5 votes |
def _get_alpha_y(a, n, k): r"""Computes the rotation angles alpha for the Y rotations. Args: omega (float): phase of the input n (int): total number of qubits k (int): index of current qubit Returns: scipy.sparse.dok_matrix[np.float64]: a sparse vector representing :math:`\alpha^y_k` """ alpha = sparse.dok_matrix((2 ** (n - k), 1), dtype=np.float64) numerator = sparse.dok_matrix((2 ** (n - k), 1), dtype=np.float64) denominator = sparse.dok_matrix((2 ** (n - k), 1), dtype=np.float64) for (i, _), e in a.items(): j = int(math.ceil((i + 1) / 2 ** k)) l = (i + 1) - (2 * j - 1) * 2 ** (k - 1) is_part_numerator = 1 <= l <= 2 ** (k - 1) if is_part_numerator: numerator[j - 1, 0] += e * e denominator[j - 1, 0] += e * e for (j, _), e in numerator.items(): numerator[j, 0] = math.sqrt(e) for (j, _), e in denominator.items(): denominator[j, 0] = 1 / math.sqrt(e) pre_alpha = numerator.multiply(denominator) # type: sparse.csr_matrix for (j, _), e in pre_alpha.todok().items(): alpha[j, 0] = 2 * np.arcsin(e) return alpha