Python Examples of scipy.sparse.dok

Source File: mottonen.py From pennylane with Apache License 2.0

6 votes

def _get_alpha_z(omega, n, k):
    r"""Computes the rotation angles alpha for the Z rotations.

    Args:
        omega (float): phase of the input
        n (int): total number of qubits
        k (int): index of current qubit

    Returns:
        scipy.sparse.dok_matrix[np.float64]: a sparse vector representing :math:`\alpha^z_k`
    """
    alpha_z_k = sparse.dok_matrix((2 ** (n - k), 1), dtype=np.float64)

    for (i, _), om in omega.items():
        i += 1
        j = int(np.ceil(i * 2 ** (-k)))
        s_condition = 2 ** (k - 1) * (2 * j - 1)
        s_i = 1.0 if i > s_condition else -1.0
        alpha_z_k[j - 1, 0] = alpha_z_k[j - 1, 0] + s_i * om / 2 ** (k - 1)

    return alpha_z_k

Source File: MaxEntClassification.py From PyShortTextCategorization with MIT License

6 votes

def shorttext_to_vec(self, shorttext):
        """ Convert the shorttext into a sparse vector given the dictionary.

        According to the dictionary (gensim.corpora.Dictionary), convert the given text
        into a vector representation, according to the occurence of tokens.

        This function is deprecated and no longer used because it is too slow to run in a loop.
        But this is used while doing prediction.

        :param shorttext: short text to be converted.
        :return: sparse vector of the vector representation
        :type shorttext: str
        :rtype: scipy.sparse.dok_matrix
        """
        # too slow, deprecated
        tokens = tokenize(self.preprocessor(shorttext))

        vec = dok_matrix((1, len(self.dictionary)))
        for token in tokens:
            if token in self.dictionary.token2id:
                vec[0, self.dictionary.token2id[token]] = 1.0

        return vec[0, :]

Source File: set_mlp.py From sparse-evolutionary-artificial-neural-networks with MIT License

6 votes

def getCoreInputConnections(self):
        values = np.sort(self.w[1].data)
        firstZeroPos = find_first_pos(values, 0)
        lastZeroPos = find_last_pos(values, 0)

        largestNegative = values[int((1 - self.zeta) * firstZeroPos)]
        smallestPositive = values[
            int(min(values.shape[0] - 1, lastZeroPos + self.zeta * (values.shape[0] - lastZeroPos)))]

        wlil = self.w[1].tolil()
        wdok = dok_matrix((self.dimensions[0], self.dimensions[1]), dtype="float64")

        # remove the weights closest to zero
        keepConnections = 0
        for ik, (row, data) in enumerate(zip(wlil.rows, wlil.data)):
            for jk, val in zip(row, data):
                if ((val < largestNegative) or (val > smallestPositive)):
                    wdok[ik, jk] = val
                    keepConnections += 1
        return wdok.tocsr().getnnz(axis=1)

Source File: dtm.py From PyShortTextCategorization with MIT License

6 votes

def generate_dtm(self, corpus, tfidf=False):
        """ Generate the inside document-term matrix and other peripherical information
        objects. This is run when the class is instantiated.

        :param corpus: corpus.
        :param tfidf: whether to weigh using tf-idf. (Default: False)
        :return: None
        :type corpus: list
        :type tfidf: bool
        """
        self.dictionary = Dictionary(corpus)
        self.dtm = dok_matrix((len(corpus), len(self.dictionary)), dtype=np.float)
        bow_corpus = [self.dictionary.doc2bow(doctokens) for doctokens in corpus]
        if tfidf:
            weighted_model = TfidfModel(bow_corpus)
            bow_corpus = weighted_model[bow_corpus]
        for docid in self.docids:
            for tokenid, count in bow_corpus[self.docid_dict[docid]]:
                self.dtm[self.docid_dict[docid], tokenid] = count

Source File: MaxEntClassification.py From PyShortTextCategorization with MIT License

6 votes

def convert_classdict_to_XY(self, classdict):
        """ Convert the training data into sparse matrices for training.

        :param classdict: training data
        :return: a tuple, consisting of sparse matrices for X (training data) and y (the labels of the training data)
        :type classdict: dict
        :rtype: tuple
        """
        nb_data = sum([len(classdict[k]) for k in classdict])
        X = dok_matrix((nb_data, len(self.dictionary)))
        y = dok_matrix((nb_data, len(self.labels2idx)))

        rowid = 0
        for label in classdict:
            if label in self.labels2idx.keys():
                for shorttext in classdict[label]:
                    tokens = tokenize(self.preprocessor(shorttext))
                    for token in tokens:
                        X[rowid, self.dictionary.token2id[token]] += 1.0
                    y[rowid, self.labels2idx[label]] = 1.
                    rowid += 1

        return X, y

Source File: Dataset.py From Recommender-Systems-Samples with MIT License

6 votes

def load_rating_file_as_matrix(self, filename):
        # get numbers of users and items
        num_users, num_items = 0, 0
        with open(filename, 'r') as f:
            line = f.readline()
            while(line != None and line != ''):
                arr = line.split('\t')
                user, item = int(arr[0]), int(arr[1])
                num_users = max(num_users, user)
                num_items = max(num_items, item)
                line = f.readline()
        # contruct matrix
        mat = sp.dok_matrix((num_users+1, num_items+1), dtype=np.float32)
        with open(filename, 'r') as f:
            line = f.readline()
            while(line != None and line != ''):
                arr = line.split('\t')
                user, item, rating = int(arr[0]), int(arr[1]), float(arr[2])
                if(rating > 0):
                    mat[user, item] = 1.0
                line = f.readline()
        return mat

Source File: set_mlp.py From sparse-evolutionary-artificial-neural-networks with MIT License

6 votes

def getCoreInputConnections(self):
        values = np.sort(self.w[1].data)
        firstZeroPos = find_first_pos(values, 0)
        lastZeroPos = find_last_pos(values, 0)

        largestNegative = values[int((1 - self.zeta) * firstZeroPos)]
        smallestPositive = values[
            int(min(values.shape[0] - 1, lastZeroPos + self.zeta * (values.shape[0] - lastZeroPos)))]

        wlil = self.w[1].tolil()
        wdok = dok_matrix((self.dimensions[0], self.dimensions[1]), dtype="float64")

        # remove the weights closest to zero
        keepConnections = 0
        for ik, (row, data) in enumerate(zip(wlil.rows, wlil.data)):
            for jk, val in zip(row, data):
                if ((val < largestNegative) or (val > smallestPositive)):
                    wdok[ik, jk] = val
                    keepConnections += 1
        return wdok.tocsr().getnnz(axis=1)

Source File: quadratic_expression.py From qiskit-aqua with Apache License 2.0

6 votes

def __init__(self, quadratic_program: Any,
                 coefficients: Union[ndarray, spmatrix, List[List[float]],
                                     Dict[Tuple[Union[int, str], Union[int, str]], float]]) -> None:
        """Creates a new quadratic expression.

        The quadratic expression can be defined via an array, a list, a sparse matrix, or a
        dictionary that uses variable names or indices as keys and stores the values internally as a
        dok_matrix. We stores values in a compressed way, i.e., values at symmetric positions are
        summed up in the upper triangle. For example, {(0, 1): 1, (1, 0): 2} -> {(0, 1): 3}.

        Args:
            quadratic_program: The parent QuadraticProgram.
            coefficients: The (sparse) representation of the coefficients.

        """
        super().__init__(quadratic_program)
        self.coefficients = coefficients

Source File: processing.py From CITE-seq-Count with MIT License

6 votes

def generate_sparse_matrices(final_results, ordered_tags_map, top_cells):
    """
    Create two sparse matrices with umi and read counts.

    Args:
        final_results (dict): Results in a dict of dicts of Counters.
        ordered_tags_map (dict): Tags in order with indexes as values.

    Returns:
        umi_results_matrix (scipy.sparse.dok_matrix): UMI counts
        read_results_matrix (scipy.sparse.dok_matrix): Read counts

    """
    umi_results_matrix = sparse.dok_matrix((len(ordered_tags_map) ,len(top_cells)), dtype=int32)
    read_results_matrix = sparse.dok_matrix((len(ordered_tags_map) ,len(top_cells)), dtype=int32)
    for i,cell_barcode in enumerate(top_cells):
        for j,TAG in enumerate(final_results[cell_barcode]):
            if final_results[cell_barcode][TAG]:
                umi_results_matrix[ordered_tags_map[TAG],i] = len(final_results[cell_barcode][TAG])
                read_results_matrix[ordered_tags_map[TAG],i] = sum(final_results[cell_barcode][TAG].values())
    return(umi_results_matrix, read_results_matrix)

Source File: mcl.py From markov_clustering with MIT License

6 votes

def prune(matrix, threshold):
    """
    Prune the matrix so that very small edges are removed.
    The maximum value in each column is never pruned.
    
    :param matrix: The matrix to be pruned
    :param threshold: The value below which edges will be removed
    :returns: The pruned matrix
    """
    if isspmatrix(matrix):
        pruned = dok_matrix(matrix.shape)
        pruned[matrix >= threshold] = matrix[matrix >= threshold]
        pruned = pruned.tocsc()
    else:
        pruned = matrix.copy()
        pruned[pruned < threshold] = 0

    # keep max value in each column. same behaviour for dense/sparse
    num_cols = matrix.shape[1]
    row_indices = matrix.argmax(axis=0).reshape((num_cols,))
    col_indices = np.arange(num_cols)
    pruned[row_indices, col_indices] = matrix[row_indices, col_indices]

    return pruned

Source File: modularity.py From markov_clustering with MIT License

6 votes

def delta_matrix(matrix, clusters):
    """
    Compute delta matrix where delta[i,j]=1 if i and j belong
    to same cluster and i!=j
    
    :param matrix: The adjacency matrix
    :param clusters: The clusters returned by get_clusters
    :returns: delta matrix
    """
    if isspmatrix(matrix):
        delta = dok_matrix(matrix.shape)
    else:
        delta = np.zeros(matrix.shape)

    for i in clusters :
        for j in permutations(i, 2):
            delta[j] = 1

    return delta

Source File: user_cluster_calculator.py From moviegeek with MIT License

6 votes

def load_data():
        print('loading data')
        user_ids = list(
            Rating.objects.values('user_id')
                .annotate(movie_count=Count('movie_id'))
                .order_by('-movie_count'))
        content_ids = list(Rating.objects.values('movie_id').distinct())
        content_map = {content_ids[i]['movie_id']: i
                       for i in range(len(content_ids))}
        num_users = len(user_ids)
        user_ratings = dok_matrix((num_users,
                                   len(content_ids)),
                                  dtype=np.float32)
        for i in range(num_users):
            # each user corresponds to a row, in the order of all_user_names
            ratings = Rating.objects.filter(user_id=user_ids[i]['user_id'])
            for user_rating in ratings:
                user_ratings[i, content_map[user_rating.movie_id]] = user_rating.rating
        print('data loaded')

        return user_ids, user_ratings

Source File: test_linear_expression.py From qiskit-aqua with Apache License 2.0

6 votes

def test_init(self):
        """ test init. """

        quadratic_program = QuadraticProgram()
        for _ in range(5):
            quadratic_program.continuous_var()

        coefficients_list = list(range(5))
        coefficients_array = np.array(coefficients_list)
        coefficients_dok = dok_matrix([coefficients_list])
        coefficients_dict_int = {i: i for i in range(1, 5)}
        coefficients_dict_str = {'x{}'.format(i): i for i in range(1, 5)}

        for coeffs in [coefficients_list,
                       coefficients_array,
                       coefficients_dok,
                       coefficients_dict_int,
                       coefficients_dict_str]:
            linear = LinearExpression(quadratic_program, coeffs)
            self.assertEqual((linear.coefficients != coefficients_dok).nnz, 0)
            self.assertTrue((linear.to_array() == coefficients_list).all())
            self.assertDictEqual(linear.to_dict(use_name=False), coefficients_dict_int)
            self.assertDictEqual(linear.to_dict(use_name=True), coefficients_dict_str)

Source File: linear_expression.py From qiskit-aqua with Apache License 2.0

6 votes

def __init__(self, quadratic_program: Any,
                 coefficients: Union[ndarray, spmatrix, List[float],
                                     Dict[Union[int, str], float]]) -> None:
        """Creates a new linear expression.

        The linear expression can be defined via an array, a list, a sparse matrix, or a dictionary
        that uses variable names or indices as keys and stores the values internally as a
        dok_matrix.

        Args:
            quadratic_program: The parent QuadraticProgram.
            coefficients: The (sparse) representation of the coefficients.

        """
        super().__init__(quadratic_program)
        self.coefficients = coefficients

Source File: mottonen.py From pennylane with Apache License 2.0

6 votes

def _compute_theta(alpha):
    """Calculates the rotation angles from the alpha vector.

    Args:
        alpha (array[float]): alpha parameters

    Returns:
        (array[float]): rotation angles theta
    """
    k = np.log2(alpha.shape[0])
    factor = 2 ** (-k)

    theta = sparse.dok_matrix(alpha.shape, dtype=np.float64)  # type: sparse.dok_matrix

    for row in range(alpha.shape[0]):
        # Use transpose of M:
        entry = sum([_matrix_M_entry(col, row) * a for (col, _), a in alpha.items()])
        entry *= factor
        if abs(entry) > 1e-6:
            theta[row, 0] = entry

    return theta

Source File: tictactoe.py From pymdptoolbox with BSD 3-Clause "New" or "Revised" License

6 votes

def getTransitionAndRewardArrays():
    """"""
    P = [dok_matrix((STATES, STATES)) for a in range(ACTIONS)]
    #R = spdok((STATES, ACTIONS))
    R = np.zeros((STATES, ACTIONS))
    # Naive approach, iterate through all possible combinations
    for a in range(ACTIONS):
        for s in range(STATES):
            state = convertIndexToTuple(s)
            if not isValid(state):
                # There are no defined moves from an invalid state, so
                # transition probabilities cannot be calculated. However,
                # P must be a square stochastic matrix, so assign a
                # probability of one to the invalid state transitioning
                # back to itself.
                P[a][s, s] = 1
                # Reward is 0
            else:
                s1, p, r = getTransitionProbabilities(state, a)
                P[a][s, s1] = p
                R[s, a] = r
        P[a] = P[a].tocsr()
    #R = R.tolil()
    return(P, R)

Source File: abssimcomplex.py From MoguTDA with MIT License

6 votes

def boundary_operator(self, i):
        source_simplices = self.n_faces(i)
        target_simplices = self.n_faces(i-1)

        if len(target_simplices) == 0:
            S = dok_matrix((1, len(source_simplices)), dtype=np.float64)
            S[0, 0:len(source_simplices)] = 1
        else:
            source_simplices_dict = {source_simplices[j]:
                                     j for j in range(len(source_simplices))}
            target_simplices_dict = {target_simplices[i]:
                                     i for i in range(len(target_simplices))}

            S = dok_matrix((len(target_simplices),
                            len(source_simplices)),
                           dtype=np.float64)
            for source_simplex in source_simplices:
                for a in range(len(source_simplex)):
                    target_simplex = source_simplex[:a]+source_simplex[(a+1):]
                    i = target_simplices_dict[target_simplex]
                    j = source_simplices_dict[source_simplex]
                    S[i, j] = -1 if a % 2 == 1 else 1   # S[i, j] = (-1)**a
        return S

Source File: stacked_classifier.py From Quadflor with BSD 3-Clause "New" or "Revised" License

6 votes

def predict(self, X):

        predictions = dok_matrix((X.shape[0], self.y.shape[1]), dtype=np.int)

        distances = self.base_classifier.predict_proba(X)
        topNIndices, topNDistances = self._get_top_labels(distances)

        for entry, (label_list, dist_list) in enumerate(zip(topNIndices, topNDistances)):
            for rank, label in enumerate(label_list):
                if not self.dependencies:
                    training_sample = [[rank, dist_list[rank]]]
                else:
                    training_sample = [distances[entry, :]]
                if label in self.meta_classifiers:
                    prediction = self.meta_classifiers[label].predict(training_sample)[0]
                    if prediction == 1:
                        predictions[entry, label] = 1

        return csr_matrix(predictions)

Source File: br_kneighbor_classifier.py From Quadflor with BSD 3-Clause "New" or "Revised" License

6 votes

def _a(self, neighbor_ids):
        result = sp.csr_matrix((0, self.y.shape[1]))
        for ns in neighbor_ids:
            neighbor_labels = self.y[ns]
            # By squeezing we support matrix output from scipy.sparse.sum and 1D array from np.sum
            labels_sum = np.squeeze(np.array(neighbor_labels.sum(0)))
            predicted_labels = sp.csr_matrix([np.floor(np.divide(labels_sum, len(ns)) + (1 - self.threshold))])
            # If there are no labels, we take the most frequent label.
            if predicted_labels.sum() == 0:
                divide = np.divide(labels_sum, len(ns))
                max_label = divide.argmax()
                predicted_labels = sp.dok_matrix((1, predicted_labels.shape[1]))
                predicted_labels[0, max_label] = 1
                predicted_labels = sp.csr_matrix(predicted_labels)

            result = sp.vstack((result, predicted_labels))
        return result

Source File: br_kneighbor_classifier.py From Quadflor with BSD 3-Clause "New" or "Revised" License

6 votes

def _b(self, neighbor_ids):
        result = sp.csr_matrix((0, self.y.shape[1]))
        for ns in neighbor_ids:
            average_label_nums = int(np.floor(np.mean([self.y[n].sum() for n in ns])))
            neighbor_labels = self.y[ns]
            labels_sum = np.array(neighbor_labels.sum(0))
            # By squeezing we support matrix output from scipy.sparse.sum and 1D array from np.sum
            divide = np.squeeze(np.divide(labels_sum, len(ns)))
            predicted_indices = np.argsort(divide)[-average_label_nums:]
            predicted_labels = sp.dok_matrix((1, len(divide)))
            # noinspection PyTypeChecker
            for index in predicted_indices:
                predicted_labels[0, index] = 1
            predicted_labels = sp.csr_matrix(predicted_labels)
            result = sp.vstack((result, predicted_labels))
        return result

Source File: searchers.py From deep_architect_legacy with MIT License

6 votes

def _compute_features(self, model):

        bls = [ b[0] for b in tuple(model.repr_model()) ]

        nfeats_other = 1
        nfeats_ngrams = len(self.module_ngram_to_id)
        nfeats = nfeats_other + nfeats_ngrams
        feats = sp.dok_matrix((1, nfeats), dtype=np.float32)

        # other features
        feats[0, 0] = len(bls)

        # ngrams features
        for k in xrange(1, self.ngram_maxlen):
            for i in xrange(len(bls) - k):
                ngram = tuple(bls[i:i + k])

                if ngram in self.module_ngram_to_id:
                    ngram_i = self.module_ngram_to_id[ngram]
                    feats_i = nfeats_other + ngram_i

                    feats[0, feats_i] += 1.0

        return sp.csr_matrix(feats)

Source File: unittest_graph_utils.py From nasbot with MIT License

6 votes

def test_kahn_topoligical_sort(self):
    """ Tests topological sorting. """
    self.report('Testing topological sort.')
    A = dok_matrix((6, 6))
    A[0, 2] = 1
    A[1, 2] = 1
    A[1, 3] = 1
    A[2, 3] = 1
    A[2, 4] = 1
    A[4, 3] = 1
    A[5, 0] = 1
    A[5, 1] = 1
    sorted_order, has_cycles = graph_utils.kahn_topological_sort(A, 5)
    assert has_cycles == False
    assert sorted_order == [5, 0, 1, 2, 4, 3] or sorted_order == [5, 1, 0, 2, 4, 3]
    # Now create a graph with cycles
    A[3, 0] = 1
    sorted_order, has_cycles = graph_utils.kahn_topological_sort(A, 5)
    assert has_cycles == True

Source File: linear_expression.py From qiskit-aqua with Apache License 2.0

6 votes

def evaluate(self, x: Union[ndarray, List, Dict[Union[int, str], float]]) -> float:
        """Evaluate the linear expression for given variables.

        Args:
            x: The values of the variables to be evaluated.

        Returns:
            The value of the linear expression given the variable values.
        """
        # cast input to dok_matrix if it is a dictionary
        x = self._coeffs_to_dok_matrix(x)

        # compute the dot-product of the input and the linear coefficients
        val = (x @ self.coefficients.transpose())[0, 0]

        # return the result
        return val

Source File: Dataset.py From RecSys2019_DeepLearning_Evaluation with GNU Affero General Public License v3.0

5 votes

def load_rating_file_as_matrix(self, filename):
        '''
        Read .rating file and Return dok matrix.
        The first line of .rating file is: num_users\t num_items
        '''
        # Get number of users and items
        num_users, num_items = 0, 0
        with open(filename, "r") as f:
            line = f.readline()
            while line != None and line != "":
                arr = line.split("\t")
                u, i = int(arr[0]), int(arr[1])
                num_users = max(num_users, u)
                num_items = max(num_items, i)
                line = f.readline()
        # Construct matrix
        mat = sp.dok_matrix((num_users+1, num_items+1), dtype=np.float32)
        with open(filename, "r") as f:
            line = f.readline()
            while line != None and line != "":
                arr = line.split("\t")
                user, item, rating = int(arr[0]), int(arr[1]), float(arr[2])
                if (rating > 0):
                    mat[user, item] = 1.0
                line = f.readline()    
        return mat

Source File: PinterestICCVReader.py From RecSys2019_DeepLearning_Evaluation with GNU Affero General Public License v3.0

5 votes

def load_rating_file_as_matrix(self, filename):
        '''
        Read .rating file and Return dok matrix.
        The first line of .rating file is: num_users\t num_items
        '''
        # Get number of users and items
        num_users, num_items = 0, 0
        with open(filename, "r") as f:
            line = f.readline()
            while line != None and line != "":
                arr = line.split("\t")
                u, i = int(arr[0]), int(arr[1])
                num_users = max(num_users, u)
                num_items = max(num_items, i)
                line = f.readline()
        # Construct matrix
        mat = sps.dok_matrix((num_users+1, num_items+1), dtype=np.float32)
        with open(filename, "r") as f:
            line = f.readline()
            while line != None and line != "":
                arr = line.split("\t")
                user, item, rating = int(arr[0]), int(arr[1]), float(arr[2])
                if (rating > 0):
                    mat[user, item] = 1.0
                line = f.readline()
        return mat

Source File: graph_score_vectorizer.py From Quadflor with BSD 3-Clause "New" or "Revised" License

5 votes

def _make_sparse(self, scores):
        n_features = len(self.vocabulary)
        result = sp.csr_matrix((0, n_features))
        for score in scores:
            sparse_score = sp.dok_matrix((1, n_features))
            for s in score.items():
                sparse_score[0, self.vocabulary[s[0]]] = s[1]
            result = sp.vstack((result, sp.csr_matrix(sparse_score)))
        return result

Source File: test_hierarchical_f_measure.py From Quadflor with BSD 3-Clause "New" or "Revised" License

5 votes

def imitate_tr(self, graph, root):
        def tr():
            pass

        tr.nx_graph = graph
        tr.nx_root = root
        return tr
        #
        # def test_speed(self):
        #     _, _, tr = load_dataset('econ62k')
        #     graph = tr.nx_graph
        #
        #     def random_labels():
        #         def set_random_ones(n_nodes):
        #             ids = np.random.choice(n_nodes, 5)
        #             zeros = sp.dok_matrix((1, n_nodes), dtype=np.bool_)
        #             for index in ids:
        #                 zeros[0, index] = True
        #             return zeros
        #
        #         number_of_nodes = graph.number_of_nodes()
        #         matrix = set_random_ones(number_of_nodes)
        #         for i in range(0, 62000):
        #             zeros = set_random_ones(number_of_nodes)
        #             matrix = sp.vstack((matrix, zeros))
        #         return sp.csr_matrix(matrix)
        #
        #     y_true = random_labels()
        #     y_pred = random_labels()
        #     print('random constructed')
        #
        #     start = default_timer()
        #     hierarchical_f_measure(graph, y_true, y_pred)
        #     print(default_timer() - start)

Source File: LoadMovieDataCnn.py From RecSys2019_DeepLearning_Evaluation with GNU Affero General Public License v3.0

5 votes

def load_itemGenres_as_matrix(path='ml-1m/'):
    num_items, num_type, dict = 0, 0, {}
    with open(path+"u.info", encoding="utf-8") as f:
        line = f.readline().strip('\n')
        while line != None and line != "":
            arr = line.split(" ")
            if (arr[1] == 'items'):
                num_items = int(arr[0])
            line = f.readline().strip('\n')

    with open(path+"u.genre", "r", encoding="utf-8") as f:
        line = f.readline().strip('\n')
        while line != None and line != "":
            arr = line.split("|")
            dict[arr[0]] = num_type
            num_type = num_type + 1
            line = f.readline().strip('\n')

    # Construct matrix
    mat = sp.dok_matrix((num_items + 1, num_type), dtype=np.float32)
    with open(path+"movies.dat", encoding="utf-8") as f:
        line = f.readline().strip('\r\n')
        while line != None and line != "":
            arr = line.split("::")
            types = arr[2].split("|")
            for ts in types:
                if(ts in dict.keys()):
                    mat[int(arr[0]), dict[ts]] = 1
            line = f.readline().strip('\r\n')
    itemGenres_mat = mat.toarray()

    return num_items, itemGenres_mat

Source File: LoadTafengDataCnn.py From RecSys2019_DeepLearning_Evaluation with GNU Affero General Public License v3.0

5 votes

def load_rating_train_as_matrix(path='tafeng/'):
    # Get number of users and items
    num_users = 32266
    num_items = 23812

    # Construct matrix
    mat = sp.dok_matrix((num_users + 1, num_items + 1), dtype=np.float32)
    with open(path+"train.rating", "r", encoding="utf-8") as f:
        line = f.readline()
        while line != None and line != "":
            arr = line.split("\t")
            user, item = int(arr[0]), int(arr[1])
            mat[user, item] = 1.0
            line = f.readline()
    return mat

Source File: mottonen.py From pennylane with Apache License 2.0

5 votes

def _get_alpha_y(a, n, k):
    r"""Computes the rotation angles alpha for the Y rotations.

    Args:
        omega (float): phase of the input
        n (int): total number of qubits
        k (int): index of current qubit

    Returns:
        scipy.sparse.dok_matrix[np.float64]: a sparse vector representing :math:`\alpha^y_k`
    """

    alpha = sparse.dok_matrix((2 ** (n - k), 1), dtype=np.float64)

    numerator = sparse.dok_matrix((2 ** (n - k), 1), dtype=np.float64)
    denominator = sparse.dok_matrix((2 ** (n - k), 1), dtype=np.float64)

    for (i, _), e in a.items():
        j = int(math.ceil((i + 1) / 2 ** k))
        l = (i + 1) - (2 * j - 1) * 2 ** (k - 1)
        is_part_numerator = 1 <= l <= 2 ** (k - 1)

        if is_part_numerator:
            numerator[j - 1, 0] += e * e
        denominator[j - 1, 0] += e * e

    for (j, _), e in numerator.items():
        numerator[j, 0] = math.sqrt(e)
    for (j, _), e in denominator.items():
        denominator[j, 0] = 1 / math.sqrt(e)

    pre_alpha = numerator.multiply(denominator)  # type: sparse.csr_matrix
    for (j, _), e in pre_alpha.todok().items():
        alpha[j, 0] = 2 * np.arcsin(e)

    return alpha

Python scipy.sparse.dok_matrix() Examples