Python Examples of networkx.pagerank

Source File: test_pagerank.py From qgisSpaceSyntaxToolkit with GNU General Public License v3.0

6 votes

def setUp(self):
        G = networkx.DiGraph()
        edges = [(1, 2), (1, 3),
                 # 2 is a dangling node
                 (3, 1), (3, 2), (3, 5),
                 (4, 5), (4, 6),
                 (5, 4), (5, 6),
                 (6, 4)]
        G.add_edges_from(edges)
        self.G = G
        self.G.pagerank = dict(zip(G,
                                   [0.03721197, 0.05395735, 0.04150565,
                                    0.37508082, 0.20599833, 0.28624589]))
        self.dangling_node_index = 1
        self.dangling_edges = {1: 2, 2: 3,
                               3: 0, 4: 0, 5: 0, 6: 0}
        self.G.dangling_pagerank = dict(zip(G,
                                            [0.10844518, 0.18618601, 0.0710892,
                                             0.2683668, 0.15919783, 0.20671497]))

Source File: summarizer.py From wanish with MIT License

6 votes

def textrank(text, hdr):
    # finding out the most possible language of the text
    lang_code = lang_identifier.classify(' '.join([hdr, text]))[0]

    # tokenizing for words
    sentences = [sentence for sentence in split_multi(text)]

    stemmer = snowballstemmer.stemmer(LANG_CODES.get(lang_code, 'english'))

    words = [set(stemmer.stemWord(word) for word in word_tokenizer(sentence.lower()) if word.isalpha())
             for sentence in sentences]

    pairs = combinations(range(len(sentences)), 2)
    scores = [(i, j, similarity(words[i], words[j])) for i, j in pairs]
    scores = filter(lambda x: x[2], scores)

    g = nx.Graph()
    g.add_weighted_edges_from(scores)
    pr = nx.pagerank(g)

    return sorted(((i, pr[i], s) for i, s in enumerate(sentences) if i in pr),
                  key=lambda x: pr[x[0]], reverse=True), lang_code

Source File: feedback_graph.py From acl2017-interactive_summarizer with Apache License 2.0

6 votes

def add_sentences(self, sentences):
        """
        @type sentences: list[Sentence]
        :param sentences:
        :return:
        """
        counter = self.counter
        G = self.G
        for sentence in sentences:
            G.add_nodes_from(sentence.concepts)
            counter.update(ngrams(sentence.concepts, self.N))

        for (keys, value) in counter.items():
            for i in range(0, len(keys) - 1):
                for j in range(1, len(keys)):
                    G.add_edge(keys[i], keys[j], weight=value)
                    # counter.update((keys[i], keys[j]))

        # for (key, value) in counter.items():
        #     G.add_edge(key[0], key[1], attr={"weight": value})

        print("V := (N,E), |N| = %s, |E| = %s" % (len(G.nodes()), len(G.edges())))

        self.pr = nx.pagerank(G)

Source File: test_pagerank.py From aws-kube-codesuite with Apache License 2.0

6 votes

def setUp(self):
        G = networkx.DiGraph()
        edges = [(1, 2), (1, 3),
                 # 2 is a dangling node
                 (3, 1), (3, 2), (3, 5),
                 (4, 5), (4, 6),
                 (5, 4), (5, 6),
                 (6, 4)]
        G.add_edges_from(edges)
        self.G = G
        self.G.pagerank = dict(zip(sorted(G),
                                   [0.03721197, 0.05395735, 0.04150565,
                                    0.37508082, 0.20599833, 0.28624589]))
        self.dangling_node_index = 1
        self.dangling_edges = {1: 2, 2: 3,
                               3: 0, 4: 0, 5: 0, 6: 0}
        self.G.dangling_pagerank = dict(zip(sorted(G),
                                            [0.10844518, 0.18618601, 0.0710892,
                                             0.2683668, 0.15919783, 0.20671497]))

Source File: feedback_graph.py From acl2017-interactive_summarizer with Apache License 2.0

6 votes

def incorporate_feedback(self, flightrecorder):
        """

        :param flightrecorder:
        :return:
         @type flightrecorder: FlightRecorder
        """
        G = self.G
        print("V := (N,E), |N| = %s, |E| = %s" % (len(G.nodes()), len(G.edges())))

        # use the pagerank personalization feature to incorporate flightrecorder feedback

        union = flightrecorder.union()

        for rejected in union.reject:
            if(G.has_node(rejected)):
                G.remove_node(rejected)

        print("V := (N,E), |N| = %s, |E| = %s" % (len(G.nodes()), len(G.edges())))

        self.pr = nx.pagerank(G)

Source File: unsupervised.py From indosum with Apache License 2.0

6 votes

def summarize(self, doc: Document, size: int = 3) -> List[int]:
        """Summarize a given document using TextRank algorithm.

        Args:
            doc (Document): The document to summarize.
            size (int): Maximum number of sentences that the summary should have.

        Returns:
            list: The indices of the extracted sentences that form the summary, sorted
                ascending.
        """
        size = min(size, len(doc.sentences))
        G = self._build_graph(doc.sentences)
        ranks = nx.pagerank(G, alpha=self.damping_factor, tol=self.tol, max_iter=self.max_iter)

        summary = sorted(ranks.keys(), key=lambda k: ranks[k], reverse=True)[:size]
        summary.sort()
        return summary

Source File: unsupervised.py From indosum with Apache License 2.0

6 votes

def summarize(self, doc: Document, size: int = 3) -> List[int]:
        """Summarize a given document using LexRank algorithm.

        Args:
            doc (Document): The document to summarize.
            size (int): Maximum number of sentences that the summary should have.

        Returns:
            list: The indices of the extracted sentences that form the summary, sorted
                ascending.
        """
        size = min(size, len(doc.sentences))
        positions = [self._get_position(k, len(doc.sentences))
                     for k in range(len(doc.sentences))]
        G = self._build_graph(doc.sentences)
        ranks = nx.pagerank(G, alpha=self.damping_factor, tol=self.tol, max_iter=self.max_iter)
        candidates = sorted(
            ranks.keys(), key=lambda k: self._combine_features(positions[k], ranks[k]),
            reverse=True)
        return self._csis(doc.sentences, candidates, size)

Source File: LOCABAL.py From RecQ with GNU General Public License v3.0

6 votes

def initModel(self):
        super(LOCABAL, self).initModel()
        self.H = np.random.rand(self.embed_size,self.embed_size)
        G = nx.DiGraph()
        for re in self.social.relation:
            G.add_edge(re[0], re[1])
        pr = nx.pagerank(G, alpha=0.85)
        pr = sorted(pr.iteritems(),key=lambda d:d[1],reverse=True)
        pr = [(u[0],ind+1) for ind,u in enumerate(pr)]
        self.W = {}
        for user in pr:
            self.W[user[0]] = 1/(1+math.log(user[1]))
        self.S = {}
        for line in self.social.relation:
            u1,u2,weight = line
            if self.data.containsUser(u1) and self.data.containsUser(u2):
                uvec1=self.data.trainSet_u[u1]
                uvec2=self.data.trainSet_u[u2]
            #add relations to dict
                if not self.S.has_key(u1):
                    self.S[u1] = {}
                self.S[u1][u2] = qmath.cosine_sp(uvec1,uvec2)

Source File: document_summarization.py From text-analytics-with-python with Apache License 2.0

6 votes

def textrank_text_summarizer(documents, num_sentences=2,
                             feature_type='frequency'):
    
    vec, dt_matrix = build_feature_matrix(norm_sentences, 
                                      feature_type='tfidf')
    similarity_matrix = (dt_matrix * dt_matrix.T)
        
    similarity_graph = networkx.from_scipy_sparse_matrix(similarity_matrix)
    scores = networkx.pagerank(similarity_graph)   
    
    ranked_sentences = sorted(((score, index) 
                                for index, score 
                                in scores.items()), 
                              reverse=True)

    top_sentence_indices = [ranked_sentences[index][1] 
                            for index in range(num_sentences)]
    top_sentence_indices.sort()
    
    for index in top_sentence_indices:
        print sentences[index]

Source File: textrank.py From nlg-yongzhuo with MIT License

6 votes

def textrank_tfidf(sentences, topk=6):
    """
        使用tf-idf作为相似度, networkx.pagerank获取中心句子作为摘要
    :param sentences: str, docs of text
    :param topk:int
    :return:list
    """
    # 切句子
    sentences = list(cut_sentence(sentences))
    # tf-idf相似度
    matrix_norm = tdidf_sim(sentences)
    # 构建相似度矩阵
    tfidf_sim = nx.from_scipy_sparse_matrix(matrix_norm * matrix_norm.T)
    # nx.pagerank
    sens_scores = nx.pagerank(tfidf_sim)
    # 得分排序
    sen_rank = sorted(sens_scores.items(), key=lambda x: x[1], reverse=True)
    # 保留topk个, 防止越界
    topk = min(len(sentences), topk)
    # 返回原句子和得分
    return [(sr[1], sentences[sr[0]]) for sr in sen_rank][0:topk]

Source File: textrank_sklearn.py From nlg-yongzhuo with MIT License

6 votes

def textrank_tfidf(sentences, topk=6):
    """
        使用tf-idf作为相似度, networkx.pagerank获取中心句子作为摘要
    :param sentences: str, docs of text
    :param topk:int
    :return:list
    """
    # 切句子
    sentences = list(cut_sentence(sentences))
    # tf-idf相似度
    matrix_norm = tdidf_sim(sentences)
    # 构建相似度矩阵
    tfidf_sim = nx.from_scipy_sparse_matrix(matrix_norm * matrix_norm.T)
    # nx.pagerank
    sens_scores = nx.pagerank(tfidf_sim)
    # 得分排序
    sen_rank = sorted(sens_scores.items(), key=lambda x: x[1], reverse=True)
    # 保留topk个, 防止越界
    topk = min(len(sentences), topk)
    # 返回原句子和得分
    return [(sr[1], sentences[sr[0]]) for sr in sen_rank][0:topk]

Source File: page_rank.py From Verum with Apache License 2.0

6 votes

def score(self, sg, *args, **xargs):  # get_pagerank_probability
        """

        :param sg: egocentric subgraph around topic in networkx format
        :param distance_degradation: A factor for degrading as distance from the topic increases
        :return: Dictionary of probabilities keyed by node
        """
        # convert to digraph if needed
        if sg.is_multigraph():
            sg = self.multigraph_to_digraph(sg)

        personalized = {}
        for node in sg.nodes():
    #        personalized[node] = linear_weight(sg.node[node]['topic_distance'], distance_degradation)
            # INSERT WEIGHTING FUNCTION BELOW
            personalized[node] = self.exponential_weight(sg.node[node]['topic_distance'])

        # return the pagerank scores
        return nx.pagerank(sg, personalization=personalized, weight='confidence')

Source File: test_pagerank.py From Carnets with BSD 3-Clause "New" or "Revised" License

6 votes

def setUp(self):
        G = networkx.DiGraph()
        edges = [(1, 2), (1, 3),
                 # 2 is a dangling node
                 (3, 1), (3, 2), (3, 5),
                 (4, 5), (4, 6),
                 (5, 4), (5, 6),
                 (6, 4)]
        G.add_edges_from(edges)
        self.G = G
        self.G.pagerank = dict(zip(sorted(G),
                                   [0.03721197, 0.05395735, 0.04150565,
                                    0.37508082, 0.20599833, 0.28624589]))
        self.dangling_node_index = 1
        self.dangling_edges = {1: 2, 2: 3,
                               3: 0, 4: 0, 5: 0, 6: 0}
        self.G.dangling_pagerank = dict(zip(sorted(G),
                                            [0.10844518, 0.18618601, 0.0710892,
                                             0.2683668, 0.15919783, 0.20671497]))

Source File: lexrankr.py From lexrankr with MIT License

6 votes

def summarize(self, text):
        self.sentences = self.factory.text2sentences(text)
        self.num_sentences = len(self.sentences)
        self.corpus = SentenceCorpus(self.sentences, self.no_below_word_count, self.no_above_word_portion, self.max_dictionary_size)
        self.model = TfidfModel(self.corpus.bows, id2word=self.corpus.dictionary, normalize=True)
        self.tfidfs = self.model[self.corpus.bows]
        self._inject_tfidfs()
        self._build_matrix()
        self._clustering()
        if self.compactify:
            self._compactify()
        self.graphs = []
        for i in range(self.num_clusters):
            graph = self.sentences2graph(self.clusters[i])
            pagerank = networkx.pagerank(graph, weight='weight')
            self.clusters[i] = sorted(pagerank, key=pagerank.get, reverse=True)
            self.graphs.append(graph)

Source File: graph_generation.py From queueing-tool with MIT License

5 votes

def generate_pagerank_graph(num_vertices=250, **kwargs):
    """Creates a random graph where the vertex types are
    selected using their pagerank.

    Calls :func:`.minimal_random_graph` and then
    :func:`.set_types_rank` where the ``rank`` keyword argument
    is given by :func:`networkx.pagerank`.

    Parameters
    ----------
    num_vertices : int (optional, the default is 250)
        The number of vertices in the graph.
    **kwargs :
        Any parameters to send to :func:`.minimal_random_graph` or
        :func:`.set_types_rank`.

    Returns
    -------
    :class:`.QueueNetworkDiGraph`
        A graph with a ``pos`` vertex property and the ``edge_type``
        edge property.

    Notes
    -----
    This function sets the edge types of a graph to be either 1, 2, or
    3. It sets the vertices to type 2 by selecting the top
    ``pType2 * g.number_of_nodes()`` vertices given by the
    :func:`~networkx.pagerank` of the graph. A loop is added
    to all vertices identified this way (if one does not exist
    already). It then randomly sets vertices close to the type 2
    vertices as type 3, and adds loops to these vertices as well. These
    loops then have edge types that correspond to the vertices type.
    The rest of the edges are set to type 1.
    """
    g = minimal_random_graph(num_vertices, **kwargs)
    r = np.zeros(num_vertices)
    for k, pr in nx.pagerank(g).items():
        r[k] = pr
    g = set_types_rank(g, rank=r, **kwargs)
    return g

Source File: test_pagerank.py From Carnets with BSD 3-Clause "New" or "Revised" License

5 votes

def test_google_matrix(self):
        G = self.G
        M = networkx.google_matrix(G, alpha=0.9, nodelist=sorted(G))
        e, ev = numpy.linalg.eig(M.T)
        p = numpy.array(ev[:, 0] / ev[:, 0].sum())[:, 0]
        for (a, b) in zip(p, self.G.pagerank.values()):
            assert_almost_equal(a, b)

Source File: test_pagerank.py From Carnets with BSD 3-Clause "New" or "Revised" License

5 votes

def test_numpy_pagerank(self):
        G = self.G
        p = networkx.pagerank_numpy(G, alpha=0.9)
        for n in G:
            assert_almost_equal(p[n], G.pagerank[n], places=4)
        personalize = dict((n, random.random()) for n in G)
        p = networkx.pagerank_numpy(G, alpha=0.9, personalization=personalize)

Source File: test_pagerank.py From Carnets with BSD 3-Clause "New" or "Revised" License

5 votes

def test_pagerank_max_iter(self):
        networkx.pagerank(self.G, max_iter=0)

Source File: test_pagerank.py From Carnets with BSD 3-Clause "New" or "Revised" License

5 votes

def test_pagerank(self):
        G = self.G
        p = networkx.pagerank(G, alpha=0.9, tol=1.e-08)
        for n in G:
            assert_almost_equal(p[n], G.pagerank[n], places=4)

        nstart = dict((n, random.random()) for n in G)
        p = networkx.pagerank(G, alpha=0.9, tol=1.e-08, nstart=nstart)
        for n in G:
            assert_almost_equal(p[n], G.pagerank[n], places=4)

Source File: test_pagerank.py From Carnets with BSD 3-Clause "New" or "Revised" License

5 votes

def test_zero_personalization_vector(self):
        G = networkx.complete_graph(4)
        personalize = {0: 0, 1: 0, 2: 0, 3: 0}
        assert_raises(ZeroDivisionError, networkx.pagerank, G,
                      personalization=personalize)

Source File: core.py From samacharbot2 with GNU General Public License v3.0

5 votes

def _textrank(matrix):
    '''returns principal eigenvector
       of the adjacency matrix'''

    graph = nx.from_numpy_matrix(matrix)
    return nx.pagerank(graph)

Source File: test_pagerank.py From Carnets with BSD 3-Clause "New" or "Revised" License

5 votes

def test_one_nonzero_personalization_value(self):
        G = networkx.complete_graph(4)
        personalize = {0: 0, 1: 0, 2: 0, 3: 1}
        answer = {0: 0.22077931820379187, 1: 0.22077931820379187, 2: 0.22077931820379187, 3: 0.3376620453886241}
        p = networkx.pagerank(G, alpha=0.85, personalization=personalize)
        for n in G:
            assert_almost_equal(p[n], answer[n], places=4)

Source File: test_pagerank.py From Carnets with BSD 3-Clause "New" or "Revised" License

5 votes

def test_incomplete_personalization(self):
        G = networkx.complete_graph(4)
        personalize = {3: 1}
        answer = {0: 0.22077931820379187, 1: 0.22077931820379187, 2: 0.22077931820379187, 3: 0.3376620453886241}
        p = networkx.pagerank(G, alpha=0.85, personalization=personalize)
        for n in G:
            assert_almost_equal(p[n], answer[n], places=4)

Source File: test_pagerank.py From Carnets with BSD 3-Clause "New" or "Revised" License

5 votes

def test_dangling_pagerank(self):
        pr = networkx.pagerank(self.G, dangling=self.dangling_edges)
        for n in self.G:
            assert_almost_equal(pr[n], self.G.dangling_pagerank[n], places=4)

Source File: test_pagerank.py From Carnets with BSD 3-Clause "New" or "Revised" License

5 votes

def test_empty(self):
        G = networkx.Graph()
        assert_equal(networkx.pagerank(G), {})
        assert_equal(networkx.pagerank_numpy(G), {})
        assert_equal(networkx.google_matrix(G).shape, (0, 0))

Source File: test_pagerank.py From aws-kube-codesuite with Apache License 2.0

5 votes

def test_pagerank(self):
        G = self.G
        p = networkx.pagerank(G, alpha=0.9, tol=1.e-08)
        for n in G:
            assert_almost_equal(p[n], G.pagerank[n], places=4)

        nstart = dict((n, random.random()) for n in G)
        p = networkx.pagerank(G, alpha=0.9, tol=1.e-08, nstart=nstart)
        for n in G:
            assert_almost_equal(p[n], G.pagerank[n], places=4)

Source File: test_pagerank.py From aws-kube-codesuite with Apache License 2.0

5 votes

def test_pagerank_max_iter(self):
        networkx.pagerank(self.G, max_iter=0)

Source File: test_pagerank.py From aws-kube-codesuite with Apache License 2.0

5 votes

def test_numpy_pagerank(self):
        G = self.G
        p = networkx.pagerank_numpy(G, alpha=0.9)
        for n in G:
            assert_almost_equal(p[n], G.pagerank[n], places=4)
        personalize = dict((n, random.random()) for n in G)
        p = networkx.pagerank_numpy(G, alpha=0.9, personalization=personalize)

Source File: test_pagerank.py From aws-kube-codesuite with Apache License 2.0

5 votes

def test_google_matrix(self):
        G = self.G
        M = networkx.google_matrix(G, alpha=0.9, nodelist=sorted(G))
        e, ev = numpy.linalg.eig(M.T)
        p = numpy.array(ev[:, 0] / ev[:, 0].sum())[:, 0]
        for (a, b) in zip(p, self.G.pagerank.values()):
            assert_almost_equal(a, b)

Source File: test_pagerank.py From aws-kube-codesuite with Apache License 2.0

5 votes

def test_zero_personalization_vector(self):
        G = networkx.complete_graph(4)
        personalize = {0: 0, 1: 0, 2: 0, 3: 0}
        assert_raises(ZeroDivisionError, networkx.pagerank, G,
                  personalization=personalize)

Python networkx.pagerank() Examples