Python networkx.pagerank() Examples
The following are 30
code examples of networkx.pagerank().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
networkx
, or try the search function
.
Example #1
Source File: test_pagerank.py From qgisSpaceSyntaxToolkit with GNU General Public License v3.0 | 6 votes |
def setUp(self): G = networkx.DiGraph() edges = [(1, 2), (1, 3), # 2 is a dangling node (3, 1), (3, 2), (3, 5), (4, 5), (4, 6), (5, 4), (5, 6), (6, 4)] G.add_edges_from(edges) self.G = G self.G.pagerank = dict(zip(G, [0.03721197, 0.05395735, 0.04150565, 0.37508082, 0.20599833, 0.28624589])) self.dangling_node_index = 1 self.dangling_edges = {1: 2, 2: 3, 3: 0, 4: 0, 5: 0, 6: 0} self.G.dangling_pagerank = dict(zip(G, [0.10844518, 0.18618601, 0.0710892, 0.2683668, 0.15919783, 0.20671497]))
Example #2
Source File: summarizer.py From wanish with MIT License | 6 votes |
def textrank(text, hdr): # finding out the most possible language of the text lang_code = lang_identifier.classify(' '.join([hdr, text]))[0] # tokenizing for words sentences = [sentence for sentence in split_multi(text)] stemmer = snowballstemmer.stemmer(LANG_CODES.get(lang_code, 'english')) words = [set(stemmer.stemWord(word) for word in word_tokenizer(sentence.lower()) if word.isalpha()) for sentence in sentences] pairs = combinations(range(len(sentences)), 2) scores = [(i, j, similarity(words[i], words[j])) for i, j in pairs] scores = filter(lambda x: x[2], scores) g = nx.Graph() g.add_weighted_edges_from(scores) pr = nx.pagerank(g) return sorted(((i, pr[i], s) for i, s in enumerate(sentences) if i in pr), key=lambda x: pr[x[0]], reverse=True), lang_code
Example #3
Source File: feedback_graph.py From acl2017-interactive_summarizer with Apache License 2.0 | 6 votes |
def add_sentences(self, sentences): """ @type sentences: list[Sentence] :param sentences: :return: """ counter = self.counter G = self.G for sentence in sentences: G.add_nodes_from(sentence.concepts) counter.update(ngrams(sentence.concepts, self.N)) for (keys, value) in counter.items(): for i in range(0, len(keys) - 1): for j in range(1, len(keys)): G.add_edge(keys[i], keys[j], weight=value) # counter.update((keys[i], keys[j])) # for (key, value) in counter.items(): # G.add_edge(key[0], key[1], attr={"weight": value}) print("V := (N,E), |N| = %s, |E| = %s" % (len(G.nodes()), len(G.edges()))) self.pr = nx.pagerank(G)
Example #4
Source File: test_pagerank.py From aws-kube-codesuite with Apache License 2.0 | 6 votes |
def setUp(self): G = networkx.DiGraph() edges = [(1, 2), (1, 3), # 2 is a dangling node (3, 1), (3, 2), (3, 5), (4, 5), (4, 6), (5, 4), (5, 6), (6, 4)] G.add_edges_from(edges) self.G = G self.G.pagerank = dict(zip(sorted(G), [0.03721197, 0.05395735, 0.04150565, 0.37508082, 0.20599833, 0.28624589])) self.dangling_node_index = 1 self.dangling_edges = {1: 2, 2: 3, 3: 0, 4: 0, 5: 0, 6: 0} self.G.dangling_pagerank = dict(zip(sorted(G), [0.10844518, 0.18618601, 0.0710892, 0.2683668, 0.15919783, 0.20671497]))
Example #5
Source File: feedback_graph.py From acl2017-interactive_summarizer with Apache License 2.0 | 6 votes |
def incorporate_feedback(self, flightrecorder): """ :param flightrecorder: :return: @type flightrecorder: FlightRecorder """ G = self.G print("V := (N,E), |N| = %s, |E| = %s" % (len(G.nodes()), len(G.edges()))) # use the pagerank personalization feature to incorporate flightrecorder feedback union = flightrecorder.union() for rejected in union.reject: if(G.has_node(rejected)): G.remove_node(rejected) print("V := (N,E), |N| = %s, |E| = %s" % (len(G.nodes()), len(G.edges()))) self.pr = nx.pagerank(G)
Example #6
Source File: unsupervised.py From indosum with Apache License 2.0 | 6 votes |
def summarize(self, doc: Document, size: int = 3) -> List[int]: """Summarize a given document using TextRank algorithm. Args: doc (Document): The document to summarize. size (int): Maximum number of sentences that the summary should have. Returns: list: The indices of the extracted sentences that form the summary, sorted ascending. """ size = min(size, len(doc.sentences)) G = self._build_graph(doc.sentences) ranks = nx.pagerank(G, alpha=self.damping_factor, tol=self.tol, max_iter=self.max_iter) summary = sorted(ranks.keys(), key=lambda k: ranks[k], reverse=True)[:size] summary.sort() return summary
Example #7
Source File: unsupervised.py From indosum with Apache License 2.0 | 6 votes |
def summarize(self, doc: Document, size: int = 3) -> List[int]: """Summarize a given document using LexRank algorithm. Args: doc (Document): The document to summarize. size (int): Maximum number of sentences that the summary should have. Returns: list: The indices of the extracted sentences that form the summary, sorted ascending. """ size = min(size, len(doc.sentences)) positions = [self._get_position(k, len(doc.sentences)) for k in range(len(doc.sentences))] G = self._build_graph(doc.sentences) ranks = nx.pagerank(G, alpha=self.damping_factor, tol=self.tol, max_iter=self.max_iter) candidates = sorted( ranks.keys(), key=lambda k: self._combine_features(positions[k], ranks[k]), reverse=True) return self._csis(doc.sentences, candidates, size)
Example #8
Source File: LOCABAL.py From RecQ with GNU General Public License v3.0 | 6 votes |
def initModel(self): super(LOCABAL, self).initModel() self.H = np.random.rand(self.embed_size,self.embed_size) G = nx.DiGraph() for re in self.social.relation: G.add_edge(re[0], re[1]) pr = nx.pagerank(G, alpha=0.85) pr = sorted(pr.iteritems(),key=lambda d:d[1],reverse=True) pr = [(u[0],ind+1) for ind,u in enumerate(pr)] self.W = {} for user in pr: self.W[user[0]] = 1/(1+math.log(user[1])) self.S = {} for line in self.social.relation: u1,u2,weight = line if self.data.containsUser(u1) and self.data.containsUser(u2): uvec1=self.data.trainSet_u[u1] uvec2=self.data.trainSet_u[u2] #add relations to dict if not self.S.has_key(u1): self.S[u1] = {} self.S[u1][u2] = qmath.cosine_sp(uvec1,uvec2)
Example #9
Source File: document_summarization.py From text-analytics-with-python with Apache License 2.0 | 6 votes |
def textrank_text_summarizer(documents, num_sentences=2, feature_type='frequency'): vec, dt_matrix = build_feature_matrix(norm_sentences, feature_type='tfidf') similarity_matrix = (dt_matrix * dt_matrix.T) similarity_graph = networkx.from_scipy_sparse_matrix(similarity_matrix) scores = networkx.pagerank(similarity_graph) ranked_sentences = sorted(((score, index) for index, score in scores.items()), reverse=True) top_sentence_indices = [ranked_sentences[index][1] for index in range(num_sentences)] top_sentence_indices.sort() for index in top_sentence_indices: print sentences[index]
Example #10
Source File: textrank.py From nlg-yongzhuo with MIT License | 6 votes |
def textrank_tfidf(sentences, topk=6): """ 使用tf-idf作为相似度, networkx.pagerank获取中心句子作为摘要 :param sentences: str, docs of text :param topk:int :return:list """ # 切句子 sentences = list(cut_sentence(sentences)) # tf-idf相似度 matrix_norm = tdidf_sim(sentences) # 构建相似度矩阵 tfidf_sim = nx.from_scipy_sparse_matrix(matrix_norm * matrix_norm.T) # nx.pagerank sens_scores = nx.pagerank(tfidf_sim) # 得分排序 sen_rank = sorted(sens_scores.items(), key=lambda x: x[1], reverse=True) # 保留topk个, 防止越界 topk = min(len(sentences), topk) # 返回原句子和得分 return [(sr[1], sentences[sr[0]]) for sr in sen_rank][0:topk]
Example #11
Source File: textrank_sklearn.py From nlg-yongzhuo with MIT License | 6 votes |
def textrank_tfidf(sentences, topk=6): """ 使用tf-idf作为相似度, networkx.pagerank获取中心句子作为摘要 :param sentences: str, docs of text :param topk:int :return:list """ # 切句子 sentences = list(cut_sentence(sentences)) # tf-idf相似度 matrix_norm = tdidf_sim(sentences) # 构建相似度矩阵 tfidf_sim = nx.from_scipy_sparse_matrix(matrix_norm * matrix_norm.T) # nx.pagerank sens_scores = nx.pagerank(tfidf_sim) # 得分排序 sen_rank = sorted(sens_scores.items(), key=lambda x: x[1], reverse=True) # 保留topk个, 防止越界 topk = min(len(sentences), topk) # 返回原句子和得分 return [(sr[1], sentences[sr[0]]) for sr in sen_rank][0:topk]
Example #12
Source File: page_rank.py From Verum with Apache License 2.0 | 6 votes |
def score(self, sg, *args, **xargs): # get_pagerank_probability """ :param sg: egocentric subgraph around topic in networkx format :param distance_degradation: A factor for degrading as distance from the topic increases :return: Dictionary of probabilities keyed by node """ # convert to digraph if needed if sg.is_multigraph(): sg = self.multigraph_to_digraph(sg) personalized = {} for node in sg.nodes(): # personalized[node] = linear_weight(sg.node[node]['topic_distance'], distance_degradation) # INSERT WEIGHTING FUNCTION BELOW personalized[node] = self.exponential_weight(sg.node[node]['topic_distance']) # return the pagerank scores return nx.pagerank(sg, personalization=personalized, weight='confidence')
Example #13
Source File: test_pagerank.py From Carnets with BSD 3-Clause "New" or "Revised" License | 6 votes |
def setUp(self): G = networkx.DiGraph() edges = [(1, 2), (1, 3), # 2 is a dangling node (3, 1), (3, 2), (3, 5), (4, 5), (4, 6), (5, 4), (5, 6), (6, 4)] G.add_edges_from(edges) self.G = G self.G.pagerank = dict(zip(sorted(G), [0.03721197, 0.05395735, 0.04150565, 0.37508082, 0.20599833, 0.28624589])) self.dangling_node_index = 1 self.dangling_edges = {1: 2, 2: 3, 3: 0, 4: 0, 5: 0, 6: 0} self.G.dangling_pagerank = dict(zip(sorted(G), [0.10844518, 0.18618601, 0.0710892, 0.2683668, 0.15919783, 0.20671497]))
Example #14
Source File: lexrankr.py From lexrankr with MIT License | 6 votes |
def summarize(self, text): self.sentences = self.factory.text2sentences(text) self.num_sentences = len(self.sentences) self.corpus = SentenceCorpus(self.sentences, self.no_below_word_count, self.no_above_word_portion, self.max_dictionary_size) self.model = TfidfModel(self.corpus.bows, id2word=self.corpus.dictionary, normalize=True) self.tfidfs = self.model[self.corpus.bows] self._inject_tfidfs() self._build_matrix() self._clustering() if self.compactify: self._compactify() self.graphs = [] for i in range(self.num_clusters): graph = self.sentences2graph(self.clusters[i]) pagerank = networkx.pagerank(graph, weight='weight') self.clusters[i] = sorted(pagerank, key=pagerank.get, reverse=True) self.graphs.append(graph)
Example #15
Source File: graph_generation.py From queueing-tool with MIT License | 5 votes |
def generate_pagerank_graph(num_vertices=250, **kwargs): """Creates a random graph where the vertex types are selected using their pagerank. Calls :func:`.minimal_random_graph` and then :func:`.set_types_rank` where the ``rank`` keyword argument is given by :func:`networkx.pagerank`. Parameters ---------- num_vertices : int (optional, the default is 250) The number of vertices in the graph. **kwargs : Any parameters to send to :func:`.minimal_random_graph` or :func:`.set_types_rank`. Returns ------- :class:`.QueueNetworkDiGraph` A graph with a ``pos`` vertex property and the ``edge_type`` edge property. Notes ----- This function sets the edge types of a graph to be either 1, 2, or 3. It sets the vertices to type 2 by selecting the top ``pType2 * g.number_of_nodes()`` vertices given by the :func:`~networkx.pagerank` of the graph. A loop is added to all vertices identified this way (if one does not exist already). It then randomly sets vertices close to the type 2 vertices as type 3, and adds loops to these vertices as well. These loops then have edge types that correspond to the vertices type. The rest of the edges are set to type 1. """ g = minimal_random_graph(num_vertices, **kwargs) r = np.zeros(num_vertices) for k, pr in nx.pagerank(g).items(): r[k] = pr g = set_types_rank(g, rank=r, **kwargs) return g
Example #16
Source File: test_pagerank.py From Carnets with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_google_matrix(self): G = self.G M = networkx.google_matrix(G, alpha=0.9, nodelist=sorted(G)) e, ev = numpy.linalg.eig(M.T) p = numpy.array(ev[:, 0] / ev[:, 0].sum())[:, 0] for (a, b) in zip(p, self.G.pagerank.values()): assert_almost_equal(a, b)
Example #17
Source File: test_pagerank.py From Carnets with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_numpy_pagerank(self): G = self.G p = networkx.pagerank_numpy(G, alpha=0.9) for n in G: assert_almost_equal(p[n], G.pagerank[n], places=4) personalize = dict((n, random.random()) for n in G) p = networkx.pagerank_numpy(G, alpha=0.9, personalization=personalize)
Example #18
Source File: test_pagerank.py From Carnets with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_pagerank_max_iter(self): networkx.pagerank(self.G, max_iter=0)
Example #19
Source File: test_pagerank.py From Carnets with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_pagerank(self): G = self.G p = networkx.pagerank(G, alpha=0.9, tol=1.e-08) for n in G: assert_almost_equal(p[n], G.pagerank[n], places=4) nstart = dict((n, random.random()) for n in G) p = networkx.pagerank(G, alpha=0.9, tol=1.e-08, nstart=nstart) for n in G: assert_almost_equal(p[n], G.pagerank[n], places=4)
Example #20
Source File: test_pagerank.py From Carnets with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_zero_personalization_vector(self): G = networkx.complete_graph(4) personalize = {0: 0, 1: 0, 2: 0, 3: 0} assert_raises(ZeroDivisionError, networkx.pagerank, G, personalization=personalize)
Example #21
Source File: core.py From samacharbot2 with GNU General Public License v3.0 | 5 votes |
def _textrank(matrix): '''returns principal eigenvector of the adjacency matrix''' graph = nx.from_numpy_matrix(matrix) return nx.pagerank(graph)
Example #22
Source File: test_pagerank.py From Carnets with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_one_nonzero_personalization_value(self): G = networkx.complete_graph(4) personalize = {0: 0, 1: 0, 2: 0, 3: 1} answer = {0: 0.22077931820379187, 1: 0.22077931820379187, 2: 0.22077931820379187, 3: 0.3376620453886241} p = networkx.pagerank(G, alpha=0.85, personalization=personalize) for n in G: assert_almost_equal(p[n], answer[n], places=4)
Example #23
Source File: test_pagerank.py From Carnets with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_incomplete_personalization(self): G = networkx.complete_graph(4) personalize = {3: 1} answer = {0: 0.22077931820379187, 1: 0.22077931820379187, 2: 0.22077931820379187, 3: 0.3376620453886241} p = networkx.pagerank(G, alpha=0.85, personalization=personalize) for n in G: assert_almost_equal(p[n], answer[n], places=4)
Example #24
Source File: test_pagerank.py From Carnets with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_dangling_pagerank(self): pr = networkx.pagerank(self.G, dangling=self.dangling_edges) for n in self.G: assert_almost_equal(pr[n], self.G.dangling_pagerank[n], places=4)
Example #25
Source File: test_pagerank.py From Carnets with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_empty(self): G = networkx.Graph() assert_equal(networkx.pagerank(G), {}) assert_equal(networkx.pagerank_numpy(G), {}) assert_equal(networkx.google_matrix(G).shape, (0, 0))
Example #26
Source File: test_pagerank.py From aws-kube-codesuite with Apache License 2.0 | 5 votes |
def test_pagerank(self): G = self.G p = networkx.pagerank(G, alpha=0.9, tol=1.e-08) for n in G: assert_almost_equal(p[n], G.pagerank[n], places=4) nstart = dict((n, random.random()) for n in G) p = networkx.pagerank(G, alpha=0.9, tol=1.e-08, nstart=nstart) for n in G: assert_almost_equal(p[n], G.pagerank[n], places=4)
Example #27
Source File: test_pagerank.py From aws-kube-codesuite with Apache License 2.0 | 5 votes |
def test_pagerank_max_iter(self): networkx.pagerank(self.G, max_iter=0)
Example #28
Source File: test_pagerank.py From aws-kube-codesuite with Apache License 2.0 | 5 votes |
def test_numpy_pagerank(self): G = self.G p = networkx.pagerank_numpy(G, alpha=0.9) for n in G: assert_almost_equal(p[n], G.pagerank[n], places=4) personalize = dict((n, random.random()) for n in G) p = networkx.pagerank_numpy(G, alpha=0.9, personalization=personalize)
Example #29
Source File: test_pagerank.py From aws-kube-codesuite with Apache License 2.0 | 5 votes |
def test_google_matrix(self): G = self.G M = networkx.google_matrix(G, alpha=0.9, nodelist=sorted(G)) e, ev = numpy.linalg.eig(M.T) p = numpy.array(ev[:, 0] / ev[:, 0].sum())[:, 0] for (a, b) in zip(p, self.G.pagerank.values()): assert_almost_equal(a, b)
Example #30
Source File: test_pagerank.py From aws-kube-codesuite with Apache License 2.0 | 5 votes |
def test_zero_personalization_vector(self): G = networkx.complete_graph(4) personalize = {0: 0, 1: 0, 2: 0, 3: 0} assert_raises(ZeroDivisionError, networkx.pagerank, G, personalization=personalize)