Python heapq.nlargest() Examples
The following are 30
code examples of heapq.nlargest().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
heapq
, or try the search function
.
Example #1
Source File: summarize.py From Django-Bookworm with MIT License | 7 votes |
def get_summary(self, number_of_sentences=5): ''' generates summary based on weighted word frequencies :param number_of_sentences: total number of sentences to return in summary :return: string of summary ''' sentence_value = {} for sentence in self.__sentence: for word in self.__word_freq.keys(): if word in word_tokenize(sentence.lower()): if sentence in sentence_value: sentence_value[sentence] += self.__word_freq.get(word) else: sentence_value[sentence] = self.__word_freq.get(word, 0) summary_sentences = heapq.nlargest(number_of_sentences, sentence_value, key=sentence_value.get) summary = ' '.join(summary_sentences) return summary
Example #2
Source File: data_helpers.py From Multi-Label-Text-Classification with Apache License 2.0 | 6 votes |
def get_onehot_label_topk(scores, top_num=1): """ Get the predicted onehot labels based on the topK number. Args: scores: The all classes predicted scores provided by network top_num: The max topK number (default: 5) Returns: predicted_onehot_labels: The predicted labels (onehot) """ predicted_onehot_labels = [] scores = np.ndarray.tolist(scores) for score in scores: onehot_labels_list = [0] * len(score) max_num_index_list = list(map(score.index, heapq.nlargest(top_num, score))) for i in max_num_index_list: onehot_labels_list[i] = 1 predicted_onehot_labels.append(onehot_labels_list) return predicted_onehot_labels
Example #3
Source File: knns.py From Surprise with BSD 3-Clause "New" or "Revised" License | 6 votes |
def estimate(self, u, i): if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)): raise PredictionImpossible('User and/or item is unknown.') x, y = self.switch(u, i) neighbors = [(self.sim[x, x2], r) for (x2, r) in self.yr[y]] k_neighbors = heapq.nlargest(self.k, neighbors, key=lambda t: t[0]) # compute weighted average sum_sim = sum_ratings = actual_k = 0 for (sim, r) in k_neighbors: if sim > 0: sum_sim += sim sum_ratings += sim * r actual_k += 1 if actual_k < self.min_k: raise PredictionImpossible('Not enough neighbors.') est = sum_ratings / sum_sim details = {'actual_k': actual_k} return est, details
Example #4
Source File: tnt.py From weibo-analysis-system with MIT License | 6 votes |
def tag(self, data): now = [(('BOS', 'BOS'), 0.0, [])] for w in data: stage = {} samples = self.status if w in self.word: samples = self.word[w] for s in samples: wd = log(self.wd.get((s, w))[1])-log(self.uni.get(s)[1]) for pre in now: p = pre[1]+wd+self.trans[(pre[0][0], pre[0][1], s)] if (pre[0][1], s) not in stage or p > stage[(pre[0][1], s)][0]: stage[(pre[0][1], s)] = (p, pre[2]+[s]) stage = list(map(lambda x: (x[0], x[1][0], x[1][1]), stage.items())) now = heapq.nlargest(self.N, stage, key=lambda x: x[1]) now = heapq.nlargest(1, stage, key=lambda x: x[1]+self.geteos(x[0][1])) return zip(data, now[0][2])
Example #5
Source File: gc.py From auto-alt-text-lambda-api with MIT License | 6 votes |
def largest_export_versions(n): """Creates a filter that keeps the largest n export versions. Args: n: number of versions to keep. Returns: A filter function that keeps the n largest paths. """ def keep(paths): heap = [] for idx, path in enumerate(paths): if path.export_version is not None: heapq.heappush(heap, (path.export_version, idx)) keepers = [paths[i] for _, i in heapq.nlargest(n, heap)] return sorted(keepers) return keep
Example #6
Source File: gc.py From auto-alt-text-lambda-api with MIT License | 6 votes |
def largest_export_versions(n): """Creates a filter that keeps the largest n export versions. Args: n: number of versions to keep. Returns: A filter function that keeps the n largest paths. """ def keep(paths): heap = [] for idx, path in enumerate(paths): if path.export_version is not None: heapq.heappush(heap, (path.export_version, idx)) keepers = [paths[i] for _, i in heapq.nlargest(n, heap)] return sorted(keepers) return keep
Example #7
Source File: evaluate.py From Recommender-Systems-Samples with MIT License | 6 votes |
def eval_one_rating(idx): # obtain test items and suers rating = _testRatings[idx] items = _testNegatives[idx] u = rating[0] gtItem = rating[1] items.append(gtItem) users = np.full(len(items), u, dtype='int32') # obtain prediction scores map_item_score = {} predictions = _model.predict([users, np.array(items)], batch_size=100, verbose=0) for i in range(len(items)): item = items[i] map_item_score[item] = predictions[i] items.pop() # evaluate topk list ranklist = heapq.nlargest(_K, map_item_score, key=map_item_score.get) hr = getHitRatio(ranklist, gtItem) ndcg = getNDCG(ranklist, gtItem) return (hr, ndcg)
Example #8
Source File: preprocessing.py From dl4nlp with MIT License | 6 votes |
def build_dictionary(sentences, size): """ Create dictionary containing most frequent words in the sentences :param sentences: sequence of sentence that contains words Caution: the sequence might be exhausted after calling this function! :param size: size of dictionary you want :return: dictionary that maps word to index (starting from 1) """ dictionary = defaultdict(int) for sentence in sentences: for token in sentence: dictionary[token] += 1 frequent_pairs = nlargest(size, dictionary.items(), itemgetter(1)) words, frequencies = zip(*frequent_pairs) result = {word: index + 1 for index, word in enumerate(words)} return result
Example #9
Source File: gc.py From lambda-packs with MIT License | 6 votes |
def largest_export_versions(n): """Creates a filter that keeps the largest n export versions. Args: n: number of versions to keep. Returns: A filter function that keeps the n largest paths. """ def keep(paths): heap = [] for idx, path in enumerate(paths): if path.export_version is not None: heapq.heappush(heap, (path.export_version, idx)) keepers = [paths[i] for _, i in heapq.nlargest(n, heap)] return sorted(keepers) return keep
Example #10
Source File: gc.py From lambda-packs with MIT License | 6 votes |
def largest_export_versions(n): """Creates a filter that keeps the largest n export versions. Args: n: number of versions to keep. Returns: A filter function that keeps the n largest paths. """ def keep(paths): heap = [] for idx, path in enumerate(paths): if path.export_version is not None: heapq.heappush(heap, (path.export_version, idx)) keepers = [paths[i] for _, i in heapq.nlargest(n, heap)] return sorted(keepers) return keep
Example #11
Source File: tnt.py From sentiment-analysis-webapp with MIT License | 6 votes |
def tag(self, data): now = [(('BOS', 'BOS'), 0.0, [])] for w in data: stage = {} samples = self.status if w in self.word: samples = self.word[w] for s in samples: wd = log(self.wd.get((s, w))[1])-log(self.uni.get(s)[1]) for pre in now: p = pre[1]+wd+self.trans[(pre[0][0], pre[0][1], s)] if (pre[0][1], s) not in stage or p > stage[(pre[0][1], s)][0]: stage[(pre[0][1], s)] = (p, pre[2]+[s]) stage = list(map(lambda x: (x[0], x[1][0], x[1][1]), stage.items())) now = heapq.nlargest(self.N, stage, key=lambda x: x[1]) now = heapq.nlargest(1, stage, key=lambda x: x[1]+self.geteos(x[0][1])) return zip(data, now[0][2])
Example #12
Source File: fpt.py From fylearn with MIT License | 6 votes |
def select_slaves(self, candidates, P_U_S, class_vector, X): R = [] for candidate in candidates: aggregates = [] for other in P_U_S: if not _tree_contains(candidate[1], other): aggregates.extend([ Inner(a, [ candidate[1], other ]) for a in OPERATORS ]) R.extend(_select_candidates(aggregates, self.num_slaves, class_vector, self.similarity_measure, X)) R = sorted(R, key=lambda x: x[0]) RR = [] used_nodes = set() for candidate in R: inner_node = candidate[1] found = False for tree in inner_node.branches_: if tree in used_nodes: found = True if not found: used_nodes.update(inner_node.branches_) RR.append(candidate) return heapq.nlargest(self.num_slaves, RR, key=lambda x: x[0])
Example #13
Source File: fpt.py From fylearn with MIT License | 6 votes |
def select_slaves(self, C, P, class_vector, num_candidates, X): R = [] for candidate in C: c = candidate[1] modified = [] candidate_leaves = _tree_leaves(c) for c_leaf in candidate_leaves: for p_leaf in [ p for p in P if p not in candidate_leaves ]: for aggr in OPERATORS: modified.append(_tree_clone_replace_leaf(c, c_leaf, Inner(aggr, [ c_leaf, p_leaf ]))) R.extend(_select_candidates(modified, self.num_candidates, class_vector, self.similarity_measure, X)) R = list(heapq.nlargest(self.num_candidates, R, key=lambda x: x[0])) return list(reversed(sorted(R, key=lambda x: x[0])))
Example #14
Source File: gc.py From deep_image_model with Apache License 2.0 | 6 votes |
def largest_export_versions(n): """Creates a filter that keeps the largest n export versions. Args: n: number of versions to keep. Returns: A filter function that keeps the n largest paths. """ def keep(paths): heap = [] for idx, path in enumerate(paths): if path.export_version is not None: heapq.heappush(heap, (path.export_version, idx)) keepers = [paths[i] for _, i in heapq.nlargest(n, heap)] return sorted(keepers) return keep
Example #15
Source File: pca.py From imylu with Apache License 2.0 | 6 votes |
def _get_top_eigen_vectors(data: ndarray, n_components: int) -> ndarray: """The eigen vectors according to top n_components large eigen values. Arguments: data {ndarray} -- Training data. n_components {int} -- Number of components to keep. Returns: ndarray -- eigen vectors with shape(n_cols, n_components). """ # Calculate eigen values and eigen vectors of covariance matrix. eigen_values, eigen_vectors = eig(data) # The indexes of top n_components large eigen values. _indexes = heapq.nlargest(n_components, enumerate(eigen_values), key=lambda x: x[1]) indexes = [x[0] for x in _indexes] return eigen_vectors[:, indexes]
Example #16
Source File: rdd.py From LearningApacheSpark with MIT License | 6 votes |
def top(self, num, key=None): """ Get the top N elements from an RDD. .. note:: This method should only be used if the resulting array is expected to be small, as all the data is loaded into the driver's memory. .. note:: It returns the list sorted in descending order. >>> sc.parallelize([10, 4, 2, 12, 3]).top(1) [12] >>> sc.parallelize([2, 3, 4, 5, 6], 2).top(2) [6, 5] >>> sc.parallelize([10, 4, 2, 12, 3]).top(3, key=str) [4, 3, 2] """ def topIterator(iterator): yield heapq.nlargest(num, iterator, key=key) def merge(a, b): return heapq.nlargest(num, a + b, key=key) return self.mapPartitions(topIterator).reduce(merge)
Example #17
Source File: char_rnn_model.py From AI_Poet_Totoro with MIT License | 5 votes |
def select_by_given(self,given,probs,max_prob = False): if given: seq_probs = zip(probs,range(0,self.vocab_size)) topn = heapq.nlargest(100,seq_probs,key=lambda sp :sp[0]) for _,seq in topn: if self.w2v_model.vocab[seq] in given: given = given.replace(self.w2v_model.vocab[seq],'') return seq,given if max_prob: return np.argmax(probs),given return np.random.choice(self.vocab_size, 1, p=probs)[0],given
Example #18
Source File: python2x.py From attention-lvcsr with MIT License | 5 votes |
def most_common(self, n=None): '''List the n most common elements and their counts. The list goes from the most common to the least. If n is None, then list all element counts. >>> Counter('abracadabra').most_common(3) [('a', 5), ('r', 2), ('b', 2)] ''' if n is None: return sorted(self.iteritems(), key=itemgetter(1), reverse=True) return nlargest(n, self.iteritems(), key=itemgetter(1))
Example #19
Source File: utils.py From Gun-Detector with Apache License 2.0 | 5 votes |
def iter_in_order(self): """Iterate over items in the queue from largest score to smallest. Yields: item: Hashable item. extra_data: Extra data stored with the item. """ for _, item, extra_data in heapq.nlargest(len(self.heap), self.heap): yield item, extra_data
Example #20
Source File: knns.py From Surprise with BSD 3-Clause "New" or "Revised" License | 5 votes |
def estimate(self, u, i): if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)): raise PredictionImpossible('User and/or item is unknown.') x, y = self.switch(u, i) neighbors = [(x2, self.sim[x, x2], r) for (x2, r) in self.yr[y]] k_neighbors = heapq.nlargest(self.k, neighbors, key=lambda t: t[1]) est = self.means[x] # compute weighted average sum_sim = sum_ratings = actual_k = 0 for (nb, sim, r) in k_neighbors: if sim > 0: sum_sim += sim sum_ratings += sim * (r - self.means[nb]) actual_k += 1 if actual_k < self.min_k: sum_ratings = 0 try: est += sum_ratings / sum_sim except ZeroDivisionError: pass # return mean details = {'actual_k': actual_k} return est, details
Example #21
Source File: learning.py From aima with MIT License | 5 votes |
def top(self, n): "Return (count, obs) tuples for the n most frequent observations." return heapq.nlargest(n, [(v, k) for (k, v) in self.dictionary.items()])
Example #22
Source File: utils.py From Gun-Detector with Apache License 2.0 | 5 votes |
def get_max(self): """Peek at the item with the highest score. Returns: Same as `pop`. """ if not self.heap: return () score, item, extra_data = heapq.nlargest(1, self.heap)[0] return score, item, extra_data
Example #23
Source File: misc.py From kgsgo-dataset-preprocessor with Mozilla Public License 2.0 | 5 votes |
def most_common(self, n=None): '''List the n most common elements and their counts from the most common to the least. If n is None, then list all element counts. >>> Counter('abracadabra').most_common(3) [('a', 5), ('r', 2), ('b', 2)] ''' if n is None: return sorted(iteritems(self), key=itemgetter(1), reverse=True) return nlargest(n, iteritems(self), key=itemgetter(1))
Example #24
Source File: pandas_py3k.py From kgsgo-dataset-preprocessor with Mozilla Public License 2.0 | 5 votes |
def most_common(self, n=None): '''List the n most common elements and their counts from the most common to the least. If n is None, then list all element counts. >>> Counter('abracadabra').most_common(3) [('a', 5), ('r', 2), ('b', 2)] ''' if n is None: return sorted(iteritems(self), key=itemgetter(1), reverse=True) return nlargest(n, iteritems(self), key=itemgetter(1))
Example #25
Source File: newCollections.py From scoop with GNU Lesser General Public License v3.0 | 5 votes |
def most_common(self, n=None): '''List the n most common elements and their counts from the most common to the least. If n is None, then list all element counts. >>> Counter('abracadabra').most_common(3) [('a', 5), ('r', 2), ('b', 2)] ''' if n is None: return sorted(self.iteritems(), key=itemgetter(1), reverse=True) return nlargest(n, self.iteritems(), key=itemgetter(1))
Example #26
Source File: utils.py From yolo_v2 with Apache License 2.0 | 5 votes |
def iter_in_order(self): """Iterate over items in the queue from largest score to smallest. Yields: item: Hashable item. extra_data: Extra data stored with the item. """ for _, item, extra_data in heapq.nlargest(len(self.heap), self.heap): yield item, extra_data
Example #27
Source File: utils.py From yolo_v2 with Apache License 2.0 | 5 votes |
def get_max(self): """Peek at the item with the highest score. Returns: Same as `pop`. """ if not self.heap: return () score, item, extra_data = heapq.nlargest(1, self.heap)[0] return score, item, extra_data
Example #28
Source File: collections.py From pmatic with GNU General Public License v2.0 | 5 votes |
def most_common(self, n=None): '''List the n most common elements and their counts from the most common to the least. If n is None, then list all element counts. >>> Counter('abcdeabcdabcaba').most_common(3) [('a', 5), ('b', 4), ('c', 3)] ''' # Emulate Bag.sortedByCount from Smalltalk if n is None: return sorted(self.iteritems(), key=_itemgetter(1), reverse=True) return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
Example #29
Source File: oom_batch_sampler.py From PyTorch-NLP with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __iter__(self): batches = list(iter(self.batch_sampler)) largest_batches = heapq.nlargest( self.num_batches, range(len(batches)), key=lambda i: sum([self.get_item_size(j) for j in batches[i]])) move_to_front = [batches[i] for i in largest_batches] [batches.pop(i) for i in sorted(largest_batches, reverse=True)] batches[0:0] = move_to_front return iter(batches)
Example #30
Source File: misc.py From deepWordBug with Apache License 2.0 | 5 votes |
def most_common(self, n=None): '''List the n most common elements and their counts from the most common to the least. If n is None, then list all element counts. >>> Counter('abcdeabcdabcaba').most_common(3) [('a', 5), ('b', 4), ('c', 3)] ''' # Emulate Bag.sortedByCount from Smalltalk if n is None: return sorted(self.items(), key=_itemgetter(1), reverse=True) return _heapq.nlargest(n, self.items(), key=_itemgetter(1))