Python heapq.nlargest() Examples

The following are 30 code examples of heapq.nlargest(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module heapq , or try the search function .
Example #1
Source File: summarize.py    From Django-Bookworm with MIT License 7 votes vote down vote up
def get_summary(self, number_of_sentences=5):
        '''
            generates summary based on weighted word frequencies

            :param number_of_sentences: total number of sentences to return in summary
            :return: string of summary
        '''
        sentence_value = {}
        for sentence in self.__sentence:
            for word in self.__word_freq.keys():
                if word in word_tokenize(sentence.lower()):
                    if sentence in sentence_value:
                        sentence_value[sentence] += self.__word_freq.get(word)
                    else:
                        sentence_value[sentence] = self.__word_freq.get(word, 0)
        
        summary_sentences = heapq.nlargest(number_of_sentences, sentence_value, key=sentence_value.get)
        summary = ' '.join(summary_sentences)
        return summary 
Example #2
Source File: data_helpers.py    From Multi-Label-Text-Classification with Apache License 2.0 6 votes vote down vote up
def get_onehot_label_topk(scores, top_num=1):
    """
    Get the predicted onehot labels based on the topK number.

    Args:
        scores: The all classes predicted scores provided by network
        top_num: The max topK number (default: 5)
    Returns:
        predicted_onehot_labels: The predicted labels (onehot)
    """
    predicted_onehot_labels = []
    scores = np.ndarray.tolist(scores)
    for score in scores:
        onehot_labels_list = [0] * len(score)
        max_num_index_list = list(map(score.index, heapq.nlargest(top_num, score)))
        for i in max_num_index_list:
            onehot_labels_list[i] = 1
        predicted_onehot_labels.append(onehot_labels_list)
    return predicted_onehot_labels 
Example #3
Source File: knns.py    From Surprise with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def estimate(self, u, i):

        if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
            raise PredictionImpossible('User and/or item is unknown.')

        x, y = self.switch(u, i)

        neighbors = [(self.sim[x, x2], r) for (x2, r) in self.yr[y]]
        k_neighbors = heapq.nlargest(self.k, neighbors, key=lambda t: t[0])

        # compute weighted average
        sum_sim = sum_ratings = actual_k = 0
        for (sim, r) in k_neighbors:
            if sim > 0:
                sum_sim += sim
                sum_ratings += sim * r
                actual_k += 1

        if actual_k < self.min_k:
            raise PredictionImpossible('Not enough neighbors.')

        est = sum_ratings / sum_sim

        details = {'actual_k': actual_k}
        return est, details 
Example #4
Source File: tnt.py    From weibo-analysis-system with MIT License 6 votes vote down vote up
def tag(self, data):
        now = [(('BOS', 'BOS'), 0.0, [])]
        for w in data:
            stage = {}
            samples = self.status
            if w in self.word:
                samples = self.word[w]
            for s in samples:
                wd = log(self.wd.get((s, w))[1])-log(self.uni.get(s)[1])
                for pre in now:
                    p = pre[1]+wd+self.trans[(pre[0][0], pre[0][1], s)]
                    if (pre[0][1], s) not in stage or p > stage[(pre[0][1],
                                                                 s)][0]:
                        stage[(pre[0][1], s)] = (p, pre[2]+[s])
            stage = list(map(lambda x: (x[0], x[1][0], x[1][1]), stage.items()))
            now = heapq.nlargest(self.N, stage, key=lambda x: x[1])
        now = heapq.nlargest(1, stage, key=lambda x: x[1]+self.geteos(x[0][1]))
        return zip(data, now[0][2]) 
Example #5
Source File: gc.py    From auto-alt-text-lambda-api with MIT License 6 votes vote down vote up
def largest_export_versions(n):
  """Creates a filter that keeps the largest n export versions.

  Args:
    n: number of versions to keep.

  Returns:
    A filter function that keeps the n largest paths.
  """
  def keep(paths):
    heap = []
    for idx, path in enumerate(paths):
      if path.export_version is not None:
        heapq.heappush(heap, (path.export_version, idx))
    keepers = [paths[i] for _, i in heapq.nlargest(n, heap)]
    return sorted(keepers)

  return keep 
Example #6
Source File: gc.py    From auto-alt-text-lambda-api with MIT License 6 votes vote down vote up
def largest_export_versions(n):
  """Creates a filter that keeps the largest n export versions.

  Args:
    n: number of versions to keep.

  Returns:
    A filter function that keeps the n largest paths.
  """
  def keep(paths):
    heap = []
    for idx, path in enumerate(paths):
      if path.export_version is not None:
        heapq.heappush(heap, (path.export_version, idx))
    keepers = [paths[i] for _, i in heapq.nlargest(n, heap)]
    return sorted(keepers)

  return keep 
Example #7
Source File: evaluate.py    From Recommender-Systems-Samples with MIT License 6 votes vote down vote up
def eval_one_rating(idx):
    
    # obtain test items and suers
    rating = _testRatings[idx]
    items = _testNegatives[idx]
    u = rating[0]
    gtItem = rating[1]
    items.append(gtItem)
    users = np.full(len(items), u, dtype='int32')
    
    # obtain prediction scores
    map_item_score = {}
    predictions = _model.predict([users, np.array(items)], batch_size=100, verbose=0)
    for i in range(len(items)):
        item = items[i]
        map_item_score[item] = predictions[i]
    items.pop()
    
    # evaluate topk list
    ranklist = heapq.nlargest(_K, map_item_score, key=map_item_score.get)
    hr = getHitRatio(ranklist, gtItem)
    ndcg = getNDCG(ranklist, gtItem)
    return (hr, ndcg) 
Example #8
Source File: preprocessing.py    From dl4nlp with MIT License 6 votes vote down vote up
def build_dictionary(sentences, size):
    """
    Create dictionary containing most frequent words in the sentences
    :param sentences: sequence of sentence that contains words
        Caution: the sequence might be exhausted after calling this function!
    :param size: size of dictionary you want
    :return: dictionary that maps word to index (starting from 1)
    """
    dictionary = defaultdict(int)
    for sentence in sentences:
        for token in sentence:
            dictionary[token] += 1
    frequent_pairs = nlargest(size, dictionary.items(), itemgetter(1))
    words, frequencies = zip(*frequent_pairs)
    result = {word: index + 1 for index, word in enumerate(words)}
    return result 
Example #9
Source File: gc.py    From lambda-packs with MIT License 6 votes vote down vote up
def largest_export_versions(n):
  """Creates a filter that keeps the largest n export versions.

  Args:
    n: number of versions to keep.

  Returns:
    A filter function that keeps the n largest paths.
  """
  def keep(paths):
    heap = []
    for idx, path in enumerate(paths):
      if path.export_version is not None:
        heapq.heappush(heap, (path.export_version, idx))
    keepers = [paths[i] for _, i in heapq.nlargest(n, heap)]
    return sorted(keepers)

  return keep 
Example #10
Source File: gc.py    From lambda-packs with MIT License 6 votes vote down vote up
def largest_export_versions(n):
  """Creates a filter that keeps the largest n export versions.

  Args:
    n: number of versions to keep.

  Returns:
    A filter function that keeps the n largest paths.
  """
  def keep(paths):
    heap = []
    for idx, path in enumerate(paths):
      if path.export_version is not None:
        heapq.heappush(heap, (path.export_version, idx))
    keepers = [paths[i] for _, i in heapq.nlargest(n, heap)]
    return sorted(keepers)

  return keep 
Example #11
Source File: tnt.py    From sentiment-analysis-webapp with MIT License 6 votes vote down vote up
def tag(self, data):
        now = [(('BOS', 'BOS'), 0.0, [])]
        for w in data:
            stage = {}
            samples = self.status
            if w in self.word:
                samples = self.word[w]
            for s in samples:
                wd = log(self.wd.get((s, w))[1])-log(self.uni.get(s)[1])
                for pre in now:
                    p = pre[1]+wd+self.trans[(pre[0][0], pre[0][1], s)]
                    if (pre[0][1], s) not in stage or p > stage[(pre[0][1],
                                                                 s)][0]:
                        stage[(pre[0][1], s)] = (p, pre[2]+[s])
            stage = list(map(lambda x: (x[0], x[1][0], x[1][1]), stage.items()))
            now = heapq.nlargest(self.N, stage, key=lambda x: x[1])
        now = heapq.nlargest(1, stage, key=lambda x: x[1]+self.geteos(x[0][1]))
        return zip(data, now[0][2]) 
Example #12
Source File: fpt.py    From fylearn with MIT License 6 votes vote down vote up
def select_slaves(self, candidates, P_U_S, class_vector, X):
        R = []
        for candidate in candidates:
            aggregates = []
            for other in P_U_S:
                if not _tree_contains(candidate[1], other):
                    aggregates.extend([ Inner(a, [ candidate[1], other ]) for a in OPERATORS ])

            R.extend(_select_candidates(aggregates, self.num_slaves, class_vector, self.similarity_measure, X))

        R = sorted(R, key=lambda x: x[0])

        RR = []
        used_nodes = set()
        for candidate in R:
            inner_node = candidate[1]
            found = False
            for tree in inner_node.branches_:
                if tree in used_nodes:
                    found = True
            if not found:
                used_nodes.update(inner_node.branches_)
                RR.append(candidate)

        return heapq.nlargest(self.num_slaves, RR, key=lambda x: x[0]) 
Example #13
Source File: fpt.py    From fylearn with MIT License 6 votes vote down vote up
def select_slaves(self, C, P, class_vector, num_candidates, X):

        R = []
        for candidate in C:
            c = candidate[1]
            modified = []
            candidate_leaves = _tree_leaves(c)

            for c_leaf in candidate_leaves:
                for p_leaf in [ p for p in P if p not in candidate_leaves ]:
                    for aggr in OPERATORS:
                        modified.append(_tree_clone_replace_leaf(c, c_leaf, Inner(aggr, [ c_leaf, p_leaf ])))

            R.extend(_select_candidates(modified, self.num_candidates, class_vector, self.similarity_measure, X))

            R = list(heapq.nlargest(self.num_candidates, R, key=lambda x: x[0]))

        return list(reversed(sorted(R, key=lambda x: x[0]))) 
Example #14
Source File: gc.py    From deep_image_model with Apache License 2.0 6 votes vote down vote up
def largest_export_versions(n):
  """Creates a filter that keeps the largest n export versions.

  Args:
    n: number of versions to keep.

  Returns:
    A filter function that keeps the n largest paths.
  """
  def keep(paths):
    heap = []
    for idx, path in enumerate(paths):
      if path.export_version is not None:
        heapq.heappush(heap, (path.export_version, idx))
    keepers = [paths[i] for _, i in heapq.nlargest(n, heap)]
    return sorted(keepers)

  return keep 
Example #15
Source File: pca.py    From imylu with Apache License 2.0 6 votes vote down vote up
def _get_top_eigen_vectors(data: ndarray, n_components: int) -> ndarray:
        """The eigen vectors according to top n_components large eigen values.

        Arguments:
            data {ndarray} -- Training data.
            n_components {int} -- Number of components to keep.

        Returns:
            ndarray -- eigen vectors with shape(n_cols, n_components).
        """

        # Calculate eigen values and eigen vectors of covariance matrix.
        eigen_values, eigen_vectors = eig(data)
        # The indexes of top n_components large eigen values.
        _indexes = heapq.nlargest(n_components, enumerate(eigen_values),
                                  key=lambda x: x[1])
        indexes = [x[0] for x in _indexes]
        return eigen_vectors[:, indexes] 
Example #16
Source File: rdd.py    From LearningApacheSpark with MIT License 6 votes vote down vote up
def top(self, num, key=None):
        """
        Get the top N elements from an RDD.

        .. note:: This method should only be used if the resulting array is expected
            to be small, as all the data is loaded into the driver's memory.

        .. note:: It returns the list sorted in descending order.

        >>> sc.parallelize([10, 4, 2, 12, 3]).top(1)
        [12]
        >>> sc.parallelize([2, 3, 4, 5, 6], 2).top(2)
        [6, 5]
        >>> sc.parallelize([10, 4, 2, 12, 3]).top(3, key=str)
        [4, 3, 2]
        """
        def topIterator(iterator):
            yield heapq.nlargest(num, iterator, key=key)

        def merge(a, b):
            return heapq.nlargest(num, a + b, key=key)

        return self.mapPartitions(topIterator).reduce(merge) 
Example #17
Source File: char_rnn_model.py    From AI_Poet_Totoro with MIT License 5 votes vote down vote up
def select_by_given(self,given,probs,max_prob = False):
        if given:
                seq_probs = zip(probs,range(0,self.vocab_size))
                topn = heapq.nlargest(100,seq_probs,key=lambda sp :sp[0])

                for _,seq in topn:
                    if self.w2v_model.vocab[seq] in given:
                        given = given.replace(self.w2v_model.vocab[seq],'')
                        return seq,given
        if max_prob:
            return  np.argmax(probs),given

        return np.random.choice(self.vocab_size, 1, p=probs)[0],given 
Example #18
Source File: python2x.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def most_common(self, n=None):
            '''List the n most common elements and their counts.

            The list goes from the most common to the least.  If n is
            None, then list all element counts.

            >>> Counter('abracadabra').most_common(3)
            [('a', 5), ('r', 2), ('b', 2)]
            '''
            if n is None:
                return sorted(self.iteritems(), key=itemgetter(1),
                              reverse=True)
            return nlargest(n, self.iteritems(), key=itemgetter(1)) 
Example #19
Source File: utils.py    From Gun-Detector with Apache License 2.0 5 votes vote down vote up
def iter_in_order(self):
    """Iterate over items in the queue from largest score to smallest.

    Yields:
      item: Hashable item.
      extra_data: Extra data stored with the item.
    """
    for _, item, extra_data in heapq.nlargest(len(self.heap), self.heap):
      yield item, extra_data 
Example #20
Source File: knns.py    From Surprise with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def estimate(self, u, i):

        if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
            raise PredictionImpossible('User and/or item is unknown.')

        x, y = self.switch(u, i)

        neighbors = [(x2, self.sim[x, x2], r) for (x2, r) in self.yr[y]]
        k_neighbors = heapq.nlargest(self.k, neighbors, key=lambda t: t[1])

        est = self.means[x]

        # compute weighted average
        sum_sim = sum_ratings = actual_k = 0
        for (nb, sim, r) in k_neighbors:
            if sim > 0:
                sum_sim += sim
                sum_ratings += sim * (r - self.means[nb])
                actual_k += 1

        if actual_k < self.min_k:
            sum_ratings = 0

        try:
            est += sum_ratings / sum_sim
        except ZeroDivisionError:
            pass  # return mean

        details = {'actual_k': actual_k}
        return est, details 
Example #21
Source File: learning.py    From aima with MIT License 5 votes vote down vote up
def top(self, n):
        "Return (count, obs) tuples for the n most frequent observations."
        return heapq.nlargest(n, [(v, k) for (k, v) in self.dictionary.items()]) 
Example #22
Source File: utils.py    From Gun-Detector with Apache License 2.0 5 votes vote down vote up
def get_max(self):
    """Peek at the item with the highest score.

    Returns:
      Same as `pop`.
    """
    if not self.heap:
      return ()
    score, item, extra_data = heapq.nlargest(1, self.heap)[0]
    return score, item, extra_data 
Example #23
Source File: misc.py    From kgsgo-dataset-preprocessor with Mozilla Public License 2.0 5 votes vote down vote up
def most_common(self, n=None):
        '''List the n most common elements and their counts from the most
        common to the least.  If n is None, then list all element counts.

        >>> Counter('abracadabra').most_common(3)
        [('a', 5), ('r', 2), ('b', 2)]

        '''
        if n is None:
            return sorted(iteritems(self), key=itemgetter(1), reverse=True)
        return nlargest(n, iteritems(self), key=itemgetter(1)) 
Example #24
Source File: pandas_py3k.py    From kgsgo-dataset-preprocessor with Mozilla Public License 2.0 5 votes vote down vote up
def most_common(self, n=None):
        '''List the n most common elements and their counts from the most
        common to the least.  If n is None, then list all element counts.

        >>> Counter('abracadabra').most_common(3)
        [('a', 5), ('r', 2), ('b', 2)]

        '''
        if n is None:
            return sorted(iteritems(self), key=itemgetter(1), reverse=True)
        return nlargest(n, iteritems(self), key=itemgetter(1)) 
Example #25
Source File: newCollections.py    From scoop with GNU Lesser General Public License v3.0 5 votes vote down vote up
def most_common(self, n=None):
        '''List the n most common elements and their counts from the most
        common to the least.  If n is None, then list all element counts.

        >>> Counter('abracadabra').most_common(3)
        [('a', 5), ('r', 2), ('b', 2)]

        '''
        if n is None:
            return sorted(self.iteritems(), key=itemgetter(1), reverse=True)
        return nlargest(n, self.iteritems(), key=itemgetter(1)) 
Example #26
Source File: utils.py    From yolo_v2 with Apache License 2.0 5 votes vote down vote up
def iter_in_order(self):
    """Iterate over items in the queue from largest score to smallest.

    Yields:
      item: Hashable item.
      extra_data: Extra data stored with the item.
    """
    for _, item, extra_data in heapq.nlargest(len(self.heap), self.heap):
      yield item, extra_data 
Example #27
Source File: utils.py    From yolo_v2 with Apache License 2.0 5 votes vote down vote up
def get_max(self):
    """Peek at the item with the highest score.

    Returns:
      Same as `pop`.
    """
    if not self.heap:
      return ()
    score, item, extra_data = heapq.nlargest(1, self.heap)[0]
    return score, item, extra_data 
Example #28
Source File: collections.py    From pmatic with GNU General Public License v2.0 5 votes vote down vote up
def most_common(self, n=None):
        '''List the n most common elements and their counts from the most
        common to the least.  If n is None, then list all element counts.

        >>> Counter('abcdeabcdabcaba').most_common(3)
        [('a', 5), ('b', 4), ('c', 3)]

        '''
        # Emulate Bag.sortedByCount from Smalltalk
        if n is None:
            return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
        return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1)) 
Example #29
Source File: oom_batch_sampler.py    From PyTorch-NLP with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __iter__(self):
        batches = list(iter(self.batch_sampler))
        largest_batches = heapq.nlargest(
            self.num_batches,
            range(len(batches)),
            key=lambda i: sum([self.get_item_size(j) for j in batches[i]]))
        move_to_front = [batches[i] for i in largest_batches]
        [batches.pop(i) for i in sorted(largest_batches, reverse=True)]
        batches[0:0] = move_to_front
        return iter(batches) 
Example #30
Source File: misc.py    From deepWordBug with Apache License 2.0 5 votes vote down vote up
def most_common(self, n=None):
        '''List the n most common elements and their counts from the most
        common to the least.  If n is None, then list all element counts.

        >>> Counter('abcdeabcdabcaba').most_common(3)
        [('a', 5), ('b', 4), ('c', 3)]

        '''
        # Emulate Bag.sortedByCount from Smalltalk
        if n is None:
            return sorted(self.items(), key=_itemgetter(1), reverse=True)
        return _heapq.nlargest(n, self.items(), key=_itemgetter(1))