Python Examples of heapq.nlargest

Source File: summarize.py From Django-Bookworm with MIT License

7 votes

def get_summary(self, number_of_sentences=5):
        '''
            generates summary based on weighted word frequencies

            :param number_of_sentences: total number of sentences to return in summary
            :return: string of summary
        '''
        sentence_value = {}
        for sentence in self.__sentence:
            for word in self.__word_freq.keys():
                if word in word_tokenize(sentence.lower()):
                    if sentence in sentence_value:
                        sentence_value[sentence] += self.__word_freq.get(word)
                    else:
                        sentence_value[sentence] = self.__word_freq.get(word, 0)
        
        summary_sentences = heapq.nlargest(number_of_sentences, sentence_value, key=sentence_value.get)
        summary = ' '.join(summary_sentences)
        return summary

Source File: data_helpers.py From Multi-Label-Text-Classification with Apache License 2.0

6 votes

def get_onehot_label_topk(scores, top_num=1):
    """
    Get the predicted onehot labels based on the topK number.

    Args:
        scores: The all classes predicted scores provided by network
        top_num: The max topK number (default: 5)
    Returns:
        predicted_onehot_labels: The predicted labels (onehot)
    """
    predicted_onehot_labels = []
    scores = np.ndarray.tolist(scores)
    for score in scores:
        onehot_labels_list = [0] * len(score)
        max_num_index_list = list(map(score.index, heapq.nlargest(top_num, score)))
        for i in max_num_index_list:
            onehot_labels_list[i] = 1
        predicted_onehot_labels.append(onehot_labels_list)
    return predicted_onehot_labels

Source File: knns.py From Surprise with BSD 3-Clause "New" or "Revised" License

6 votes

def estimate(self, u, i):

        if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
            raise PredictionImpossible('User and/or item is unknown.')

        x, y = self.switch(u, i)

        neighbors = [(self.sim[x, x2], r) for (x2, r) in self.yr[y]]
        k_neighbors = heapq.nlargest(self.k, neighbors, key=lambda t: t[0])

        # compute weighted average
        sum_sim = sum_ratings = actual_k = 0
        for (sim, r) in k_neighbors:
            if sim > 0:
                sum_sim += sim
                sum_ratings += sim * r
                actual_k += 1

        if actual_k < self.min_k:
            raise PredictionImpossible('Not enough neighbors.')

        est = sum_ratings / sum_sim

        details = {'actual_k': actual_k}
        return est, details

Source File: tnt.py From weibo-analysis-system with MIT License

6 votes

def tag(self, data):
        now = [(('BOS', 'BOS'), 0.0, [])]
        for w in data:
            stage = {}
            samples = self.status
            if w in self.word:
                samples = self.word[w]
            for s in samples:
                wd = log(self.wd.get((s, w))[1])-log(self.uni.get(s)[1])
                for pre in now:
                    p = pre[1]+wd+self.trans[(pre[0][0], pre[0][1], s)]
                    if (pre[0][1], s) not in stage or p > stage[(pre[0][1],
                                                                 s)][0]:
                        stage[(pre[0][1], s)] = (p, pre[2]+[s])
            stage = list(map(lambda x: (x[0], x[1][0], x[1][1]), stage.items()))
            now = heapq.nlargest(self.N, stage, key=lambda x: x[1])
        now = heapq.nlargest(1, stage, key=lambda x: x[1]+self.geteos(x[0][1]))
        return zip(data, now[0][2])

Source File: gc.py From auto-alt-text-lambda-api with MIT License

6 votes

def largest_export_versions(n):
  """Creates a filter that keeps the largest n export versions.

  Args:
    n: number of versions to keep.

  Returns:
    A filter function that keeps the n largest paths.
  """
  def keep(paths):
    heap = []
    for idx, path in enumerate(paths):
      if path.export_version is not None:
        heapq.heappush(heap, (path.export_version, idx))
    keepers = [paths[i] for _, i in heapq.nlargest(n, heap)]
    return sorted(keepers)

  return keep

Source File: gc.py From auto-alt-text-lambda-api with MIT License

6 votes

def largest_export_versions(n):
  """Creates a filter that keeps the largest n export versions.

  Args:
    n: number of versions to keep.

  Returns:
    A filter function that keeps the n largest paths.
  """
  def keep(paths):
    heap = []
    for idx, path in enumerate(paths):
      if path.export_version is not None:
        heapq.heappush(heap, (path.export_version, idx))
    keepers = [paths[i] for _, i in heapq.nlargest(n, heap)]
    return sorted(keepers)

  return keep

Source File: evaluate.py From Recommender-Systems-Samples with MIT License

6 votes

def eval_one_rating(idx):
    
    # obtain test items and suers
    rating = _testRatings[idx]
    items = _testNegatives[idx]
    u = rating[0]
    gtItem = rating[1]
    items.append(gtItem)
    users = np.full(len(items), u, dtype='int32')
    
    # obtain prediction scores
    map_item_score = {}
    predictions = _model.predict([users, np.array(items)], batch_size=100, verbose=0)
    for i in range(len(items)):
        item = items[i]
        map_item_score[item] = predictions[i]
    items.pop()
    
    # evaluate topk list
    ranklist = heapq.nlargest(_K, map_item_score, key=map_item_score.get)
    hr = getHitRatio(ranklist, gtItem)
    ndcg = getNDCG(ranklist, gtItem)
    return (hr, ndcg)

Source File: preprocessing.py From dl4nlp with MIT License

6 votes

def build_dictionary(sentences, size):
    """
    Create dictionary containing most frequent words in the sentences
    :param sentences: sequence of sentence that contains words
        Caution: the sequence might be exhausted after calling this function!
    :param size: size of dictionary you want
    :return: dictionary that maps word to index (starting from 1)
    """
    dictionary = defaultdict(int)
    for sentence in sentences:
        for token in sentence:
            dictionary[token] += 1
    frequent_pairs = nlargest(size, dictionary.items(), itemgetter(1))
    words, frequencies = zip(*frequent_pairs)
    result = {word: index + 1 for index, word in enumerate(words)}
    return result

Source File: gc.py From lambda-packs with MIT License

6 votes

def largest_export_versions(n):
  """Creates a filter that keeps the largest n export versions.

  Args:
    n: number of versions to keep.

  Returns:
    A filter function that keeps the n largest paths.
  """
  def keep(paths):
    heap = []
    for idx, path in enumerate(paths):
      if path.export_version is not None:
        heapq.heappush(heap, (path.export_version, idx))
    keepers = [paths[i] for _, i in heapq.nlargest(n, heap)]
    return sorted(keepers)

  return keep

Source File: gc.py From lambda-packs with MIT License

6 votes

def largest_export_versions(n):
  """Creates a filter that keeps the largest n export versions.

  Args:
    n: number of versions to keep.

  Returns:
    A filter function that keeps the n largest paths.
  """
  def keep(paths):
    heap = []
    for idx, path in enumerate(paths):
      if path.export_version is not None:
        heapq.heappush(heap, (path.export_version, idx))
    keepers = [paths[i] for _, i in heapq.nlargest(n, heap)]
    return sorted(keepers)

  return keep

Source File: tnt.py From sentiment-analysis-webapp with MIT License

6 votes

def tag(self, data):
        now = [(('BOS', 'BOS'), 0.0, [])]
        for w in data:
            stage = {}
            samples = self.status
            if w in self.word:
                samples = self.word[w]
            for s in samples:
                wd = log(self.wd.get((s, w))[1])-log(self.uni.get(s)[1])
                for pre in now:
                    p = pre[1]+wd+self.trans[(pre[0][0], pre[0][1], s)]
                    if (pre[0][1], s) not in stage or p > stage[(pre[0][1],
                                                                 s)][0]:
                        stage[(pre[0][1], s)] = (p, pre[2]+[s])
            stage = list(map(lambda x: (x[0], x[1][0], x[1][1]), stage.items()))
            now = heapq.nlargest(self.N, stage, key=lambda x: x[1])
        now = heapq.nlargest(1, stage, key=lambda x: x[1]+self.geteos(x[0][1]))
        return zip(data, now[0][2])

Source File: fpt.py From fylearn with MIT License

6 votes

def select_slaves(self, candidates, P_U_S, class_vector, X):
        R = []
        for candidate in candidates:
            aggregates = []
            for other in P_U_S:
                if not _tree_contains(candidate[1], other):
                    aggregates.extend([ Inner(a, [ candidate[1], other ]) for a in OPERATORS ])

            R.extend(_select_candidates(aggregates, self.num_slaves, class_vector, self.similarity_measure, X))

        R = sorted(R, key=lambda x: x[0])

        RR = []
        used_nodes = set()
        for candidate in R:
            inner_node = candidate[1]
            found = False
            for tree in inner_node.branches_:
                if tree in used_nodes:
                    found = True
            if not found:
                used_nodes.update(inner_node.branches_)
                RR.append(candidate)

        return heapq.nlargest(self.num_slaves, RR, key=lambda x: x[0])

Source File: fpt.py From fylearn with MIT License

6 votes

def select_slaves(self, C, P, class_vector, num_candidates, X):

        R = []
        for candidate in C:
            c = candidate[1]
            modified = []
            candidate_leaves = _tree_leaves(c)

            for c_leaf in candidate_leaves:
                for p_leaf in [ p for p in P if p not in candidate_leaves ]:
                    for aggr in OPERATORS:
                        modified.append(_tree_clone_replace_leaf(c, c_leaf, Inner(aggr, [ c_leaf, p_leaf ])))

            R.extend(_select_candidates(modified, self.num_candidates, class_vector, self.similarity_measure, X))

            R = list(heapq.nlargest(self.num_candidates, R, key=lambda x: x[0]))

        return list(reversed(sorted(R, key=lambda x: x[0])))

Source File: gc.py From deep_image_model with Apache License 2.0

6 votes

def largest_export_versions(n):
  """Creates a filter that keeps the largest n export versions.

  Args:
    n: number of versions to keep.

  Returns:
    A filter function that keeps the n largest paths.
  """
  def keep(paths):
    heap = []
    for idx, path in enumerate(paths):
      if path.export_version is not None:
        heapq.heappush(heap, (path.export_version, idx))
    keepers = [paths[i] for _, i in heapq.nlargest(n, heap)]
    return sorted(keepers)

  return keep

Source File: pca.py From imylu with Apache License 2.0

6 votes

def _get_top_eigen_vectors(data: ndarray, n_components: int) -> ndarray:
        """The eigen vectors according to top n_components large eigen values.

        Arguments:
            data {ndarray} -- Training data.
            n_components {int} -- Number of components to keep.

        Returns:
            ndarray -- eigen vectors with shape(n_cols, n_components).
        """

        # Calculate eigen values and eigen vectors of covariance matrix.
        eigen_values, eigen_vectors = eig(data)
        # The indexes of top n_components large eigen values.
        _indexes = heapq.nlargest(n_components, enumerate(eigen_values),
                                  key=lambda x: x[1])
        indexes = [x[0] for x in _indexes]
        return eigen_vectors[:, indexes]

Source File: rdd.py From LearningApacheSpark with MIT License

6 votes

def top(self, num, key=None):
        """
        Get the top N elements from an RDD.

        .. note:: This method should only be used if the resulting array is expected
            to be small, as all the data is loaded into the driver's memory.

        .. note:: It returns the list sorted in descending order.

        >>> sc.parallelize([10, 4, 2, 12, 3]).top(1)
        [12]
        >>> sc.parallelize([2, 3, 4, 5, 6], 2).top(2)
        [6, 5]
        >>> sc.parallelize([10, 4, 2, 12, 3]).top(3, key=str)
        [4, 3, 2]
        """
        def topIterator(iterator):
            yield heapq.nlargest(num, iterator, key=key)

        def merge(a, b):
            return heapq.nlargest(num, a + b, key=key)

        return self.mapPartitions(topIterator).reduce(merge)

Source File: char_rnn_model.py From AI_Poet_Totoro with MIT License

5 votes

def select_by_given(self,given,probs,max_prob = False):
        if given:
                seq_probs = zip(probs,range(0,self.vocab_size))
                topn = heapq.nlargest(100,seq_probs,key=lambda sp :sp[0])

                for _,seq in topn:
                    if self.w2v_model.vocab[seq] in given:
                        given = given.replace(self.w2v_model.vocab[seq],'')
                        return seq,given
        if max_prob:
            return  np.argmax(probs),given

        return np.random.choice(self.vocab_size, 1, p=probs)[0],given

Source File: python2x.py From attention-lvcsr with MIT License

5 votes

def most_common(self, n=None):
            '''List the n most common elements and their counts.

            The list goes from the most common to the least.  If n is
            None, then list all element counts.

            >>> Counter('abracadabra').most_common(3)
            [('a', 5), ('r', 2), ('b', 2)]
            '''
            if n is None:
                return sorted(self.iteritems(), key=itemgetter(1),
                              reverse=True)
            return nlargest(n, self.iteritems(), key=itemgetter(1))

Source File: utils.py From Gun-Detector with Apache License 2.0

5 votes

def iter_in_order(self):
    """Iterate over items in the queue from largest score to smallest.

    Yields:
      item: Hashable item.
      extra_data: Extra data stored with the item.
    """
    for _, item, extra_data in heapq.nlargest(len(self.heap), self.heap):
      yield item, extra_data

Source File: knns.py From Surprise with BSD 3-Clause "New" or "Revised" License

5 votes

def estimate(self, u, i):

        if not (self.trainset.knows_user(u) and self.trainset.knows_item(i)):
            raise PredictionImpossible('User and/or item is unknown.')

        x, y = self.switch(u, i)

        neighbors = [(x2, self.sim[x, x2], r) for (x2, r) in self.yr[y]]
        k_neighbors = heapq.nlargest(self.k, neighbors, key=lambda t: t[1])

        est = self.means[x]

        # compute weighted average
        sum_sim = sum_ratings = actual_k = 0
        for (nb, sim, r) in k_neighbors:
            if sim > 0:
                sum_sim += sim
                sum_ratings += sim * (r - self.means[nb])
                actual_k += 1

        if actual_k < self.min_k:
            sum_ratings = 0

        try:
            est += sum_ratings / sum_sim
        except ZeroDivisionError:
            pass  # return mean

        details = {'actual_k': actual_k}
        return est, details

Source File: learning.py From aima with MIT License

5 votes

def top(self, n):
        "Return (count, obs) tuples for the n most frequent observations."
        return heapq.nlargest(n, [(v, k) for (k, v) in self.dictionary.items()])

Source File: utils.py From Gun-Detector with Apache License 2.0

5 votes

def get_max(self):
    """Peek at the item with the highest score.

    Returns:
      Same as `pop`.
    """
    if not self.heap:
      return ()
    score, item, extra_data = heapq.nlargest(1, self.heap)[0]
    return score, item, extra_data

Source File: misc.py From kgsgo-dataset-preprocessor with Mozilla Public License 2.0

5 votes

def most_common(self, n=None):
        '''List the n most common elements and their counts from the most
        common to the least.  If n is None, then list all element counts.

        >>> Counter('abracadabra').most_common(3)
        [('a', 5), ('r', 2), ('b', 2)]

        '''
        if n is None:
            return sorted(iteritems(self), key=itemgetter(1), reverse=True)
        return nlargest(n, iteritems(self), key=itemgetter(1))

Source File: pandas_py3k.py From kgsgo-dataset-preprocessor with Mozilla Public License 2.0

5 votes

def most_common(self, n=None):
        '''List the n most common elements and their counts from the most
        common to the least.  If n is None, then list all element counts.

        >>> Counter('abracadabra').most_common(3)
        [('a', 5), ('r', 2), ('b', 2)]

        '''
        if n is None:
            return sorted(iteritems(self), key=itemgetter(1), reverse=True)
        return nlargest(n, iteritems(self), key=itemgetter(1))

Source File: newCollections.py From scoop with GNU Lesser General Public License v3.0

5 votes

def most_common(self, n=None):
        '''List the n most common elements and their counts from the most
        common to the least.  If n is None, then list all element counts.

        >>> Counter('abracadabra').most_common(3)
        [('a', 5), ('r', 2), ('b', 2)]

        '''
        if n is None:
            return sorted(self.iteritems(), key=itemgetter(1), reverse=True)
        return nlargest(n, self.iteritems(), key=itemgetter(1))

Source File: utils.py From yolo_v2 with Apache License 2.0

5 votes

def iter_in_order(self):
    """Iterate over items in the queue from largest score to smallest.

    Yields:
      item: Hashable item.
      extra_data: Extra data stored with the item.
    """
    for _, item, extra_data in heapq.nlargest(len(self.heap), self.heap):
      yield item, extra_data

Source File: utils.py From yolo_v2 with Apache License 2.0

5 votes

def get_max(self):
    """Peek at the item with the highest score.

    Returns:
      Same as `pop`.
    """
    if not self.heap:
      return ()
    score, item, extra_data = heapq.nlargest(1, self.heap)[0]
    return score, item, extra_data

Source File: collections.py From pmatic with GNU General Public License v2.0

5 votes

def most_common(self, n=None):
        '''List the n most common elements and their counts from the most
        common to the least.  If n is None, then list all element counts.

        >>> Counter('abcdeabcdabcaba').most_common(3)
        [('a', 5), ('b', 4), ('c', 3)]

        '''
        # Emulate Bag.sortedByCount from Smalltalk
        if n is None:
            return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
        return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))

Source File: oom_batch_sampler.py From PyTorch-NLP with BSD 3-Clause "New" or "Revised" License

5 votes

def __iter__(self):
        batches = list(iter(self.batch_sampler))
        largest_batches = heapq.nlargest(
            self.num_batches,
            range(len(batches)),
            key=lambda i: sum([self.get_item_size(j) for j in batches[i]]))
        move_to_front = [batches[i] for i in largest_batches]
        [batches.pop(i) for i in sorted(largest_batches, reverse=True)]
        batches[0:0] = move_to_front
        return iter(batches)

Source File: misc.py From deepWordBug with Apache License 2.0

5 votes

def most_common(self, n=None):
        '''List the n most common elements and their counts from the most
        common to the least.  If n is None, then list all element counts.

        >>> Counter('abcdeabcdabcaba').most_common(3)
        [('a', 5), ('b', 4), ('c', 3)]

        '''
        # Emulate Bag.sortedByCount from Smalltalk
        if n is None:
            return sorted(self.items(), key=_itemgetter(1), reverse=True)
        return _heapq.nlargest(n, self.items(), key=_itemgetter(1))

Python heapq.nlargest() Examples