Python Examples of heapq.heappushpop

Source File: KNN.py From statistical_learning with Apache License 2.0

6 votes

def nearest(self, x, near_k=1, p=2):
        # use the max heap builtin library heapq
        # init the elements with -inf, and use the minus distance for comparison
        # the top of the max heap is the min distance.
        self.knn = [(-np.inf, None)]*near_k
        def visit(node):
            if not node == None:
                # cal the distance to the split point, i.e. the hyperplane
                dis = x[node.sp] - node.data[node.sp]
                # visit the child node recursively
                # if returned, we get the current nearest point
                visit(node.left if dis < 0 else node.right)
                # cal the distance to the current nearest point
                curr_dis = np.linalg.norm(x-node.data, p)
                # push the minus distance to the heap
                heapq.heappushpop(self.knn, (-curr_dis, node))
                # compare the distance to the hyperplane with the min distance
                # if less, visit another node.
                if -(self.knn[0][0]) > abs(dis):
                    visit(node.right if dis < 0 else node.left)
        visit(self.root)
        self.knn = np.array(
            [i[1].data for i in heapq.nlargest(near_k, self.knn)])
        return self.knn

Source File: generate.py From justcopy-backend with MIT License

6 votes

def search_docs(inputs, max_ex=5, opts=None):
    """Given a set of document ids (returned by ranking for a question), search
    for top N best matching (by heuristic) paragraphs that contain the answer.
    """
    if not opts:
        raise RuntimeError('Options dict must be supplied.')

    doc_ids, q_tokens, answer = inputs
    examples = []
    for i, doc_id in enumerate(doc_ids):
        for j, paragraph in enumerate(re.split(r'\n+', fetch_text(doc_id))):
            found = find_answer(paragraph, q_tokens, answer, opts)
            if found:
                # Reverse ranking, giving priority to early docs + paragraphs
                score = (found[0], -i, -j, random.random())
                if len(examples) < max_ex:
                    heapq.heappush(examples, (score, found[1]))
                else:
                    heapq.heappushpop(examples, (score, found[1]))
    return [e[1] for e in examples]

Source File: utils.py From multilabel-image-classification-tensorflow with MIT License

6 votes

def push(self, score, item, extra_data=None):
    """Push an item onto the queue.

    If the queue is at capacity, the item with the smallest score will be
    dropped. Note that it is assumed each item has exactly one score. The same
    item with a different score will still be dropped.

    Args:
      score: Number used to prioritize items in the queue. Largest scores are
          kept in the queue.
      item: A hashable item to be stored. Duplicates of this item will not be
          added to the queue.
      extra_data: An extra (possible not hashable) data to store with the item.
    """
    if item in self.unique_items:
      return
    if len(self.heap) >= self.capacity:
      _, popped_item, _ = heapq.heappushpop(
          self.heap, MPQItemContainer(score, item, extra_data))
      self.unique_items.add(item)
      self.unique_items.remove(popped_item)
    else:
      heapq.heappush(self.heap, MPQItemContainer(score, item, extra_data))
      self.unique_items.add(item)

Source File: colorizer.py From ec2-api with Apache License 2.0

6 votes

def _addResult(self, test, *args):
        try:
            name = test.id()
        except AttributeError:
            name = 'Unknown.unknown'
        test_class, test_name = name.rsplit('.', 1)

        elapsed = (self._now() - self.start_time).total_seconds()
        item = (elapsed, test_class, test_name)
        if len(self.slow_tests) >= self.num_slow_tests:
            heapq.heappushpop(self.slow_tests, item)
        else:
            heapq.heappush(self.slow_tests, item)

        self.results.setdefault(test_class, [])
        self.results[test_class].append((test_name, elapsed) + args)
        self.last_time[test_class] = self._now()
        self.writeTests()

Source File: utils.py From yolo_v2 with Apache License 2.0

6 votes

def push(self, score, item, extra_data=None):
    """Push an item onto the queue.

    If the queue is at capacity, the item with the smallest score will be
    dropped. Note that it is assumed each item has exactly one score. The same
    item with a different score will still be dropped.

    Args:
      score: Number used to prioritize items in the queue. Largest scores are
          kept in the queue.
      item: A hashable item to be stored. Duplicates of this item will not be
          added to the queue.
      extra_data: An extra (possible not hashable) data to store with the item.
    """
    if item in self.unique_items:
      return
    if len(self.heap) >= self.capacity:
      _, popped_item, _ = heapq.heappushpop(
          self.heap, MPQItemContainer(score, item, extra_data))
      self.unique_items.add(item)
      self.unique_items.remove(popped_item)
    else:
      heapq.heappush(self.heap, MPQItemContainer(score, item, extra_data))
      self.unique_items.add(item)

Source File: colorizer.py From glance_store with Apache License 2.0

6 votes

def _addResult(self, test, *args):
        try:
            name = test.id()
        except AttributeError:
            name = 'Unknown.unknown'
        test_class, test_name = name.rsplit('.', 1)

        elapsed = (self._now() - self.start_time).total_seconds()
        item = (elapsed, test_class, test_name)
        if len(self.slow_tests) >= self.num_slow_tests:
            heapq.heappushpop(self.slow_tests, item)
        else:
            heapq.heappush(self.slow_tests, item)

        self.results.setdefault(test_class, [])
        self.results[test_class].append((test_name, elapsed) + args)
        self.last_time[test_class] = self._now()
        self.writeTests()

Source File: time.py From Stone-Soup with MIT License

6 votes

def data_gen(self):
        time_data_buffer = []

        for time_data in self.reader:
            # Drop "old" detections
            if len(time_data_buffer) >= self.buffer_size and \
                    time_data < time_data_buffer[0]:
                warn('"Old" detection dropped')
                continue

            # Yield oldest when buffer full
            if len(time_data_buffer) >= self.buffer_size:
                yield heapq.heappushpop(time_data_buffer, time_data)
            else:
                # Else just insert
                heapq.heappush(time_data_buffer, time_data)

        # No more new data: yield remaining buffer
        while time_data_buffer:
            yield heapq.heappop(time_data_buffer)

Source File: utils.py From models with Apache License 2.0

6 votes

def push(self, score, item, extra_data=None):
    """Push an item onto the queue.

    If the queue is at capacity, the item with the smallest score will be
    dropped. Note that it is assumed each item has exactly one score. The same
    item with a different score will still be dropped.

    Args:
      score: Number used to prioritize items in the queue. Largest scores are
          kept in the queue.
      item: A hashable item to be stored. Duplicates of this item will not be
          added to the queue.
      extra_data: An extra (possible not hashable) data to store with the item.
    """
    if item in self.unique_items:
      return
    if len(self.heap) >= self.capacity:
      _, popped_item, _ = heapq.heappushpop(
          self.heap, MPQItemContainer(score, item, extra_data))
      self.unique_items.add(item)
      self.unique_items.remove(popped_item)
    else:
      heapq.heappush(self.heap, MPQItemContainer(score, item, extra_data))
      self.unique_items.add(item)

Source File: utils.py From Gun-Detector with Apache License 2.0

6 votes

def push(self, score, item, extra_data=None):
    """Push an item onto the queue.

    If the queue is at capacity, the item with the smallest score will be
    dropped. Note that it is assumed each item has exactly one score. The same
    item with a different score will still be dropped.

    Args:
      score: Number used to prioritize items in the queue. Largest scores are
          kept in the queue.
      item: A hashable item to be stored. Duplicates of this item will not be
          added to the queue.
      extra_data: An extra (possible not hashable) data to store with the item.
    """
    if item in self.unique_items:
      return
    if len(self.heap) >= self.capacity:
      _, popped_item, _ = heapq.heappushpop(
          self.heap, MPQItemContainer(score, item, extra_data))
      self.unique_items.add(item)
      self.unique_items.remove(popped_item)
    else:
      heapq.heappush(self.heap, MPQItemContainer(score, item, extra_data))
      self.unique_items.add(item)

Source File: utils.py From g-tensorflow-models with Apache License 2.0

6 votes

def push(self, score, item, extra_data=None):
    """Push an item onto the queue.

    If the queue is at capacity, the item with the smallest score will be
    dropped. Note that it is assumed each item has exactly one score. The same
    item with a different score will still be dropped.

    Args:
      score: Number used to prioritize items in the queue. Largest scores are
          kept in the queue.
      item: A hashable item to be stored. Duplicates of this item will not be
          added to the queue.
      extra_data: An extra (possible not hashable) data to store with the item.
    """
    if item in self.unique_items:
      return
    if len(self.heap) >= self.capacity:
      _, popped_item, _ = heapq.heappushpop(
          self.heap, MPQItemContainer(score, item, extra_data))
      self.unique_items.add(item)
      self.unique_items.remove(popped_item)
    else:
      heapq.heappush(self.heap, MPQItemContainer(score, item, extra_data))
      self.unique_items.add(item)

Source File: colorizer.py From searchlight with Apache License 2.0

6 votes

def _addResult(self, test, *args):
        try:
            name = test.id()
        except AttributeError:
            name = 'Unknown.unknown'
        test_class, test_name = name.rsplit('.', 1)

        elapsed = (self._now() - self.start_time).total_seconds()
        item = (elapsed, test_class, test_name)
        if len(self.slow_tests) >= self.num_slow_tests:
            heapq.heappushpop(self.slow_tests, item)
        else:
            heapq.heappush(self.slow_tests, item)

        self.results.setdefault(test_class, [])
        self.results[test_class].append((test_name, elapsed) + args)
        self.last_time[test_class] = self._now()
        self.writeTests()

Source File: 414 Third Maximum Number.py From LeetCode with MIT License

6 votes

def thirdMax(self, nums):
        """
        It is an easy question but error prone:
          1. Choice of min heap or max heap: use min heap (not max heap) because
          we want to know the smallest maximum number
          2. Duplicate number
        :type nums: List[int]
        :rtype: int
        """
        if not nums:
            return None

        h = []
        for e in set(nums):
            if len(h) < 3:
                heapq.heappush(h, e)
            elif len(h) == 3 and e > h[0]:
                heapq.heappushpop(h, e)

        assert len(h) <= 3
        if len(h) == 3:
            ret = min(h)
        else:
            ret = max(h)
        return ret

Source File: 347. Top K Frequent Elements.py From LeetCode with MIT License

6 votes

def topKFrequent(self, nums, K):
        """
        Count and Maintain a heap with size k -> O(n lg k)
        Since python heapq does not support cmp, need to wrap data in a struct
        Need to use min heap instead of max heap, since we need to pop the minimal one
        :type nums: List[int]
        :type K: int
        :rtype: List[int]
        """
        cnt = defaultdict(int)
        for e in nums:
            cnt[e] += 1

        lst = []
        for k, v in cnt.items():
            lst.append(Counter(k, v))

        ret = []
        for elt in lst:
            if len(ret) < K:
                heapq.heappush(ret, elt)
            else:
                heapq.heappushpop(ret, elt)

        return map(lambda x: x.val, ret)

Source File: redis_index.py From semanticRetrievalMRS with MIT License

6 votes

def scored_dict_ranking(candidate_doc_list, scored_dict, top_k):
        scored_doc = []
        v_terms = scored_dict.keys()

        for cur_doc in candidate_doc_list:
            cur_doc_score = 0
            for cur_term in v_terms:
                if cur_doc not in scored_dict[cur_term]:
                    cur_doc_score += 0
                else:
                    cur_doc_score += scored_dict[cur_term][cur_doc]

            if top_k is not None and 0 <= top_k == len(scored_doc):
                heapq.heappushpop(scored_doc, (cur_doc_score, cur_doc))
            else:
                heapq.heappush(scored_doc, (cur_doc_score, cur_doc))

        return scored_doc

Source File: rdd.py From spark-cluster-deployment with Apache License 2.0

6 votes

def top(self, num):
        """
        Get the top N elements from a RDD.

        Note: It returns the list sorted in descending order.
        >>> sc.parallelize([10, 4, 2, 12, 3]).top(1)
        [12]
        >>> sc.parallelize([2, 3, 4, 5, 6], 2).cache().top(2)
        [6, 5]
        """
        def topIterator(iterator):
            q = []
            for k in iterator:
                if len(q) < num:
                    heapq.heappush(q, k)
                else:
                    heapq.heappushpop(q, k)
            yield q

        def merge(a, b):
            return next(topIterator(a + b))

        return sorted(self.mapPartitions(topIterator).reduce(merge), reverse=True)

Source File: heap_examples.py From Algorithm_Templates with MIT License

5 votes

def kClosest(points: 'List[List[int]]', K: int) -> 'List[List[int]]':
    heap = [(-p[0] * p[0] - p[1] * p[1], p) for p in points[:K]]
    heapq.heapify(heap)

    for p in points[K:]:
        heapq.heappushpop(heap, (-p[0] * p[0] - p[1] * p[1], p))
    return [h[1] for h in heap]


# [313] https://leetcode.com/problems/super-ugly-number/
# Write a program to find the nth super ugly number.
# Super ugly numbers are positive numbers whose all prime factors are in the given prime list primes of size k.
#
# generators on a heap, use merge

Source File: trie_tree_examples.py From Algorithm_Templates with MIT License

5 votes

def add_word(self):
        cur = self.trie
        for c in self.cur_word:
            if c not in cur:
                cur[c] = {}
                cur[c]['*'] = []
            cur = cur[c]
        if '#' not in cur:
            cur['#'] = HeapItem(1, self.cur_word)
        else:
            cur['#'].count += 1

        word_item = cur['#']

        cur = self.trie
        for c in self.cur_word:
            cur = cur[c]
            heap = cur['*']
            for i in range(len(heap)):
                if heap[i].word == word_item.word:
                    heap[i].count += 1
                    # heapq.heapify(heap)
                    heapq._siftup(heap, i)
                    break
            else:
                if len(heap) < self.size:
                    heapq.heappush(heap, copy(word_item))
                else:
                    heapq.heappushpop(heap, copy(word_item))

Source File: caption_generator.py From g-tensorflow-models with Apache License 2.0

5 votes

def push(self, x):
    """Pushes a new element."""
    assert self._data is not None
    if len(self._data) < self._n:
      heapq.heappush(self._data, x)
    else:
      heapq.heappushpop(self._data, x)

Source File: caption_generator.py From models with Apache License 2.0

5 votes

def push(self, x):
    """Pushes a new element."""
    assert self._data is not None
    if len(self._data) < self._n:
      heapq.heappush(self._data, x)
    else:
      heapq.heappushpop(self._data, x)

Source File: Simple.py From Simple with GNU Affero General Public License v3.0

5 votes

def __getNextSimplex(self):
		targetSimplex = heappop(self.queue)
		currentDifference = self.maxValue - self.minValue
		while currentDifference > targetSimplex.difference:
			targetSimplex.update(currentDifference)
			# if greater than because heapq is in ascending order
			if targetSimplex.acquisitionValue > self.queue[0].acquisitionValue:
				targetSimplex = heappushpop(self.queue, targetSimplex)
		return targetSimplex

Source File: misc.py From image_captioning with MIT License

5 votes

def push(self, x):
        assert self._data is not None
        if len(self._data) < self._n:
            heapq.heappush(self._data, x)
        else:
            heapq.heappushpop(self._data, x)

Source File: _kg_connection.py From ASER with MIT License

5 votes

def get_partial_match_events(self, event, bys=['skeleton_words', 'skeleton_words_clean', 'verbs'],
                                 top_n=None, threshold=0.1, sort=True):
        """
        try to use skeleton_words to match exactly, and compute similarity between words
        if failed, try to use skeleton_words_clean to match exactly, and compute similarity between words
        if failed, try to use verbs to match exactly, and compute similarity between words
        """
        # exact match by skeleton_words, skeleton_words_clean or verbs, and compute similarity according type
        for by in bys:
            key_match_events = self.get_events_by_keys([by], [event[by]])
            if len(key_match_events) == 0:
                continue
            if not sort:
                if top_n and len(key_match_events) > top_n:
                    return random.sample(key_match_events, top_n)
                else:
                    return key_match_events
            queue = []
            queue_len = 0
            for index, key_match_event in enumerate(key_match_events):
                similarity = compute_overlap(
                    event[self.type], key_match_event[self.type])
                if similarity >= threshold:
                    if not top_n or queue_len < top_n:
                        heapq.heappush(
                            queue, (similarity, key_match_event['frequency'], index, key_match_event))
                        queue_len += 1
                    else:
                        heapq.heappushpop(
                            queue, (similarity, key_match_event['frequency'], index, key_match_event))
            key_match_results = []
            while len(queue) > 0:
                x = heapq.heappop(queue)
                key_match_results.append((x[0], x[-1]))
            key_match_results.reverse()
            return key_match_results
        return []

Source File: beam_search.py From seq2seq.pytorch with MIT License

5 votes

def push(self, x):
        """Pushes a new element."""
        assert self._data is not None
        if len(self._data) < self._n:
            heapq.heappush(self._data, x)
        else:
            heapq.heappushpop(self._data, x)

Source File: caption_generator.py From object_detection_kitti with Apache License 2.0

5 votes

def push(self, x):
    """Pushes a new element."""
    assert self._data is not None
    if len(self._data) < self._n:
      heapq.heappush(self._data, x)
    else:
      heapq.heappushpop(self._data, x)

Source File: caption_generator.py From hands-detection with MIT License

5 votes

def push(self, x):
    """Pushes a new element."""
    assert self._data is not None
    if len(self._data) < self._n:
      heapq.heappush(self._data, x)
    else:
      heapq.heappushpop(self._data, x)

Source File: caption_generator.py From ECO-pytorch with BSD 2-Clause "Simplified" License

5 votes

def push(self, x):
    """Pushes a new element."""
    assert self._data is not None
    if len(self._data) < self._n:
      heapq.heappush(self._data, x)
    else:
      heapq.heappushpop(self._data, x)

Source File: __init__.py From samplernn-pytorch with MIT License

5 votes

def call_plugins(self, queue_name, time, *args):
        args = (time,) + args
        queue = self.plugin_queues[queue_name]
        if len(queue) == 0:
            return
        while queue[0][0] <= time:
            plugin = queue[0][2]
            getattr(plugin, queue_name)(*args)
            for trigger in plugin.trigger_interval:
                if trigger[1] == queue_name:
                    interval = trigger[0]
            new_item = (time + interval, queue[0][1], plugin)
            heapq.heappushpop(queue, new_item)

Source File: utils.py From owasp-pysec with Apache License 2.0

5 votes

def top_n(values, first_n=10):
    """Returns the *n* greatest objects in values"""
    values = iter(values)
    top = [val for val in islice(values, first_n)]
    if len(top) < first_n:
        return top
    heapq.heapify(top)
    for val in values:
        heapq.heappushpop(top, val)
    return top

Source File: caption_generator.py From multilabel-image-classification-tensorflow with MIT License

5 votes

def push(self, x):
    """Pushes a new element."""
    assert self._data is not None
    if len(self._data) < self._n:
      heapq.heappush(self._data, x)
    else:
      heapq.heappushpop(self._data, x)

Source File: caption_generator.py From ImageCaptioningAttack with Apache License 2.0

5 votes

def push(self, x):
    """Pushes a new element."""
    assert self._data is not None
    if len(self._data) < self._n:
      heapq.heappush(self._data, x)
    else:
      heapq.heappushpop(self._data, x)

Python heapq.heappushpop() Examples