Python heapq.nsmallest() Examples

The following are 30 code examples of heapq.nsmallest(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module heapq , or try the search function .
Example #1
Source File: prunner.py    From pytorch-ssd with MIT License 6 votes vote down vote up
def prune_linear_layers(self, num=1):
        self.register_linear_hooks()
        before_loss, before_accuracy = self.train_fun(self.model)
        ranks = []
        for path, output in self.outputs.items():
            output = output.data
            grad = self.grads[path].data
            v = grad * output
            v = v.sum(0)  # sum to the channel axis.
            v = torch.abs(v)
            v = v / torch.sqrt(torch.sum(v * v))  # normalize
            for i, e in enumerate(v):
                ranks.append((path, i, e))
        to_prune = nsmallest(num, ranks, key=lambda t: t[2])
        to_prune = sorted(to_prune, key=lambda t: (t[0], -t[1]))
        for path, feature_index, value in to_prune:
            self.remove_linear_feature(path, feature_index)
        self.deregister_hooks()
        after_loss, after_accuracy = self.train_fun(self.model)
        return after_loss - before_loss, after_accuracy - before_accuracy 
Example #2
Source File: expertrec.py    From prediction_api with MIT License 6 votes vote down vote up
def search(self, text, num=20):
        '''
        Recommend some experts in the given text's field.
        :param text: The text.
        :param num: The number of the recommended experts.
        :return: A list of dictionaries:
                {
                    'id': The expert's ID in AMiner(http://www.aminer.cn/),
                    'url': The expert's AMiner homepage.
                    'L2 distance': Similarity. The smaller the L2 distance is , the more likely the expert is interested in the given text's field.
                }
        '''
        vec = self.doc2vec(text)
        dist_mat = self._index_mat - vec.T
        dist = np.linalg.norm(dist_mat, axis=1)
        ret = [{
          'id': self._id2person[i],
          'url': self.base_url.format(self._id2person[i]),
          'L2 distance': d
        } for i, d in enumerate(dist)]
        return heapq.nsmallest(num, ret, lambda x: x['L2 distance']) 
Example #3
Source File: typecheck.py    From linter-pylama with MIT License 6 votes vote down vote up
def _similar_names(owner, attrname, distance_threshold, max_choices):
    """Given an owner and a name, try to find similar names

    The similar names are searched given a distance metric and only
    a given number of choices will be returned.
    """
    possible_names = []
    names = _node_names(owner)

    for name in names:
        if name == attrname:
            continue

        distance = _string_distance(attrname, name)
        if distance <= distance_threshold:
            possible_names.append((name, distance))

    # Now get back the values with a minimum, up to the given
    # limit or choices.
    picked = [name for (name, _) in
              heapq.nsmallest(max_choices, possible_names,
                              key=operator.itemgetter(1))]
    return sorted(picked) 
Example #4
Source File: cache.py    From ReynirPackage with GNU General Public License v3.0 6 votes vote down vote up
def lookup(self, key: Any, func: Callable[[Any], Any]) -> Any:
        """ Lookup a key in the cache, calling func(key)
            to obtain the data if not already there """
        with self.lock:
            self.use_count[key] += 1
            # Get cache entry or compute if not found
            try:
                result = self.cache[key]
                self.hits += 1
            except KeyError:
                result = func(key)
                self.cache[key] = result
                self.misses += 1

                # Purge the 10% least frequently used cache entries
                if len(self.cache) > self.maxsize:
                    for key, _ in nsmallest(self.maxsize // 10,
                        self.use_count.items(), key = itemgetter(1)):

                        del self.cache[key], self.use_count[key]

            return result 
Example #5
Source File: main.py    From imgscaper with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def pickBestPatch(self, ty, tx, coords):
        """Iterate over a random selection of patches (e.g. 100) and pick a random
        sample of the best (e.g. top 5).  Distance metric is used to rank the patches.
        """
        results = []
        for sy, sx in random.sample(list(coords), min(len(coords), PATCH_COUNT)):
            d = self.D(sy, sx, ty, tx)
            heapq.heappush(results, (d, len(results), (sy,sx)))
        
        # Some unlucky cases with special images cause no patches to be found
        # at all, in this case we just bail out.
        if not results:
            return -1, -1

        choices = heapq.nsmallest(BEST_COUNT, results)
        return random.choice(choices)[2] 
Example #6
Source File: prunner.py    From pytorch-ssd with MIT License 6 votes vote down vote up
def prune_conv_layers(self, num=1):
        """Prune one conv2d filter.
        """
        self.register_conv_hooks()
        before_loss, before_accuracy = self.train_fun(self.model)
        ranks = []
        for path, output in self.outputs.items():
            output = output.data
            grad = self.grads[path].data
            v = grad * output
            v = v.sum(0).sum(1).sum(1)  # sum to the channel axis.
            v = torch.abs(v)
            v = v / torch.sqrt(torch.sum(v * v))  # normalize
            for i, e in enumerate(v):
                ranks.append((path, i, e))
        to_prune = nsmallest(num, ranks, key=lambda t: t[2])
        to_prune = sorted(to_prune, key=lambda t: (t[0], -t[1]))  # prune the filters with bigger indexes first to avoid rearrangement.
        for path, filter_index, value in to_prune:
            self.remove_conv_filter(path, filter_index)
        self.deregister_hooks()
        after_loss, after_accuracy = self.train_fun(self.model)
        return after_loss - before_loss, after_accuracy - before_accuracy 
Example #7
Source File: rdd.py    From LearningApacheSpark with MIT License 6 votes vote down vote up
def takeOrdered(self, num, key=None):
        """
        Get the N elements from an RDD ordered in ascending order or as
        specified by the optional key function.

        .. note:: this method should only be used if the resulting array is expected
            to be small, as all the data is loaded into the driver's memory.

        >>> sc.parallelize([10, 1, 2, 9, 3, 4, 5, 6, 7]).takeOrdered(6)
        [1, 2, 3, 4, 5, 6]
        >>> sc.parallelize([10, 1, 2, 9, 3, 4, 5, 6, 7], 2).takeOrdered(6, key=lambda x: -x)
        [10, 9, 7, 6, 5, 4]
        """

        def merge(a, b):
            return heapq.nsmallest(num, a + b, key)

        return self.mapPartitions(lambda it: [heapq.nsmallest(num, it, key)]).reduce(merge) 
Example #8
Source File: _utils.py    From ida-minsc with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def new_wrapper(cls, func, cache):
        '''Create a new wrapper that will determine the correct function to call.'''
        # define the wrapper...
        def F(*arguments, **keywords):
            heap = [res for _, res in heapq.nsmallest(len(cache), cache)]
            f, (a, w, k) = cls.match((arguments[:], keywords), heap)
            return f(*arguments, **keywords)
            #return f(*(arguments + tuple(w)), **keywords)

        # swap out the original code object with our wrapper's
        f, c = F, F.func_code
        cargs = c.co_argcount, c.co_nlocals, c.co_stacksize, c.co_flags, \
                c.co_code, c.co_consts, c.co_names, c.co_varnames, \
                c.co_filename, '.'.join((func.__module__, func.func_name)), \
                c.co_firstlineno, c.co_lnotab, c.co_freevars, c.co_cellvars
        newcode = types.CodeType(*cargs)
        res = types.FunctionType(newcode, f.func_globals, f.func_name, f.func_defaults, f.func_closure)
        res.func_name, res.func_doc = func.func_name, func.func_doc

        # assign the specified cache to it
        setattr(res, cls.cache_name, cache)
        # ...and finally add a default docstring
        setattr(res, '__doc__', '')
        return res 
Example #9
Source File: decode_ngram.py    From neural_ime with MIT License 6 votes vote down vote up
def search(lattice, ngrams, queues, beam_size, viterbi_size):
    for i in range(len(lattice)):
        for j in range(len(lattice[i])):
            for target, source in lattice[i][j]:

                word_queue = []
                for previous_cost, previous_history in queues[j]:
                    history = previous_history + [(target, source)]
                    cost = previous_cost + get_ngram_cost(ngrams, tuple(history[-3:]))
                    hypothesis = (cost, history)
                    word_queue.append(hypothesis)

                # prune word_queue to viterbi size
                if viterbi_size > 0:
                    word_queue = heapq.nsmallest(viterbi_size, word_queue, key=operator.itemgetter(0))

                queues[i] += word_queue

        # prune queues[i] to beam size
        if beam_size > 0:
            queues[i] = heapq.nsmallest(beam_size, queues[i], key=operator.itemgetter(0))
    return queues 
Example #10
Source File: queue.py    From petridishnn with MIT License 6 votes vote down vote up
def update(self, key=None, l_priority=None, full_sort=False, keep_top_k=None):
        """
        Sort the petridish queue using func

        Args
            func : a function that maps from (idx, petridish_queue_entry) to a float; the default
            is lambda i, _ : i, which means we follow the FIFO order.
        """
        if len(self.entries) == 0:
            return
        assert bool(key) != bool(l_priority), "only one option should be used for updating priority"
        if key:
            for i in range(self.size()):
                self.entries[i][IDX_PV] = key(self.entries[i][IDX_PQE])
        else:
            for i in range(self.size()):
                self.entries[i][IDX_PV] = l_priority[i]
        if full_sort:
            self.entries.sort()
            if keep_top_k is not None:
                self.entries[keep_top_k:] = []
        elif keep_top_k is not None:
            self.entries = heapq.nsmallest(keep_top_k, self.entries)
        else:
            self._update() 
Example #11
Source File: TransE.py    From MTransE with Apache License 2.0 6 votes vote down vote up
def kNN_entity(self, vec, topk=10, method=0, self_vec_id=None):
        q = []
        for i in range(len(self.vec_e)):
            #skip self
            if self_vec_id != None and i == self_vec_id:
                continue
            if method == 1:
                dist = SP.distance.cosine(vec, self.vec_e[i])
            else:
                dist = LA.norm(vec - self.vec_e[i])
            if len(q) < topk:
                HP.heappush(q, self.index_dist(i, dist))
            else:
                #indeed it fetches the biggest
                tmp = HP.nsmallest(1, q)[0]
                if tmp.dist > dist:
                    HP.heapreplace(q, self.index_dist(i, dist) )
        rst = []
        while len(q) > 0:
            item = HP.heappop(q)
            rst.insert(0, (self.vocab_e[self.vec2e[item.index]], item.dist))
        return rst

    #given entity name, find kNN 
Example #12
Source File: TransE.py    From MTransE with Apache License 2.0 6 votes vote down vote up
def kNN_relation(self, vec, topk=10, method=0, self_vec_id=None):
        q = []
        for i in range(len(self.vec_r)):
            #skip self
            if self_vec_id != None and i == self_vec_id:
                continue
            if method == 1:
                dist = SP.distance.cosine(vec, self.vec_r[i])
            else:
                dist = LA.norm(vec - self.vec_r[i])
            if len(q) < topk:
                HP.heappush(q, self.index_dist(i, dist))
            else:
                #indeed it fetches the biggest
                tmp = HP.nsmallest(1, q)[0]
                if tmp.dist > dist:
                    HP.heapreplace(q, self.index_dist(i, dist) )
        rst = []
        while len(q) > 0:
            item = HP.heappop(q)
            rst.insert(0, (self.vocab_r[self.vec2r[item.index]], item.dist))
        return rst

    #given relation name, find kNN 
Example #13
Source File: nce_neighbors.py    From castor with Apache License 2.0 5 votes vote down vote up
def get_nearest_neg_id(pos_feature, neg_dict, distance="cosine", k=1, weight=False):
    dis_list = []
    pos_feature = pos_feature.data.cpu().numpy()
    pos_feature_norm = pos_feature / np.sqrt(sum(pos_feature ** 2))
    neg_list = []
    for key in neg_dict:
        if distance == "l2":
            dis = np.sqrt(np.sum((np.array(pos_feature) - neg_dict[key]["feature"]) ** 2))
        elif distance == "cosine":
            neg_feature = np.array(neg_dict[key]["feature"])
            feat_norm = neg_feature / np.sqrt(sum(neg_feature ** 2))
            dis = 1 - feat_norm.dot(pos_feature_norm)
        dis_list.append(dis)
        neg_list.append(key)

    k = min(k, len(neg_dict))
    min_list = heapq.nsmallest(k, enumerate(dis_list), key=operator.itemgetter(1))
    # find the corresponding neg id
    min_id_list = [neg_list[x[0]] for x in min_list]
    if weight:
        min_id_score = [1 - x[1] for x in min_list]
        return min_id_list, min_id_score
    else:
        return min_id_list

# get the negative samples randomly 
Example #14
Source File: decoder.py    From neural-dep-srl with Apache License 2.0 5 votes vote down vote up
def constrained_decoder(voc, predictions, beam, constraints):
    heap = [State(score=0, label='O', prev=None, roles=set())]
    for i, prediction in enumerate(predictions):
        next_generation = list()
        for prev in heapq.nsmallest(beam, heap, key=_get_score):
            for j, prob in enumerate(prediction):
                label = voc[j]
                score = -math.log2(prob + sys.float_info.min)
                if score > _PRUNING_THRESHOLD and next_generation:
                    continue

                next_state = State(score=score + prev.score,
                                   label=label, prev=prev,
                                   roles=prev.roles)

                constraints_violated = [not check(next_state) for check in
                                        constraints]
                if any(constraints_violated):
                    continue

                next_generation.append(
                    State(next_state.score, next_state.label, next_state.prev,
                          next_state.roles | {next_state.label[2:]}))

        heap = next_generation

    head = heapq.nsmallest(1, heap, key=_get_score)[0]

    backtrack = list()
    while head:
        backtrack.append(head.label)
        head = head.prev

    return list(reversed(backtrack[:-1])) 
Example #15
Source File: utils.py    From models with Apache License 2.0 5 votes vote down vote up
def get_min(self):
    """Peek at the item with the lowest score.

    Returns:
      Same as `pop`.
    """
    if not self.heap:
      return ()
    score, item, extra_data = heapq.nsmallest(1, self.heap)[0]
    return score, item, extra_data 
Example #16
Source File: 628 Maximum Product of Three Numbers.py    From LeetCode with MIT License 5 votes vote down vote up
def maximumProduct(self, nums: List[int]) -> int:
        """
        heapq nlargest nsmallest
        """
        mxes = heapq.nlargest(3, nums)
        mns = heapq.nsmallest(3, nums)
        return max(
            mxes[0] * mxes[1] * mxes[2],
            mns[0] * mns[1] * mxes[0],
        ) 
Example #17
Source File: utils.py    From g-tensorflow-models with Apache License 2.0 5 votes vote down vote up
def get_min(self):
    """Peek at the item with the lowest score.

    Returns:
      Same as `pop`.
    """
    if not self.heap:
      return ()
    score, item, extra_data = heapq.nsmallest(1, self.heap)[0]
    return score, item, extra_data 
Example #18
Source File: 973 K Closest Points to Origin.py    From LeetCode with MIT License 5 votes vote down vote up
def kClosest(self, points: List[List[int]], K: int) -> List[List[int]]:
        return heapq.nsmallest(K, points, key=lambda x: x[0]**2 + x[1]**2) 
Example #19
Source File: test_heapq.py    From medicare-demo with Apache License 2.0 5 votes vote down vote up
def test_get_only(self):
        for f in (heapify, heappop):
            self.assertRaises(TypeError, f, GetOnly())
        for f in (heappush, heapreplace):
            self.assertRaises(TypeError, f, GetOnly(), 10)
        for f in (nlargest, nsmallest):
            self.assertRaises(TypeError, f, 2, GetOnly()) 
Example #20
Source File: filter_important_factors.py    From TOBIAS with MIT License 5 votes vote down vote up
def get_important(args):

    file = args.file_in         #input file bindetect_result.txt
    filter = args.filter        #filter how many binding factors of every condition will be selected
    file_out = args.file_out    #name of output output file
    list_file = []              #contains all lines of file
    new_file = []               #list for the filtered file

    with open(file) as f:   #open bindetect results
        for i in f:
            i = i.strip()
            i = i.split('\t')   #read file tab sapareted
            list_file.append(i)
    
    index_list = [list_file[0].index(i) for i in list_file[0] if '_change' in i]#get the indexs of the columens
    
    important_values = [[max(heapq.nsmallest(filter,[float(a[i]) for a in list_file[1:]])), min(heapq.nlargest(filter,[float(a[i]) for a in list_file[1:]]))]  for i in index_list]

    #important_values contains the maximum and minimum value of the bindingfactor
    for i in list_file[1:]:
        for a,b in zip(index_list, important_values):
            if float(i[a]) >= float(max(b)) or float(i[a]) <= float(min(b)):    #filters if binding value is important
                new_file.append(i)                                              #important lines get append to new list
                print(i[0])                                                     #print stdout for nextflowpipeline
                break                                                           #if line is added for loop jumps to next line 
    
    #build new tab seperater text file 
    book = {i:[] for i in list_file[0]}
    [[book[key].append(value) for key,value in zip(list_file[0], i)] for i in new_file]   #dict for exele wirter key first line value hole line
    df = pd.DataFrame(book)
    df.to_csv(file_out , '\t', index=False)
#--------------------------------------------------------------------------------------------------------# 
Example #21
Source File: test_heapq.py    From medicare-demo with Apache License 2.0 5 votes vote down vote up
def test_len_only(self):
        for f in (heapify, heappop):
            self.assertRaises(TypeError, f, LenOnly())
        for f in (heappush, heapreplace):
            self.assertRaises(TypeError, f, LenOnly(), 10)
        for f in (nlargest, nsmallest):
            self.assertRaises(TypeError, f, 2, LenOnly()) 
Example #22
Source File: test_heapq.py    From medicare-demo with Apache License 2.0 5 votes vote down vote up
def test_non_sequence(self):
        for f in (heapify, heappop):
            self.assertRaises(TypeError, f, 10)
        for f in (heappush, heapreplace, nlargest, nsmallest):
            self.assertRaises(TypeError, f, 10, 10) 
Example #23
Source File: test_heapq.py    From medicare-demo with Apache License 2.0 5 votes vote down vote up
def test_nsmallest(self):
        data = [(random.randrange(2000), i) for i in range(1000)]
        for f in (None, lambda x:  x[0] * 547 % 2000):
            for n in (0, 1, 2, 10, 100, 400, 999, 1000, 1100):
                self.assertEqual(nsmallest(n, data), sorted(data)[:n])
                self.assertEqual(nsmallest(n, data, key=f),
                                 sorted(data, key=f)[:n]) 
Example #24
Source File: heap_examples.py    From Algorithm_Templates with MIT License 5 votes vote down vote up
def kClosest1(points: 'List[List[int]]', K: int) -> 'List[List[int]]':
    return heapq.nsmallest(K, points, key=lambda x: x[0] * x[0] + x[1] * x[1])


# [973] https://leetcode.com/problems/k-closest-points-to-origin/
# We have a list of points on the plane.  Find the K closest points to the origin (0, 0).
#
# use heappushpop 
Example #25
Source File: download_vott_json.py    From active-learning-detect with MIT License 5 votes vote down vote up
def select_rows(arr_image_data, num_rows, is_largest):
    total_rows = len(arr_image_data)
    if num_rows > total_rows:
        num_rows = total_rows
    if is_largest:
        top = nlargest(num_rows, arr_image_data,
                       key=lambda x: float(x[0][CONFIDENCE_LOCATION]))
    else:
        top = nsmallest(num_rows, arr_image_data,
                       key=lambda x: float(x[0][CONFIDENCE_LOCATION]))

    return top 
Example #26
Source File: happenings_tags.py    From django-happenings with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def upcoming_events(now=None, finish=90, num=5):
    if now is None:
        now = get_now()
    finish = now + timezone.timedelta(days=finish)
    finish = finish.replace(hour=23, minute=59, second=59, microsecond=999)
    all_upcoming = (UpcomingEvents(x, now, finish, num).get_upcoming_events()
                    for x in Event.objects.live(now))
    upcoming = heapq.nsmallest(
        num,
        (item for sublist in all_upcoming for item in sublist),
        key=lambda x: x[0]
    )
    return {'upcoming_events': upcoming} 
Example #27
Source File: routing.py    From kademlia with MIT License 5 votes vote down vote up
def find_neighbors(self, node, k=None, exclude=None):
        k = k or self.ksize
        nodes = []
        for neighbor in TableTraverser(self, node):
            notexcluded = exclude is None or not neighbor.same_home_as(exclude)
            if neighbor.id != node.id and notexcluded:
                heapq.heappush(nodes, (node.distance_to(neighbor), neighbor))
            if len(nodes) == k:
                break

        return list(map(operator.itemgetter(1), heapq.nsmallest(k, nodes))) 
Example #28
Source File: node.py    From kademlia with MIT License 5 votes vote down vote up
def __iter__(self):
        nodes = heapq.nsmallest(self.maxsize, self.heap)
        return iter(map(itemgetter(1), nodes)) 
Example #29
Source File: finetune.py    From transferlearning with MIT License 5 votes vote down vote up
def lowest_ranking_filters(self, num):
        data_1 = []
        for i in sorted(self.filter_ranks_1.keys()):
            for j in range(self.filter_ranks_1[i].size(0)):
                data_1.append((self.activation_to_layer_1[i], j, self.filter_ranks_1[i][j]))
        data_2 = []
        for i in sorted(self.filter_ranks_2.keys()):
            for j in range(self.filter_ranks_2[i].size(0)):
                data_2.append((self.activation_to_layer_2[i], j, self.filter_ranks_2[i][j]))
        data_3 = []
        data_3.extend(data_1)
        data_3.extend(data_2)
        dic = {}
        c = nsmallest(num*2, data_3, itemgetter(2))
        for i in range(len(c)):
            nm = str(c[i][0]) + '_' + str(c[i][1])
            if dic.get(nm)!=None:
                dic[nm] = min(dic[nm], c[i][2].item())
            else:
                dic[nm] = c[i][2].item()
        newc = []
        for i in range(len(list(dic.items()))):
            lyer = int(list(dic.items())[i][0].split('_')[0])
            filt = int(list(dic.items())[i][0].split('_')[1])
            val = torch.tensor(list(dic.items())[i][1])
            newc.append((lyer, filt, val))
        return nsmallest(num, newc, itemgetter(2)) 
Example #30
Source File: test_heapq.py    From medicare-demo with Apache License 2.0 5 votes vote down vote up
def test_get_only(self):
        seq = [CmpErr(), CmpErr(), CmpErr()]
        for f in (heapify, heappop):
            self.assertRaises(ZeroDivisionError, f, seq)
        for f in (heappush, heapreplace):
            self.assertRaises(ZeroDivisionError, f, seq, 10)
        for f in (nlargest, nsmallest):
            self.assertRaises(ZeroDivisionError, f, 2, seq)