Python Examples of heapq.nsmallest

Source File: prunner.py From pytorch-ssd with MIT License

6 votes

def prune_linear_layers(self, num=1):
        self.register_linear_hooks()
        before_loss, before_accuracy = self.train_fun(self.model)
        ranks = []
        for path, output in self.outputs.items():
            output = output.data
            grad = self.grads[path].data
            v = grad * output
            v = v.sum(0)  # sum to the channel axis.
            v = torch.abs(v)
            v = v / torch.sqrt(torch.sum(v * v))  # normalize
            for i, e in enumerate(v):
                ranks.append((path, i, e))
        to_prune = nsmallest(num, ranks, key=lambda t: t[2])
        to_prune = sorted(to_prune, key=lambda t: (t[0], -t[1]))
        for path, feature_index, value in to_prune:
            self.remove_linear_feature(path, feature_index)
        self.deregister_hooks()
        after_loss, after_accuracy = self.train_fun(self.model)
        return after_loss - before_loss, after_accuracy - before_accuracy

Source File: expertrec.py From prediction_api with MIT License

6 votes

def search(self, text, num=20):
        '''
        Recommend some experts in the given text's field.
        :param text: The text.
        :param num: The number of the recommended experts.
        :return: A list of dictionaries:
                {
                    'id': The expert's ID in AMiner(http://www.aminer.cn/),
                    'url': The expert's AMiner homepage.
                    'L2 distance': Similarity. The smaller the L2 distance is , the more likely the expert is interested in the given text's field.
                }
        '''
        vec = self.doc2vec(text)
        dist_mat = self._index_mat - vec.T
        dist = np.linalg.norm(dist_mat, axis=1)
        ret = [{
          'id': self._id2person[i],
          'url': self.base_url.format(self._id2person[i]),
          'L2 distance': d
        } for i, d in enumerate(dist)]
        return heapq.nsmallest(num, ret, lambda x: x['L2 distance'])

Source File: typecheck.py From linter-pylama with MIT License

6 votes

def _similar_names(owner, attrname, distance_threshold, max_choices):
    """Given an owner and a name, try to find similar names

    The similar names are searched given a distance metric and only
    a given number of choices will be returned.
    """
    possible_names = []
    names = _node_names(owner)

    for name in names:
        if name == attrname:
            continue

        distance = _string_distance(attrname, name)
        if distance <= distance_threshold:
            possible_names.append((name, distance))

    # Now get back the values with a minimum, up to the given
    # limit or choices.
    picked = [name for (name, _) in
              heapq.nsmallest(max_choices, possible_names,
                              key=operator.itemgetter(1))]
    return sorted(picked)

Source File: cache.py From ReynirPackage with GNU General Public License v3.0

6 votes

def lookup(self, key: Any, func: Callable[[Any], Any]) -> Any:
        """ Lookup a key in the cache, calling func(key)
            to obtain the data if not already there """
        with self.lock:
            self.use_count[key] += 1
            # Get cache entry or compute if not found
            try:
                result = self.cache[key]
                self.hits += 1
            except KeyError:
                result = func(key)
                self.cache[key] = result
                self.misses += 1

                # Purge the 10% least frequently used cache entries
                if len(self.cache) > self.maxsize:
                    for key, _ in nsmallest(self.maxsize // 10,
                        self.use_count.items(), key = itemgetter(1)):

                        del self.cache[key], self.use_count[key]

            return result

Source File: main.py From imgscaper with BSD 3-Clause "New" or "Revised" License

6 votes

def pickBestPatch(self, ty, tx, coords):
        """Iterate over a random selection of patches (e.g. 100) and pick a random
        sample of the best (e.g. top 5).  Distance metric is used to rank the patches.
        """
        results = []
        for sy, sx in random.sample(list(coords), min(len(coords), PATCH_COUNT)):
            d = self.D(sy, sx, ty, tx)
            heapq.heappush(results, (d, len(results), (sy,sx)))
        
        # Some unlucky cases with special images cause no patches to be found
        # at all, in this case we just bail out.
        if not results:
            return -1, -1

        choices = heapq.nsmallest(BEST_COUNT, results)
        return random.choice(choices)[2]

Source File: prunner.py From pytorch-ssd with MIT License

6 votes

def prune_conv_layers(self, num=1):
        """Prune one conv2d filter.
        """
        self.register_conv_hooks()
        before_loss, before_accuracy = self.train_fun(self.model)
        ranks = []
        for path, output in self.outputs.items():
            output = output.data
            grad = self.grads[path].data
            v = grad * output
            v = v.sum(0).sum(1).sum(1)  # sum to the channel axis.
            v = torch.abs(v)
            v = v / torch.sqrt(torch.sum(v * v))  # normalize
            for i, e in enumerate(v):
                ranks.append((path, i, e))
        to_prune = nsmallest(num, ranks, key=lambda t: t[2])
        to_prune = sorted(to_prune, key=lambda t: (t[0], -t[1]))  # prune the filters with bigger indexes first to avoid rearrangement.
        for path, filter_index, value in to_prune:
            self.remove_conv_filter(path, filter_index)
        self.deregister_hooks()
        after_loss, after_accuracy = self.train_fun(self.model)
        return after_loss - before_loss, after_accuracy - before_accuracy

Source File: rdd.py From LearningApacheSpark with MIT License

6 votes

def takeOrdered(self, num, key=None):
        """
        Get the N elements from an RDD ordered in ascending order or as
        specified by the optional key function.

        .. note:: this method should only be used if the resulting array is expected
            to be small, as all the data is loaded into the driver's memory.

        >>> sc.parallelize([10, 1, 2, 9, 3, 4, 5, 6, 7]).takeOrdered(6)
        [1, 2, 3, 4, 5, 6]
        >>> sc.parallelize([10, 1, 2, 9, 3, 4, 5, 6, 7], 2).takeOrdered(6, key=lambda x: -x)
        [10, 9, 7, 6, 5, 4]
        """

        def merge(a, b):
            return heapq.nsmallest(num, a + b, key)

        return self.mapPartitions(lambda it: [heapq.nsmallest(num, it, key)]).reduce(merge)

Source File: _utils.py From ida-minsc with BSD 3-Clause "New" or "Revised" License

6 votes

def new_wrapper(cls, func, cache):
        '''Create a new wrapper that will determine the correct function to call.'''
        # define the wrapper...
        def F(*arguments, **keywords):
            heap = [res for _, res in heapq.nsmallest(len(cache), cache)]
            f, (a, w, k) = cls.match((arguments[:], keywords), heap)
            return f(*arguments, **keywords)
            #return f(*(arguments + tuple(w)), **keywords)

        # swap out the original code object with our wrapper's
        f, c = F, F.func_code
        cargs = c.co_argcount, c.co_nlocals, c.co_stacksize, c.co_flags, \
                c.co_code, c.co_consts, c.co_names, c.co_varnames, \
                c.co_filename, '.'.join((func.__module__, func.func_name)), \
                c.co_firstlineno, c.co_lnotab, c.co_freevars, c.co_cellvars
        newcode = types.CodeType(*cargs)
        res = types.FunctionType(newcode, f.func_globals, f.func_name, f.func_defaults, f.func_closure)
        res.func_name, res.func_doc = func.func_name, func.func_doc

        # assign the specified cache to it
        setattr(res, cls.cache_name, cache)
        # ...and finally add a default docstring
        setattr(res, '__doc__', '')
        return res

Source File: decode_ngram.py From neural_ime with MIT License

6 votes

def search(lattice, ngrams, queues, beam_size, viterbi_size):
    for i in range(len(lattice)):
        for j in range(len(lattice[i])):
            for target, source in lattice[i][j]:

                word_queue = []
                for previous_cost, previous_history in queues[j]:
                    history = previous_history + [(target, source)]
                    cost = previous_cost + get_ngram_cost(ngrams, tuple(history[-3:]))
                    hypothesis = (cost, history)
                    word_queue.append(hypothesis)

                # prune word_queue to viterbi size
                if viterbi_size > 0:
                    word_queue = heapq.nsmallest(viterbi_size, word_queue, key=operator.itemgetter(0))

                queues[i] += word_queue

        # prune queues[i] to beam size
        if beam_size > 0:
            queues[i] = heapq.nsmallest(beam_size, queues[i], key=operator.itemgetter(0))
    return queues

Source File: queue.py From petridishnn with MIT License

6 votes

def update(self, key=None, l_priority=None, full_sort=False, keep_top_k=None):
        """
        Sort the petridish queue using func

        Args
            func : a function that maps from (idx, petridish_queue_entry) to a float; the default
            is lambda i, _ : i, which means we follow the FIFO order.
        """
        if len(self.entries) == 0:
            return
        assert bool(key) != bool(l_priority), "only one option should be used for updating priority"
        if key:
            for i in range(self.size()):
                self.entries[i][IDX_PV] = key(self.entries[i][IDX_PQE])
        else:
            for i in range(self.size()):
                self.entries[i][IDX_PV] = l_priority[i]
        if full_sort:
            self.entries.sort()
            if keep_top_k is not None:
                self.entries[keep_top_k:] = []
        elif keep_top_k is not None:
            self.entries = heapq.nsmallest(keep_top_k, self.entries)
        else:
            self._update()

Source File: TransE.py From MTransE with Apache License 2.0

6 votes

def kNN_entity(self, vec, topk=10, method=0, self_vec_id=None):
        q = []
        for i in range(len(self.vec_e)):
            #skip self
            if self_vec_id != None and i == self_vec_id:
                continue
            if method == 1:
                dist = SP.distance.cosine(vec, self.vec_e[i])
            else:
                dist = LA.norm(vec - self.vec_e[i])
            if len(q) < topk:
                HP.heappush(q, self.index_dist(i, dist))
            else:
                #indeed it fetches the biggest
                tmp = HP.nsmallest(1, q)[0]
                if tmp.dist > dist:
                    HP.heapreplace(q, self.index_dist(i, dist) )
        rst = []
        while len(q) > 0:
            item = HP.heappop(q)
            rst.insert(0, (self.vocab_e[self.vec2e[item.index]], item.dist))
        return rst

    #given entity name, find kNN

Source File: TransE.py From MTransE with Apache License 2.0

6 votes

def kNN_relation(self, vec, topk=10, method=0, self_vec_id=None):
        q = []
        for i in range(len(self.vec_r)):
            #skip self
            if self_vec_id != None and i == self_vec_id:
                continue
            if method == 1:
                dist = SP.distance.cosine(vec, self.vec_r[i])
            else:
                dist = LA.norm(vec - self.vec_r[i])
            if len(q) < topk:
                HP.heappush(q, self.index_dist(i, dist))
            else:
                #indeed it fetches the biggest
                tmp = HP.nsmallest(1, q)[0]
                if tmp.dist > dist:
                    HP.heapreplace(q, self.index_dist(i, dist) )
        rst = []
        while len(q) > 0:
            item = HP.heappop(q)
            rst.insert(0, (self.vocab_r[self.vec2r[item.index]], item.dist))
        return rst

    #given relation name, find kNN

Source File: nce_neighbors.py From castor with Apache License 2.0

5 votes

def get_nearest_neg_id(pos_feature, neg_dict, distance="cosine", k=1, weight=False):
    dis_list = []
    pos_feature = pos_feature.data.cpu().numpy()
    pos_feature_norm = pos_feature / np.sqrt(sum(pos_feature ** 2))
    neg_list = []
    for key in neg_dict:
        if distance == "l2":
            dis = np.sqrt(np.sum((np.array(pos_feature) - neg_dict[key]["feature"]) ** 2))
        elif distance == "cosine":
            neg_feature = np.array(neg_dict[key]["feature"])
            feat_norm = neg_feature / np.sqrt(sum(neg_feature ** 2))
            dis = 1 - feat_norm.dot(pos_feature_norm)
        dis_list.append(dis)
        neg_list.append(key)

    k = min(k, len(neg_dict))
    min_list = heapq.nsmallest(k, enumerate(dis_list), key=operator.itemgetter(1))
    # find the corresponding neg id
    min_id_list = [neg_list[x[0]] for x in min_list]
    if weight:
        min_id_score = [1 - x[1] for x in min_list]
        return min_id_list, min_id_score
    else:
        return min_id_list

# get the negative samples randomly

Source File: decoder.py From neural-dep-srl with Apache License 2.0

5 votes

def constrained_decoder(voc, predictions, beam, constraints):
    heap = [State(score=0, label='O', prev=None, roles=set())]
    for i, prediction in enumerate(predictions):
        next_generation = list()
        for prev in heapq.nsmallest(beam, heap, key=_get_score):
            for j, prob in enumerate(prediction):
                label = voc[j]
                score = -math.log2(prob + sys.float_info.min)
                if score > _PRUNING_THRESHOLD and next_generation:
                    continue

                next_state = State(score=score + prev.score,
                                   label=label, prev=prev,
                                   roles=prev.roles)

                constraints_violated = [not check(next_state) for check in
                                        constraints]
                if any(constraints_violated):
                    continue

                next_generation.append(
                    State(next_state.score, next_state.label, next_state.prev,
                          next_state.roles | {next_state.label[2:]}))

        heap = next_generation

    head = heapq.nsmallest(1, heap, key=_get_score)[0]

    backtrack = list()
    while head:
        backtrack.append(head.label)
        head = head.prev

    return list(reversed(backtrack[:-1]))

Source File: utils.py From models with Apache License 2.0

5 votes

def get_min(self):
    """Peek at the item with the lowest score.

    Returns:
      Same as `pop`.
    """
    if not self.heap:
      return ()
    score, item, extra_data = heapq.nsmallest(1, self.heap)[0]
    return score, item, extra_data

Source File: 628 Maximum Product of Three Numbers.py From LeetCode with MIT License

5 votes

def maximumProduct(self, nums: List[int]) -> int:
        """
        heapq nlargest nsmallest
        """
        mxes = heapq.nlargest(3, nums)
        mns = heapq.nsmallest(3, nums)
        return max(
            mxes[0] * mxes[1] * mxes[2],
            mns[0] * mns[1] * mxes[0],
        )

Source File: utils.py From g-tensorflow-models with Apache License 2.0

5 votes

def get_min(self):
    """Peek at the item with the lowest score.

    Returns:
      Same as `pop`.
    """
    if not self.heap:
      return ()
    score, item, extra_data = heapq.nsmallest(1, self.heap)[0]
    return score, item, extra_data

Source File: 973 K Closest Points to Origin.py From LeetCode with MIT License

5 votes

def kClosest(self, points: List[List[int]], K: int) -> List[List[int]]:
        return heapq.nsmallest(K, points, key=lambda x: x[0]**2 + x[1]**2)

Source File: test_heapq.py From medicare-demo with Apache License 2.0

5 votes

def test_get_only(self):
        for f in (heapify, heappop):
            self.assertRaises(TypeError, f, GetOnly())
        for f in (heappush, heapreplace):
            self.assertRaises(TypeError, f, GetOnly(), 10)
        for f in (nlargest, nsmallest):
            self.assertRaises(TypeError, f, 2, GetOnly())

Source File: filter_important_factors.py From TOBIAS with MIT License

5 votes

def get_important(args):

    file = args.file_in         #input file bindetect_result.txt
    filter = args.filter        #filter how many binding factors of every condition will be selected
    file_out = args.file_out    #name of output output file
    list_file = []              #contains all lines of file
    new_file = []               #list for the filtered file

    with open(file) as f:   #open bindetect results
        for i in f:
            i = i.strip()
            i = i.split('\t')   #read file tab sapareted
            list_file.append(i)
    
    index_list = [list_file[0].index(i) for i in list_file[0] if '_change' in i]#get the indexs of the columens
    
    important_values = [[max(heapq.nsmallest(filter,[float(a[i]) for a in list_file[1:]])), min(heapq.nlargest(filter,[float(a[i]) for a in list_file[1:]]))]  for i in index_list]

    #important_values contains the maximum and minimum value of the bindingfactor
    for i in list_file[1:]:
        for a,b in zip(index_list, important_values):
            if float(i[a]) >= float(max(b)) or float(i[a]) <= float(min(b)):    #filters if binding value is important
                new_file.append(i)                                              #important lines get append to new list
                print(i[0])                                                     #print stdout for nextflowpipeline
                break                                                           #if line is added for loop jumps to next line 
    
    #build new tab seperater text file 
    book = {i:[] for i in list_file[0]}
    [[book[key].append(value) for key,value in zip(list_file[0], i)] for i in new_file]   #dict for exele wirter key first line value hole line
    df = pd.DataFrame(book)
    df.to_csv(file_out , '\t', index=False)
#--------------------------------------------------------------------------------------------------------#

Source File: test_heapq.py From medicare-demo with Apache License 2.0

5 votes

def test_len_only(self):
        for f in (heapify, heappop):
            self.assertRaises(TypeError, f, LenOnly())
        for f in (heappush, heapreplace):
            self.assertRaises(TypeError, f, LenOnly(), 10)
        for f in (nlargest, nsmallest):
            self.assertRaises(TypeError, f, 2, LenOnly())

Source File: test_heapq.py From medicare-demo with Apache License 2.0

5 votes

def test_non_sequence(self):
        for f in (heapify, heappop):
            self.assertRaises(TypeError, f, 10)
        for f in (heappush, heapreplace, nlargest, nsmallest):
            self.assertRaises(TypeError, f, 10, 10)

Source File: test_heapq.py From medicare-demo with Apache License 2.0

5 votes

def test_nsmallest(self):
        data = [(random.randrange(2000), i) for i in range(1000)]
        for f in (None, lambda x:  x[0] * 547 % 2000):
            for n in (0, 1, 2, 10, 100, 400, 999, 1000, 1100):
                self.assertEqual(nsmallest(n, data), sorted(data)[:n])
                self.assertEqual(nsmallest(n, data, key=f),
                                 sorted(data, key=f)[:n])

Source File: heap_examples.py From Algorithm_Templates with MIT License

5 votes

def kClosest1(points: 'List[List[int]]', K: int) -> 'List[List[int]]':
    return heapq.nsmallest(K, points, key=lambda x: x[0] * x[0] + x[1] * x[1])


# [973] https://leetcode.com/problems/k-closest-points-to-origin/
# We have a list of points on the plane.  Find the K closest points to the origin (0, 0).
#
# use heappushpop

Source File: download_vott_json.py From active-learning-detect with MIT License

5 votes

def select_rows(arr_image_data, num_rows, is_largest):
    total_rows = len(arr_image_data)
    if num_rows > total_rows:
        num_rows = total_rows
    if is_largest:
        top = nlargest(num_rows, arr_image_data,
                       key=lambda x: float(x[0][CONFIDENCE_LOCATION]))
    else:
        top = nsmallest(num_rows, arr_image_data,
                       key=lambda x: float(x[0][CONFIDENCE_LOCATION]))

    return top

Source File: happenings_tags.py From django-happenings with BSD 2-Clause "Simplified" License

5 votes

def upcoming_events(now=None, finish=90, num=5):
    if now is None:
        now = get_now()
    finish = now + timezone.timedelta(days=finish)
    finish = finish.replace(hour=23, minute=59, second=59, microsecond=999)
    all_upcoming = (UpcomingEvents(x, now, finish, num).get_upcoming_events()
                    for x in Event.objects.live(now))
    upcoming = heapq.nsmallest(
        num,
        (item for sublist in all_upcoming for item in sublist),
        key=lambda x: x[0]
    )
    return {'upcoming_events': upcoming}

Source File: routing.py From kademlia with MIT License

5 votes

def find_neighbors(self, node, k=None, exclude=None):
        k = k or self.ksize
        nodes = []
        for neighbor in TableTraverser(self, node):
            notexcluded = exclude is None or not neighbor.same_home_as(exclude)
            if neighbor.id != node.id and notexcluded:
                heapq.heappush(nodes, (node.distance_to(neighbor), neighbor))
            if len(nodes) == k:
                break

        return list(map(operator.itemgetter(1), heapq.nsmallest(k, nodes)))

Source File: node.py From kademlia with MIT License

5 votes

def __iter__(self):
        nodes = heapq.nsmallest(self.maxsize, self.heap)
        return iter(map(itemgetter(1), nodes))

Source File: finetune.py From transferlearning with MIT License

5 votes

def lowest_ranking_filters(self, num):
        data_1 = []
        for i in sorted(self.filter_ranks_1.keys()):
            for j in range(self.filter_ranks_1[i].size(0)):
                data_1.append((self.activation_to_layer_1[i], j, self.filter_ranks_1[i][j]))
        data_2 = []
        for i in sorted(self.filter_ranks_2.keys()):
            for j in range(self.filter_ranks_2[i].size(0)):
                data_2.append((self.activation_to_layer_2[i], j, self.filter_ranks_2[i][j]))
        data_3 = []
        data_3.extend(data_1)
        data_3.extend(data_2)
        dic = {}
        c = nsmallest(num*2, data_3, itemgetter(2))
        for i in range(len(c)):
            nm = str(c[i][0]) + '_' + str(c[i][1])
            if dic.get(nm)!=None:
                dic[nm] = min(dic[nm], c[i][2].item())
            else:
                dic[nm] = c[i][2].item()
        newc = []
        for i in range(len(list(dic.items()))):
            lyer = int(list(dic.items())[i][0].split('_')[0])
            filt = int(list(dic.items())[i][0].split('_')[1])
            val = torch.tensor(list(dic.items())[i][1])
            newc.append((lyer, filt, val))
        return nsmallest(num, newc, itemgetter(2))

Source File: test_heapq.py From medicare-demo with Apache License 2.0

5 votes

def test_get_only(self):
        seq = [CmpErr(), CmpErr(), CmpErr()]
        for f in (heapify, heappop):
            self.assertRaises(ZeroDivisionError, f, seq)
        for f in (heappush, heapreplace):
            self.assertRaises(ZeroDivisionError, f, seq, 10)
        for f in (nlargest, nsmallest):
            self.assertRaises(ZeroDivisionError, f, 2, seq)

Python heapq.nsmallest() Examples