Python bottleneck.argpartition() Examples

The following are 15 code examples of bottleneck.argpartition(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module bottleneck , or try the search function .
Example #1
Source File: train.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def evaluate_emb(emb, labels):
    """Evaluate embeddings based on Recall@k."""
    d_mat = get_distance_matrix(emb)
    d_mat = d_mat.asnumpy()
    labels = labels.asnumpy()

    names = []
    accs = []
    for k in [1, 2, 4, 8, 16]:
        names.append('Recall@%d' % k)
        correct, cnt = 0.0, 0.0
        for i in range(emb.shape[0]):
            d_mat[i, i] = 1e10
            nns = argpartition(d_mat[i], k)[:k]
            if any(labels[i] == labels[nn] for nn in nns):
                correct += 1
            cnt += 1
        accs.append(correct/cnt)
    return names, accs 
Example #2
Source File: train.py    From training_results_v0.6 with Apache License 2.0 6 votes vote down vote up
def evaluate_emb(emb, labels):
    """Evaluate embeddings based on Recall@k."""
    d_mat = get_distance_matrix(emb)
    d_mat = d_mat.asnumpy()
    labels = labels.asnumpy()

    names = []
    accs = []
    for k in [1, 2, 4, 8, 16]:
        names.append('Recall@%d' % k)
        correct, cnt = 0.0, 0.0
        for i in range(emb.shape[0]):
            d_mat[i, i] = 1e10
            nns = argpartition(d_mat[i], k)[:k]
            if any(labels[i] == labels[nn] for nn in nns):
                correct += 1
            cnt += 1
        accs.append(correct/cnt)
    return names, accs 
Example #3
Source File: utils.py    From RecVAE with Apache License 2.0 6 votes vote down vote up
def ndcg(X_pred, heldout_batch, k=100):
    '''
    normalized discounted cumulative gain@k for binary relevance
    ASSUMPTIONS: all the 0's in heldout_data indicate 0 relevance
    '''
    batch_users = X_pred.shape[0]
    idx_topk_part = bn.argpartition(-X_pred, k, axis=1)
    topk_part = X_pred[np.arange(batch_users)[:, np.newaxis],
                       idx_topk_part[:, :k]]
    idx_part = np.argsort(-topk_part, axis=1)
    # X_pred[np.arange(batch_users)[:, np.newaxis], idx_topk] is the sorted
    # topk predicted score
    idx_topk = idx_topk_part[np.arange(batch_users)[:, np.newaxis], idx_part]
    # build the discount template
    tp = 1. / np.log2(np.arange(2, k + 2))

    DCG = (heldout_batch[np.arange(batch_users)[:, np.newaxis],
                         idx_topk].toarray() * tp).sum(axis=1)
    IDCG = np.array([(tp[:min(n, k)]).sum()
                     for n in heldout_batch.getnnz(axis=1)])
    return DCG / IDCG 
Example #4
Source File: metrics_ranking.py    From NNCF with MIT License 6 votes vote down vote up
def eval_apk(true_scores, pred_scores, topk):
    idx = bottleneck.argpartition(-pred_scores, topk)[:topk]  # find the top-k smallest
    noise = np.random.random(topk)
    if not isinstance(pred_scores, np.ndarray):
        pred_scores = np.array(pred_scores)
    if not isinstance(true_scores, np.ndarray):
        true_scores = np.array(true_scores)
    rec = sorted(zip(pred_scores[idx], noise, true_scores[idx]), reverse=True)
    nhits = 0.
    k = topk if topk >= 0 else len(rec)
    sumap = 0.0
    for i in range(len(rec)):
        if (rec[i][-1] != 0.):
            nhits += 1.0
            if i < k:
                sumap += nhits / (i+1.0)
            else:
                break
    nhits = np.sum(true_scores)
    if nhits != 0:
        sumap /= min(nhits, k)
        return sumap
    else:
        return 0. 
Example #5
Source File: split_train_validation_test_VAE_CF.py    From RecSys2019_DeepLearning_Evaluation with GNU Affero General Public License v3.0 6 votes vote down vote up
def NDCG_binary_at_k_batch(X_pred, heldout_batch, k=100):
    '''
    normalized discounted cumulative gain@k for binary relevance
    ASSUMPTIONS: all the 0's in heldout_data indicate 0 relevance
    '''
    batch_users = X_pred.shape[0]
    idx_topk_part = bn.argpartition(-X_pred, k, axis=1)
    topk_part = X_pred[np.arange(batch_users)[:, np.newaxis],
                       idx_topk_part[:, :k]]
    idx_part = np.argsort(-topk_part, axis=1)
    # X_pred[np.arange(batch_users)[:, np.newaxis], idx_topk] is the sorted
    # topk predicted score
    idx_topk = idx_topk_part[np.arange(batch_users)[:, np.newaxis], idx_part]
    # build the discount template
    tp = 1. / np.log2(np.arange(2, k + 2))

    DCG = (heldout_batch[np.arange(batch_users)[:, np.newaxis],
                         idx_topk].toarray() * tp).sum(axis=1)
    IDCG = np.array([(tp[:min(n, k)]).sum()
                     for n in heldout_batch.getnnz(axis=1)])
    return DCG / IDCG 
Example #6
Source File: las.py    From biclustlib with GNU General Public License v3.0 6 votes vote down vote up
def _find_constrained_bicluster(self, data):
        """Find a k x l bicluster."""
        num_rows, num_cols = data.shape

        k = random.randint(1, math.ceil(num_rows / 2))
        l = random.randint(1, math.ceil(num_cols / 2))

        cols = np.random.choice(num_cols, size=l, replace=False)

        old_avg, avg = float('-inf'), 0.0

        while abs(avg - old_avg) > self.tol:
            old_avg = avg

            row_sums = np.sum(data[:, cols], axis=1)
            rows = bn.argpartition(row_sums, num_rows - k)[-k:] # this is usually faster than rows = np.argsort(row_sums)[-k:]

            col_sums = np.sum(data[rows, :], axis=0)
            cols = bn.argpartition(col_sums, num_cols - l)[-l:] # this is usually faster than cols = np.argsort(col_sums)[-l:]

            avg = np.mean(data[np.ix_(rows, cols)])

        return Bicluster(rows, cols) 
Example #7
Source File: train.py    From SNIPER-mxnet with Apache License 2.0 6 votes vote down vote up
def evaluate_emb(emb, labels):
    """Evaluate embeddings based on Recall@k."""
    d_mat = get_distance_matrix(emb)
    d_mat = d_mat.asnumpy()
    labels = labels.asnumpy()

    names = []
    accs = []
    for k in [1, 2, 4, 8, 16]:
        names.append('Recall@%d' % k)
        correct, cnt = 0.0, 0.0
        for i in range(emb.shape[0]):
            d_mat[i, i] = 1e10
            nns = argpartition(d_mat[i], k)[:k]
            if any(labels[i] == labels[nn] for nn in nns):
                correct += 1
            cnt += 1
        accs.append(correct/cnt)
    return names, accs 
Example #8
Source File: seq2seq_atten.py    From video_captioning_rl with MIT License 5 votes vote down vote up
def top_n_indexes(arr, n):
        idx = bn.argpartition(arr, arr.size-n, axis=None)[-n:]
        width = arr.shape[1]
        return [divmod(i, width) for i in idx] 
Example #9
Source File: Sets2Sets.py    From Sets2Sets with Apache License 2.0 5 votes vote down vote up
def top_n_indexes(arr, n):
    idx = bn.argpartition(arr, arr.size - n, axis=None)[-n:]
    width = arr.shape[1]
    return [divmod(i, width) for i in idx] 
Example #10
Source File: train.py    From DistanceWeightedSampling with MIT License 5 votes vote down vote up
def evaluate_emb(emb, labels):
    """Evaluate embeddings based on Recall@k."""
    d_mat = get_distance_matrix(emb)
    d_mat = d_mat.asnumpy()
    labels = labels.asnumpy()

    names = []
    accs = []
    for k in [1, 2, 4, 8, 16]:
        names.append('Recall@%d' % k)
        correct, cnt = 0.0, 0.0
        for i in range(emb.shape[0]):
            d_mat[i, i] = 1e10
            nns = argpartition(d_mat[i], k)[:k]
            if any(labels[i] == labels[nn] for nn in nns):
                correct += 1
            cnt += 1
        accs.append(correct/cnt)
    return names, accs

#def validate(val_loader, model, criterion, args):
#    outputs = []
#    labels = []
#
#    model.eval()
#    
#    with torch.no_grad():
#        end = time.time()
#        for i, (input, target) in enumerate(val_loader):
#            outpus += model(input)[-1].cpu().tolist()
#            labels += target.cpu().tolist()
# 
Example #11
Source File: utils.py    From RecVAE with Apache License 2.0 5 votes vote down vote up
def recall(X_pred, heldout_batch, k=100):
    batch_users = X_pred.shape[0]

    idx = bn.argpartition(-X_pred, k, axis=1)
    X_pred_binary = np.zeros_like(X_pred, dtype=bool)
    X_pred_binary[np.arange(batch_users)[:, np.newaxis], idx[:, :k]] = True

    X_true_binary = (heldout_batch > 0).toarray()
    tmp = (np.logical_and(X_true_binary, X_pred_binary).sum(axis=1)).astype(
        np.float32)
    recall = tmp / np.minimum(k, X_true_binary.sum(axis=1))
    return recall 
Example #12
Source File: metrics_ranking.py    From NNCF with MIT License 5 votes vote down vote up
def eval_multiple(true_scores, pred_scores, topk):
    idx = bottleneck.argpartition(-pred_scores, topk)[:topk]
    noise = np.random.random(topk)
    if not isinstance(pred_scores, np.ndarray):
        pred_scores = np.array(pred_scores)
    if not isinstance(true_scores, np.ndarray):
        true_scores = np.array(true_scores)
    rec = sorted(zip(pred_scores[idx], noise, true_scores[idx]), reverse=True)
    nhits = 0.
    nhits_topk = 0.
    k = topk if topk >= 0 else len(rec)
    sumap = 0.0
    for i in range(len(rec)):
        if rec[i][-1] != 0.:
            nhits += 1.0
            if i < k:
                nhits_topk += 1
                sumap += nhits / (i+1.0)
    nhits = np.sum(true_scores)
    if nhits != 0:
        sumap /= min(nhits, k)
        map_at_k = sumap
        recall_at_k = nhits_topk / nhits
        precision_at_k = nhits_topk / k
    else:
        map_at_k = 0.
        recall_at_k = 0.
        precision_at_k = 0.

    return map_at_k, recall_at_k, precision_at_k 
Example #13
Source File: split_train_validation_test_VAE_CF.py    From RecSys2019_DeepLearning_Evaluation with GNU Affero General Public License v3.0 5 votes vote down vote up
def Recall_at_k_batch(X_pred, heldout_batch, k=100):
    batch_users = X_pred.shape[0]

    idx = bn.argpartition(-X_pred, k, axis=1)
    X_pred_binary = np.zeros_like(X_pred, dtype=bool)
    X_pred_binary[np.arange(batch_users)[:, np.newaxis], idx[:, :k]] = True

    X_true_binary = (heldout_batch > 0).toarray()
    tmp = (np.logical_and(X_true_binary, X_pred_binary).sum(axis=1)).astype(
        np.float32)
    recall = tmp / np.minimum(k, X_true_binary.sum(axis=1))
    return recall





##############################################################################################################################################################



















############################ Train a Multi-VAE^{PR} 
Example #14
Source File: utils.py    From mmvt with GNU General Public License v3.0 5 votes vote down vote up
def top_n_indexes(arr, n):
    # https://gist.github.com/tomerfiliba/3698403
    try:
        import bottleneck
        idx = bottleneck.argpartition(arr, arr.size-n, axis=None)[-n:]
    except:
        idx = np.argpartition(arr, arr.size - n, axis=None)[-n:]
    width = arr.shape[1]
    return [divmod(i, width) for i in idx] 
Example #15
Source File: cca.py    From biclustlib with GNU General Public License v3.0 5 votes vote down vote up
def __random_deletion(self, data, bool_array, msr_array, choice):
        indices = np.where(bool_array)[0]
        n = int(math.ceil(len(msr_array) * self.alpha))
        max_msr_indices = bn.argpartition(msr_array, len(msr_array) - n)[-n:]
        i = indices[np.random.choice(max_msr_indices)]
        bool_array[i] = False