Python Examples of bottleneck.argpartition

Source File: train.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

6 votes

def evaluate_emb(emb, labels):
    """Evaluate embeddings based on Recall@k."""
    d_mat = get_distance_matrix(emb)
    d_mat = d_mat.asnumpy()
    labels = labels.asnumpy()

    names = []
    accs = []
    for k in [1, 2, 4, 8, 16]:
        names.append('Recall@%d' % k)
        correct, cnt = 0.0, 0.0
        for i in range(emb.shape[0]):
            d_mat[i, i] = 1e10
            nns = argpartition(d_mat[i], k)[:k]
            if any(labels[i] == labels[nn] for nn in nns):
                correct += 1
            cnt += 1
        accs.append(correct/cnt)
    return names, accs

Source File: train.py From training_results_v0.6 with Apache License 2.0

6 votes

def evaluate_emb(emb, labels):
    """Evaluate embeddings based on Recall@k."""
    d_mat = get_distance_matrix(emb)
    d_mat = d_mat.asnumpy()
    labels = labels.asnumpy()

    names = []
    accs = []
    for k in [1, 2, 4, 8, 16]:
        names.append('Recall@%d' % k)
        correct, cnt = 0.0, 0.0
        for i in range(emb.shape[0]):
            d_mat[i, i] = 1e10
            nns = argpartition(d_mat[i], k)[:k]
            if any(labels[i] == labels[nn] for nn in nns):
                correct += 1
            cnt += 1
        accs.append(correct/cnt)
    return names, accs

Source File: utils.py From RecVAE with Apache License 2.0

6 votes

def ndcg(X_pred, heldout_batch, k=100):
    '''
    normalized discounted cumulative gain@k for binary relevance
    ASSUMPTIONS: all the 0's in heldout_data indicate 0 relevance
    '''
    batch_users = X_pred.shape[0]
    idx_topk_part = bn.argpartition(-X_pred, k, axis=1)
    topk_part = X_pred[np.arange(batch_users)[:, np.newaxis],
                       idx_topk_part[:, :k]]
    idx_part = np.argsort(-topk_part, axis=1)
    # X_pred[np.arange(batch_users)[:, np.newaxis], idx_topk] is the sorted
    # topk predicted score
    idx_topk = idx_topk_part[np.arange(batch_users)[:, np.newaxis], idx_part]
    # build the discount template
    tp = 1. / np.log2(np.arange(2, k + 2))

    DCG = (heldout_batch[np.arange(batch_users)[:, np.newaxis],
                         idx_topk].toarray() * tp).sum(axis=1)
    IDCG = np.array([(tp[:min(n, k)]).sum()
                     for n in heldout_batch.getnnz(axis=1)])
    return DCG / IDCG

Source File: metrics_ranking.py From NNCF with MIT License

6 votes

def eval_apk(true_scores, pred_scores, topk):
    idx = bottleneck.argpartition(-pred_scores, topk)[:topk]  # find the top-k smallest
    noise = np.random.random(topk)
    if not isinstance(pred_scores, np.ndarray):
        pred_scores = np.array(pred_scores)
    if not isinstance(true_scores, np.ndarray):
        true_scores = np.array(true_scores)
    rec = sorted(zip(pred_scores[idx], noise, true_scores[idx]), reverse=True)
    nhits = 0.
    k = topk if topk >= 0 else len(rec)
    sumap = 0.0
    for i in range(len(rec)):
        if (rec[i][-1] != 0.):
            nhits += 1.0
            if i < k:
                sumap += nhits / (i+1.0)
            else:
                break
    nhits = np.sum(true_scores)
    if nhits != 0:
        sumap /= min(nhits, k)
        return sumap
    else:
        return 0.

Source File: split_train_validation_test_VAE_CF.py From RecSys2019_DeepLearning_Evaluation with GNU Affero General Public License v3.0

6 votes

def NDCG_binary_at_k_batch(X_pred, heldout_batch, k=100):
    '''
    normalized discounted cumulative gain@k for binary relevance
    ASSUMPTIONS: all the 0's in heldout_data indicate 0 relevance
    '''
    batch_users = X_pred.shape[0]
    idx_topk_part = bn.argpartition(-X_pred, k, axis=1)
    topk_part = X_pred[np.arange(batch_users)[:, np.newaxis],
                       idx_topk_part[:, :k]]
    idx_part = np.argsort(-topk_part, axis=1)
    # X_pred[np.arange(batch_users)[:, np.newaxis], idx_topk] is the sorted
    # topk predicted score
    idx_topk = idx_topk_part[np.arange(batch_users)[:, np.newaxis], idx_part]
    # build the discount template
    tp = 1. / np.log2(np.arange(2, k + 2))

    DCG = (heldout_batch[np.arange(batch_users)[:, np.newaxis],
                         idx_topk].toarray() * tp).sum(axis=1)
    IDCG = np.array([(tp[:min(n, k)]).sum()
                     for n in heldout_batch.getnnz(axis=1)])
    return DCG / IDCG

Source File: las.py From biclustlib with GNU General Public License v3.0

6 votes

def _find_constrained_bicluster(self, data):
        """Find a k x l bicluster."""
        num_rows, num_cols = data.shape

        k = random.randint(1, math.ceil(num_rows / 2))
        l = random.randint(1, math.ceil(num_cols / 2))

        cols = np.random.choice(num_cols, size=l, replace=False)

        old_avg, avg = float('-inf'), 0.0

        while abs(avg - old_avg) > self.tol:
            old_avg = avg

            row_sums = np.sum(data[:, cols], axis=1)
            rows = bn.argpartition(row_sums, num_rows - k)[-k:] # this is usually faster than rows = np.argsort(row_sums)[-k:]

            col_sums = np.sum(data[rows, :], axis=0)
            cols = bn.argpartition(col_sums, num_cols - l)[-l:] # this is usually faster than cols = np.argsort(col_sums)[-l:]

            avg = np.mean(data[np.ix_(rows, cols)])

        return Bicluster(rows, cols)

Source File: train.py From SNIPER-mxnet with Apache License 2.0

6 votes

def evaluate_emb(emb, labels):
    """Evaluate embeddings based on Recall@k."""
    d_mat = get_distance_matrix(emb)
    d_mat = d_mat.asnumpy()
    labels = labels.asnumpy()

    names = []
    accs = []
    for k in [1, 2, 4, 8, 16]:
        names.append('Recall@%d' % k)
        correct, cnt = 0.0, 0.0
        for i in range(emb.shape[0]):
            d_mat[i, i] = 1e10
            nns = argpartition(d_mat[i], k)[:k]
            if any(labels[i] == labels[nn] for nn in nns):
                correct += 1
            cnt += 1
        accs.append(correct/cnt)
    return names, accs

Source File: seq2seq_atten.py From video_captioning_rl with MIT License

5 votes

def top_n_indexes(arr, n):
        idx = bn.argpartition(arr, arr.size-n, axis=None)[-n:]
        width = arr.shape[1]
        return [divmod(i, width) for i in idx]

Source File: Sets2Sets.py From Sets2Sets with Apache License 2.0

5 votes

def top_n_indexes(arr, n):
    idx = bn.argpartition(arr, arr.size - n, axis=None)[-n:]
    width = arr.shape[1]
    return [divmod(i, width) for i in idx]

Source File: train.py From DistanceWeightedSampling with MIT License

5 votes

def evaluate_emb(emb, labels):
    """Evaluate embeddings based on Recall@k."""
    d_mat = get_distance_matrix(emb)
    d_mat = d_mat.asnumpy()
    labels = labels.asnumpy()

    names = []
    accs = []
    for k in [1, 2, 4, 8, 16]:
        names.append('Recall@%d' % k)
        correct, cnt = 0.0, 0.0
        for i in range(emb.shape[0]):
            d_mat[i, i] = 1e10
            nns = argpartition(d_mat[i], k)[:k]
            if any(labels[i] == labels[nn] for nn in nns):
                correct += 1
            cnt += 1
        accs.append(correct/cnt)
    return names, accs

#def validate(val_loader, model, criterion, args):
#    outputs = []
#    labels = []
#
#    model.eval()
#    
#    with torch.no_grad():
#        end = time.time()
#        for i, (input, target) in enumerate(val_loader):
#            outpus += model(input)[-1].cpu().tolist()
#            labels += target.cpu().tolist()
#

Source File: utils.py From RecVAE with Apache License 2.0

5 votes

def recall(X_pred, heldout_batch, k=100):
    batch_users = X_pred.shape[0]

    idx = bn.argpartition(-X_pred, k, axis=1)
    X_pred_binary = np.zeros_like(X_pred, dtype=bool)
    X_pred_binary[np.arange(batch_users)[:, np.newaxis], idx[:, :k]] = True

    X_true_binary = (heldout_batch > 0).toarray()
    tmp = (np.logical_and(X_true_binary, X_pred_binary).sum(axis=1)).astype(
        np.float32)
    recall = tmp / np.minimum(k, X_true_binary.sum(axis=1))
    return recall

Source File: metrics_ranking.py From NNCF with MIT License

5 votes

def eval_multiple(true_scores, pred_scores, topk):
    idx = bottleneck.argpartition(-pred_scores, topk)[:topk]
    noise = np.random.random(topk)
    if not isinstance(pred_scores, np.ndarray):
        pred_scores = np.array(pred_scores)
    if not isinstance(true_scores, np.ndarray):
        true_scores = np.array(true_scores)
    rec = sorted(zip(pred_scores[idx], noise, true_scores[idx]), reverse=True)
    nhits = 0.
    nhits_topk = 0.
    k = topk if topk >= 0 else len(rec)
    sumap = 0.0
    for i in range(len(rec)):
        if rec[i][-1] != 0.:
            nhits += 1.0
            if i < k:
                nhits_topk += 1
                sumap += nhits / (i+1.0)
    nhits = np.sum(true_scores)
    if nhits != 0:
        sumap /= min(nhits, k)
        map_at_k = sumap
        recall_at_k = nhits_topk / nhits
        precision_at_k = nhits_topk / k
    else:
        map_at_k = 0.
        recall_at_k = 0.
        precision_at_k = 0.

    return map_at_k, recall_at_k, precision_at_k

Source File: split_train_validation_test_VAE_CF.py From RecSys2019_DeepLearning_Evaluation with GNU Affero General Public License v3.0

5 votes

def Recall_at_k_batch(X_pred, heldout_batch, k=100):
    batch_users = X_pred.shape[0]

    idx = bn.argpartition(-X_pred, k, axis=1)
    X_pred_binary = np.zeros_like(X_pred, dtype=bool)
    X_pred_binary[np.arange(batch_users)[:, np.newaxis], idx[:, :k]] = True

    X_true_binary = (heldout_batch > 0).toarray()
    tmp = (np.logical_and(X_true_binary, X_pred_binary).sum(axis=1)).astype(
        np.float32)
    recall = tmp / np.minimum(k, X_true_binary.sum(axis=1))
    return recall





##############################################################################################################################################################



















############################ Train a Multi-VAE^{PR}

Source File: utils.py From mmvt with GNU General Public License v3.0

5 votes

def top_n_indexes(arr, n):
    # https://gist.github.com/tomerfiliba/3698403
    try:
        import bottleneck
        idx = bottleneck.argpartition(arr, arr.size-n, axis=None)[-n:]
    except:
        idx = np.argpartition(arr, arr.size - n, axis=None)[-n:]
    width = arr.shape[1]
    return [divmod(i, width) for i in idx]

Source File: cca.py From biclustlib with GNU General Public License v3.0

5 votes

def __random_deletion(self, data, bool_array, msr_array, choice):
        indices = np.where(bool_array)[0]
        n = int(math.ceil(len(msr_array) * self.alpha))
        max_msr_indices = bn.argpartition(msr_array, len(msr_array) - n)[-n:]
        i = indices[np.random.choice(max_msr_indices)]
        bool_array[i] = False

Python bottleneck.argpartition() Examples