Python sklearn.metrics.pairwise.manhattan_distances() Examples

Example #1
Source File:    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_tsne_with_different_distance_metrics():
    """Make sure that TSNE works for different distance metrics"""
    random_state = check_random_state(0)
    n_components_original = 3
    n_components_embedding = 2
    X = random_state.randn(50, n_components_original).astype(np.float32)
    metrics = ['manhattan', 'cosine']
    dist_funcs = [manhattan_distances, cosine_distances]
    for metric, dist_func in zip(metrics, dist_funcs):
        X_transformed_tsne = TSNE(
            metric=metric, n_components=n_components_embedding,
        X_transformed_tsne_precomputed = TSNE(
            metric='precomputed', n_components=n_components_embedding,
        assert_array_equal(X_transformed_tsne, X_transformed_tsne_precomputed) 
Example #2
Source File:    From CIKM-AnalytiCup-2018 with Apache License 2.0 6 votes vote down vote up
def _get_similarity_values(self, q1_csc, q2_csc):
        cosine_sim = []
        manhattan_dis = []
        eucledian_dis = []
        jaccard_dis = []
        minkowsk_dis = []
        for i,j in zip(q1_csc, q2_csc):
            sim = cs(i, j)
            sim = md(i, j)
            sim = ed(i, j)
            i_ = i.toarray()
            j_ = j.toarray()
                sim = jsc(i_, j_)
            sim = minkowski_dis.pairwise(i_, j_)
        return cosine_sim, manhattan_dis, eucledian_dis, jaccard_dis, minkowsk_dis 
Example #3
Source File:    From region with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_init():
    default = Spanning_Forest()
    assert default.metric == skm.manhattan_distances
    assert == np.mean
    assert default.reduction == np.sum
    change = Spanning_Forest(dissimilarity=skm.euclidean_distances,
                             center=np.median, reduction=np.max)
    assert change.metric == skm.euclidean_distances
    assert == np.median
    assert change.reduction == np.max
    sym = Spanning_Forest(affinity=skm.cosine_similarity)
    assert isinstance(sym.metric, types.LambdaType)
    test_distance = -np.log(skm.cosine_similarity(data[:2,]))
    comparator = sym.metric(data[:2,])
    np.testing.assert_allclose(test_distance, comparator) 
Example #4
Source File:    From adapted_deep_embeddings with MIT License 6 votes vote down vote up
def recall_at_kappa_leave_one_out(test_emb, test_id, kappa, dist):
    unique_ids, unique_counts = np.unique(test_id,return_counts=True)
    unique_ids = unique_ids[unique_counts >= 2]
    good_test_indices = np.in1d(test_id,unique_ids)
    valid_test_embs = test_emb[good_test_indices]
    valid_test_ids = test_id[good_test_indices]
    n_correct_at_k = np.zeros(kappa)
    if dist == 'cos':
        distances = find_cos_distances(valid_test_embs,test_emb)
    elif dist == 'l2':
        distances = find_l2_distances(valid_test_embs, test_emb)
    elif dist == 'l1':
        distances = manhattan_distances(valid_test_embs, test_emb)
    elif dist == 'max_l1' or dist == 'max_l2':
        distances = max_distances(valid_test_embs, test_emb, dist)
    for idx, valid_test_id in enumerate(valid_test_ids):
        k_sorted_indices = np.argsort(distances[idx])[1:]
        first_correct_position = np.where(test_id[k_sorted_indices] == valid_test_id)[0][0]
        if first_correct_position < kappa:
            n_correct_at_k[first_correct_position:] += 1
    return 1.*n_correct_at_k / len(valid_test_ids) 
Example #5
Source File:    From adapted_deep_embeddings with MIT License 6 votes vote down vote up
def recall_at_kappa_support_query(x_support, y_support, x_query, y_query, kappa, dist):
    n_correct_at_k = np.zeros(kappa)
    if dist == 'cos':
        distances = find_cos_distances(x_query, x_support)
    elif dist == 'l2':
        distances = find_l2_distances(x_query, x_support)
    elif dist == 'l1':
        distances = manhattan_distances(x_query, x_support)
    elif dist == 'max_l1' or dist == 'max_l2':
        distances = max_distances(x_query, x_support, dist)
    for idx, valid_test_id in enumerate(y_query):
        k_sorted_indices = np.argsort(distances[idx])
        first_correct_position = np.where(y_support[k_sorted_indices] == valid_test_id)[0][0]
        if first_correct_position < kappa:
            n_correct_at_k[first_correct_position:] += 1
    return 1.*n_correct_at_k / len(y_query) 
Example #6
Source File:    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_tsne_with_different_distance_metrics():
    """Make sure that TSNE works for different distance metrics"""
    random_state = check_random_state(0)
    n_components_original = 3
    n_components_embedding = 2
    X = random_state.randn(50, n_components_original).astype(np.float32)
    metrics = ['manhattan', 'cosine']
    dist_funcs = [manhattan_distances, cosine_distances]
    for metric, dist_func in zip(metrics, dist_funcs):
        X_transformed_tsne = TSNE(
            metric=metric, n_components=n_components_embedding,
        X_transformed_tsne_precomputed = TSNE(
            metric='precomputed', n_components=n_components_embedding,
        assert_array_equal(X_transformed_tsne, X_transformed_tsne_precomputed) 
Example #7
Source File:    From mars with Apache License 2.0 5 votes vote down vote up
def execute(cls, ctx, op):
        (x, y), device_id, xp = as_same_device(
            [ctx[inp.key] for inp in op.inputs], device=op.device, ret_extra=True)
        out = op.outputs[0]

        with device(device_id):
            if sklearn_manhattan_distances is not None:
                ctx[out.key] = sklearn_manhattan_distances(
                    x, y, sum_over_features=op.sum_over_features)
            else:  # pragma: no cover
                # we cannot support sparse
                raise NotImplementedError('cannot support calculate manhattan '
                                          'distances on GPU') 
Example #8
Source File:    From tokenquery with GNU General Public License v3.0 5 votes vote down vote up
def vec_man_dist(token_input, operation_input):
    operation_string = None
    ref_vector_string = None
    cond_value_string = None
    for opr_sign in ['==', '>=', '<=', '!=', '<>', '<', '>', '=']:
        if opr_sign in operation_input:
            ref_vector_string = operation_input.split(opr_sign)[0]
            operation_string = opr_sign
            cond_value_string = operation_input.split(opr_sign)[1]

    if ref_vector_string and cond_value_string and operation_string:
            cond_value = float(cond_value_string)
            ref_vector = change_string_to_vector(ref_vector_string)
            token_vector = change_string_to_vector(token_input)
            print(manhattan_distances(token_vector, ref_vector))
            if len(ref_vector) != len(token_vector):
                print ('len of vectors does not match')
                return False
            if operation_string == "=" or operation_string == "==":
                return manhattan_distances(token_vector, ref_vector) == cond_value
            elif operation_string == "<":
                return manhattan_distances(token_vector, ref_vector) < cond_value
            elif operation_string == ">":
                return manhattan_distances(token_vector, ref_vector) > cond_value
            elif operation_string == ">=":
                return manhattan_distances(token_vector, ref_vector) >= cond_value
            elif operation_string == "<=":
                return manhattan_distances(token_vector, ref_vector) <= cond_value
            elif operation_string == "!=" or operation_string == "<>":
                return manhattan_distances(token_vector, ref_vector) != cond_value
                return False
        except ValueError:
            # TODO raise tokenregex error
            return False

        # TODO raise tokenregex error
        print ('Problem with the operation input') 
Example #9
Source File:    From nlp_research with MIT License 5 votes vote down vote up
def similarity(self, query, type):
        assert self.corpus != None, "self.corpus can't be None"
        ret = []
        if type == 'cosine':
            query = self.get_vector(query)
            for item in self.corpus_vec:
                sim = cosine_similarity(item, query)
        elif type == 'manhattan':
            query = self.get_vector(query)
            for item in self.corpus_vec:
                sim = manhattan_distances(item, query)
        elif type == 'euclidean':
            query = self.get_vector(query)
            for item in self.corpus_vec:
                sim = euclidean_distances (item, query)
        #elif type == 'jaccard':
        #    #query = query.split()
        #    query = self.get_vector(query)
        #    for item in self.corpus_vec:
        #        pdb.set_trace()
        #        sim = jaccard_similarity_score(item, query)
        #        ret.append(sim)
        elif type == 'bm25':
            query = query.split()
            ret = self.bm25_model.get_scores(query)
            raise ValueError('similarity type error:%s'%type)
        return ret 
Example #10
Source File:    From abu with GNU General Public License v3.0 5 votes vote down vote up
def manhattan_distances_xy(x, y, to_similar=False):
    :param x: 可迭代序列
    :param y: 可迭代序列
    :param to_similar: 是否进行后置输出转换similar值
    :return: float数值
    distance = _distance_xy(manhattan_distances, x, y)
    if to_similar:
        # 实际上l1和l2转换similar的值不直观,只能对比使用
        distance = 1.0 / (1.0 + distance)
    return distance 
Example #11
Source File:    From region with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self,
        Initialize the SKATER algorithm.

        dissimilarity : a callable distance metric
        affinity : an callable affinity metric between 0,1. 
                   Will be inverted to provide a 
                   dissimilarity metric.
        reduction: the reduction applied over all clusters
                   to provide the map score.
        center:    way to compute the center of each region in attribute space
        NOTE: Optimization occurs with respect to a *dissimilarity* metric, so the reduction should
              yield some kind of score where larger values are *less desirable* than smaller values. 
              Typically, this means we use addition. 
        if affinity is not None:
            # invert the 0,1 affinity to 
            # to an unbounded positive dissimilarity
            metric = lambda x: -np.log(affinity(x))
            metric = dissimilarity
        self.metric = metric
        self.reduction = reduction = center 
Example #12
Source File:    From mars with Apache License 2.0 4 votes vote down vote up
def testManhattanDistancesExecution(self):
        raw_x = np.random.rand(20, 5)
        raw_y = np.random.rand(21, 5)

        x1 = mt.tensor(raw_x, chunk_size=30)
        y1 = mt.tensor(raw_y, chunk_size=30)

        x2 = mt.tensor(raw_x, chunk_size=11)
        y2 = mt.tensor(raw_y, chunk_size=12)

        raw_sparse_x = sps.random(20, 5, density=0.4, format='csr', random_state=0)
        raw_sparse_y = sps.random(21, 5, density=0.3, format='csr', random_state=0)

        x3 = mt.tensor(raw_sparse_x, chunk_size=30)
        y3 = mt.tensor(raw_sparse_y, chunk_size=30)

        x4 = mt.tensor(raw_sparse_x, chunk_size=11)
        y4 = mt.tensor(raw_sparse_y, chunk_size=12)

        for x, y, is_sparse in [(x1, y1, False),
                                (x2, y2, False),
                                (x3, y3, True),
                                (x4, y4, True)]:
            if is_sparse:
                rx, ry = raw_sparse_x, raw_sparse_y
                rx, ry = raw_x, raw_y

            sv = [True, False] if not is_sparse else [True]

            for sum_over_features in sv:
                d = manhattan_distances(x, y, sum_over_features)

                result = self.executor.execute_tensor(d, concat=True)[0]
                expected = sk_manhattan_distances(rx, ry, sum_over_features)

                np.testing.assert_almost_equal(result, expected)

                d = manhattan_distances(x, sum_over_features=sum_over_features)

                result = self.executor.execute_tensor(d, concat=True)[0]
                expected = sk_manhattan_distances(rx, sum_over_features=sum_over_features)

                np.testing.assert_almost_equal(result, expected) 
Example #13
Source File:    From abu with GNU General Public License v3.0 4 votes vote down vote up
def manhattan_distance_matrix(df, scale_end=True, to_similar=False):
    曼哈顿距离(L1范数): 与manhattan_distances_xy的区别主要是,非两两distance计算,只有一个矩阵的输入,
    且输入必须为pd.DataFrame or np.array or 多层迭代序列[[],[]],注意需要理解数据的测距目的来分析


                        tsla	bidu	noah	sfun	goog	vips	aapl
            2014-07-25	223.57	226.50	15.32	12.110	589.02	21.349	97.67
            2014-07-28	224.82	225.80	16.13	12.450	590.60	21.548	99.02
            2014-07-29	225.01	220.00	16.75	12.220	585.61	21.190	98.38
            ...	...	...	...	...	...	...	...
            2016-07-22	222.27	160.88	25.50	4.850	742.74	13.510	98.66
            2016-07-25	230.01	160.25	25.57	4.790	739.77	13.390	97.34
            2016-07-26	225.93	163.09	24.75	4.945	740.92	13.655	97.76

            ABuStatsUtil.manhattan_distance_matrix(cc, scale_start=True)


                    tsla	bidu	noah	sfun	goog	vips	aapl
            tsla	0.0000	0.3698	0.6452	0.7917	0.4670	0.7426	0.3198
            bidu	0.3698	0.0000	0.5969	0.7056	0.6495	0.5822	0.4000
            noah	0.6452	0.5969	0.0000	0.7422	0.7441	0.6913	0.6896
            sfun	0.7917	0.7056	0.7422	0.0000	0.9236	0.4489	1.0000
            goog	0.4670	0.6495	0.7441	0.9236	0.0000	0.8925	0.5134
            vips	0.7426	0.5822	0.6913	0.4489	0.8925	0.0000	0.7038
            aapl	0.3198	0.4000	0.6896	1.0000	0.5134	0.7038	0.0000

            ABuStatsUtil.manhattan_distance_matrix(cc, scale_start=False)


                    tsla	bidu	noah	sfun	goog	vips	aapl
            tsla	0.0000	0.0640	0.3318	0.3585	0.6415	0.3395	0.1906
            bidu	0.0640	0.0000	0.2750	0.3018	0.6982	0.2827	0.1338
            noah	0.3318	0.2750	0.0000	0.0267	0.9733	0.0124	0.1412
            sfun	0.3585	0.3018	0.0267	0.0000	1.0000	0.0191	0.1680
            goog	0.6415	0.6982	0.9733	1.0000	0.0000	0.9809	0.8320
            vips	0.3395	0.2827	0.0124	0.0191	0.9809	0.0000	0.1489
            aapl	0.1906	0.1338	0.1412	0.1680	0.8320	0.1489	0.000

    :param df: pd.DataFrame or np.array or 多层迭代序列[[],[]], 之所以叫df,是因为在内部会统一转换为pd.DataFrame
    :param scale_end: 对结果矩阵进行标准化处理
    :param to_similar: 是否进行后置输出转换similar值
    :return: distance_df,pd.DataFrame对象
    return _distance_matrix(manhattan_distances, df, scale_end, to_similar)