Python sklearn.metrics.pairwise.manhattan_distances() Examples
The following are 13
code examples of sklearn.metrics.pairwise.manhattan_distances().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.metrics.pairwise
, or try the search function
.
Example #1
Source File: test_t_sne.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_tsne_with_different_distance_metrics(): """Make sure that TSNE works for different distance metrics""" random_state = check_random_state(0) n_components_original = 3 n_components_embedding = 2 X = random_state.randn(50, n_components_original).astype(np.float32) metrics = ['manhattan', 'cosine'] dist_funcs = [manhattan_distances, cosine_distances] for metric, dist_func in zip(metrics, dist_funcs): X_transformed_tsne = TSNE( metric=metric, n_components=n_components_embedding, random_state=0).fit_transform(X) X_transformed_tsne_precomputed = TSNE( metric='precomputed', n_components=n_components_embedding, random_state=0).fit_transform(dist_func(X)) assert_array_equal(X_transformed_tsne, X_transformed_tsne_precomputed)
Example #2
Source File: feature_engineering.py From CIKM-AnalytiCup-2018 with Apache License 2.0 | 6 votes |
def _get_similarity_values(self, q1_csc, q2_csc): cosine_sim = [] manhattan_dis = [] eucledian_dis = [] jaccard_dis = [] minkowsk_dis = [] for i,j in zip(q1_csc, q2_csc): sim = cs(i, j) cosine_sim.append(sim[0][0]) sim = md(i, j) manhattan_dis.append(sim[0][0]) sim = ed(i, j) eucledian_dis.append(sim[0][0]) i_ = i.toarray() j_ = j.toarray() try: sim = jsc(i_, j_) jaccard_dis.append(sim) except: jaccard_dis.append(0) sim = minkowski_dis.pairwise(i_, j_) minkowsk_dis.append(sim[0][0]) return cosine_sim, manhattan_dis, eucledian_dis, jaccard_dis, minkowsk_dis
Example #3
Source File: test_skater.py From region with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_init(): default = Spanning_Forest() assert default.metric == skm.manhattan_distances assert default.center == np.mean assert default.reduction == np.sum change = Spanning_Forest(dissimilarity=skm.euclidean_distances, center=np.median, reduction=np.max) assert change.metric == skm.euclidean_distances assert change.center == np.median assert change.reduction == np.max sym = Spanning_Forest(affinity=skm.cosine_similarity) assert isinstance(sym.metric, types.LambdaType) test_distance = -np.log(skm.cosine_similarity(data[:2,])) comparator = sym.metric(data[:2,]) np.testing.assert_allclose(test_distance, comparator)
Example #4
Source File: recall_at_kappa.py From adapted_deep_embeddings with MIT License | 6 votes |
def recall_at_kappa_leave_one_out(test_emb, test_id, kappa, dist): unique_ids, unique_counts = np.unique(test_id,return_counts=True) unique_ids = unique_ids[unique_counts >= 2] good_test_indices = np.in1d(test_id,unique_ids) valid_test_embs = test_emb[good_test_indices] valid_test_ids = test_id[good_test_indices] n_correct_at_k = np.zeros(kappa) if dist == 'cos': distances = find_cos_distances(valid_test_embs,test_emb) elif dist == 'l2': distances = find_l2_distances(valid_test_embs, test_emb) elif dist == 'l1': distances = manhattan_distances(valid_test_embs, test_emb) elif dist == 'max_l1' or dist == 'max_l2': distances = max_distances(valid_test_embs, test_emb, dist) for idx, valid_test_id in enumerate(valid_test_ids): k_sorted_indices = np.argsort(distances[idx])[1:] first_correct_position = np.where(test_id[k_sorted_indices] == valid_test_id)[0][0] if first_correct_position < kappa: n_correct_at_k[first_correct_position:] += 1 return 1.*n_correct_at_k / len(valid_test_ids)
Example #5
Source File: recall_at_kappa.py From adapted_deep_embeddings with MIT License | 6 votes |
def recall_at_kappa_support_query(x_support, y_support, x_query, y_query, kappa, dist): n_correct_at_k = np.zeros(kappa) if dist == 'cos': distances = find_cos_distances(x_query, x_support) elif dist == 'l2': distances = find_l2_distances(x_query, x_support) elif dist == 'l1': distances = manhattan_distances(x_query, x_support) elif dist == 'max_l1' or dist == 'max_l2': distances = max_distances(x_query, x_support, dist) for idx, valid_test_id in enumerate(y_query): k_sorted_indices = np.argsort(distances[idx]) first_correct_position = np.where(y_support[k_sorted_indices] == valid_test_id)[0][0] if first_correct_position < kappa: n_correct_at_k[first_correct_position:] += 1 return 1.*n_correct_at_k / len(y_query)
Example #6
Source File: test_t_sne.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_tsne_with_different_distance_metrics(): """Make sure that TSNE works for different distance metrics""" random_state = check_random_state(0) n_components_original = 3 n_components_embedding = 2 X = random_state.randn(50, n_components_original).astype(np.float32) metrics = ['manhattan', 'cosine'] dist_funcs = [manhattan_distances, cosine_distances] for metric, dist_func in zip(metrics, dist_funcs): X_transformed_tsne = TSNE( metric=metric, n_components=n_components_embedding, random_state=0).fit_transform(X) X_transformed_tsne_precomputed = TSNE( metric='precomputed', n_components=n_components_embedding, random_state=0).fit_transform(dist_func(X)) assert_array_equal(X_transformed_tsne, X_transformed_tsne_precomputed)
Example #7
Source File: manhattan.py From mars with Apache License 2.0 | 5 votes |
def execute(cls, ctx, op): (x, y), device_id, xp = as_same_device( [ctx[inp.key] for inp in op.inputs], device=op.device, ret_extra=True) out = op.outputs[0] with device(device_id): if sklearn_manhattan_distances is not None: ctx[out.key] = sklearn_manhattan_distances( x, y, sum_over_features=op.sum_over_features) else: # pragma: no cover # we cannot support sparse raise NotImplementedError('cannot support calculate manhattan ' 'distances on GPU')
Example #8
Source File: vector_opr.py From tokenquery with GNU General Public License v3.0 | 5 votes |
def vec_man_dist(token_input, operation_input): operation_string = None ref_vector_string = None cond_value_string = None for opr_sign in ['==', '>=', '<=', '!=', '<>', '<', '>', '=']: if opr_sign in operation_input: ref_vector_string = operation_input.split(opr_sign)[0] operation_string = opr_sign cond_value_string = operation_input.split(opr_sign)[1] break if ref_vector_string and cond_value_string and operation_string: try: cond_value = float(cond_value_string) ref_vector = change_string_to_vector(ref_vector_string) token_vector = change_string_to_vector(token_input) print(manhattan_distances(token_vector, ref_vector)) if len(ref_vector) != len(token_vector): print ('len of vectors does not match') return False if operation_string == "=" or operation_string == "==": return manhattan_distances(token_vector, ref_vector) == cond_value elif operation_string == "<": return manhattan_distances(token_vector, ref_vector) < cond_value elif operation_string == ">": return manhattan_distances(token_vector, ref_vector) > cond_value elif operation_string == ">=": return manhattan_distances(token_vector, ref_vector) >= cond_value elif operation_string == "<=": return manhattan_distances(token_vector, ref_vector) <= cond_value elif operation_string == "!=" or operation_string == "<>": return manhattan_distances(token_vector, ref_vector) != cond_value else: return False except ValueError: # TODO raise tokenregex error return False else: # TODO raise tokenregex error print ('Problem with the operation input')
Example #9
Source File: similarity.py From nlp_research with MIT License | 5 votes |
def similarity(self, query, type): assert self.corpus != None, "self.corpus can't be None" ret = [] if type == 'cosine': query = self.get_vector(query) for item in self.corpus_vec: sim = cosine_similarity(item, query) ret.append(sim[0][0]) elif type == 'manhattan': query = self.get_vector(query) for item in self.corpus_vec: sim = manhattan_distances(item, query) ret.append(sim[0][0]) elif type == 'euclidean': query = self.get_vector(query) for item in self.corpus_vec: sim = euclidean_distances (item, query) ret.append(sim[0][0]) #elif type == 'jaccard': # #query = query.split() # query = self.get_vector(query) # for item in self.corpus_vec: # pdb.set_trace() # sim = jaccard_similarity_score(item, query) # ret.append(sim) elif type == 'bm25': query = query.split() ret = self.bm25_model.get_scores(query) else: raise ValueError('similarity type error:%s'%type) return ret
Example #10
Source File: ABuStatsUtil.py From abu with GNU General Public License v3.0 | 5 votes |
def manhattan_distances_xy(x, y, to_similar=False): """ 曼哈顿距离(L1范数)计算两个序列distance,注意需要理解数据的测距目的来分析 是否需要进行scale_start,进行和不进行scale_start的结果将完全不一样,在功能需求及数据理解的情况下 选择是否进行scale_start :param x: 可迭代序列 :param y: 可迭代序列 :param to_similar: 是否进行后置输出转换similar值 :return: float数值 """ distance = _distance_xy(manhattan_distances, x, y) if to_similar: # 实际上l1和l2转换similar的值不直观,只能对比使用 distance = 1.0 / (1.0 + distance) return distance
Example #11
Source File: skater.py From region with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, dissimilarity=skm.manhattan_distances, affinity=None, reduction=np.sum, center=np.mean): """ Initialize the SKATER algorithm. dissimilarity : a callable distance metric affinity : an callable affinity metric between 0,1. Will be inverted to provide a dissimilarity metric. reduction: the reduction applied over all clusters to provide the map score. center: way to compute the center of each region in attribute space NOTE: Optimization occurs with respect to a *dissimilarity* metric, so the reduction should yield some kind of score where larger values are *less desirable* than smaller values. Typically, this means we use addition. """ if affinity is not None: # invert the 0,1 affinity to # to an unbounded positive dissimilarity metric = lambda x: -np.log(affinity(x)) else: metric = dissimilarity self.metric = metric self.reduction = reduction self.center = center
Example #12
Source File: test_manhattan_distances.py From mars with Apache License 2.0 | 4 votes |
def testManhattanDistancesExecution(self): raw_x = np.random.rand(20, 5) raw_y = np.random.rand(21, 5) x1 = mt.tensor(raw_x, chunk_size=30) y1 = mt.tensor(raw_y, chunk_size=30) x2 = mt.tensor(raw_x, chunk_size=11) y2 = mt.tensor(raw_y, chunk_size=12) raw_sparse_x = sps.random(20, 5, density=0.4, format='csr', random_state=0) raw_sparse_y = sps.random(21, 5, density=0.3, format='csr', random_state=0) x3 = mt.tensor(raw_sparse_x, chunk_size=30) y3 = mt.tensor(raw_sparse_y, chunk_size=30) x4 = mt.tensor(raw_sparse_x, chunk_size=11) y4 = mt.tensor(raw_sparse_y, chunk_size=12) for x, y, is_sparse in [(x1, y1, False), (x2, y2, False), (x3, y3, True), (x4, y4, True)]: if is_sparse: rx, ry = raw_sparse_x, raw_sparse_y else: rx, ry = raw_x, raw_y sv = [True, False] if not is_sparse else [True] for sum_over_features in sv: d = manhattan_distances(x, y, sum_over_features) result = self.executor.execute_tensor(d, concat=True)[0] expected = sk_manhattan_distances(rx, ry, sum_over_features) np.testing.assert_almost_equal(result, expected) d = manhattan_distances(x, sum_over_features=sum_over_features) result = self.executor.execute_tensor(d, concat=True)[0] expected = sk_manhattan_distances(rx, sum_over_features=sum_over_features) np.testing.assert_almost_equal(result, expected)
Example #13
Source File: ABuStatsUtil.py From abu with GNU General Public License v3.0 | 4 votes |
def manhattan_distance_matrix(df, scale_end=True, to_similar=False): """ 曼哈顿距离(L1范数): 与manhattan_distances_xy的区别主要是,非两两distance计算,只有一个矩阵的输入, 且输入必须为pd.DataFrame or np.array or 多层迭代序列[[],[]],注意需要理解数据的测距目的来分析 是否需要进行scale_start,进行和不进行scale_start的结果将完全不一样,在功能需求及数据理解的情况下 选择是否进行scale_start eg: input: tsla bidu noah sfun goog vips aapl 2014-07-25 223.57 226.50 15.32 12.110 589.02 21.349 97.67 2014-07-28 224.82 225.80 16.13 12.450 590.60 21.548 99.02 2014-07-29 225.01 220.00 16.75 12.220 585.61 21.190 98.38 ... ... ... ... ... ... ... ... 2016-07-22 222.27 160.88 25.50 4.850 742.74 13.510 98.66 2016-07-25 230.01 160.25 25.57 4.790 739.77 13.390 97.34 2016-07-26 225.93 163.09 24.75 4.945 740.92 13.655 97.76 ABuStatsUtil.manhattan_distance_matrix(cc, scale_start=True) output: tsla bidu noah sfun goog vips aapl tsla 0.0000 0.3698 0.6452 0.7917 0.4670 0.7426 0.3198 bidu 0.3698 0.0000 0.5969 0.7056 0.6495 0.5822 0.4000 noah 0.6452 0.5969 0.0000 0.7422 0.7441 0.6913 0.6896 sfun 0.7917 0.7056 0.7422 0.0000 0.9236 0.4489 1.0000 goog 0.4670 0.6495 0.7441 0.9236 0.0000 0.8925 0.5134 vips 0.7426 0.5822 0.6913 0.4489 0.8925 0.0000 0.7038 aapl 0.3198 0.4000 0.6896 1.0000 0.5134 0.7038 0.0000 ABuStatsUtil.manhattan_distance_matrix(cc, scale_start=False) output: tsla bidu noah sfun goog vips aapl tsla 0.0000 0.0640 0.3318 0.3585 0.6415 0.3395 0.1906 bidu 0.0640 0.0000 0.2750 0.3018 0.6982 0.2827 0.1338 noah 0.3318 0.2750 0.0000 0.0267 0.9733 0.0124 0.1412 sfun 0.3585 0.3018 0.0267 0.0000 1.0000 0.0191 0.1680 goog 0.6415 0.6982 0.9733 1.0000 0.0000 0.9809 0.8320 vips 0.3395 0.2827 0.0124 0.0191 0.9809 0.0000 0.1489 aapl 0.1906 0.1338 0.1412 0.1680 0.8320 0.1489 0.000 :param df: pd.DataFrame or np.array or 多层迭代序列[[],[]], 之所以叫df,是因为在内部会统一转换为pd.DataFrame :param scale_end: 对结果矩阵进行标准化处理 :param to_similar: 是否进行后置输出转换similar值 :return: distance_df,pd.DataFrame对象 """ return _distance_matrix(manhattan_distances, df, scale_end, to_similar)