Python scipy.spatial.distance.canberra() Examples
The following are 8
code examples of scipy.spatial.distance.canberra().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scipy.spatial.distance
, or try the search function
.
Example #1
Source File: make_handcrafted_33_features.py From wsdm19cup with MIT License | 5 votes |
def __calc_distances__(self, v1s, v2s, is_sparse=True): if is_sparse: dcosine = np.array([cosine(x.toarray(), y.toarray()) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dcityblock = np.array([cityblock(x.toarray(), y.toarray()) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dcanberra = np.array([canberra(x.toarray(), y.toarray()) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) deuclidean = np.array([euclidean(x.toarray(), y.toarray()) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dminkowski = np.array([minkowski(x.toarray(), y.toarray(), 3) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dbraycurtis = np.array([braycurtis(x.toarray(), y.toarray()) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dskew_q1 = [skew(x.toarray().ravel()) for x in v1s] dskew_q2 = [skew(x.toarray().ravel()) for x in v2s] dkur_q1 = [kurtosis(x.toarray().ravel()) for x in v1s] dkur_q2 = [kurtosis(x.toarray().ravel()) for x in v2s] dskew_diff = np.abs(np.array(dskew_q1) - np.array(dskew_q2)).reshape((-1,1)) dkur_diff = np.abs(np.array(dkur_q1) - np.array(dkur_q2)).reshape((-1,1)) else: dcosine = np.array([cosine(x, y) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dcityblock = np.array([cityblock(x, y) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dcanberra = np.array([canberra(x, y) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) deuclidean = np.array([euclidean(x, y) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dminkowski = np.array([minkowski(x, y, 3) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dbraycurtis = np.array([braycurtis(x, y) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dskew_q1 = [skew(x) for x in v1s] dskew_q2 = [skew(x) for x in v2s] dkur_q1 = [kurtosis(x) for x in v1s] dkur_q2 = [kurtosis(x) for x in v2s] dskew_diff = np.abs(np.array(dskew_q1) - np.array(dskew_q2)).reshape((-1,1)) dkur_diff = np.abs(np.array(dkur_q1) - np.array(dkur_q2)).reshape((-1,1)) return np.hstack((dcosine,dcityblock,dcanberra,deuclidean,dminkowski,dbraycurtis,dskew_diff,dkur_diff))
Example #2
Source File: train_predict_trees_batch2.py From wsdm19cup with MIT License | 5 votes |
def __calc_distances__(self, v1s, v2s, is_sparse=True): if is_sparse: dcosine = np.array([cosine(x.toarray(), y.toarray()) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dcityblock = np.array([cityblock(x.toarray(), y.toarray()) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dcanberra = np.array([canberra(x.toarray(), y.toarray()) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) deuclidean = np.array([euclidean(x.toarray(), y.toarray()) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dminkowski = np.array([minkowski(x.toarray(), y.toarray(), 3) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dbraycurtis = np.array([braycurtis(x.toarray(), y.toarray()) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dskew_q1 = [skew(x.toarray().ravel()) for x in v1s] dskew_q2 = [skew(x.toarray().ravel()) for x in v2s] dkur_q1 = [kurtosis(x.toarray().ravel()) for x in v1s] dkur_q2 = [kurtosis(x.toarray().ravel()) for x in v2s] dskew_diff = np.abs(np.array(dskew_q1) - np.array(dskew_q2)).reshape((-1,1)) dkur_diff = np.abs(np.array(dkur_q1) - np.array(dkur_q2)).reshape((-1,1)) else: dcosine = np.array([cosine(x, y) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dcityblock = np.array([cityblock(x, y) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dcanberra = np.array([canberra(x, y) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) deuclidean = np.array([euclidean(x, y) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dminkowski = np.array([minkowski(x, y, 3) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dbraycurtis = np.array([braycurtis(x, y) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dskew_q1 = [skew(x) for x in v1s] dskew_q2 = [skew(x) for x in v2s] dkur_q1 = [kurtosis(x) for x in v1s] dkur_q2 = [kurtosis(x) for x in v2s] dskew_diff = np.abs(np.array(dskew_q1) - np.array(dskew_q2)).reshape((-1,1)) dkur_diff = np.abs(np.array(dkur_q1) - np.array(dkur_q2)).reshape((-1,1)) return np.hstack((dcosine,dcityblock,dcanberra,deuclidean,dminkowski,dbraycurtis,dskew_diff,dkur_diff))
Example #3
Source File: train_predict_trees_batch1.py From wsdm19cup with MIT License | 5 votes |
def __calc_distances__(self, v1s, v2s, is_sparse=True): if is_sparse: dcosine = np.array([cosine(x.toarray(), y.toarray()) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dcityblock = np.array([cityblock(x.toarray(), y.toarray()) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dcanberra = np.array([canberra(x.toarray(), y.toarray()) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) deuclidean = np.array([euclidean(x.toarray(), y.toarray()) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dminkowski = np.array([minkowski(x.toarray(), y.toarray(), 3) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dbraycurtis = np.array([braycurtis(x.toarray(), y.toarray()) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dskew_q1 = [skew(x.toarray().ravel()) for x in v1s] dskew_q2 = [skew(x.toarray().ravel()) for x in v2s] dkur_q1 = [kurtosis(x.toarray().ravel()) for x in v1s] dkur_q2 = [kurtosis(x.toarray().ravel()) for x in v2s] dskew_diff = np.abs(np.array(dskew_q1) - np.array(dskew_q2)).reshape((-1,1)) dkur_diff = np.abs(np.array(dkur_q1) - np.array(dkur_q2)).reshape((-1,1)) else: dcosine = np.array([cosine(x, y) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dcityblock = np.array([cityblock(x, y) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dcanberra = np.array([canberra(x, y) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) deuclidean = np.array([euclidean(x, y) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dminkowski = np.array([minkowski(x, y, 3) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dbraycurtis = np.array([braycurtis(x, y) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dskew_q1 = [skew(x) for x in v1s] dskew_q2 = [skew(x) for x in v2s] dkur_q1 = [kurtosis(x) for x in v1s] dkur_q2 = [kurtosis(x) for x in v2s] dskew_diff = np.abs(np.array(dskew_q1) - np.array(dskew_q2)).reshape((-1,1)) dkur_diff = np.abs(np.array(dkur_q1) - np.array(dkur_q2)).reshape((-1,1)) return np.hstack((dcosine,dcityblock,dcanberra,deuclidean,dminkowski,dbraycurtis,dskew_diff,dkur_diff))
Example #4
Source File: train_predict_trees_batch3.py From wsdm19cup with MIT License | 5 votes |
def __calc_distances__(self, v1s, v2s, is_sparse=True): if is_sparse: dcosine = np.array([cosine(x.toarray(), y.toarray()) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dcityblock = np.array([cityblock(x.toarray(), y.toarray()) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dcanberra = np.array([canberra(x.toarray(), y.toarray()) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) deuclidean = np.array([euclidean(x.toarray(), y.toarray()) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dminkowski = np.array([minkowski(x.toarray(), y.toarray(), 3) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dbraycurtis = np.array([braycurtis(x.toarray(), y.toarray()) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dskew_q1 = [skew(x.toarray().ravel()) for x in v1s] dskew_q2 = [skew(x.toarray().ravel()) for x in v2s] dkur_q1 = [kurtosis(x.toarray().ravel()) for x in v1s] dkur_q2 = [kurtosis(x.toarray().ravel()) for x in v2s] dskew_diff = np.abs(np.array(dskew_q1) - np.array(dskew_q2)).reshape((-1,1)) dkur_diff = np.abs(np.array(dkur_q1) - np.array(dkur_q2)).reshape((-1,1)) else: dcosine = np.array([cosine(x, y) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dcityblock = np.array([cityblock(x, y) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dcanberra = np.array([canberra(x, y) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) deuclidean = np.array([euclidean(x, y) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dminkowski = np.array([minkowski(x, y, 3) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dbraycurtis = np.array([braycurtis(x, y) for (x, y) in zip(v1s, v2s)]).reshape((-1,1)) dskew_q1 = [skew(x) for x in v1s] dskew_q2 = [skew(x) for x in v2s] dkur_q1 = [kurtosis(x) for x in v1s] dkur_q2 = [kurtosis(x) for x in v2s] dskew_diff = np.abs(np.array(dskew_q1) - np.array(dskew_q2)).reshape((-1,1)) dkur_diff = np.abs(np.array(dkur_q1) - np.array(dkur_q2)).reshape((-1,1)) return np.hstack((dcosine,dcityblock,dcanberra,deuclidean,dminkowski,dbraycurtis,dskew_diff,dkur_diff))
Example #5
Source File: utils.py From teneto with GNU General Public License v3.0 | 5 votes |
def get_distance_function(requested_metric): """ This function returns a specified distance function. Paramters --------- requested_metric: str Distance function. Can be any function in: https://docs.scipy.org/doc/scipy/reference/spatial.distance.html. Returns ------- requested_metric : distance function """ distance_options = { 'braycurtis': distance.braycurtis, 'canberra': distance.canberra, 'chebyshev': distance.chebyshev, 'cityblock': distance.cityblock, 'correlation': distance.correlation, 'cosine': distance.cosine, 'euclidean': distance.euclidean, 'sqeuclidean': distance.sqeuclidean, 'dice': distance.dice, 'hamming': distance.hamming, 'jaccard': distance.jaccard, 'kulsinski': distance.kulsinski, 'matching': distance.matching, 'rogerstanimoto': distance.rogerstanimoto, 'russellrao': distance.russellrao, 'sokalmichener': distance.sokalmichener, 'sokalsneath': distance.sokalsneath, 'yule': distance.yule, } if requested_metric in distance_options: return distance_options[requested_metric] else: raise ValueError('Distance function cannot be found.')
Example #6
Source File: taar_similarity.py From telemetry-airflow with Mozilla Public License 2.0 | 5 votes |
def similarity_function(x, y): """ Similarity function for comparing user features. This actually really should be implemented in taar.similarity_recommender and then imported here for consistency. """ def safe_get(field, row, default_value): # Safely get a value from the Row. If the value is None, get the # default value. return row[field] if row[field] is not None else default_value # Extract the values for the categorical and continuous features for both # the x and y samples. Use an empty string as the default value for missing # categorical fields and 0 for the continuous ones. x_categorical_features = [safe_get(k, x, "") for k in CATEGORICAL_FEATURES] y_categorical_features = [safe_get(k, y, "") for k in CATEGORICAL_FEATURES] x_continuous_features = [ float(safe_get(k, x, 0)) for k in CONTINUOUS_FEATURES ] y_continuous_features = [ float(safe_get(k, y, 0)) for k in CONTINUOUS_FEATURES ] # Here a larger distance indicates a poorer match between categorical variables. j_d = distance.hamming(x_categorical_features, y_categorical_features) j_c = distance.canberra(x_continuous_features, y_continuous_features) # Take the product of similarities to attain a univariate similarity score. # Add a minimal constant to prevent zero values from categorical features. # Note: since both the distance function return a Numpy type, we need to # call the |item| function to get the underlying Python type. If we don't # do that this job will fail when performing KDE due to SPARK-20803 on # Spark 2.2.0. return abs((j_c + 0.001) * j_d).item()
Example #7
Source File: taar_similarity.py From python_mozetl with MIT License | 5 votes |
def similarity_function(x, y): """ Similarity function for comparing user features. This actually really should be implemented in taar.similarity_recommender and then imported here for consistency. """ def safe_get(field, row, default_value): # Safely get a value from the Row. If the value is None, get the # default value. return row[field] if row[field] is not None else default_value # Extract the values for the categorical and continuous features for both # the x and y samples. Use an empty string as the default value for missing # categorical fields and 0 for the continuous ones. x_categorical_features = [safe_get(k, x, "") for k in CATEGORICAL_FEATURES] x_continuous_features = [safe_get(k, x, 0) for k in CONTINUOUS_FEATURES] y_categorical_features = [safe_get(k, y, "") for k in CATEGORICAL_FEATURES] y_continuous_features = [safe_get(k, y, 0) for k in CONTINUOUS_FEATURES] # Here a larger distance indicates a poorer match between categorical variables. j_d = distance.hamming(x_categorical_features, y_categorical_features) j_c = distance.canberra(x_continuous_features, y_continuous_features) # Take the product of similarities to attain a univariate similarity score. # Add a minimal constant to prevent zero values from categorical features. # Note: since both the distance function return a Numpy type, we need to # call the |item| function to get the underlying Python type. If we don't # do that this job will fail when performing KDE due to SPARK-20803 on # Spark 2.2.0. return abs((j_c + 0.001) * j_d).item()
Example #8
Source File: netsimile.py From netrd with MIT License | 4 votes |
def dist(self, G1, G2): """A scalable approach to network similarity. A network similarity measure based on node signature distributions. The results dictionary includes the underlying feature matrices in `'feature_matrices'` and the underlying signature vectors in `'signature_vectors'`. Parameters ---------- G1, G2 (nx.Graph) two undirected networkx graphs to be compared. Returns ------- dist (float) the distance between `G1` and `G2`. References ---------- .. [1] Michele Berlingerio, Danai Koutra, Tina Eliassi-Rad, Christos Faloutsos: NetSimile: A Scalable Approach to Size-Independent Network Similarity. CoRR abs/1209.2684 (2012) """ # find the graph node feature matrices G1_node_features = feature_extraction(G1) G2_node_features = feature_extraction(G2) # get the graph signature vectors G1_signature = graph_signature(G1_node_features) G2_signature = graph_signature(G2_node_features) # the final distance is the absolute canberra distance dist = abs(canberra(G1_signature, G2_signature)) self.results['feature_matrices'] = G1_node_features, G2_node_features self.results['signature_vectors'] = G1_signature, G2_signature self.results['dist'] = dist return dist