Python scipy.spatial.distance.hamming() Examples
The following are 14
code examples of scipy.spatial.distance.hamming().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scipy.spatial.distance
, or try the search function
.
Example #1
Source File: test_classification.py From Mastering-Elasticsearch-7.0 with MIT License | 7 votes |
def test_multilabel_hamming_loss(): # Dense label indicator matrix format y1 = np.array([[0, 1, 1], [1, 0, 1]]) y2 = np.array([[0, 0, 1], [1, 0, 1]]) w = np.array([1, 3]) assert_equal(hamming_loss(y1, y2), 1 / 6) assert_equal(hamming_loss(y1, y1), 0) assert_equal(hamming_loss(y2, y2), 0) assert_equal(hamming_loss(y2, 1 - y2), 1) assert_equal(hamming_loss(y1, 1 - y1), 1) assert_equal(hamming_loss(y1, np.zeros(y1.shape)), 4 / 6) assert_equal(hamming_loss(y2, np.zeros(y1.shape)), 0.5) assert_equal(hamming_loss(y1, y2, sample_weight=w), 1. / 12) assert_equal(hamming_loss(y1, 1-y2, sample_weight=w), 11. / 12) assert_equal(hamming_loss(y1, np.zeros_like(y1), sample_weight=w), 2. / 3) # sp_hamming only works with 1-D arrays assert_equal(hamming_loss(y1[0], y2[0]), sp_hamming(y1[0], y2[0])) assert_warns_message(DeprecationWarning, "The labels parameter is unused. It was" " deprecated in version 0.21 and" " will be removed in version 0.23", hamming_loss, y1, y2, labels=[0, 1])
Example #2
Source File: cost_sensitive_reference_pair_encoding.py From libact with BSD 2-Clause "Simplified" License | 6 votes |
def make_query(self): dataset = self.dataset unlabeled_entry_ids, X_pool = dataset.get_unlabeled_entries() X_pool = np.asarray(X_pool) self.csrpe_.train(dataset) self.model_.train(dataset) predY = self.model_.predict(X_pool) Z = self.csrpe_.predicted_code(X_pool) predZ = self.csrpe_.encode(predY) dist = paired_distances(Z, predZ, metric=hamming) # z1 z2 dist2 = self.csrpe_.predict_dist(X_pool) # z1 zt #dist3 = self.csrpe.distance(predZ) # z2 zt dist = dist + dist2 #dist = dist + dist3 ask_id = self.random_state_.choice( np.where(np.isclose(dist, np.max(dist)))[0]) return unlabeled_entry_ids[ask_id]
Example #3
Source File: classification.py From brainiak with Apache License 2.0 | 6 votes |
def example_of_aggregating_sim_matrix(raw_data, labels, num_subjects, num_epochs_per_subj): # aggregate the kernel matrix to save memory svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1, gamma='auto') clf = Classifier(svm_clf, num_processed_voxels=1000, epochs_per_subj=num_epochs_per_subj) rearranged_data = raw_data[num_epochs_per_subj:] + raw_data[0:num_epochs_per_subj] rearranged_labels = labels[num_epochs_per_subj:] + labels[0:num_epochs_per_subj] clf.fit(list(zip(rearranged_data, rearranged_data)), rearranged_labels, num_training_samples=num_epochs_per_subj*(num_subjects-1)) predict = clf.predict() print(predict) print(clf.decision_function()) test_labels = labels[0:num_epochs_per_subj] incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj logger.info( 'when aggregating the similarity matrix to save memory, ' 'the accuracy is %d / %d = %.2f' % (num_epochs_per_subj-incorrect_predict, num_epochs_per_subj, (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj) ) # when the kernel matrix is computed in portion, the test data is already in print(clf.score(None, test_labels))
Example #4
Source File: classification.py From brainiak with Apache License 2.0 | 6 votes |
def example_of_correlating_two_components(raw_data, raw_data2, labels, num_subjects, num_epochs_per_subj): # aggregate the kernel matrix to save memory svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1, gamma='auto') clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj) num_training_samples=num_epochs_per_subj*(num_subjects-1) clf.fit(list(zip(raw_data[0:num_training_samples], raw_data2[0:num_training_samples])), labels[0:num_training_samples]) X = list(zip(raw_data[num_training_samples:], raw_data2[num_training_samples:])) predict = clf.predict(X) print(predict) print(clf.decision_function(X)) test_labels = labels[num_training_samples:] incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj logger.info( 'when aggregating the similarity matrix to save memory, ' 'the accuracy is %d / %d = %.2f' % (num_epochs_per_subj-incorrect_predict, num_epochs_per_subj, (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj) ) # when the kernel matrix is computed in portion, the test data is already in print(clf.score(X, test_labels))
Example #5
Source File: classification.py From brainiak with Apache License 2.0 | 6 votes |
def example_of_correlating_two_components_aggregating_sim_matrix(raw_data, raw_data2, labels, num_subjects, num_epochs_per_subj): # aggregate the kernel matrix to save memory svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1, gamma='auto') clf = Classifier(svm_clf, num_processed_voxels=1000, epochs_per_subj=num_epochs_per_subj) num_training_samples=num_epochs_per_subj*(num_subjects-1) clf.fit(list(zip(raw_data, raw_data2)), labels, num_training_samples=num_training_samples) predict = clf.predict() print(predict) print(clf.decision_function()) test_labels = labels[num_training_samples:] incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj logger.info( 'when aggregating the similarity matrix to save memory, ' 'the accuracy is %d / %d = %.2f' % (num_epochs_per_subj-incorrect_predict, num_epochs_per_subj, (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj) ) # when the kernel matrix is computed in portion, the test data is already in print(clf.score(None, test_labels)) # python3 classification.py face_scene bet.nii.gz face_scene/prefrontal_top_mask.nii.gz face_scene/fs_epoch_labels.npy
Example #6
Source File: test_libdist.py From enspara with GNU General Public License v3.0 | 6 votes |
def test_hamming_distance(): dtypes = ['|S1'] for elem_size in ['8', '16', '32', '64']: for int_type in ['int', 'uint']: dtypes.append(int_type + elem_size) for dtype in dtypes: X = np.array([[1, 3, 8], [3, 1, 8], [1, 1, 7]]).astype(dtype) y = np.array([1, 2, 3]).astype(dtype) d_expected = np.zeros((len(X))) for i in range(len(X)): d_expected[i] = scipy_hamming(X[i], y) d_enspara = libdist.hamming(X, y) assert_array_equal(d_expected, d_enspara)
Example #7
Source File: test_classification.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_multilabel_hamming_loss(): # Dense label indicator matrix format y1 = np.array([[0, 1, 1], [1, 0, 1]]) y2 = np.array([[0, 0, 1], [1, 0, 1]]) w = np.array([1, 3]) assert_equal(hamming_loss(y1, y2), 1 / 6) assert_equal(hamming_loss(y1, y1), 0) assert_equal(hamming_loss(y2, y2), 0) assert_equal(hamming_loss(y2, 1 - y2), 1) assert_equal(hamming_loss(y1, 1 - y1), 1) assert_equal(hamming_loss(y1, np.zeros(y1.shape)), 4 / 6) assert_equal(hamming_loss(y2, np.zeros(y1.shape)), 0.5) assert_equal(hamming_loss(y1, y2, sample_weight=w), 1. / 12) assert_equal(hamming_loss(y1, 1-y2, sample_weight=w), 11. / 12) assert_equal(hamming_loss(y1, np.zeros_like(y1), sample_weight=w), 2. / 3) # sp_hamming only works with 1-D arrays assert_equal(hamming_loss(y1[0], y2[0]), sp_hamming(y1[0], y2[0])) assert_warns(DeprecationWarning, hamming_loss, y1, y2, classes=[0, 1])
Example #8
Source File: plotting.py From kvae with MIT License | 5 votes |
def plot_trajectory_uncertainty(true, gen, filter, smooth, filename): sequences, timesteps, h, w = true.shape errors = dict(Generated=list(), Filtered=list(), Smoothed=list()) for label, var in zip(('Generated', 'Filtered', 'Smoothed'), (gen, filter, smooth)): for step in range(timesteps): errors[label].append(hamming(true[:, step].ravel() > 0.5, var[:, step].ravel() > 0.5)) plt.plot(np.linspace(1, timesteps, num=timesteps).astype(int), errors[label], linewidth=3, ms=20, label=label) plt.xlabel('Steps', fontsize=20) plt.ylabel('Hamming distance', fontsize=20) plt.legend(fontsize=20) plt.savefig(filename) plt.close()
Example #9
Source File: classification.py From brainiak with Apache License 2.0 | 5 votes |
def example_of_cross_validation_with_detailed_info(raw_data, labels, num_subjects, num_epochs_per_subj): # no shrinking, set C=1 svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1, gamma='auto') #logit_clf = LogisticRegression() clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj) # doing leave-one-subject-out cross validation for i in range(num_subjects): leave_start = i * num_epochs_per_subj leave_end = (i+1) * num_epochs_per_subj training_data = raw_data[0:leave_start] + raw_data[leave_end:] test_data = raw_data[leave_start:leave_end] training_labels = labels[0:leave_start] + labels[leave_end:] test_labels = labels[leave_start:leave_end] clf.fit(list(zip(training_data, training_data)), training_labels) # joblib can be used for saving and loading models #joblib.dump(clf, 'model/logistic.pkl') #clf = joblib.load('model/svm.pkl') predict = clf.predict(list(zip(test_data, test_data))) print(predict) print(clf.decision_function(list(zip(test_data, test_data)))) incorrect_predict = hamming(predict, np.asanyarray(test_labels)) * num_epochs_per_subj logger.info( 'when leaving subject %d out for testing, the accuracy is %d / %d = %.2f' % (i, num_epochs_per_subj-incorrect_predict, num_epochs_per_subj, (num_epochs_per_subj-incorrect_predict) * 1.0 / num_epochs_per_subj) ) print(clf.score(list(zip(test_data, test_data)), test_labels))
Example #10
Source File: utils.py From teneto with GNU General Public License v3.0 | 5 votes |
def get_distance_function(requested_metric): """ This function returns a specified distance function. Paramters --------- requested_metric: str Distance function. Can be any function in: https://docs.scipy.org/doc/scipy/reference/spatial.distance.html. Returns ------- requested_metric : distance function """ distance_options = { 'braycurtis': distance.braycurtis, 'canberra': distance.canberra, 'chebyshev': distance.chebyshev, 'cityblock': distance.cityblock, 'correlation': distance.correlation, 'cosine': distance.cosine, 'euclidean': distance.euclidean, 'sqeuclidean': distance.sqeuclidean, 'dice': distance.dice, 'hamming': distance.hamming, 'jaccard': distance.jaccard, 'kulsinski': distance.kulsinski, 'matching': distance.matching, 'rogerstanimoto': distance.rogerstanimoto, 'russellrao': distance.russellrao, 'sokalmichener': distance.sokalmichener, 'sokalsneath': distance.sokalsneath, 'yule': distance.yule, } if requested_metric in distance_options: return distance_options[requested_metric] else: raise ValueError('Distance function cannot be found.')
Example #11
Source File: utils.py From teneto with GNU General Public License v3.0 | 5 votes |
def check_distance_funciton_input(distance_func_name, netinfo): """ Function checks distance_func_name, if it is specified as 'default'. Then given the type of the network selects a default distance function. Parameters ---------- distance_func_name : str distance function name. netinfo : dict the output of utils.process_input Returns ------- distance_func_name : str distance function name. """ if distance_func_name == 'default' and netinfo['nettype'][0] == 'b': print('Default distance funciton specified. As network is binary, using Hamming') distance_func_name = 'hamming' elif distance_func_name == 'default' and netinfo['nettype'][0] == 'w': distance_func_name = 'euclidean' print( 'Default distance funciton specified. ' 'As network is weighted, using Euclidean') return distance_func_name
Example #12
Source File: taar_similarity.py From telemetry-airflow with Mozilla Public License 2.0 | 5 votes |
def similarity_function(x, y): """ Similarity function for comparing user features. This actually really should be implemented in taar.similarity_recommender and then imported here for consistency. """ def safe_get(field, row, default_value): # Safely get a value from the Row. If the value is None, get the # default value. return row[field] if row[field] is not None else default_value # Extract the values for the categorical and continuous features for both # the x and y samples. Use an empty string as the default value for missing # categorical fields and 0 for the continuous ones. x_categorical_features = [safe_get(k, x, "") for k in CATEGORICAL_FEATURES] y_categorical_features = [safe_get(k, y, "") for k in CATEGORICAL_FEATURES] x_continuous_features = [ float(safe_get(k, x, 0)) for k in CONTINUOUS_FEATURES ] y_continuous_features = [ float(safe_get(k, y, 0)) for k in CONTINUOUS_FEATURES ] # Here a larger distance indicates a poorer match between categorical variables. j_d = distance.hamming(x_categorical_features, y_categorical_features) j_c = distance.canberra(x_continuous_features, y_continuous_features) # Take the product of similarities to attain a univariate similarity score. # Add a minimal constant to prevent zero values from categorical features. # Note: since both the distance function return a Numpy type, we need to # call the |item| function to get the underlying Python type. If we don't # do that this job will fail when performing KDE due to SPARK-20803 on # Spark 2.2.0. return abs((j_c + 0.001) * j_d).item()
Example #13
Source File: taar_similarity.py From python_mozetl with MIT License | 5 votes |
def similarity_function(x, y): """ Similarity function for comparing user features. This actually really should be implemented in taar.similarity_recommender and then imported here for consistency. """ def safe_get(field, row, default_value): # Safely get a value from the Row. If the value is None, get the # default value. return row[field] if row[field] is not None else default_value # Extract the values for the categorical and continuous features for both # the x and y samples. Use an empty string as the default value for missing # categorical fields and 0 for the continuous ones. x_categorical_features = [safe_get(k, x, "") for k in CATEGORICAL_FEATURES] x_continuous_features = [safe_get(k, x, 0) for k in CONTINUOUS_FEATURES] y_categorical_features = [safe_get(k, y, "") for k in CATEGORICAL_FEATURES] y_continuous_features = [safe_get(k, y, 0) for k in CONTINUOUS_FEATURES] # Here a larger distance indicates a poorer match between categorical variables. j_d = distance.hamming(x_categorical_features, y_categorical_features) j_c = distance.canberra(x_continuous_features, y_continuous_features) # Take the product of similarities to attain a univariate similarity score. # Add a minimal constant to prevent zero values from categorical features. # Note: since both the distance function return a Numpy type, we need to # call the |item| function to get the underlying Python type. If we don't # do that this job will fail when performing KDE due to SPARK-20803 on # Spark 2.2.0. return abs((j_c + 0.001) * j_d).item()
Example #14
Source File: ripple_carry_adder.py From forest-benchmarking with Apache License 2.0 | 5 votes |
def get_error_hamming_distributions_from_results(results: Sequence[Sequence[Sequence[int]]]) \ -> Sequence[Sequence[float]]: """ Get the distribution of the hamming weight of the error vector (number of bits flipped between output and expected answer) for each possible pair of two n_bit summands using results output by get_n_bit_adder_results :param results: a list of results output from a call to get_n_bit_adder_results :return: the relative frequency of observing each hamming weight, 0 to n_bits+1, for the error that occurred when adding each pair of two n_bit summands """ num_shots = len(results[0]) n_bits = len(results[0][0]) - 1 hamming_wt_distrs = [] # loop over all binary strings of length n_bits for result, bits in zip(results, all_bitstrings(2 * n_bits)): # Input nums are written from (MSB .... LSB) = (a_n, ..., a_1, a_0) num_a = bit_array_to_int(bits[:n_bits]) num_b = bit_array_to_int(bits[n_bits:]) # add the numbers ans = num_a + num_b ans_bits = int_to_bit_array(ans, n_bits + 1) # record the fraction of shots that resulted in an error of the given weight hamming_wt_distr = [0. for _ in range(len(ans_bits) + 1)] for shot in result: # multiply relative hamming distance by the length of the output for the weight wt = len(ans_bits) * hamming(ans_bits, shot) hamming_wt_distr[int(wt)] += 1. / num_shots hamming_wt_distrs.append(hamming_wt_distr) return hamming_wt_distrs