Python sklearn.cluster.AffinityPropagation() Examples
The following are 13
code examples of sklearn.cluster.AffinityPropagation().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.cluster
, or try the search function
.
Example #1
Source File: document_clustering.py From text-analytics-with-python with Apache License 2.0 | 6 votes |
def affinity_propagation(feature_matrix): sim = feature_matrix * feature_matrix.T sim = sim.todense() ap = AffinityPropagation() ap.fit(sim) clusters = ap.labels_ return ap, clusters # get clusters using affinity propagation
Example #2
Source File: test_cluster.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_affinity_propagation_class(self): from sklearn.datasets.samples_generator import make_blobs centers = [[1, 1], [-1, -1], [1, -1]] X, labels_true = make_blobs(n_samples=300, centers=centers, cluster_std=0.5, random_state=0) df = pdml.ModelFrame(data=X, target=labels_true) af = df.cluster.AffinityPropagation(preference=-50) df.fit(af) af2 = cluster.AffinityPropagation(preference=-50).fit(X) tm.assert_numpy_array_equal(af.cluster_centers_indices_, af2.cluster_centers_indices_) tm.assert_numpy_array_equal(af.labels_, af2.labels_)
Example #3
Source File: test_cluster.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_Classifications(self): iris = datasets.load_iris() df = pdml.ModelFrame(iris) models = ['AffinityPropagation', 'MeanShift'] for model in models: mod1 = getattr(df.cluster, model)() mod2 = getattr(cluster, model)() df.fit(mod1) mod2.fit(iris.data) result = df.predict(mod1) expected = mod2.predict(iris.data) self.assertIsInstance(result, pdml.ModelSeries) self.assert_numpy_array_almost_equal(result.values, expected)
Example #4
Source File: cluster.py From poem with MIT License | 5 votes |
def cluster(X): X = preprocessing.normalize(X, norm='l2') distance = X.dot(X.transpose()) c = AffinityPropagation(affinity="precomputed") y = c.fit_predict(distance) return y
Example #5
Source File: sense_clusterer.py From yelp with GNU Lesser General Public License v2.1 | 5 votes |
def cluster_affinity_propagation(similarity_matrix, desired_keys=None): numpy_matrix = similarity_matrix_to_numpy(similarity_matrix, desired_keys) clusterer = AffinityPropagation() return clusterer.fit_predict(numpy_matrix)
Example #6
Source File: compare_clustering_algs.py From mmvt with GNU General Public License v3.0 | 5 votes |
def compare(data, n_groups, output_fol): # plot_clusters(data.astype(np.float), scipy.cluster.vq.kmeans, 'scipy.cluster.vq.kmeans', output_fol, (n_groups,), {}) plot_clusters(data, cluster.KMeans, 'KMeans', output_fol, (), {'n_clusters': n_groups}) for ct in ['spherical', 'tied', 'diag', 'full']: plot_clusters(data, mixture.GaussianMixture, 'GMM_{}'.format(ct), output_fol, (), {'n_components': n_groups, 'covariance_type': ct}) plot_clusters(data, cluster.AffinityPropagation, 'AffinityPropagation', output_fol, (), {'preference': -5.0, 'damping': 0.95}) plot_clusters(data, cluster.MeanShift, 'MeanShift', output_fol, (0.175,), {'cluster_all': False}) plot_clusters(data, cluster.SpectralClustering, 'SpectralClustering', output_fol, (), {'n_clusters': n_groups}) plot_clusters(data, cluster.AgglomerativeClustering, 'AgglomerativeClustering', output_fol, (), {'n_clusters': n_groups, 'linkage': 'ward'}) plot_clusters(data, cluster.DBSCAN, 'DBSCAN', output_fol, (), {'eps': 0.025}) # plot_clusters(data, hdbscan.HDBSCAN, 'HDBSCAN', output_fol, (), {'min_cluster_size': 15})
Example #7
Source File: lexrankr.py From lexrankr with MIT License | 5 votes |
def __init__(self, similarity='cosine', decay_window=20, decay_alpha=0.25, clustering='dbscan', tagger='twitter', useful_tags=['Noun', 'Verb', 'Adjective', 'Determiner', 'Adverb', 'Conjunction', 'Josa', 'PreEomi', 'Eomi', 'Suffix', 'Alpha', 'Number'], delimiters=['. ', '\n', '.\n'], min_token_length=2, stopwords=stopwords_ko, no_below_word_count=2, no_above_word_portion=0.85, max_dictionary_size=None, min_cluster_size=2, similarity_threshold=0.85, matrix_smoothing=False, n_clusters=None, compactify=True, **kwargs): self.decay_window = decay_window self.decay_alpha = decay_alpha if similarity == 'cosine': # very, very slow :( self.vectorizer = DictVectorizer() self.uniform_sim = self._sim_cosine elif similarity == 'jaccard': self.uniform_sim = self._sim_jaccard elif similarity == 'normalized_cooccurrence': self.uniform_sim = self._sim_normalized_cooccurrence else: raise LexRankError("available similarity functions are: cosine, jaccard, normalized_cooccurrence") self.sim = lambda sentence1, sentence2: self.decay(sentence1, sentence2) * self.uniform_sim(sentence1, sentence2) self.factory = SentenceFactory(tagger=tagger, useful_tags=useful_tags, delimiters=delimiters, min_token_length=min_token_length, stopwords=stopwords, **kwargs) if clustering == 'birch': self._birch = Birch(threshold=0.99, n_clusters=n_clusters) self._clusterer = lambda matrix: self._birch.fit_predict(1 - matrix) elif clustering == 'dbscan': self._dbscan = DBSCAN() self._clusterer = lambda matrix: self._dbscan.fit_predict(1 - matrix) elif clustering == 'affinity': self._affinity = AffinityPropagation() self._clusterer = lambda matrix: self._affinity.fit_predict(1 - matrix) elif clustering is None: self._clusterer = lambda matrix: [0 for index in range(matrix.shape[0])] else: raise LexRankError("available clustering algorithms are: birch, markov, no-clustering(use `None`)") self.no_below_word_count = no_below_word_count self.no_above_word_portion = no_above_word_portion self.max_dictionary_size = max_dictionary_size self.similarity_threshold = similarity_threshold self.min_cluster_size = min_cluster_size self.matrix_smoothing = matrix_smoothing self.compactify = compactify
Example #8
Source File: song2vec_operator.py From MusicTaster with MIT License | 4 votes |
def cluster_song_in_playlist(self, playlist_id, cluster_n=5, is_detailed=False): """ 获取单个歌单内的歌曲聚类信息 Args: playlist_id: 歌单id cluster_n:聚类数 is_detailed: 返回的结果是否包含详情 Returns: 聚类后的列表 """ playlist_obj = playlist_detail(playlist_id) song_list = [] vec_list = [] song_info_dict = {} ap_cluster = AffinityPropagation() data_process_logger.info('clustering playlist: %s' % playlist_obj['name']) for item in playlist_obj['tracks']: song = item['name'].lower() song_info_dict[song] = { 'name': song, 'artist': item['artists'][0]['name'], 'id': item['id'], 'album_img_url': item['album']['picUrl'], 'site_url': 'http://music.163.com/#/song?id=%s' % item['id'] } # print song if song not in song_list: song_list.append(song) # print self.song2vec_model.vocab.get(song) # print self.song2vec_model.syn0norm == None if self.song2vec_model.vocab.get(song) and len(self.song2vec_model.syn0norm): song_vec = self.song2vec_model.syn0norm[self.song2vec_model.vocab[song].index] else: data_process_logger.warn( 'The song %s of playlist-%s is not in dataset' % (song, playlist_obj['name'])) song_vec = [0 for i in range(self.song2vec_model.vector_size)] vec_list.append(song_vec) # song_list = list(song_list) if len(vec_list) > 1: cluster_result = ap_cluster.fit(vec_list, song_list) cluster_array = [[] for i in range(len(cluster_result.cluster_centers_indices_))] for i in range(len(cluster_result.labels_)): label = cluster_result.labels_[i] index = i cluster_array[label].append(song_list[i]) return cluster_array, playlist_obj['name'], song_info_dict else: return [song_list], playlist_obj['name'], song_info_dict
Example #9
Source File: song2vec_operator.py From MusicTaster with MIT License | 4 votes |
def cluster_artist_in_playlist(self, playlist_id, cluster_n=5, is_detailed=False): """ 获取单个歌单内的歌手聚类信息 Args: playlist_id: 歌单id cluster_n:聚类数 is_detailed: 是否包含详情信息 Returns: 聚类后的列表 """ playlist_obj = playlist_detail(playlist_id) artist_list = [] vec_list = [] ap_cluster = AffinityPropagation() data_process_logger.info('clustering playlist: %s' % playlist_obj['name']) for item in playlist_obj['tracks']: artist = item['artists'][0]['name'].lower() # print artist if artist not in artist_list: artist_list.append(artist) # print self.song2vec_model.vocab.get(artist) # print self.song2vec_model.syn0norm == None if self.artist2vec_model.vocab.get(artist) and len(self.artist2vec_model.syn0norm): artist_vec = self.artist2vec_model.syn0norm[self.artist2vec_model.vocab[artist].index] else: data_process_logger.warn( 'The artist %s of playlist-%s is not in dataset' % (artist, playlist_obj['name'])) artist_vec = [0 for i in range(self.artist2vec_model.vector_size)] vec_list.append(artist_vec) # artist_list = list(artist_list) # vec_list = list(vec_list) if len(vec_list) > 1: cluster_result = ap_cluster.fit(vec_list, artist_list) cluster_array = [[] for i in range(len(cluster_result.cluster_centers_indices_))] for i in range(len(cluster_result.labels_)): label = cluster_result.labels_[i] index = i cluster_array[label].append(artist_list[i]) return cluster_array, playlist_obj['name'], {} else: return [artist_list], playlist_obj['name'], {}
Example #10
Source File: context_utils.py From yelp with GNU Lesser General Public License v2.1 | 4 votes |
def build_groups2(nouns): print('building groups', time.strftime("%H:%M:%S")) all_senses = set() sense_word_map = {} for noun in nouns: senses = wordnet.synsets(noun, pos='n') all_senses.update(senses) for sense in senses: if sense.name() not in sense_word_map: sense_word_map[sense.name()] = [] sense_word_map[sense.name()].append(noun) all_senses = list(all_senses) all_senses_names = [sense.name() for sense in all_senses] print('number of senses:', len(all_senses)) sense_similarity_matrix, sense_similarity_matrix_columns =\ get_sense_similarity_submatrix(all_senses_names) print('submatrix ready', time.strftime("%H:%M:%S")) # affinity_propagation = AffinityPropagation() # labels1 = affinity_propagation.fit_predict(sense_similarity_matrix) # print('affinity propagation ready', time.strftime("%H:%M:%S")) grouper = BaumanSensesGrouper(sense_similarity_matrix, 0.7) groups = grouper.group_senses() print('groups') # print(groups) new_groups = [] for group in groups: new_group = set() for element in group: sense_name = sense_similarity_matrix_columns[element] new_group.add(sense_name) new_groups.append(new_group) print('finished groups', time.strftime("%H:%M:%S")) # print(groups) # print(new_groups) print('num groups: %d' % len(groups)) sense_groups = [] for group in new_groups: sense_group = SenseGroup(group) for sense in sense_group.senses: sense_group.nouns |= set(sense_word_map[sense]) sense_groups.append(sense_group) return sense_groups
Example #11
Source File: context_utils.py From yelp with GNU Lesser General Public License v2.1 | 4 votes |
def evaluate_clustering(): similarity_matrix = get_sense_similarity_submatrix(range(10000)) matrix_size = len(similarity_matrix) print('got matrix') affinity_propagation = AffinityPropagation() labels1 = affinity_propagation.fit_predict(similarity_matrix) print('affinity propagation') dbscan = DBSCAN(min_samples=1) labels2 = dbscan.fit_predict(similarity_matrix) print('print dbscan') distance_matrix = np.ndarray((matrix_size, matrix_size)) for i in range(matrix_size): for j in range(matrix_size): distance_matrix[i, j] = 1 - similarity_matrix[i, j] print(distance_matrix[1, 2]) print(distance_matrix[1, 1]) print('created distance matrix') cluster_map1 = cluster_evaluation.fpena_get_clusters(labels1) cluster_map2 = cluster_evaluation.fpena_get_clusters(labels2) print(cluster_map1) print(cluster_map2) sc1 = sklearn.metrics.silhouette_score(distance_matrix, labels1, metric='euclidean') sc2 = sklearn.metrics.silhouette_score(distance_matrix, labels2, metric='euclidean') sc5 = cluster_evaluation.fpena_evaluate(cluster_map1, distance_matrix) sc6 = cluster_evaluation.fpena_evaluate(cluster_map2, distance_matrix) num_elements1 = [len(values) for values in cluster_map1.values()] num_elements2 = [len(values) for values in cluster_map2.values()] print(num_elements1) print(num_elements2) print('Number of clusters Affinity Propagation: %f' % len(cluster_map1)) print('Number of clusters DBSCAN: %f' % len(cluster_map2)) print('Average elements per cluster Affinity Propagation: %f' % np.mean(num_elements1)) print('Average elements per cluster DBSCAN: %f' % np.mean(num_elements2)) print('Standard deviation per cluster Affinity Propagation: %f' % np.std(num_elements1)) print('Standard deviation per cluster DBSCAN: %f' % np.std(num_elements2)) print('Silouhette score Affinity Propagation (distance matrix): %f' % sc1) print('Silouhette score DBSCAN (distance matrix): %f' % sc2) print('Dunn index Affinity Propagation (distance matrix): %f' % sc5) print('Dunn index DBSCAN (distance matrix): %f' % sc6) # start = time.time() # main() # evaluate_clustering() # get_similarity_submatrix() # end = time.time() # total_time = end - start # print("Total time = %f seconds" % total_time)
Example #12
Source File: exposons.py From enspara with GNU General Public License v3.0 | 4 votes |
def exposons_from_sasas(sasas, damping, weights, threshold): """Compute exposons for an MDTraj trajectory. This function is a convenience wrapper to compute exposons using other functions already existing in MDTraj, sklearn, and elsewhere in enspara. Parameters ---------- sasas: np.ndarray, shape=(n_conformations, n_sidechains) SASAs to use in the calculations. damping: float Damping parameter to use for affinity propagation. Goes from 0.5 to <1.0. Empirically, values between 0.85 and 0.95 tend to work best. weights: ndarray, shape=(len(trj),), default=None Weight of each frame in the simulation for the mutual information calculation. Useful if `trj` represents cluster centers of an MSM rather than a full trajectory. If None, frames will be weighted equally. threshold: float, default=0.02 Sidechains with greater than this amount of total SASA will count as exposed for the purposes of the exposed/buried dichotomy used in mutual information calculations. Returns ------- sasa_mi: np.ndarray, shape=(n_res, n_res) Mutual information of each sidchain with each other sidechain computed for the purposes of clustering exposons. exposons: np.ndarray, shape=(n_res,) Assignment of residues to exposons. Residues in the same exposon share the same number in this array. """ sasa_mi = weighted_mi(sasas > threshold, weights) c = AffinityPropagation( damping=damping, affinity='precomputed', preference=0, max_iter=10000) c.fit(sasa_mi) return sasa_mi, c.labels_
Example #13
Source File: test_cluster.py From pandas-ml with BSD 3-Clause "New" or "Revised" License | 3 votes |
def test_objectmapper(self): df = pdml.ModelFrame([]) self.assertIs(df.cluster.AffinityPropagation, cluster.AffinityPropagation) self.assertIs(df.cluster.AgglomerativeClustering, cluster.AgglomerativeClustering) self.assertIs(df.cluster.Birch, cluster.Birch) self.assertIs(df.cluster.DBSCAN, cluster.DBSCAN) self.assertIs(df.cluster.FeatureAgglomeration, cluster.FeatureAgglomeration) self.assertIs(df.cluster.KMeans, cluster.KMeans) self.assertIs(df.cluster.MiniBatchKMeans, cluster.MiniBatchKMeans) self.assertIs(df.cluster.MeanShift, cluster.MeanShift) self.assertIs(df.cluster.SpectralClustering, cluster.SpectralClustering) self.assertIs(df.cluster.bicluster.SpectralBiclustering, cluster.bicluster.SpectralBiclustering) self.assertIs(df.cluster.bicluster.SpectralCoclustering, cluster.bicluster.SpectralCoclustering)