Python sklearn.cluster.AgglomerativeClustering() Examples
The following are 30
code examples of sklearn.cluster.AgglomerativeClustering().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
sklearn.cluster
, or try the search function
.
Example #1
Source File: graph_eval.py From nodevectors with MIT License | 6 votes |
def print_labeled_tests(w, y, test_size=0.2, seed=42): """ Clustering and label prediction tests """ X_train, X_test, y_train, y_test = train_test_split( w, y, test_size=test_size, random_state=seed) # Print Label Prediction Tests res = LabelPrediction(w, y, test_size=test_size, seed=seed) # Can only cluster on single-label (not multioutput) if len(y.shape) < 2: n_clusters = np.unique(y).size umpagglo = cluster.AgglomerativeClustering( n_clusters=n_clusters, affinity='cosine', linkage='average' ).fit(w).labels_ x = evalClusteringOnLabels(umpagglo, y, verbose=True) res = {**res, **x} return res
Example #2
Source File: test_hierarchical.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_linkage_misc(): # Misc tests on linkage rng = np.random.RandomState(42) X = rng.normal(size=(5, 5)) assert_raises(ValueError, AgglomerativeClustering(linkage='foo').fit, X) assert_raises(ValueError, linkage_tree, X, linkage='foo') assert_raises(ValueError, linkage_tree, X, connectivity=np.ones((4, 4))) # Smoke test FeatureAgglomeration FeatureAgglomeration().fit(X) # test hierarchical clustering on a precomputed distances matrix dis = cosine_distances(X) res = linkage_tree(dis, affinity="precomputed") assert_array_equal(res[0], linkage_tree(X, affinity="cosine")[0]) # test hierarchical clustering on a precomputed distances matrix res = linkage_tree(X, affinity=manhattan_distances) assert_array_equal(res[0], linkage_tree(X, affinity="manhattan")[0])
Example #3
Source File: test_hierarchical.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_compute_full_tree(): # Test that the full tree is computed if n_clusters is small rng = np.random.RandomState(0) X = rng.randn(10, 2) connectivity = kneighbors_graph(X, 5, include_self=False) # When n_clusters is less, the full tree should be built # that is the number of merges should be n_samples - 1 agc = AgglomerativeClustering(n_clusters=2, connectivity=connectivity) agc.fit(X) n_samples = X.shape[0] n_nodes = agc.children_.shape[0] assert_equal(n_nodes, n_samples - 1) # When n_clusters is large, greater than max of 100 and 0.02 * n_samples. # we should stop when there are n_clusters. n_clusters = 101 X = rng.randn(200, 2) connectivity = kneighbors_graph(X, 10, include_self=False) agc = AgglomerativeClustering(n_clusters=n_clusters, connectivity=connectivity) agc.fit(X) n_samples = X.shape[0] n_nodes = agc.children_.shape[0] assert_equal(n_nodes, n_samples - n_clusters)
Example #4
Source File: test_hierarchical.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_single_linkage_clustering(): # Check that we get the correct result in two emblematic cases moons, moon_labels = make_moons(noise=0.05, random_state=42) clustering = AgglomerativeClustering(n_clusters=2, linkage='single') clustering.fit(moons) assert_almost_equal(normalized_mutual_info_score(clustering.labels_, moon_labels), 1) circles, circle_labels = make_circles(factor=0.5, noise=0.025, random_state=42) clustering = AgglomerativeClustering(n_clusters=2, linkage='single') clustering.fit(circles) assert_almost_equal(normalized_mutual_info_score(clustering.labels_, circle_labels), 1)
Example #5
Source File: test_hierarchical.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_identical_points(): # Ensure identical points are handled correctly when using mst with # a sparse connectivity matrix X = np.array([[0, 0, 0], [0, 0, 0], [1, 1, 1], [1, 1, 1], [2, 2, 2], [2, 2, 2]]) true_labels = np.array([0, 0, 1, 1, 2, 2]) connectivity = kneighbors_graph(X, n_neighbors=3, include_self=False) connectivity = 0.5 * (connectivity + connectivity.T) connectivity, n_components = _fix_connectivity(X, connectivity, 'euclidean') for linkage in ('single', 'average', 'average', 'ward'): clustering = AgglomerativeClustering(n_clusters=3, linkage=linkage, connectivity=connectivity) clustering.fit(X) assert_almost_equal(normalized_mutual_info_score(clustering.labels_, true_labels), 1)
Example #6
Source File: agglomerative.py From Python-Machine-Learning-Cookbook-Second-Edition with MIT License | 6 votes |
def perform_clustering(X, connectivity, title, num_clusters=3, linkage='ward'): plt.figure() model = AgglomerativeClustering(linkage=linkage, connectivity=connectivity, n_clusters=num_clusters) model.fit(X) # extract labels labels = model.labels_ # specify marker shapes for different clusters markers = '.vx' for i, marker in zip(range(num_clusters), markers): # plot the points belong to the current cluster plt.scatter(X[labels==i, 0], X[labels==i, 1], s=50, marker=marker, color='k', facecolors='none') plt.title(title)
Example #7
Source File: cluster_manager.py From texta with GNU General Public License v3.0 | 6 votes |
def _cluster_documents(self): method = self.params['cluster_method'] n_clusters = int(self.params['cluster_n_clusters']) n_samples = len(self.document_vectors) if n_clusters > n_samples: n_clusters = n_samples if method == 'kmeans': clusterer = KMeans(n_clusters=n_clusters, init='k-means++', max_iter=100, n_init=1) else: clusterer = AgglomerativeClustering(n_clusters=n_clusters, linkage='complete', affinity='cosine') clustering = clusterer.fit(self.document_vectors) cluster_labels = clustering.labels_ clustering_dict = clustering.__dict__ clusters = {} for document_id,cluster_label in enumerate(cluster_labels): if cluster_label not in clusters: clusters[cluster_label] = [] clusters[cluster_label].append(document_id) return clusters
Example #8
Source File: test_hierarchical.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_compute_full_tree(): # Test that the full tree is computed if n_clusters is small rng = np.random.RandomState(0) X = rng.randn(10, 2) connectivity = kneighbors_graph(X, 5, include_self=False) # When n_clusters is less, the full tree should be built # that is the number of merges should be n_samples - 1 agc = AgglomerativeClustering(n_clusters=2, connectivity=connectivity) agc.fit(X) n_samples = X.shape[0] n_nodes = agc.children_.shape[0] assert_equal(n_nodes, n_samples - 1) # When n_clusters is large, greater than max of 100 and 0.02 * n_samples. # we should stop when there are n_clusters. n_clusters = 101 X = rng.randn(200, 2) connectivity = kneighbors_graph(X, 10, include_self=False) agc = AgglomerativeClustering(n_clusters=n_clusters, connectivity=connectivity) agc.fit(X) n_samples = X.shape[0] n_nodes = agc.children_.shape[0] assert_equal(n_nodes, n_samples - n_clusters)
Example #9
Source File: test_hierarchical.py From twitter-stock-recommendation with MIT License | 6 votes |
def test_linkage_misc(): # Misc tests on linkage rng = np.random.RandomState(42) X = rng.normal(size=(5, 5)) assert_raises(ValueError, AgglomerativeClustering(linkage='foo').fit, X) assert_raises(ValueError, linkage_tree, X, linkage='foo') assert_raises(ValueError, linkage_tree, X, connectivity=np.ones((4, 4))) # Smoke test FeatureAgglomeration FeatureAgglomeration().fit(X) # test hierarchical clustering on a precomputed distances matrix dis = cosine_distances(X) res = linkage_tree(dis, affinity="precomputed") assert_array_equal(res[0], linkage_tree(X, affinity="cosine")[0]) # test hierarchical clustering on a precomputed distances matrix res = linkage_tree(X, affinity=manhattan_distances) assert_array_equal(res[0], linkage_tree(X, affinity="manhattan")[0])
Example #10
Source File: test_hierarchical.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_cluster_distances_with_distance_threshold(): rng = np.random.RandomState(0) n_samples = 100 X = rng.randint(-10, 10, size=(n_samples, 3)) # check the distances within the clusters and with other clusters distance_threshold = 4 clustering = AgglomerativeClustering( n_clusters=None, distance_threshold=distance_threshold, linkage="single").fit(X) labels = clustering.labels_ D = pairwise_distances(X, metric="minkowski", p=2) # to avoid taking the 0 diagonal in min() np.fill_diagonal(D, np.inf) for label in np.unique(labels): in_cluster_mask = labels == label max_in_cluster_distance = (D[in_cluster_mask][:, in_cluster_mask] .min(axis=0).max()) min_out_cluster_distance = (D[in_cluster_mask][:, ~in_cluster_mask] .min(axis=0).min()) # single data point clusters only have that inf diagonal here if in_cluster_mask.sum() > 1: assert max_in_cluster_distance < distance_threshold assert min_out_cluster_distance >= distance_threshold
Example #11
Source File: predicting.py From ImageSetCleaner with GNU General Public License v3.0 | 6 votes |
def detection_with_agglomaritve_clustering(image_set): """ Really good if the classes you are analyzing are close to what the network learned. :param image_set: The bottleneck values of the relevant images. :return: Predictions vector N.B : The detector breaks with a full black image. """ # http://scikit-learn.org/stable/auto_examples/cluster/plot_agglomerative_clustering.html#sphx-glr-auto-examples-cluster-plot-agglomerative-clustering-py clf = cluster.AgglomerativeClustering(n_clusters=2, affinity="l2", linkage="complete") clf.fit(image_set) predictions = clf.labels_ predictions = normalize_predictions(predictions) return predictions
Example #12
Source File: clusterings.py From parcellation_fragmenter with BSD 3-Clause "New" or "Revised" License | 5 votes |
def ward(n_clusters, samples): """ Run Ward clustering on vertex coordinates. Parameters: - - - - - n_clusters : int number of clusters to generate samples : array Euclidean-space coordinates of vertices """ # Generate KNN graph knn_graph = neighbors.kneighbors_graph( samples, n_neighbors=20, mode='connectivity', metric='minkowski', p=2, include_self=False, n_jobs=-1) # Apply Ward-Agglomerative clustering ward = cluster.AgglomerativeClustering( n_clusters=n_clusters, affinity='euclidean', connectivity=knn_graph, linkage='ward') ward.fit(samples) labels = ward.labels_.copy() labels = labels.astype(np.int32)+1 return labels
Example #13
Source File: __init__.py From dials with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _agglomerative_clustering(self): X = self.coords.as_numpy_array() # Perform cluster analysis from sklearn.cluster import AgglomerativeClustering import numpy as np model = AgglomerativeClustering( n_clusters=self.params.cluster.n_clusters, linkage="average", affinity="cosine", ) model.fit(X) return flex.int(model.labels_.astype(np.int32))
Example #14
Source File: agglomerative.py From trajminer with MIT License | 5 votes |
def __init__(self, n_clusters, linkage='ward', measure='precomputed', n_jobs=1): self.agglomerative = skAgglomerative(n_clusters=n_clusters, affinity='precomputed') self.n_clusters = n_clusters self.measure = measure self.n_jobs = n_jobs
Example #15
Source File: cluster_images.py From NucleiDetectron with Apache License 2.0 | 5 votes |
def create_color_cluster_agglomerative_clustering(in_df, num_clusters): cluster_maker = AgglomerativeClustering(linkage='average', n_clusters=num_clusters) cluster_maker.fit(in_df[color_features_names]) in_df['cluster-id'] = cluster_maker.labels_ in_df['cluster-id'] = in_df['cluster-id'].map(lambda x: str(x)) return in_df
Example #16
Source File: create_endpoints_mask_with_clustering.py From TractSeg with Apache License 2.0 | 5 votes |
def cluster(points, algorithm=DBSCAN): print("Running {}...".format(algorithm)) if algorithm == "KMeans": # not good at finding clusters if close together labels = KMeans(n_clusters=2, random_state=0, n_jobs=-1).fit_predict(points) elif algorithm == "DBSCAN": # no fixed number of labels; slow with high eps labels = DBSCAN(eps=3.0, n_jobs=-1).fit_predict(points) # labels = SpectralClustering(n_clusters=2, n_jobs=-1).fit_predict(points) # slow (> 1min) # labels = AgglomerativeClustering(n_clusters=2).fit_predict(points) # fast points_start, points_end = select_two_biggest_clusters(labels, points) return points_start, points_end
Example #17
Source File: compare_clustering_algs.py From mmvt with GNU General Public License v3.0 | 5 votes |
def compare(data, n_groups, output_fol): # plot_clusters(data.astype(np.float), scipy.cluster.vq.kmeans, 'scipy.cluster.vq.kmeans', output_fol, (n_groups,), {}) plot_clusters(data, cluster.KMeans, 'KMeans', output_fol, (), {'n_clusters': n_groups}) for ct in ['spherical', 'tied', 'diag', 'full']: plot_clusters(data, mixture.GaussianMixture, 'GMM_{}'.format(ct), output_fol, (), {'n_components': n_groups, 'covariance_type': ct}) plot_clusters(data, cluster.AffinityPropagation, 'AffinityPropagation', output_fol, (), {'preference': -5.0, 'damping': 0.95}) plot_clusters(data, cluster.MeanShift, 'MeanShift', output_fol, (0.175,), {'cluster_all': False}) plot_clusters(data, cluster.SpectralClustering, 'SpectralClustering', output_fol, (), {'n_clusters': n_groups}) plot_clusters(data, cluster.AgglomerativeClustering, 'AgglomerativeClustering', output_fol, (), {'n_clusters': n_groups, 'linkage': 'ward'}) plot_clusters(data, cluster.DBSCAN, 'DBSCAN', output_fol, (), {'eps': 0.025}) # plot_clusters(data, hdbscan.HDBSCAN, 'HDBSCAN', output_fol, (), {'min_cluster_size': 15})
Example #18
Source File: sklearn_cluster.py From learn-to-cluster with MIT License | 5 votes |
def hierarchy(feat, n_clusters, knn, **kwargs): from sklearn.neighbors import kneighbors_graph knn_graph = kneighbors_graph(feat, knn, include_self=False) hierarchy = cluster.AgglomerativeClustering(n_clusters=n_clusters, connectivity=knn_graph, linkage='ward').fit(feat) return hierarchy.labels_
Example #19
Source File: test_hierarchical.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_agglomerative_clustering_wrong_arg_memory(): # Test either if an error is raised when memory is not # either a str or a joblib.Memory instance rng = np.random.RandomState(0) n_samples = 100 X = rng.randn(n_samples, 50) memory = 5 clustering = AgglomerativeClustering(memory=memory) assert_raises(ValueError, clustering.fit, X)
Example #20
Source File: test_hierarchical.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_connectivity_propagation(): # Check that connectivity in the ward tree is propagated correctly during # merging. X = np.array([(.014, .120), (.014, .099), (.014, .097), (.017, .153), (.017, .153), (.018, .153), (.018, .153), (.018, .153), (.018, .153), (.018, .153), (.018, .153), (.018, .153), (.018, .152), (.018, .149), (.018, .144)]) connectivity = kneighbors_graph(X, 10, include_self=False) ward = AgglomerativeClustering( n_clusters=4, connectivity=connectivity, linkage='ward') # If changes are not propagated correctly, fit crashes with an # IndexError ward.fit(X)
Example #21
Source File: test_hierarchical.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_connectivity_fixing_non_lil(): # Check non regression of a bug if a non item assignable connectivity is # provided with more than one component. # create dummy data x = np.array([[0, 0], [1, 1]]) # create a mask with several components to force connectivity fixing m = np.array([[True, False], [False, True]]) c = grid_to_graph(n_x=2, n_y=2, mask=m) w = AgglomerativeClustering(connectivity=c, linkage='ward') assert_warns(UserWarning, w.fit, x)
Example #22
Source File: test_hierarchical.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_connectivity_callable(): rng = np.random.RandomState(0) X = rng.rand(20, 5) connectivity = kneighbors_graph(X, 3, include_self=False) aglc1 = AgglomerativeClustering(connectivity=connectivity) aglc2 = AgglomerativeClustering( connectivity=partial(kneighbors_graph, n_neighbors=3, include_self=False)) aglc1.fit(X) aglc2.fit(X) assert_array_equal(aglc1.labels_, aglc2.labels_)
Example #23
Source File: test_hierarchical.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_agg_n_clusters(): # Test that an error is raised when n_clusters <= 0 rng = np.random.RandomState(0) X = rng.rand(20, 10) for n_clus in [-1, 0]: agc = AgglomerativeClustering(n_clusters=n_clus) msg = ("n_clusters should be an integer greater than 0." " %s was provided." % str(agc.n_clusters)) assert_raise_message(ValueError, msg, agc.fit, X)
Example #24
Source File: stitch_patches_page.py From ScanSSD with MIT License | 5 votes |
def clustering(math_regions, char_data, image, algorithm, thresh_votes): centers = [] for math_region in math_regions: center = [(math_region[0]+math_region[2])/2, (math_region[1]+math_region[3])/2] centers.append(center) clustering = AgglomerativeClustering().fit(centers) labels = np.unique(clustering.labels_) for label in labels: regions = math_regions[labels==label] pass
Example #25
Source File: baseline_clustering.py From cdp with MIT License | 5 votes |
def hierarchy(feat, n_clusters=2, knn=30): from sklearn.neighbors import kneighbors_graph knn_graph = kneighbors_graph(feat, knn, include_self=False) hierarchy = cluster.AgglomerativeClustering(n_clusters=n_clusters, connectivity=knn_graph, linkage='ward').fit(feat) return hierarchy.labels_
Example #26
Source File: test_hierarchical.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_agglomerative_clustering_wrong_arg_memory(): # Test either if an error is raised when memory is not # either a str or a joblib.Memory instance rng = np.random.RandomState(0) n_samples = 100 X = rng.randn(n_samples, 50) memory = 5 clustering = AgglomerativeClustering(memory=memory) assert_raises(ValueError, clustering.fit, X)
Example #27
Source File: diarization.py From VBDiarization with Apache License 2.0 | 5 votes |
def run_ahc(self, n_clusters, embeddings, scores_matrix): """ Run agglomerative hierarchical clustering. Returns: np.array: means of clusters """ scores_matrix = -((scores_matrix - np.min(scores_matrix)) / (np.max(scores_matrix) - np.min(scores_matrix))) ahc = AgglomerativeClustering(affinity='precomputed', linkage='complete', n_clusters=n_clusters) labels = ahc.fit_predict(scores_matrix) return np.array([np.mean(embeddings[np.where(labels == i)], axis=0) for i in range(n_clusters)])
Example #28
Source File: infer.py From NLP_Toolkit with Apache License 2.0 | 5 votes |
def n_cluster_embeddings(self, features=None, n_clusters=3, method='ac'): ''' clusters the nodes based on embedding features features = None (use DGI generated embeddings) ''' if method == 'ac': clustering = AgglomerativeClustering(n_clusters=n_clusters, affinity='euclidean',\ linkage='ward') clustering.fit(self.embeddings if features is None else features) self.labels = clustering.labels_ self.score = silhouette_score(self.embeddings if features is None else features,\ self.labels) return {'labels': self.labels, 'score': self.score}
Example #29
Source File: test_hierarchical.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_connectivity_propagation(): # Check that connectivity in the ward tree is propagated correctly during # merging. X = np.array([(.014, .120), (.014, .099), (.014, .097), (.017, .153), (.017, .153), (.018, .153), (.018, .153), (.018, .153), (.018, .153), (.018, .153), (.018, .153), (.018, .153), (.018, .152), (.018, .149), (.018, .144)]) connectivity = kneighbors_graph(X, 10, include_self=False) ward = AgglomerativeClustering( n_clusters=4, connectivity=connectivity, linkage='ward') # If changes are not propagated correctly, fit crashes with an # IndexError ward.fit(X)
Example #30
Source File: test_hierarchical.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def test_n_components_deprecation(): # Test that a Deprecation warning is thrown when n_components_ # attribute is accessed X = np.array([[1, 2], [1, 4], [1, 0], [4, 2]]) agc = AgglomerativeClustering().fit(X) match = ("``n_components_`` attribute was deprecated " "in favor of ``n_connected_components_``") with pytest.warns(DeprecationWarning, match=match): n = agc.n_components_ assert n == agc.n_connected_components_