Python scipy.cluster.hierarchy.linkage() Examples
The following are 30
code examples of scipy.cluster.hierarchy.linkage().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scipy.cluster.hierarchy
, or try the search function
.
Example #1
Source File: clustering.py From anvio with GNU General Public License v3.0 | 6 votes |
def create_newick_file_from_matrix_file(observation_matrix_path, output_file_path, linkage=constants.linkage_method_default, distance=constants.distance_metric_default, norm='l1', progress=progress, transpose=False, items_order_file_path=None): is_distance_and_linkage_compatible(distance, linkage) filesnpaths.is_file_exists(observation_matrix_path) filesnpaths.is_file_tab_delimited(observation_matrix_path) filesnpaths.is_output_file_writable(output_file_path) if items_order_file_path: filesnpaths.is_output_file_writable(items_order_file_path) id_to_sample_dict, sample_to_id_dict, header, vectors = utils.get_vectors_from_TAB_delim_matrix(observation_matrix_path, transpose=transpose) vectors = np.array(vectors) newick = get_newick_from_matrix(vectors, distance, linkage, norm, id_to_sample_dict) if output_file_path: open(output_file_path, 'w').write(newick.strip() + '\n') if items_order_file_path: open(items_order_file_path, 'w').write('\n'.join(utils.get_names_order_from_newick_tree(newick)) + '\n')
Example #2
Source File: subroutines.py From SigProfilerExtractor with BSD 2-Clause "Simplified" License | 6 votes |
def dendrogram(data, threshold, layer_directory): colnames = data.columns data = np.array(data) Z = hierarchy.linkage(data.T, 'single', 'cosine') plt.figure(figsize=(15, 9)) dn = hierarchy.dendrogram(Z, labels = colnames, color_threshold=threshold) plt.title("Clustering of Samples Based on Mutational Signatures" ) plt.ylabel("Cosine Distance") plt.xlabel("Sample IDs") #plt.ylim((0,1)) plt.savefig(layer_directory+'/dendrogram.pdf',figsize=(10, 8), dpi=300) # which datapoints goes to which cluster # The indices of the datapoints will be displayed as the ids Y = hierarchy.fcluster(Z, threshold, criterion='distance', R=None, monocrit=None) dataframe = pd.DataFrame({"Cluster":Y, "Sample Names":list(colnames)}) dataframe = dataframe.set_index("Sample Names") #print(dataframe) dictionary = {"clusters":Y, "informations":dn} return dataframe ######################################## Plot the reconstruction error vs stabilities and select the optimum number of signature ####################################################
Example #3
Source File: agglomerative.py From aggregation with Apache License 2.0 | 6 votes |
def __agglomerative__(self,markings): """ runs an initial agglomerative clustering over the given markings :param markings: :return: """ # this converts stuff into panda format - probably a better way to do this but the labels do seem # necessary labels = [str(i) for i in markings] param_labels = [str(i) for i in range(len(markings[0]))] df = pd.DataFrame(np.array(markings), columns=param_labels, index=labels) row_dist = pd.DataFrame(squareform(pdist(df, metric='euclidean')), columns=labels, index=labels) # use ward metric to do the actual clustering row_clusters = linkage(row_dist, method='ward') return row_clusters
Example #4
Source File: marker.py From scprep with GNU General Public License v3.0 | 6 votes |
def _cluster_tissues(tissue_names, cluster_names, tissue_labels, cluster_labels, s, c): # cluster tissues hierarchically using mean size and color tissue_features = [] for tissue in tissue_names: tissue_data = [] for cluster in cluster_names: tissue_cluster_idx = np.where( (np.array(tissue_labels) == tissue) & (np.array(cluster_labels) == cluster) ) tissue_data.append( np.vstack([s[tissue_cluster_idx], c[tissue_cluster_idx]]).mean(axis=1) ) tissue_features.append(np.concatenate(tissue_data)) tissue_features = np.array(tissue_features) # normalize tissue_features = tissue_features / np.sqrt(np.sum(tissue_features ** 2)) tissues_order = hierarchy.leaves_list(hierarchy.linkage(tissue_features)) return tissues_order
Example #5
Source File: subroutines.py From SigProfilerExtractor with BSD 2-Clause "Simplified" License | 6 votes |
def dendrogram(data, threshold, layer_directory): colnames = data.columns data = np.array(data) Z = hierarchy.linkage(data.T, 'single', 'cosine') plt.figure(figsize=(15, 9)) dn = hierarchy.dendrogram(Z, labels = colnames, color_threshold=threshold) plt.title("Clustering of Samples Based on Mutational Signatures" ) plt.ylabel("Cosine Distance") plt.xlabel("Sample IDs") #plt.ylim((0,1)) plt.savefig(layer_directory+'/dendrogram.pdf',figsize=(10, 8), dpi=300) # which datapoints goes to which cluster # The indices of the datapoints will be displayed as the ids Y = hierarchy.fcluster(Z, threshold, criterion='distance', R=None, monocrit=None) dataframe = pd.DataFrame({"Cluster":Y, "Sample Names":list(colnames)}) dataframe = dataframe.set_index("Sample Names") #print(dataframe) dictionary = {"clusters":Y, "informations":dn} return dataframe ######################################## Plot the reconstruction error vs stabilities and select the optimum number of signature ####################################################
Example #6
Source File: plot.py From pypath with GNU General Public License v3.0 | 6 votes |
def make_plot(self): self.z = hc.linkage(self.data, method='average') self.ax = self.fig.add_subplot(1, 1, 1) self.dendro = \ hc.dendrogram(self.z, labels=self.data.columns, color_threshold=0, orientation='left', ax=self.ax, link_color_func=lambda x: self.color) _ = [ tl.set_fontproperties(self.fp_ticklabel) for tl in self.ax.get_yticklabels() ] _ = [ tl.set_fontproperties(self.fp_ticklabel) for tl in self.ax.get_xticklabels() ] self.ax.xaxis.grid(True, color='#FFFFFF', lw=1, ls='solid') self.ax.yaxis.grid(False) self.ax.set_axisbelow(True) self.ax.set_facecolor('#EAEAF2') list(map(lambda s: s.set_lw(0), self.ax.spines.values())) self.ax.tick_params(which='both', length=0)
Example #7
Source File: cluster.py From cesi with Apache License 2.0 | 6 votes |
def getClusters(self, embed): n, m = len(embed), self.p.embed_dims X = np.empty((n, m), np.float32) for i in range(len(embed)): X[i, :] = embed[i] dist = pdist(X, metric=self.p.metric) clust_res = linkage(dist, method=self.p.linkage) labels = fcluster(clust_res, t=self.p.thresh_val, criterion='distance') - 1 clusters = [[] for i in range(max(labels) + 1)] for i in range(len(labels)): clusters[labels[i]].append(i) return clusters
Example #8
Source File: diarizationFunctions.py From pyBK with MIT License | 6 votes |
def performClusteringLinkage(segmentBKTable, segmentCVTable, N_init, linkageCriterion,linkageMetric ): from scipy.cluster.hierarchy import linkage from scipy import cluster if linkageMetric == 'jaccard': observations = segmentBKTable elif linkageMetric == 'cosine': observations = segmentCVTable else: observations = segmentCVTable clusteringTable = np.zeros([np.size(segmentCVTable,0),N_init]) Z = linkage(observations,method=linkageCriterion,metric=linkageMetric) for i in np.arange(N_init): clusteringTable[:,i] = cluster.hierarchy.cut_tree(Z,N_init-i).T+1 k=N_init print('done') return clusteringTable, k
Example #9
Source File: allocation.py From finance_ml with MIT License | 6 votes |
def get_hrp(cov, corr): """Construct a hierarchical portfolio Params ------ cov: pd.DataFrame corr: pd.DataFrame Returns ------- pd.Series """ dist = get_corr_dist(corr) link = sch.linkage(dist, 'single') sort_idx = get_quasi_diag(link) # Recover label sort_idx = corr.index[sort_idx].tolist() hrp = get_rec_bipart(cov, sort_idx) return hrp.sort_index()
Example #10
Source File: test_hierarchical.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_cluster_distances_with_distance_threshold(): rng = np.random.RandomState(0) n_samples = 100 X = rng.randint(-10, 10, size=(n_samples, 3)) # check the distances within the clusters and with other clusters distance_threshold = 4 clustering = AgglomerativeClustering( n_clusters=None, distance_threshold=distance_threshold, linkage="single").fit(X) labels = clustering.labels_ D = pairwise_distances(X, metric="minkowski", p=2) # to avoid taking the 0 diagonal in min() np.fill_diagonal(D, np.inf) for label in np.unique(labels): in_cluster_mask = labels == label max_in_cluster_distance = (D[in_cluster_mask][:, in_cluster_mask] .min(axis=0).max()) min_out_cluster_distance = (D[in_cluster_mask][:, ~in_cluster_mask] .min(axis=0).min()) # single data point clusters only have that inf diagonal here if in_cluster_mask.sum() > 1: assert max_in_cluster_distance < distance_threshold assert min_out_cluster_distance >= distance_threshold
Example #11
Source File: ttclust.py From TTClust with GNU General Public License v3.0 | 6 votes |
def generate_graphs(clusters_list, output, size, linkage, cutoff, distances, traj): """ DESCRIPTION Create a linear cluster mapping graph where every frame is printed as a colored barplot Args: clusters_list (list): list of cluster output (string): output name for graph size (int): number of frames linkage (numpy array): matrix linkage cutoff (float): cutoff distance value for clustering (in the dendogram) distances(numpy array): distance matrix traj (Trajectory): trajectory for time usage in axis barplot Return: colors_list (list) to be used with 2D distance projection graph """ colors_list = plot_barplot(clusters_list, output, size, traj) plot_dendro(linkage, output, cutoff, colors_list, clusters_list) plot_hist(clusters_list, output, colors_list) if (distances.shape[0] < 10000): plot_distmat(distances, output) else: printScreenLogfile("Too many frames! The RMSD distance matrix will not be generated") return colors_list
Example #12
Source File: common.py From plastering with MIT License | 6 votes |
def hier_clustering(d, threshold=3): srcids = d.keys() tokenizer = lambda x: x.split() vectorizer = TfidfVectorizer(tokenizer=tokenizer) assert isinstance(d, dict) assert isinstance(list(d.values())[0], list) assert isinstance(list(d.values())[0][0], str) doc = [' '.join(d[srcid]) for srcid in srcids] vect = vectorizer.fit_transform(doc) #TODO: Make vect aligned to the required format z = linkage(vect.toarray(), metric='cityblock', method='complete') dists = list(set(z[:,2])) # threshold = 3 #threshold = (dists[2] + dists[3]) / 2 b = hier.fcluster(z, threshold, criterion='distance') cluster_dict = defaultdict(list) for srcid, cluster_id in zip(srcids, b): cluster_dict[str(cluster_id)].append(srcid) value_lengther = lambda x: len(x[1]) return OrderedDict(\ sorted(cluster_dict.items(), key=value_lengther, reverse=True))
Example #13
Source File: test_hierarchical.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_identical_points(): # Ensure identical points are handled correctly when using mst with # a sparse connectivity matrix X = np.array([[0, 0, 0], [0, 0, 0], [1, 1, 1], [1, 1, 1], [2, 2, 2], [2, 2, 2]]) true_labels = np.array([0, 0, 1, 1, 2, 2]) connectivity = kneighbors_graph(X, n_neighbors=3, include_self=False) connectivity = 0.5 * (connectivity + connectivity.T) connectivity, n_components = _fix_connectivity(X, connectivity, 'euclidean') for linkage in ('single', 'average', 'average', 'ward'): clustering = AgglomerativeClustering(n_clusters=3, linkage=linkage, connectivity=connectivity) clustering.fit(X) assert_almost_equal(normalized_mutual_info_score(clustering.labels_, true_labels), 1)
Example #14
Source File: test_hierarchical.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_single_linkage_clustering(): # Check that we get the correct result in two emblematic cases moons, moon_labels = make_moons(noise=0.05, random_state=42) clustering = AgglomerativeClustering(n_clusters=2, linkage='single') clustering.fit(moons) assert_almost_equal(normalized_mutual_info_score(clustering.labels_, moon_labels), 1) circles, circle_labels = make_circles(factor=0.5, noise=0.025, random_state=42) clustering = AgglomerativeClustering(n_clusters=2, linkage='single') clustering.fit(circles) assert_almost_equal(normalized_mutual_info_score(clustering.labels_, circle_labels), 1)
Example #15
Source File: test_hierarchical.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_unstructured_linkage_tree(): # Check that we obtain the correct solution for unstructured linkage trees. rng = np.random.RandomState(0) X = rng.randn(50, 100) for this_X in (X, X[0]): # With specified a number of clusters just for the sake of # raising a warning and testing the warning code with ignore_warnings(): children, n_nodes, n_leaves, parent = assert_warns( UserWarning, ward_tree, this_X.T, n_clusters=10) n_nodes = 2 * X.shape[1] - 1 assert_equal(len(children) + n_leaves, n_nodes) for tree_builder in _TREE_BUILDERS.values(): for this_X in (X, X[0]): with ignore_warnings(): children, n_nodes, n_leaves, parent = assert_warns( UserWarning, tree_builder, this_X.T, n_clusters=10) n_nodes = 2 * X.shape[1] - 1 assert_equal(len(children) + n_leaves, n_nodes)
Example #16
Source File: test_hierarchical.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_structured_linkage_tree(): # Check that we obtain the correct solution for structured linkage trees. rng = np.random.RandomState(0) mask = np.ones([10, 10], dtype=np.bool) # Avoiding a mask with only 'True' entries mask[4:7, 4:7] = 0 X = rng.randn(50, 100) connectivity = grid_to_graph(*mask.shape) for tree_builder in _TREE_BUILDERS.values(): children, n_components, n_leaves, parent = \ tree_builder(X.T, connectivity) n_nodes = 2 * X.shape[1] - 1 assert len(children) + n_leaves == n_nodes # Check that ward_tree raises a ValueError with a connectivity matrix # of the wrong shape assert_raises(ValueError, tree_builder, X.T, np.ones((4, 4))) # Check that fitting with no samples raises an error assert_raises(ValueError, tree_builder, X.T[:0], connectivity)
Example #17
Source File: test_hierarchical.py From Mastering-Elasticsearch-7.0 with MIT License | 6 votes |
def test_linkage_misc(): # Misc tests on linkage rng = np.random.RandomState(42) X = rng.normal(size=(5, 5)) assert_raises(ValueError, AgglomerativeClustering(linkage='foo').fit, X) assert_raises(ValueError, linkage_tree, X, linkage='foo') assert_raises(ValueError, linkage_tree, X, connectivity=np.ones((4, 4))) # Smoke test FeatureAgglomeration FeatureAgglomeration().fit(X) # test hierarchical clustering on a precomputed distances matrix dis = cosine_distances(X) res = linkage_tree(dis, affinity="precomputed") assert_array_equal(res[0], linkage_tree(X, affinity="cosine")[0]) # test hierarchical clustering on a precomputed distances matrix res = linkage_tree(X, affinity=manhattan_distances) assert_array_equal(res[0], linkage_tree(X, affinity="manhattan")[0])
Example #18
Source File: _clustergram.py From dash-bio with MIT License | 6 votes |
def _get_clusters(self): """Cluster the data according to the specified dimensions. Returns: - tuple: The linkage matrices for the columns and/or rows. """ Zcol = None Zrow = None # cluster along columns if self._cluster in ["col", "all"]: tmp = np.transpose(self._data) dcol = self._dist_fun(tmp, metric=self._col_dist) Zcol = self._link_fun(dcol, optimal_ordering=self._optimal_leaf_order) # cluster along rows only if 'all' is selected if self._cluster in ["row", "all"]: drow = self._dist_fun(self._data, metric=self._row_dist) Zrow = self._link_fun(drow, optimal_ordering=self._optimal_leaf_order) return (Zcol, Zrow)
Example #19
Source File: sqtl.py From pancanatlas_code_public with MIT License | 6 votes |
def get_col_linkage(combined_df, method='ward', metric='cosine'): CACHE_DIR = os.path.expanduser('~/cache/alt_splice_heatmap/sqtl') if not os.path.exists(CACHE_DIR): os.makedirs(CACHE_DIR) col_linkage_cache_path = os.path.join(CACHE_DIR, 'col_linkage_%s_%s.npy' %(method, metric)) idx_linkage_cache_path = os.path.join(CACHE_DIR, 'idx.npy') col_name_cache_path = os.path.join(CACHE_DIR, 'col_names.npy') if os.path.exists(col_linkage_cache_path): print "Loading linkage from %s" %col_linkage_cache_path col_linkage = np.load(col_linkage_cache_path) assert np.array_equal(np.load(idx_linkage_cache_path), combined_df.index) assert np.array_equal(np.load(col_name_cache_path), combined_df.columns) else: print "Calculating linkage" col_linkage = hc.linkage(sp.distance.pdist(combined_df.values.T), method=method, metric=metric) np.save(col_linkage_cache_path, col_linkage) np.save(idx_linkage_cache_path, combined_df.index) np.save(col_name_cache_path, combined_df.columns) return col_linkage
Example #20
Source File: zodiac.py From plastering with MIT License | 6 votes |
def create_cluster_map(self, bow, srcids): cluster_map = {} z = linkage(bow, metric='cityblock', method='complete') dists = list(set(z[:, 2])) thresh = (dists[1] + dists[2]) / 2 self.logger.info('Threshold: {0}'.format(thresh)) b = hier.fcluster(z, thresh, criterion='distance') assert bow.shape[0] == len(b) assert len(b) == len(srcids) for cid, srcid in zip(b, srcids): cluster_map[cid] = cluster_map.get(cid, []) + [srcid] self.logger.info('# of clusters: {0}'.format(len(b))) self.logger.info('sizes of clustsers:{0}'.format(sorted(map(len, cluster_map.values())))) return cluster_map
Example #21
Source File: sdm.py From scedar with MIT License | 5 votes |
def sort_x_by_d(x, dmat=None, metric="cosine", linkage="auto", n_eval_rounds=None, optimal_ordering=False, nprocs=None, verbose=False): dmat = SampleDistanceMatrix(x, d=dmat, metric=metric, nprocs=nprocs)._d xhct = HClustTree.hclust_tree(dmat, linkage="auto", is_euc_dist=(metric == "euclidean"), optimal_ordering=optimal_ordering) return xhct.leaf_ids()
Example #22
Source File: precluster.py From texta with GNU General Public License v3.0 | 5 votes |
def __init__(self,words, vectors, number_of_steps = 21,metric="cosine",linkage="complete"): self.words = words self.vectors = vectors self.number_of_steps = number_of_steps self.metric = metric self.linkage = linkage
Example #23
Source File: utils.py From lens with Apache License 2.0 | 5 votes |
def hierarchical_ordering_indices(columns, correlation_matrix): """Return array with hierarchical cluster ordering of columns Parameters ---------- columns: iterable of str Names of columns. correlation_matrix: np.ndarray Matrix of correlation coefficients between columns. Returns ------- indices: iterable of int Indices with order of columns """ if len(columns) > 2: pairwise_dists = distance.pdist( np.where(np.isnan(correlation_matrix), 0, correlation_matrix), metric="euclidean", ) linkage = hierarchy.linkage(pairwise_dists, method="average") dendogram = hierarchy.dendrogram( linkage, no_plot=True, color_threshold=-np.inf ) idx = dendogram["leaves"] else: idx = list(range(len(columns))) return idx
Example #24
Source File: precluster.py From texta with GNU General Public License v3.0 | 5 votes |
def __call__(self): if len(self.words) == 0 or len(self.vectors) == 0: return [] if len(self.words) == 1: self.words.append(self.words[0]) self.vectors.append(self.vectors[0]) distance_matrix = scidist.pdist(np.array(self.vectors),self.metric) linkage_matrix = hier.linkage(distance_matrix,self.linkage) dendrogram = self._linkage_matrix_to_dendrogram(linkage_matrix,self.words,self.vectors) clusterings = self._create_clusterings(dendrogram) return [[(node.label,node.vector) for node in _get_cluster_nodes(cluster)] for cluster in self._find_optimal_clustering(clusterings)]
Example #25
Source File: corClust.py From Kitsune-py with MIT License | 5 votes |
def cluster(self,maxClust): D = self.corrDist() Z = linkage(D[np.triu_indices(self.n, 1)]) # create a linkage matrix based on the distance matrix if maxClust < 1: maxClust = 1 if maxClust > self.n: maxClust = self.n map = self.__breakClust__(to_tree(Z),maxClust) return map # a recursive helper function which breaks down the dendrogram branches until all clusters have no more than maxClust elements
Example #26
Source File: sdm.py From scedar with MIT License | 5 votes |
def hclust_linkage(dmat, linkage="complete", n_eval_rounds=None, is_euc_dist=False, optimal_ordering=False, verbose=False): dmat = np.array(dmat, dtype="float") dmat = SampleDistanceMatrix.num_correct_dist_mat(dmat) n = dmat.shape[0] if linkage == "auto": try_linkages = ("single", "complete", "average", "weighted") if is_euc_dist: try_linkages += ("centroid", "median", "ward") if n_eval_rounds is None: n_eval_rounds = int(np.ceil(np.log2(n))) else: n_eval_rounds = int(np.ceil(max(np.log2(n), n_eval_rounds))) ltype_mdl_list = [] for iter_ltype in try_linkages: iter_lhct = HClustTree.hclust_tree(dmat, linkage=iter_ltype) iter_nbp_cnt_list = iter_lhct.n_round_bipar_cnt(n_eval_rounds) iter_nbp_mdl_arr = np.array(list(map( lambda x: MultinomialMdl(np.array(x)).mdl, iter_nbp_cnt_list))) iter_nbp_mdl = np.sum( iter_nbp_mdl_arr / np.arange(1, n_eval_rounds + 1)) ltype_mdl_list.append(iter_nbp_mdl) linkage = try_linkages[ltype_mdl_list.index(max(ltype_mdl_list))] if verbose: print(linkage, tuple(zip(try_linkages, ltype_mdl_list)), sep="\n") dmat_sf = spspatial.distance.squareform(dmat) hac_z = sch.linkage(dmat_sf, method=linkage, optimal_ordering=optimal_ordering) return hac_z
Example #27
Source File: ontobio-assoc.py From ontobio with BSD 3-Clause "New" or "Revised" License | 5 votes |
def plot_subject_term_matrix(ont, aset, args): import numpy as np import pandas as pd import scipy.cluster.hierarchy as sch import scipy.spatial as scs df = aset.as_dataframe(subjects=args.subjects) print('DF={}'.format(df)) d = scs.distance.pdist(df) Z = sch.linkage(d, method='complete') P = sch.dendrogram(Z) print(P)
Example #28
Source File: LogClustering.py From loglizer with MIT License | 5 votes |
def _offline_clustering(self, X): print('Starting offline clustering...') p_dist = pdist(X, metric=self._distance_metric) Z = linkage(p_dist, 'complete') cluster_index = fcluster(Z, self.max_dist, criterion='distance') self._extract_representatives(X, cluster_index) print('Processed {} instances.'.format(X.shape[0])) print('Found {} clusters offline.\n'.format(len(self.representatives))) # print('The representive vectors are:') # pprint.pprint(self.representatives.tolist())
Example #29
Source File: common.py From plastering with MIT License | 5 votes |
def get_word_clusters(sentence_dict): srcids = list(sentence_dict.keys()) sentences = [] for srcid in srcids: sentence = [] for metadata_type, sent in sentence_dict[srcid].items(): sentence.append(''.join(sent)) sentence = '\n'.join(sentence) sentence = ' '.join(re.findall('[a-z]+', sentence)) sentences.append(sentence) vect = TfidfVectorizer() #vect = CountVectorizer() bow = vect.fit_transform(sentences).toarray() try: z = linkage(bow, metric='cityblock', method='complete') except: pdb.set_trace() dists = list(set(z[:,2])) thresh = (dists[2] + dists[3]) /2 #thresh = (dists[1] + dists[2]) /2 print("Threshold: ", thresh) b = hier.fcluster(z,thresh, criterion='distance') cluster_dict = defaultdict(list) for srcid, cluster_id in zip(srcids, b): cluster_dict[cluster_id].append(srcid) return dict(cluster_dict)
Example #30
Source File: rep_dists.py From pancanatlas_code_public with MIT License | 5 votes |
def heatmap_dists(data, norm=False, labels=None, metric='euclidean', method='ward'): fig, (ax, cax) = plt.subplots(ncols=2,figsize=(7 * 1.05 ,7), gridspec_kw={"width_ratios":[1, 0.05]}) if labels is None: try: labels = data.index except AttributeError: pass n = data.shape[0] assert labels is None or len(labels) == n dists = ssd.pdist(data, metric=metric) linkage = sch.linkage(dists, metric=metric, method=method) dendro = sch.dendrogram(linkage, no_plot=True) order = dendro['leaves'] sq_form_dists = ssd.squareform(dists)[order][:, order] assert sq_form_dists.shape == (n,n) hmap = ax.imshow(sq_form_dists, aspect='auto') ax.set_xticks(np.arange(n)) ax.set_yticks(np.arange(n)) if labels is not None: ax.set_xticklabels(labels[order], rotation=90) ax.set_yticklabels(labels[order]) cb = plt.colorbar(hmap, cax=cax) return fig, (ax, cax) # Tasks