Python scipy.cluster.hierarchy.dendrogram() Examples
The following are 30
code examples of scipy.cluster.hierarchy.dendrogram().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scipy.cluster.hierarchy
, or try the search function
.
Example #1
Source File: subroutines.py From SigProfilerExtractor with BSD 2-Clause "Simplified" License | 6 votes |
def dendrogram(data, threshold, layer_directory): colnames = data.columns data = np.array(data) Z = hierarchy.linkage(data.T, 'single', 'cosine') plt.figure(figsize=(15, 9)) dn = hierarchy.dendrogram(Z, labels = colnames, color_threshold=threshold) plt.title("Clustering of Samples Based on Mutational Signatures" ) plt.ylabel("Cosine Distance") plt.xlabel("Sample IDs") #plt.ylim((0,1)) plt.savefig(layer_directory+'/dendrogram.pdf',figsize=(10, 8), dpi=300) # which datapoints goes to which cluster # The indices of the datapoints will be displayed as the ids Y = hierarchy.fcluster(Z, threshold, criterion='distance', R=None, monocrit=None) dataframe = pd.DataFrame({"Cluster":Y, "Sample Names":list(colnames)}) dataframe = dataframe.set_index("Sample Names") #print(dataframe) dictionary = {"clusters":Y, "informations":dn} return dataframe ######################################## Plot the reconstruction error vs stabilities and select the optimum number of signature ####################################################
Example #2
Source File: subroutines.py From SigProfilerExtractor with BSD 2-Clause "Simplified" License | 6 votes |
def dendrogram(data, threshold, layer_directory): colnames = data.columns data = np.array(data) Z = hierarchy.linkage(data.T, 'single', 'cosine') plt.figure(figsize=(15, 9)) dn = hierarchy.dendrogram(Z, labels = colnames, color_threshold=threshold) plt.title("Clustering of Samples Based on Mutational Signatures" ) plt.ylabel("Cosine Distance") plt.xlabel("Sample IDs") #plt.ylim((0,1)) plt.savefig(layer_directory+'/dendrogram.pdf',figsize=(10, 8), dpi=300) # which datapoints goes to which cluster # The indices of the datapoints will be displayed as the ids Y = hierarchy.fcluster(Z, threshold, criterion='distance', R=None, monocrit=None) dataframe = pd.DataFrame({"Cluster":Y, "Sample Names":list(colnames)}) dataframe = dataframe.set_index("Sample Names") #print(dataframe) dictionary = {"clusters":Y, "informations":dn} return dataframe ######################################## Plot the reconstruction error vs stabilities and select the optimum number of signature ####################################################
Example #3
Source File: regions.py From TOBIAS with MIT License | 6 votes |
def assign_colors(self): """ Assign colors for plotting the dendrogram """ clusters = self.linkage_clusters no_IDS = self.n colorlist = ["blue", "green", "red", "orange"] node_color = ["black"] * (2*no_IDS-1) i = 0 for cluster in sorted(list(clusters.keys())): if len(clusters[cluster]) > 1: color = colorlist[i] for node in clusters[cluster]: node_color[node] = color i += 1 if i == len(colorlist): i = 0 self.node_color = node_color #list corresponding to each possible clustering in tree
Example #4
Source File: plot.py From pypath with GNU General Public License v3.0 | 6 votes |
def make_plot(self): self.z = hc.linkage(self.data, method='average') self.ax = self.fig.add_subplot(1, 1, 1) self.dendro = \ hc.dendrogram(self.z, labels=self.data.columns, color_threshold=0, orientation='left', ax=self.ax, link_color_func=lambda x: self.color) _ = [ tl.set_fontproperties(self.fp_ticklabel) for tl in self.ax.get_yticklabels() ] _ = [ tl.set_fontproperties(self.fp_ticklabel) for tl in self.ax.get_xticklabels() ] self.ax.xaxis.grid(True, color='#FFFFFF', lw=1, ls='solid') self.ax.yaxis.grid(False) self.ax.set_axisbelow(True) self.ax.set_facecolor('#EAEAF2') list(map(lambda s: s.set_lw(0), self.ax.spines.values())) self.ax.tick_params(which='both', length=0)
Example #5
Source File: env_corr.py From glosim with MIT License | 6 votes |
def plotdendro(Z,ncluster,filename,rep_ind): plt.figure(figsize=(10, 15)) plt.title('Hierarchical Clustering Dendrogram') plt.xlabel('sample index') plt.ylabel('distance') d = sc.dendrogram(Z,truncate_mode='lastp', p=ncluster,orientation='right',leaf_rotation=90.,leaf_font_size=20.,show_contracted=False) coord=[] for i in range(len(d['icoord'])): if d['dcoord'][i][0]==0.0 : coord.append(d['icoord'][i][0]) for i in range(len(d['icoord'])): if d['dcoord'][i][3]==0.0 : coord.append(d['icoord'][i][3]) plt.savefig(filename, dpi=100, facecolor='w', edgecolor='w', orientation='portrait', papertype='letter', format=None, transparent=True, bbox_inches=None, pad_inches=0.1, frameon=None)
Example #6
Source File: ontobio-assoc.py From ontobio with BSD 3-Clause "New" or "Revised" License | 6 votes |
def run_query_associations(ont, aset, args): if args.dendrogram: plot_subject_term_matrix(ont, aset, args) return import plotly.plotly as py import plotly.graph_objs as go tups = aset.query_associations(subjects=args.subjects) for (s,c) in tups: print("{} {}".format(s, c)) z, xaxis, yaxis = tuple_to_matrix(tups) xaxis = mk_axis(xaxis, aset, args) yaxis = mk_axis(yaxis, aset, args) logging.info("PLOTTING: {} x {} = {}".format(xaxis, yaxis, z)) trace = go.Heatmap(z=z, x=xaxis, y=yaxis) data=[trace] py.plot(data, filename='labelled-heatmap') #plot_dendrogram(z, xaxis, yaxis) # TODO: fix this really dumb implementation
Example #7
Source File: agglomerative.py From atap with Apache License 2.0 | 6 votes |
def plot_dendrogram(self, **kwargs): # Distances between each pair of children distance = np.arange(self.children.shape[0]) position = np.arange(self.children.shape[0]) # Create linkage matrix and then plot the dendrogram linkage_matrix = np.column_stack([ self.children, distance, position] ).astype(float) # Plot the corresponding dendrogram fig, ax = plt.subplots(figsize=(15, 7)) # set size ax = dendrogram(linkage_matrix, **kwargs) plt.tick_params(axis='x', bottom='off', top='off', labelbottom='off') plt.tight_layout() plt.show()
Example #8
Source File: construction.py From FinanceHub with MIT License | 6 votes |
def plot_dendrogram(self, show_chart=True, save_path=None, figsize=(8, 8), threshold=None): """ Plots the dendrogram using scipy's own method. :param show_chart: If True, shows the chart. :param save_path: local directory to save file. :param figsize: tuple with figsize dimensions. :param threshold: height of the dendrogram to color the nodes. If None, the colors of the nodes follow scipy's standard behaviour, which cuts the dendrogram on 70% of its height (0.7*max(self.link[:,2]). """ plt.figure(figsize=figsize) dn = sch.dendrogram(self.link, orientation='left', labels=self.sort_ix, color_threshold=threshold) plt.tight_layout() if not (save_path is None): plt.savefig(save_path, pad_inches=1, dpi=400) if show_chart: plt.show() plt.close()
Example #9
Source File: KEGG_clustering.py From BioData with MIT License | 5 votes |
def hClust_euclidean(genome_df): linkage_matrix = linkage(genome_df, method='average', metric='euclidean') #linkage_matrix = linkage(df, metric='braycurtis') names = genome_df.index.tolist() #clust = dendrogram(linkage_matrix, orientation="right", labels=names, get_leaves=True) clust = dendrogram(linkage_matrix, no_plot=True, labels=names, get_leaves=True) leaves = clust['ivl'] leave_order = list(leaves) genome_df = genome_df.reindex(leave_order) return genome_df
Example #10
Source File: __init__.py From pyani with MIT License | 5 votes |
def clean_axis(axis): """Remove ticks, tick labels, and frame from axis. :param axis: """ axis.get_xaxis().set_ticks([]) axis.get_yaxis().set_ticks([]) for spine in list(axis.spines.values()): spine.set_visible(False) # Add dendrogram and axes to passed figure
Example #11
Source File: heatmap.py From CompareM with GNU General Public License v3.0 | 5 votes |
def plotDendrogram(self, matrix, axis, clusteringThreshold, orientation): d = dist.pdist(matrix) linkage = cluster.linkage(dist.squareform(d), method='average', metric='cityblock') dendrogram = cluster.dendrogram(linkage, orientation=orientation, link_color_func=lambda k: 'k') index = cluster.fcluster(linkage, clusteringThreshold * max(linkage[:,2]), 'distance') axis.set_xticks([]) axis.set_yticks([]) return index, dendrogram['leaves']
Example #12
Source File: utils.py From lens with Apache License 2.0 | 5 votes |
def hierarchical_ordering_indices(columns, correlation_matrix): """Return array with hierarchical cluster ordering of columns Parameters ---------- columns: iterable of str Names of columns. correlation_matrix: np.ndarray Matrix of correlation coefficients between columns. Returns ------- indices: iterable of int Indices with order of columns """ if len(columns) > 2: pairwise_dists = distance.pdist( np.where(np.isnan(correlation_matrix), 0, correlation_matrix), metric="euclidean", ) linkage = hierarchy.linkage(pairwise_dists, method="average") dendogram = hierarchy.dendrogram( linkage, no_plot=True, color_threshold=-np.inf ) idx = dendogram["leaves"] else: idx = list(range(len(columns))) return idx
Example #13
Source File: cluster_line_markings.py From Data-digging with MIT License | 5 votes |
def plot_dendrogram(the_linkage, theid, d_max): fig = plt.figure(figsize=(12, 5)) axL = fig.add_subplot(1,2,1) axL.set_title('Hierarchical %s Clustering Dendrogram' % cluster_method) axL.set_xlabel('sample index') axL.set_ylabel('distance') dendrogram( the_linkage, leaf_rotation=90., # rotates the x axis labels leaf_font_size=8., # font size for the x axis labels ) # freeze the current xlimits xlimits = axL.get_xlim() axL.plot(xlimits, np.array([d_max, d_max]), linestyle='-', color="#777777") axL.set_xlim(xlimits) axR = fig.add_subplot(1,2,2) axR.plot(the_linkage[:,2]) # freeze the current ylimits xlimits = axR.get_xlim() axR.plot(xlimits, np.array([d_max, d_max]), linestyle='-', color="#777777") axR.set_xlim(xlimits) axR.set_xlabel('iteration') axR.set_ylabel('distance') plt.tight_layout() plt.savefig('dendrograms/dendrogram_%s_%s.png' % (theid, cluster_method), facecolor='None', edgecolor='None') #plt.show() plt.clf() plt.cla() plt.close('') plt.close('All')
Example #14
Source File: document_clustering.py From text-analytics-with-python with Apache License 2.0 | 5 votes |
def plot_hierarchical_clusters(linkage_matrix, movie_data, figure_size=(8,12)): # set size fig, ax = plt.subplots(figsize=figure_size) movie_titles = movie_data['Title'].values.tolist() # plot dendrogram ax = dendrogram(linkage_matrix, orientation="left", labels=movie_titles) plt.tick_params(axis= 'x', which='both', bottom='off', top='off', labelbottom='off') plt.tight_layout() plt.savefig('ward_hierachical_clusters.png', dpi=200) # build ward's linkage matrix
Example #15
Source File: heatmap.py From SqueezeMeta with GNU General Public License v3.0 | 5 votes |
def plotDendrogram(self, matrix, axis, clusteringThreshold, orientation): d = dist.pdist(matrix) linkage = cluster.linkage(dist.squareform(d), method='average', metric='cityblock') dendrogram = cluster.dendrogram(linkage, orientation=orientation, link_color_func=lambda k: 'k') index = cluster.fcluster(linkage, clusteringThreshold * max(linkage[:,2]), 'distance') axis.set_xticks([]) axis.set_yticks([]) return index, dendrogram['leaves']
Example #16
Source File: KEGG_clustering.py From BioData with MIT License | 5 votes |
def hClust_correlation(genome_df): linkage_matrix = linkage(genome_df, method='single', metric='correlation') #linkage_matrix = linkage(df, metric='braycurtis') names = genome_df.index.tolist() #clust = dendrogram(linkage_matrix, orientation="right", labels=names, get_leaves=True) clust = dendrogram(linkage_matrix, no_plot=True, labels=names, get_leaves=True) leaves = clust['ivl'] leave_order = list(leaves) genome_df = genome_df.reindex(leave_order) return genome_df
Example #17
Source File: __init__.py From xai with MIT License | 5 votes |
def _plot_correlation_dendogram( corr: pd.DataFrame, cols: List[str], plt_kwargs={}): """ Plot dendogram of a correlation matrix, using the columns provided. This consists of a chart that that shows hierarchically the variables that are most correlated by the connecting trees. The closer to the right that the connection is, the more correlated the features are. If you would like to visualise this as a tree, please see the function _plot_correlation_dendogram. :Example: columns_to_include=["age", "loan", "gender"] xai._plot_correlation_dendogram(df, cols=columns_to_include) :returns: Null :rtype: None """ corr = np.round(corr, 4) corr_condensed = hc.distance.squareform(1-corr) z = hc.linkage(corr_condensed, method="average") fig = plt.figure(**plt_kwargs) dendrogram = hc.dendrogram( z, labels=cols, orientation="left", leaf_font_size=16) plt.show()
Example #18
Source File: heatmap.py From traitar with GNU General Public License v3.0 | 5 votes |
def exportFlatClusterData(filename, new_row_header,new_column_header,xt,ind1,ind2): """ Export the clustered results as a text file, only indicating the flat-clusters rather than the tree """ filename = string.replace(filename,'.pdf','.txt') export_text = open(filename,'w') column_header = string.join(['UID','row_clusters-flat']+new_column_header,'\t')+'\n' ### format column-names for export export_text.write(column_header) column_clusters = string.join(['column_clusters-flat','']+ map(str, ind2),'\t')+'\n' ### format column-flat-clusters for export export_text.write(column_clusters) ### The clusters, dendrogram and flat clusters are drawn bottom-up, so we need to reverse the order to match new_row_header = new_row_header[::-1] xt = xt[::-1] ### Export each row in the clustered data matrix xt i=0 for row in xt: export_text.write(string.join([new_row_header[i],str(ind1[i])]+map(str, row),'\t')+'\n') i+=1 export_text.close() ### Export as CDT file filename = string.replace(filename,'.txt','.cdt') export_cdt = open(filename,'w') column_header = string.join(['UNIQID','NAME','GWEIGHT']+new_column_header,'\t')+'\n' ### format column-names for export export_cdt.write(column_header) eweight = string.join(['EWEIGHT','','']+ ['1']*len(new_column_header),'\t')+'\n' ### format column-flat-clusters for export export_cdt.write(eweight) ### Export each row in the clustered data matrix xt i=0 for row in xt: export_cdt.write(string.join([new_row_header[i]]*2+['1']+map(str, row),'\t')+'\n') i+=1 export_cdt.close() ################# Create Custom Color Gradients ################# #http://matplotlib.sourceforge.net/examples/pylab_examples/custom_cmap.html
Example #19
Source File: regions.py From TOBIAS with MIT License | 5 votes |
def cluster(self, threshold=0.5, method="average"): """ Main function to cluster the overlap dictionary into clusters""" self.overlap_to_distance() if len(self.names) > 1: self.linkage_mat = linkage(squareform(self.distance_mat), method) self.labels = fcluster(self.linkage_mat, threshold, criterion="distance") #ordering of the dendrogram #Find clusters below threshold self.linkage_clusters = dict(zip(range(self.n), [[num] for num in range(self.n)])) for i, row in enumerate(self.linkage_mat): ID1 = int(row[0]) ID2 = int(row[1]) new = self.n + i dist = row[2] if dist <= threshold: self.linkage_clusters[new] = self.linkage_clusters[ID1] + self.linkage_clusters[ID2] + [new] del self.linkage_clusters[ID1] del self.linkage_clusters[ID2] #Add member-names to clusters for cluster in self.linkage_clusters: self.clusters[cluster] = {"member_idx": [idx for idx in self.linkage_clusters[cluster] if idx < self.n]} self.clusters[cluster]["member_names"] = [self.names[idx] for idx in self.clusters[cluster]["member_idx"]] else: #only one TF self.linkage_clusters = {0:[0]} self.linkage_mat = np.array([[0]]) self.clusters[0] = {"member_idx":[0]} self.clusters[0]["member_names"] = [self.names[idx] for idx in self.clusters[0]["member_idx"]] self.get_cluster_names() #Set names of clusters self.assign_colors()
Example #20
Source File: motif_clust.py From TOBIAS with MIT License | 5 votes |
def plot_dendrogram(label, linkage, font_size, out, title, threshold, dpi): """Plot dendrogram with highlighted threshold Parameter: ---------- label : list List of labels linkage : ndarray The hierarchical clustering of rows or cols encoded as a linkage matrix. font_size : int font size out : String Output path title : String Plot title threshold : float dendrogram cluster threshold dpi : int dpi of plot """ x = 10.0 y = x * len(label)/(x*3) #ensure good aspect ratio #set cap on y axis (prevent errors from too large figure) plt.figure(figsize=(x, y)) plt.title(title, fontsize=20) plt.axvline(x=threshold, color="red") dendrogram(linkage, color_threshold=threshold, labels=label, leaf_font_size=font_size, orientation="right") try: plt.tight_layout() plt.savefig(out, dpi=dpi) except ValueError as e: print("Skipped plotting of dendrogram.") print("Error: " + str(e)) #--------------------------------------------------------------------------------------------------------#
Example #21
Source File: __init__.py From EDeN with MIT License | 5 votes |
def dendrogram(data, vectorizer, method="ward", color_threshold=1, size=10, filename=None): """dendrogram. "median","centroid","weighted","single","ward","complete","average" """ data = list(data) # get labels labels = [] for graph in data: label = graph.graph.get('id', None) if label: labels.append(label) # transform input into sparse vectors data_matrix = vectorizer.transform(data) # labels if not labels: labels = [str(i) for i in range(data_matrix.shape[0])] # embed high dimensional sparse vectors in 2D from sklearn import metrics from scipy.cluster.hierarchy import linkage, dendrogram distance_matrix = metrics.pairwise.pairwise_distances(data_matrix) linkage_matrix = linkage(distance_matrix, method=method) plt.figure(figsize=(size, size)) dendrogram(linkage_matrix, color_threshold=color_threshold, labels=labels, orientation='right') if filename is not None: plt.savefig(filename) else: plt.show()
Example #22
Source File: hierarchy.py From malss with MIT License | 5 votes |
def dendrogram(self): return dendrogram(self.model, truncate_mode='lastp', p=min(12, len(self.model)))
Example #23
Source File: cluster.py From glosim with MIT License | 5 votes |
def plotdendro(Z,ncluster,filename,rep_ind): plt.figure(figsize=(10, 15)) plt.title('Hierarchical Clustering Dendrogram') plt.xlabel('sample index') plt.ylabel('distance') d=sc.dendrogram(Z,truncate_mode='lastp', p=ncluster,orientation='right',leaf_rotation=90.,leaf_font_size=20.,show_contracted=False) # coord = np.c_[np.array(d['icoord'])[:,1:3],np.array(d['dcoord'])[:,1]] # coord = coord[np.argsort(coord[:,2])] num=ncluster-1 coord=[] for i in range(len(d['icoord'])): if d['dcoord'][i][0]==0.0 : coord.append(d['icoord'][i][0]) for i in range(len(d['icoord'])): if d['dcoord'][i][3]==0.0 : coord.append(d['icoord'][i][3]) #print d['leaves'] #return #for posi in coord: # x = posi # y = 0.05 # plt.plot(x, y, 'ro') # plt.annotate("%2i" % rep_ind[num], (x, y), xytext=(0, -8), # textcoords='offset points', # va='top', ha='center') # num = num-1 #plt.show() plt.savefig(filename, dpi=100, facecolor='w', edgecolor='w', orientation='portrait', papertype='letter', format=None, transparent=True, bbox_inches=None, pad_inches=0.1, frameon=None)
Example #24
Source File: ontobio-assoc.py From ontobio with BSD 3-Clause "New" or "Revised" License | 5 votes |
def plot_subject_term_matrix(ont, aset, args): import numpy as np import pandas as pd import scipy.cluster.hierarchy as sch import scipy.spatial as scs df = aset.as_dataframe(subjects=args.subjects) print('DF={}'.format(df)) d = scs.distance.pdist(df) Z = sch.linkage(d, method='complete') P = sch.dendrogram(Z) print(P)
Example #25
Source File: env_corr.py From glosim with MIT License | 5 votes |
def clusterdistmatfull(distmatrixfile,sim,mode='average',plot=False): # Compute the clusturing on dist^2 so that the average # distance of a cluster with an other is the RMS distance sim2 = sim*sim Z = sc.linkage(sim2,mode) # get the full tree plt.figure(figsize=(10, 15)) plt.title('Hierarchical Clustering Dendrogram') plt.xlabel('sample index') plt.ylabel('distance') dendo = sc.dendrogram(Z,orientation='right',leaf_rotation=90.,leaf_font_size=20.,show_contracted=False) c_list = np.array(dendo['leaves']) c_count = Counter(c_list) nbclst = len(c_count) print "Number of clusters", nbclst # c_list = np.zeros(len(sim)) # # Change cluster groups numbering to (0:n-1) # for i in range(len(sim)): # c_list[i] = int(clist[i]-1) return c_list,Z
Example #26
Source File: rep_dists.py From pancanatlas_code_public with MIT License | 5 votes |
def heatmap_dists(data, norm=False, labels=None, metric='euclidean', method='ward'): fig, (ax, cax) = plt.subplots(ncols=2,figsize=(7 * 1.05 ,7), gridspec_kw={"width_ratios":[1, 0.05]}) if labels is None: try: labels = data.index except AttributeError: pass n = data.shape[0] assert labels is None or len(labels) == n dists = ssd.pdist(data, metric=metric) linkage = sch.linkage(dists, metric=metric, method=method) dendro = sch.dendrogram(linkage, no_plot=True) order = dendro['leaves'] sq_form_dists = ssd.squareform(dists)[order][:, order] assert sq_form_dists.shape == (n,n) hmap = ax.imshow(sq_form_dists, aspect='auto') ax.set_xticks(np.arange(n)) ax.set_yticks(np.arange(n)) if labels is not None: ax.set_xticklabels(labels[order], rotation=90) ax.set_yticklabels(labels[order]) cb = plt.colorbar(hmap, cax=cax) return fig, (ax, cax) # Tasks
Example #27
Source File: Plotter.py From CAN_Reverse_Engineering with GNU General Public License v3.0 | 5 votes |
def plot_dendrogram(a_timer: PipelineTimer, linkage_matrix, threshold: float, vehicle_number: str, force: bool = False): dendrogram_filename = "dendrogram_" + vehicle_number + "." + figure_format if path.isfile(dendrogram_filename): if force: remove(dendrogram_filename) else: print("Dendrogram already plotted. Skipping...") return plt.figure(figsize=(7, 7), dpi=600) R: dict = dendrogram(Z=linkage_matrix, orientation='top', distance_sort='ascending', no_labels=True) plt.title("Dendrogram of Agglomerative Clustering for Vehicle " + vehicle_number) plt.xlabel("Signals Observed") plt.ylabel("Single Linkage Cluster Merge Distance") xmin, xmax = plt.xlim() # Add a 25% opacity dashed black line to the entropy gradient plot at one boundary of each sub-flow plt.hlines(y=threshold, xmin=xmin, xmax=xmax, alpha=0.25, colors='black', linestyle='dashed', label='cluster threshold') plt.legend(loc='upper right') print("\tPlotting dendrogram and saving to " + dendrogram_filename) savefig(dendrogram_filename, bbox_iches='tight', pad_inches=0.0, dpi=600, format=figure_format, transparent=figure_transp) plt.close() print("\t\tComplete...")
Example #28
Source File: _clustergram.py From dash-bio with MIT License | 5 votes |
def _sort_traces(self, rdt, cdt): """Sort row dendrogram clusters and column dendrogram clusters so that the background trace (above threshold) is trace 0 and all other traces are ordered top-to-bottom (row dendrogram) or left-to-right (column dendrogram). Parameters: - rdt (list[dict]): The row dendrogram cluster traces. - cdt (list[dict]): The column dendrogram cluster traces. Returns: - tuple: The sorted row dendrogram clusters and column dendrogram clusters. """ tmp_rdt = [] tmp_cdt = [] if len(rdt) > 0: # first, find background trace: (max 'x') rdt.sort(key=lambda t: -1 * max(list(t["x"]))) tmp_rdt.append(rdt[0]) # then, sort top-to-bottom r = rdt[1:] r.sort(key=lambda t: -1 * min(list(t["y"]))) tmp_rdt += r if len(cdt) > 0: # background trace has max 'y' cdt.sort(key=lambda t: -1 * max(list(t["y"]))) tmp_cdt.append(cdt[0]) # sort left to right c = cdt[1:] c.sort(key=lambda t: min(list(t["x"]))) tmp_cdt += c return (tmp_rdt, tmp_cdt)
Example #29
Source File: data_viewing.py From lumin with Apache License 2.0 | 5 votes |
def plot_rank_order_dendrogram(df:pd.DataFrame, threshold:float=0.8, savename:Optional[str]=None, settings:PlotSettings=PlotSettings()) \ -> Dict[str,Union[List[str],float]]: r''' Plots a dendrogram of features in df clustered via Spearman's rank correlation coefficient. Also returns a sets of features with correlation coefficients greater than the threshold Arguments: df: Pandas DataFrame containing data threshold: Threshold on correlation coefficient savename: Optional name of file to which to save the plot of feature importances settings: :class:`~lumin.plotting.plot_settings.PlotSettings` class to control figure appearance Returns: Dict of sets of features with correlation coefficients greater than the threshold and cluster distance ''' corr = np.round(scipy.stats.spearmanr(df).correlation, 4) corr_condensed = hc.distance.squareform(1-np.abs(corr)) # Abs because negtaive of a feature is a trvial transformation: information unaffected z = hc.linkage(corr_condensed, method='average', optimal_ordering=True) with sns.axes_style('white'), sns.color_palette(settings.cat_palette): plt.figure(figsize=(settings.w_large, (0.5*len(df.columns)))) hc.dendrogram(z, labels=df.columns, orientation='left', leaf_font_size=settings.lbl_sz, color_threshold=1-threshold) plt.xlabel("Distance (1 - |Spearman's Rank Correlation Coefficient|)", fontsize=settings.lbl_sz, color=settings.lbl_col) plt.xticks(fontsize=settings.tk_sz, color=settings.tk_col) if savename is not None: plt.savefig(settings.savepath/f'{savename}{settings.format}', bbox_inches='tight') plt.show() feats = df.columns sets = {} for i, merge in enumerate(z): if merge[2] > 1-threshold: continue if merge[0] <= len(z): a = [feats[int(merge[0])]] else: a = sets.pop(int(merge[0]))['children'] if merge[1] <= len(z): b = [feats[int(merge[1])]] else: b = sets.pop(int(merge[1]))['children'] sets[1 + i + len(z)] = {'children': [*a, *b], 'distance': merge[2]} return sets
Example #30
Source File: rep_dists.py From pancanatlas_code_public with MIT License | 4 votes |
def heatmap_dists_with_dendro(data, norm=False, labels=None, metric='euclidean', method='ward'): fig = plt.figure(figsize=(7 * 1.30, 7 * 1.25)) gs = gridspec.GridSpec(ncols=3, nrows=2, height_ratios=[.25, 1], width_ratios=[.25, 1, .05], hspace=0) dend_top_ax = fig.add_subplot(gs[0,1]) hmap_ax = fig.add_subplot(gs[1,1]) cbar_ax = fig.add_subplot(gs[1,2]) dend_top_ax.set_axis_off() if labels is None: try: labels = data.index except AttributeError: pass n = data.shape[0] assert labels is None or len(labels) == n dists = ssd.pdist(data, metric=metric) linkage = sch.linkage(dists, metric=metric, method=method) dendro = sch.dendrogram(linkage, ax=dend_top_ax, color_threshold=0, above_threshold_color='black') order = dendro['leaves'] sq_form_dists = ssd.squareform(dists)[order][:, order] assert sq_form_dists.shape == (n,n) if norm: sq_form_dists = spst.zscore(sq_form_dists, axis=None) sq_form_dists *= -1 cmap = plt.get_cmap('cubehelix') vmin = -4 vmax = 4 else: cmap = plt.get_cmap() vmin = None vmax = None hmap = hmap_ax.imshow(sq_form_dists, aspect='auto', cmap=cmap, vmin=vmin, vmax=vmax) hmap_ax.set_xticks(np.arange(n)) hmap_ax.set_yticks(np.arange(n)) if labels is not None: hmap_ax.set_xticklabels(labels[order], rotation=90) hmap_ax.set_yticklabels(labels[order]) cb = plt.colorbar(hmap, cax=cbar_ax) return