Python seaborn.clustermap() Examples
The following are 30
code examples of seaborn.clustermap().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
seaborn
, or try the search function
.
Example #1
Source File: RnaseqqcReport.py From CGATPipelines with MIT License | 6 votes |
def __call__(self, data, path): colorbar, factors, unique, xkcd = self.getColorBar(data) n_samples = data.shape[0] data = data.iloc[:, :n_samples] col_dict = dict(list(zip(unique, xkcd))) print(data.head()) seaborn.set(font_scale=.5) ax = seaborn.clustermap(data, row_colors=colorbar, col_colors=colorbar) plt.setp(ax.ax_heatmap.yaxis.set_visible(False)) for label in unique: ax.ax_col_dendrogram.bar( 0, 0, color=seaborn.xkcd_rgb[col_dict[label]], label=label, linewidth=0) ax.ax_col_dendrogram.legend(loc="center", ncol=len(unique)) return ResultBlocks(ResultBlock( '''#$mpl %i$#\n''' % ax.cax.figure.number, title='ClusterMapPlot'))
Example #2
Source File: basenji_test_genes.py From basenji with Apache License 2.0 | 6 votes |
def clustermap(gene_values, out_pdf, color=None, table=False): """ Generate a clustered heatmap using seaborn. """ if table: np.save(out_pdf[:-4], gene_values) plt.figure() g = sns.clustermap( gene_values, metric='euclidean', cmap=color, xticklabels=False, yticklabels=False) g.ax_heatmap.set_xlabel('Experiments') g.ax_heatmap.set_ylabel('Genes') plt.savefig(out_pdf) plt.close()
Example #3
Source File: construction.py From FinanceHub with MIT License | 6 votes |
def plot_corr_matrix(self, save_path=None, show_chart=True, cmap='vlag', linewidth=0, figsize=(10, 10)): """ Plots the correlation matrix :param save_path: local directory to save file. If provided, saves a png of the image to the address. :param show_chart: If True, shows the chart. :param cmap: matplotlib colormap. :param linewidth: witdth of the grid lines of the correlation matrix. :param figsize: tuple with figsize dimensions. """ sns.clustermap(self.corr, method=self.method, metric=self.metric, cmap=cmap, figsize=figsize, linewidths=linewidth, col_linkage=self.link, row_linkage=self.link) plt.tight_layout() if not (save_path is None): plt.savefig(save_path, pad_inches=1, dpi=400) if show_chart: plt.show() plt.close()
Example #4
Source File: heatmap.py From django-djangui with GNU General Public License v3.0 | 6 votes |
def main(): args = parser.parse_args() data = pd.read_table(args.tsv, index_col=args.row if args.row else 0, sep=args.delimiter, encoding='utf-8') if args.cols: try: data = data.loc[:,args.cols.split(',')] except KeyError: data = data.iloc[:,[int(i)-1 for i in args.cols.split(',')]] if len(data.columns) > 50: raise BaseException('Too many columns') data = np.log2(data) if args.log_normalize else data data[data==-1*np.inf] = data[data!=-1*np.inf].min().min() width = 5+0 if len(data.columns)<50 else (len(data.columns)-50)/100 row_cutoff = 1000 height = 15+0 if len(data)<row_cutoff else (len(data)-row_cutoff)/75.0 seaborn_map = sns.clustermap(data, figsize=(width, height)) seaborn_map.savefig('{}_heatmap.png'.format(os.path.split(args.tsv.name)[1])) seaborn_map.data2d.to_csv('{}_heatmap.tsv'.format(os.path.split(args.tsv.name)[1]), sep='\t')
Example #5
Source File: heatmap.py From Wooey with BSD 3-Clause "New" or "Revised" License | 6 votes |
def main(): args = parser.parse_args() data = pd.read_table(args.tsv, index_col=args.row if args.row else 0, sep=args.delimiter, encoding='utf-8') if args.cols: try: data = data.loc[:,args.cols.split(',')] except KeyError: data = data.iloc[:,[int(i)-1 for i in args.cols.split(',')]] if len(data.columns) > 50: raise BaseException('Too many columns') data = np.log2(data) if args.log_normalize else data data[data==-1*np.inf] = data[data!=-1*np.inf].min().min() width = 5+0 if len(data.columns)<50 else (len(data.columns)-50)/100 row_cutoff = 1000 height = 15+0 if len(data)<row_cutoff else (len(data)-row_cutoff)/75.0 seaborn_map = sns.clustermap(data, figsize=(width, height)) seaborn_map.savefig('{}_heatmap.png'.format(os.path.split(args.tsv.name)[1])) seaborn_map.data2d.to_csv('{}_heatmap.tsv'.format(os.path.split(args.tsv.name)[1]), sep='\t')
Example #6
Source File: plot_heatmaps.py From pancanatlas_code_public with MIT License | 6 votes |
def plot_heatmap(outpath, df, sample_linkage, sample_colors, event_linkage, desc, sample_color_lut): assert desc.lower().startswith('altsplice') or desc.lower().startswith('expression') is_altsplice = desc.lower().startswith('altsplice') sys.setrecursionlimit(100000) print "Plotting data ... " graph = sns.clustermap(df.T, col_colors=sample_colors, col_linkage=sample_linkage, row_linkage=event_linkage, cmap = sns.cubehelix_palette(as_cmap=True)) graph.ax_heatmap.axis('off') graph.ax_col_dendrogram.set_title("%s Clustering" %' '.join(desc.split('_')).title()) graph.ax_heatmap.set_xlabel("Events") graph.ax_heatmap.set_ylabel("Samples") if is_altsplice: graph.cax.set_title("psi") else: graph.cax.set_title("log(counts)") add_legend(graph, sample_color_lut) plot_utils.save(outpath) return
Example #7
Source File: __init__.py From pyani with MIT License | 6 votes |
def get_clustermap(dfr, params, title=None, annot=True): """Return a Seaborn clustermap for the passed dataframe. :param dfr: :param params: :param title: str, plot title :param annot: Boolean, add text for cell values? """ fig = sns.clustermap( dfr, cmap=params.cmap, vmin=params.vmin, vmax=params.vmax, col_colors=params.colorbar, row_colors=params.colorbar, figsize=(params.figsize, params.figsize), linewidths=params.linewidths, annot=annot, ) # add labels for each of the input genomes add_labels(fig, params) fig.cax.yaxis.set_label_position("left") if title: fig.cax.set_ylabel(title) # Return clustermap return fig # Generate Seaborn heatmap output
Example #8
Source File: heatmap.py From mmvec with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _parse_taxonomy_strings(taxonomy_series, level): ''' taxonomy_series: pd.Series of semicolon-delimited taxonomy strings level: int taxonomic level for annotating clustermap. Returns ------- Returns a pd.Series of taxonomy names at specified level, or terminal annotation ''' return taxonomy_series.apply(lambda x: x.split(';')[:level][-1].strip())
Example #9
Source File: plotlib.py From mCaller with MIT License | 5 votes |
def plot_correlation_matrix(curmat,elevenmer,labels,outdir): plt.figure(figsize=(7,6)) cg = sns.clustermap(curmat,metric='euclidean',xticklabels=labels,yticklabels=labels) plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0) #sns.heatmap(curmat,xticklabels=labels,yticklabels=labels) plt.title(elevenmer) plt.show() plt.savefig(outdir+'correlation_matrix_'+elevenmer+'.pdf',dpi=500,transparent=True)
Example #10
Source File: utils.py From pysster with MIT License | 5 votes |
def _plot_heatmap(file_path, data, class_id, classes = None): import seaborn as sns _set_sns_context(data.shape[1]) n_classes = len(set(class_id)) palette = _get_colors(n_classes) colors = [palette[x] for x in class_id] g = sns.clustermap(data = data.T, method = "ward", metric = "euclidean", cmap = "RdBu_r", xticklabels = False, yticklabels = True, figsize = (30,25), row_cluster = True, col_cluster = True, linewidths = 0, col_colors = colors, robust = True, z_score = 0, cbar_kws={"ticks":[-1.5,0,+1.5]}) g.ax_col_dendrogram.set_xlim([0,1e-10]) g.ax_col_dendrogram.set_ylim([0,1e-10]) plt.setp(g.ax_heatmap.get_yticklabels(), rotation=0) sns.set(font_scale=2.8) if classes == None: classes = list(range(n_classes)) for x in range(n_classes): g.ax_col_dendrogram.bar(0, 0, color=palette[x], label="class_{}".format(classes[x]), linewidth=0) g.ax_col_dendrogram.legend(loc = "center", ncol = min(6, n_classes)) cax = plt.gcf().axes[-1] cax.tick_params(labelsize=25) plt.savefig(file_path, bbox_inches = 'tight') plt.close('all') sns.reset_orig()
Example #11
Source File: heatmap.py From XenonPy with BSD 3-Clause "New" or "Revised" License | 5 votes |
def draw(self, y=None): ax = sb.clustermap( self.desc, cmap="RdBu", method=self.method, figsize=self.figsize, row_cluster=self.row_cluster, col_cluster=self.col_cluster, **self.kwargs) ax.cax.set_visible(False) ax.ax_heatmap.yaxis.set_ticks_position('left') ax.ax_heatmap.yaxis.set_label_position('left') if y is None: ax.ax_col_dendrogram.set_position((0.1, 0.8, 0.9, 0.1)) ax.ax_heatmap.set_position((0.1, 0.2, 0.9, 0.6)) else: ax.ax_col_dendrogram.set_position((0.1, 0.8, 0.83, 0.1)) ax.ax_heatmap.set_position((0.1, 0.2, 0.84, 0.6)) ax = plt.axes([0.95, 0.2, 0.05, 0.6]) x_ = y.values y_ = np.arange(len(x_))[::-1] ax.plot(x_, y_, lw=4) ax.get_yaxis().set_visible(False) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.set_xlabel('{:s}'.format(y.name), fontsize='large') if self.save: plt.savefig(**self.save)
Example #12
Source File: parkDataVisulization.py From python-urbanPlanning with MIT License | 5 votes |
def heatmap_pData(df): import pandas as pd import seaborn as sns sns.set() # Load the brain networks example dataset # df = sns.load_dataset("brain_networks", header=[0, 1, 2], index_col=0) # Select a subset of the networks used_networks = [1, 5, 6, 7, 8, 12, 13, 17] # used_columns = [True,]*len(df.columns) # print(len(used_columns)) # print(used_columns) # df = df.loc[:, used_columns] columnsList=['shapelyArea', 'shapelyLength','shapeIdx', 'FRAC', 'popu_mean', 'popu_std','SVFW_mean', 'SVFW_std', 'SVFep_std', 'SVFep_median','SVFep_majority', 'SVFep_minority', 'facilityFre', 'HVege_mean','HVege_count','MVege_mean', 'MVege_count','LVege_mean', 'LVege_count', 'cla_treeCanopy', 'cla_grassShrub', 'cla_bareSoil','cla_buildings', 'cla_roadsRailraods', 'cla_otherPavedSurfaces','cla_water', ] df=df[columnsList] # Create a categorical palette to identify the networks network_pal = sns.husl_palette(8, s=.45) network_lut = dict(zip(map(str, used_networks), network_pal)) # Convert the palette to vectors that will be drawn on the side of the matrix networks = df.columns network_colors = pd.Series(networks, index=df.columns).map(network_lut) # Draw the full plot sns.clustermap(df.corr(), center=0, cmap="vlag", row_colors=network_colors, col_colors=network_colors, linewidths=.75, figsize=(13, 13))
Example #13
Source File: plot_figure.py From pyHSICLasso with MIT License | 5 votes |
def plot_heatmap(X, row_linkage, featname, filepath): df = pd.DataFrame(X) df.index = featname cg = sns.clustermap(df, center=0, row_linkage=row_linkage, method='ward', cmap=microarray_cmap) cg.ax_heatmap.set_xticklabels("") plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0) plt.setp(cg.ax_heatmap.xaxis.get_majorticklabels(), rotation=90) plt.title('Heatmap') plt.savefig(filepath) plt.clf()
Example #14
Source File: csv2heatmap.py From amptk with BSD 2-Clause "Simplified" License | 5 votes |
def drawClustermap(df, output, args=False): if args.scaling == 'z_score': g = sns.clustermap(df, method=args.cluster_method, metric=args.distance_metric, linewidths=0.5, cmap=args.color, col_cluster=cluster, z_score=0, figsize=figSize) elif args.scaling == 'standard': g = sns.clustermap(df, method=args.cluster_method, metric=args.distance_metric, linewidths=0.5, cmap=args.color, col_cluster=cluster, standard_scale=0, figsize=figSize) else: g = sns.clustermap(df, method=args.cluster_method, metric=args.distance_metric, linewidths=0.5, cmap=args.color, col_cluster=cluster, figsize=figSize) plt.setp(g.ax_heatmap.get_yticklabels(), rotation=0, size=int(args.yaxis_fontsize), family=args.font) plt.setp(g.ax_heatmap.get_xticklabels(), rotation=90, size=int(args.xaxis_fontsize), family=args.font, weight='bold') g.savefig(output, format=args.format, dpi=1000, bbox_inches='tight')
Example #15
Source File: plots.py From cdlib with BSD 2-Clause "Simplified" License | 5 votes |
def plot_sim_matrix(clusterings, scoring): """ Plot a similarity matrix between a list of clusterings, using the provided scoring function. :param clusterings: list of clusterings to compare :param scoring: the scoring function to use :return: a ClusterGrid instance Example: >>> from cdlib import algorithms, viz, evaluation >>> import networkx as nx >>> g = nx.karate_club_graph() >>> coms = algorithms.louvain(g) >>> coms2 = algorithms.walktrap(g) >>> clustermap = viz.plot_sim_matrix([coms,coms2],evaluation.adjusted_mutual_information) """ forDF = [] for c in clusterings: cID = c.get_description() for c2 in clusterings: c2ID = c2.get_description() forDF.append([cID, c2ID, scoring(c, c2).score]) df = pd.DataFrame(columns=["com1", "com2", "score"], data=forDF) df = df.pivot("com1", "com2", "score") return sns.clustermap(df)
Example #16
Source File: __init__.py From pyani with MIT License | 5 votes |
def heatmap(dfr, outfilename=None, title=None, params=None): """Return seaborn heatmap with cluster dendrograms. :param dfr: pandas DataFrame with relevant data :param outfilename: path to output file (indicates output format) :param title: :param params: """ # Decide on figure layout size: a minimum size is required for # aesthetics, and a maximum to avoid core dumps on rendering. # If we hit the maximum size, we should modify font size. maxfigsize = 120 calcfigsize = dfr.shape[0] * 1.1 figsize = min(max(8, calcfigsize), maxfigsize) if figsize == maxfigsize: scale = maxfigsize / calcfigsize sns.set_context("notebook", font_scale=scale) # Add a colorbar? if params.classes is None: col_cb = None else: col_cb = get_colorbar(dfr, params.classes) # Add attributes to parameter object, and draw heatmap params.colorbar = col_cb params.figsize = figsize params.linewidths = 0.25 fig = get_clustermap(dfr, params, title=title) # Save to file if outfilename: fig.savefig(outfilename) # Return clustermap return fig
Example #17
Source File: word_coocurrence.py From guesswhat with Apache License 2.0 | 5 votes |
def __init__(self, path, games, logger, suffix): super(WordCoocurence, self).__init__(path, self.__class__.__name__, suffix) questions = [] word_counter = collections.Counter() NO_WORDS_TO_DISPLAY = 50 for game in games: # split questions into words for q in game.questions: questions.append(q) q = re.sub('[?]', '', q) words = re.findall(r'\w+', q) for w in words: word_counter[w.lower()] += 1 # compute word co-coocurrence common_words = word_counter.most_common(NO_WORDS_TO_DISPLAY) common_words = [pair[0] for pair in common_words] corrmat = np.zeros((NO_WORDS_TO_DISPLAY, NO_WORDS_TO_DISPLAY)) # compute the correlation matrices for i, question in enumerate(questions): for word in question: if word in common_words: for other_word in question: if other_word in common_words: if word != other_word: corrmat[common_words.index(word)][common_words.index(other_word)] += 1. # Display the cor matrix df = pd.DataFrame(data=corrmat, index=common_words, columns=common_words) f = sns.clustermap(df, standard_scale=0, col_cluster=False, row_cluster=True, cbar_kws={"label": "co-occurence"}) f.ax_heatmap.xaxis.tick_top() plt.setp(f.ax_heatmap.get_xticklabels(), rotation=90) plt.setp(f.ax_heatmap.get_yticklabels(), rotation=0)
Example #18
Source File: sf_heatmap.py From pancanatlas_code_public with MIT License | 5 votes |
def plot_heatmap(psi_df, meta_df, outpath): # Sort by cancer type psi_df = psi_df.copy().loc[meta_df['cnc'].sort_values().index] psi_df = psi_df.iloc[:, psi_df.columns.map(lambda x: _decode_event_name(x)[1]).argsort()] col_colors, col_cmap_lut = _get_heatmap_col_colors(psi_df) row_colors, row_cmap_lut = _get_heatmap_row_colors(meta_df, psi_df.index) method = 'ward'; metric = 'cosine' graph = sns.clustermap(psi_df, cmap='Purples', row_colors=row_colors, col_colors=col_colors, row_cluster=False, col_cluster=False, xticklabels=psi_df.columns.map(lambda x:_decode_event_name(x)[2]), linewidths=0, mask=psi_df.isnull()) _override_sns_row_colors(graph, row_colors.values) graph.ax_heatmap.set_yticks([]) graph.ax_heatmap.set_xlabel("Events") graph.ax_heatmap.set_ylabel("Samples") graph.cax.set_title("psi") tumor_only_row_cmap_lut = {key:val for key,val in row_cmap_lut.items() if not 'Normal' in key} plotter.add_legend(graph, tumor_only_row_cmap_lut) plotter.add_col_legend(graph, col_cmap_lut) print "Writing: %s" %outpath plt.savefig(outpath, bbox_inches='tight') pdf_outpath = re.sub('.png$', '.pdf', outpath) print "Writing: %s" %pdf_outpath #plt.savefig(pdf_outpath, bbox_inches='tight') plt.close() return
Example #19
Source File: de.py From smallrnaseq with GNU General Public License v3.0 | 5 votes |
def cluster_map(data, names): """Cluster map of genes""" import seaborn as sns import pylab as plt data = data.ix[names] X = np.log(data).fillna(0) X = X.apply(lambda x: x-x.mean(), 1) cg = sns.clustermap(X,cmap='RdYlBu_r',figsize=(8,10),lw=.5,linecolor='gray') mt=plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0) mt=plt.setp(cg.ax_heatmap.xaxis.get_majorticklabels(), rotation=90) return cg
Example #20
Source File: plotting.py From smallrnaseq with GNU General Public License v3.0 | 5 votes |
def expression_clustermap(counts, freq=0.8): scols,ncols = base.get_column_names(counts) X = counts.set_index('name')[ncols] X = np.log(X) v = X.std(1).sort_values(ascending=False) X = X[X.isnull().sum(1)/len(X.columns)<0.2] X = X.fillna(0) cg = sns.clustermap(X,cmap='YlGnBu',figsize=(12,12),lw=0,linecolor='gray') mt = plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0, fontsize=9) mt = plt.setp(cg.ax_heatmap.xaxis.get_majorticklabels(), rotation=90) return cg
Example #21
Source File: plot.py From retentioneering-tools with Mozilla Public License 2.0 | 5 votes |
def cluster_heatmap(data, clusters, target, plot_name=None, **kwargs): """ Visualizes feature usage with heatmap. Parameters -------- data: pd.DataFrame Feature matrix. clusters: np.array Array of cluster IDs. target: np.array Boolean vector, if ``True``, then user has `positive_target_event` in trajectory. plot_name: str, optional Name of plot to save. Default: ``'clusters_heatmap_{timestamp}.svg'`` Returns ------- Saves plot to ``retention_config.experiments_folder`` Return type ------- PNG """ heatmap = sns.clustermap(data.values, cmap="BrBG", xticklabels=data.columns, yticklabels=False, row_cluster=True, col_cluster=False) heatmap.ax_row_dendrogram.set_visible(False) heatmap = heatmap.ax_heatmap plot_name = plot_name or 'cluster_heatmap_{}'.format(datetime.now()).replace(':', '_').replace('.', '_') + '.svg' plot_name = data.retention.retention_config['experiments_folder'] + '/' + plot_name return heatmap, plot_name, None, data.retention.retention_config
Example #22
Source File: basenji_motifs.py From basenji with Apache License 2.0 | 5 votes |
def plot_target_corr(filter_outs, seq_targets, filter_names, target_names, out_pdf, seq_op='mean'): num_seqs = filter_outs.shape[0] num_targets = len(target_names) if seq_op == 'mean': filter_outs_seq = filter_outs.mean(axis=2) else: filter_outs_seq = filter_outs.max(axis=2) # std is sequence by filter. filter_seqs_std = filter_outs_seq.std(axis=0) filter_outs_seq = filter_outs_seq[:, filter_seqs_std > 0] filter_names_live = filter_names[filter_seqs_std > 0] filter_target_cors = np.zeros((len(filter_names_live), num_targets)) for fi in range(len(filter_names_live)): for ti in range(num_targets): cor, p = spearmanr(filter_outs_seq[:, fi], seq_targets[:num_seqs, ti]) filter_target_cors[fi, ti] = cor cor_df = pd.DataFrame( filter_target_cors, index=filter_names_live, columns=target_names) sns.set(font_scale=0.3) plt.figure() sns.clustermap(cor_df, cmap='BrBG', center=0, figsize=(8, 10)) plt.savefig(out_pdf) plt.close() ################################################################################ # plot_filter_seq_heat # # Plot a clustered heatmap of filter activations in # # Input # param_matrix: np.array of the filter's parameter matrix # out_pdf: ################################################################################
Example #23
Source File: plots.py From cgpm with Apache License 2.0 | 5 votes |
def plot_clustermap(D, xticklabels=None, yticklabels=None): import seaborn as sns if xticklabels is None: xticklabels = range(D.shape[0]) if yticklabels is None: yticklabels = range(D.shape[1]) zmat = sns.clustermap( D, yticklabels=yticklabels, xticklabels=xticklabels, linewidths=0.2, cmap='BuGn') plt.setp(zmat.ax_heatmap.get_yticklabels(), rotation=0) plt.setp(zmat.ax_heatmap.get_xticklabels(), rotation=90) return zmat
Example #24
Source File: heatmap.py From mmvec with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _normalize_table(table, method): ''' Normalize column data in a dataframe for plotting in clustermap. table: pd.DataFrame Input data. method: str Normalization method to use. Returns normalized table as pd.DataFrame ''' if 'col' in method: axis = 0 elif 'row' in method: axis = 1 if 'z_score' in method: res = table.apply(lambda x: (x - x.mean()) / x.std(), axis=axis) elif 'rel' in method: res = table.apply(lambda x: x / x.sum(), axis=axis) elif method == 'log10': res = table.apply(lambda x: np.log10(x + 1)) return res.fillna(0)
Example #25
Source File: heatmap.py From mmvec with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _parse_heatmap_metadata_annotations(metadata_column, margin_palette): ''' Transform feature or sample metadata into color vector for annotating margin of clustermap. Parameters ---------- metadata_column: pd.Series of metadata for annotating plots margin_palette: str Name of color palette to use for annotating metadata along margin(s) of clustermap. Returns ------- Returns vector of colors for annotating clustermap and dict mapping colors to classes. ''' # Create a categorical palette to identify md col metadata_column = metadata_column.astype(str) col_names = sorted(metadata_column.unique()) # Select Color palette if margin_palette == 'colorhelix': col_palette = sns.cubehelix_palette( len(col_names), start=2, rot=3, dark=0.3, light=0.8, reverse=True) else: col_palette = sns.color_palette(margin_palette, len(col_names)) class_colors = dict(zip(col_names, col_palette)) # Convert the palette to vectors that will be drawn on the matrix margin col_colors = metadata_column.map(class_colors) return col_colors, class_colors
Example #26
Source File: misc.py From mriqc with BSD 3-Clause "New" or "Revised" License | 4 votes |
def plot_corrmat(in_csv, out_file=None): import seaborn as sn sn.set(style="whitegrid") dataframe = pd.read_csv(in_csv, index_col=False, na_values="n/a", na_filter=False) colnames = dataframe.columns.ravel().tolist() for col in ["subject_id", "site", "modality"]: try: colnames.remove(col) except ValueError: pass # Correlation matrix corr = dataframe[colnames].corr() corr = corr.dropna((0, 1), "all") # Generate a mask for the upper triangle mask = np.zeros_like(corr, dtype=np.bool) mask[np.triu_indices_from(mask)] = True # Generate a custom diverging colormap cmap = sn.diverging_palette(220, 10, as_cmap=True) # Draw the heatmap with the mask and correct aspect ratio corrplot = sn.clustermap( corr, cmap=cmap, center=0.0, method="average", square=True, linewidths=0.5 ) plt.setp(corrplot.ax_heatmap.yaxis.get_ticklabels(), rotation="horizontal") # , mask=mask, square=True, linewidths=.5, cbar_kws={"shrink": .5}) if out_file is None: out_file = "corr_matrix.svg" fname, ext = op.splitext(out_file) if ext[1:] not in ["pdf", "svg", "png"]: ext = ".svg" out_file = fname + ".svg" corrplot.savefig( out_file, format=ext[1:], bbox_inches="tight", pad_inches=0, dpi=100 ) return corrplot
Example #27
Source File: nested_heatmap.py From Wooey with BSD 3-Clause "New" or "Revised" License | 4 votes |
def main(): args = parser.parse_args() import numpy as np import pandas as pd import seaborn as sns major_index = args.major_index minor_index = args.minor_index df = pd.read_table(args.tsv, index_col=[major_index, minor_index], sep=args.delimiter) df = np.log2(df) if args.log_normalize else df # set our undected samples to our lowest detection df[df==-1*np.inf] = df[df!=-1*np.inf].min().min() # translate our data so we have no negatives (which would screw up our addition and makes no biological sense) if args.translate: df+=abs(df.min().min()) major_counts = df.groupby(level=[major_index]).count() # we only want to plot samples with multiple values in the minor index cutoff = args.minor_cutoff multi = df[df.index.get_level_values(major_index).isin(major_counts[major_counts>=cutoff].dropna().index)] # Let's select the most variable minor axis elements most_variable = multi.groupby(level=major_index).var().mean(axis=1).order(ascending=False) # and group by 20s for i in xrange(11): dat = multi[multi.index.get_level_values(major_index).isin(most_variable.index[10*i:10*(i+1)])] # we want to cluster by our major index, and then under these plot the values of our minor index major_dat = dat.groupby(level=major_index).sum() seaborn_map = sns.clustermap(major_dat, row_cluster=True, col_cluster=True) # now we keep this clustering, but recreate our data to fit the above clustering, with our minor # index below the major index (you can think of transcript levels under gene levels if you are # a biologist) merged_dat = pd.DataFrame(columns=[seaborn_map.data2d.columns]) for major_val in seaborn_map.data2d.index: minor_rows = multi[multi.index.get_level_values(major_index)==major_val][seaborn_map.data2d.columns] major_row = major_dat.loc[major_val, ][seaborn_map.data2d.columns] merged_dat.append(major_row) merged_dat = merged_dat.append(major_row).append(minor_rows) merged_map = sns.clustermap(merged_dat, row_cluster=False, col_cluster=False) # recreate our dendrogram, this is undocumented and probably a hack but it works seaborn_map.dendrogram_col.plot(merged_map.ax_col_dendrogram) # for rows, I imagine at some point it will fail to fall within the major axis but fortunately # for this dataset it is not true seaborn_map.dendrogram_row.plot(merged_map.ax_row_dendrogram) merged_map.savefig('{}_heatmap_{}.png'.format(os.path.split(args.tsv.name)[1], i))
Example #28
Source File: plot_heatmaps.py From pancanatlas_code_public with MIT License | 4 votes |
def main(df, outdir, desc, color_loader, run_representative): '''Runs all tasks on a single embedding. embed_dir: location of pca & tsne embeddings plot_dir: directory to write plots desc: identifies embedding (used for e.g. plot titles) ''' print("clustermap: %s" %desc) if not os.path.exists(outdir): os.makedirs(outdir) assert desc.lower().startswith('altsplice') or desc.lower().startswith('expression') is_altsplice = desc.lower().startswith('altsplice') if not is_altsplice: assert df.values.max() > 1, "99.999999% Sure this is not psi data" print("Clipping 99th percentile") df.iloc[:] = np.minimum(df.values, np.percentile(df.values, 99, axis=0)) method = 'ward' metric = 'cosine' # get colors cat_series, color_lut = color_loader(df) colors = cat_series.map(color_lut) df = df.loc[colors.index] # filter to high var keep_cols = filter_to_high_var(df.values, df.columns, MAX_EVENTS) df = df.iloc[:, keep_cols] cluster_desc = '_'.join(['%d_high_var_events'%MAX_EVENTS, method, metric]) sample_desc = desc.strip().replace(' ', '_').lower() + '_' + cluster_desc sample_linkage, event_linkage = get_linkage(df, sample_desc, method=method, metric=metric) outpath = os.path.join(outdir, cluster_desc + '_clustermap.png') plot_heatmap(outpath, df, sample_linkage, colors, event_linkage, desc, color_lut) if run_representative: medians = collapse_to_median(df, cat_series) rep_colors = colors.loc[medians.index] rep_cluster_desc = cluster_desc + '_reps' rep_desc = desc.strip().replace(' ', '_').lower() + '_' + rep_cluster_desc print("clustermap: %s" %desc) rep_sample_linkage, rep_event_linkage = get_linkage(medians, rep_desc, method=method, metric=metric) rep_outpath = os.path.join(outdir, rep_cluster_desc + '_clustermap.png') plot_heatmap(rep_outpath, medians, rep_sample_linkage, rep_colors, rep_event_linkage, rep_desc, color_lut) return
Example #29
Source File: nested_heatmap.py From django-djangui with GNU General Public License v3.0 | 4 votes |
def main(): args = parser.parse_args() import numpy as np import pandas as pd import seaborn as sns major_index = args.major_index minor_index = args.minor_index df = pd.read_table(args.tsv, index_col=[major_index, minor_index], sep=args.delimiter) df = np.log2(df) if args.log_normalize else df # set our undected samples to our lowest detection df[df==-1*np.inf] = df[df!=-1*np.inf].min().min() # translate our data so we have no negatives (which would screw up our addition and makes no biological sense) if args.translate: df+=abs(df.min().min()) major_counts = df.groupby(level=[major_index]).count() # we only want to plot samples with multiple values in the minor index cutoff = args.minor_cutoff multi = df[df.index.get_level_values(major_index).isin(major_counts[major_counts>=cutoff].dropna().index)] # Let's select the most variable minor axis elements most_variable = multi.groupby(level=major_index).var().mean(axis=1).order(ascending=False) # and group by 20s for i in xrange(11): dat = multi[multi.index.get_level_values(major_index).isin(most_variable.index[10*i:10*(i+1)])] # we want to cluster by our major index, and then under these plot the values of our minor index major_dat = dat.groupby(level=major_index).sum() seaborn_map = sns.clustermap(major_dat, row_cluster=True, col_cluster=True) # now we keep this clustering, but recreate our data to fit the above clustering, with our minor # index below the major index (you can think of transcript levels under gene levels if you are # a biologist) merged_dat = pd.DataFrame(columns=[seaborn_map.data2d.columns]) for major_val in seaborn_map.data2d.index: minor_rows = multi[multi.index.get_level_values(major_index)==major_val][seaborn_map.data2d.columns] major_row = major_dat.loc[major_val,][seaborn_map.data2d.columns] merged_dat.append(major_row) merged_dat = merged_dat.append(major_row).append(minor_rows) merged_map = sns.clustermap(merged_dat, row_cluster=False, col_cluster=False) # recreate our dendrogram, this is undocumented and probably a hack but it works seaborn_map.dendrogram_col.plot(merged_map.ax_col_dendrogram) # for rows, I imagine at some point it will fail to fall within the major axis but fortunately # for this dataset it is not true seaborn_map.dendrogram_row.plot(merged_map.ax_row_dendrogram) merged_map.savefig('{}_heatmap_{}.png'.format(os.path.split(args.tsv.name)[1], i))
Example #30
Source File: basenji_motifs.py From basenji with Apache License 2.0 | 4 votes |
def plot_filter_seq_heat(filter_outs, out_pdf, whiten=True, drop_dead=True): # compute filter output means per sequence filter_seqs = filter_outs.mean(axis=2) # whiten if whiten: filter_seqs = preprocessing.scale(filter_seqs) # transpose filter_seqs = np.transpose(filter_seqs) if drop_dead: filter_stds = filter_seqs.std(axis=1) filter_seqs = filter_seqs[filter_stds > 0] # downsample sequences seqs_i = np.random.randint(0, filter_seqs.shape[1], 500) hmin = np.percentile(filter_seqs[:, seqs_i], 0.1) hmax = np.percentile(filter_seqs[:, seqs_i], 99.9) sns.set(font_scale=0.3) plt.figure() sns.clustermap( filter_seqs[:, seqs_i], row_cluster=True, col_cluster=True, linewidths=0, xticklabels=False, vmin=hmin, vmax=hmax) plt.savefig(out_pdf) #out_png = out_pdf[:-2] + 'ng' #plt.savefig(out_png, dpi=300) plt.close() ################################################################################ # plot_filter_seq_heat # # Plot a clustered heatmap of filter activations in sequence segments. # # Mean doesn't work well for the smaller segments for some reason, but taking # the max looks OK. Still, similar motifs don't cluster quite as well as you # might expect. # # Input # filter_outs ################################################################################