Python seaborn.clustermap() Examples

The following are 30 code examples of seaborn.clustermap(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module seaborn , or try the search function .
Example #1
Source File: RnaseqqcReport.py    From CGATPipelines with MIT License 6 votes vote down vote up
def __call__(self, data, path):

        colorbar, factors, unique, xkcd = self.getColorBar(data)
        n_samples = data.shape[0]
        data = data.iloc[:, :n_samples]
        col_dict = dict(list(zip(unique, xkcd)))

        print(data.head())
        seaborn.set(font_scale=.5)
        ax = seaborn.clustermap(data,
                                row_colors=colorbar, col_colors=colorbar)
        plt.setp(ax.ax_heatmap.yaxis.set_visible(False))

        for label in unique:
            ax.ax_col_dendrogram.bar(
                0, 0, color=seaborn.xkcd_rgb[col_dict[label]],
                label=label, linewidth=0)
        ax.ax_col_dendrogram.legend(loc="center", ncol=len(unique))

        return ResultBlocks(ResultBlock(
            '''#$mpl %i$#\n''' % ax.cax.figure.number,
            title='ClusterMapPlot')) 
Example #2
Source File: basenji_test_genes.py    From basenji with Apache License 2.0 6 votes vote down vote up
def clustermap(gene_values, out_pdf, color=None, table=False):
  """ Generate a clustered heatmap using seaborn. """

  if table:
    np.save(out_pdf[:-4], gene_values)

  plt.figure()
  g = sns.clustermap(
      gene_values,
      metric='euclidean',
      cmap=color,
      xticklabels=False,
      yticklabels=False)
  g.ax_heatmap.set_xlabel('Experiments')
  g.ax_heatmap.set_ylabel('Genes')
  plt.savefig(out_pdf)
  plt.close() 
Example #3
Source File: construction.py    From FinanceHub with MIT License 6 votes vote down vote up
def plot_corr_matrix(self, save_path=None, show_chart=True, cmap='vlag', linewidth=0, figsize=(10, 10)):
        """
        Plots the correlation matrix
        :param save_path: local directory to save file. If provided, saves a png of the image to the address.
        :param show_chart: If True, shows the chart.
        :param cmap: matplotlib colormap.
        :param linewidth: witdth of the grid lines of the correlation matrix.
        :param figsize: tuple with figsize dimensions.
        """

        sns.clustermap(self.corr, method=self.method, metric=self.metric, cmap=cmap,
                       figsize=figsize, linewidths=linewidth,
                       col_linkage=self.link, row_linkage=self.link)

        plt.tight_layout()

        if not (save_path is None):
            plt.savefig(save_path,
                        pad_inches=1,
                        dpi=400)

        if show_chart:
            plt.show()

        plt.close() 
Example #4
Source File: heatmap.py    From django-djangui with GNU General Public License v3.0 6 votes vote down vote up
def main():
    args = parser.parse_args()
    data = pd.read_table(args.tsv, index_col=args.row if args.row else 0, sep=args.delimiter, encoding='utf-8')
    if args.cols:
        try:
            data = data.loc[:,args.cols.split(',')]
        except KeyError:
            data = data.iloc[:,[int(i)-1 for i in args.cols.split(',')]]
    if len(data.columns) > 50:
        raise BaseException('Too many columns')
    data = np.log2(data) if args.log_normalize else data
    data[data==-1*np.inf] = data[data!=-1*np.inf].min().min()
    width = 5+0 if len(data.columns)<50 else (len(data.columns)-50)/100
    row_cutoff = 1000
    height = 15+0 if len(data)<row_cutoff else (len(data)-row_cutoff)/75.0
    seaborn_map = sns.clustermap(data, figsize=(width, height))
    seaborn_map.savefig('{}_heatmap.png'.format(os.path.split(args.tsv.name)[1]))
    seaborn_map.data2d.to_csv('{}_heatmap.tsv'.format(os.path.split(args.tsv.name)[1]), sep='\t') 
Example #5
Source File: heatmap.py    From Wooey with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def main():
    args = parser.parse_args()
    data = pd.read_table(args.tsv, index_col=args.row if args.row else 0, sep=args.delimiter, encoding='utf-8')
    if args.cols:
        try:
            data = data.loc[:,args.cols.split(',')]
        except KeyError:
            data = data.iloc[:,[int(i)-1 for i in args.cols.split(',')]]
    if len(data.columns) > 50:
        raise BaseException('Too many columns')
    data = np.log2(data) if args.log_normalize else data
    data[data==-1*np.inf] = data[data!=-1*np.inf].min().min()
    width = 5+0 if len(data.columns)<50 else (len(data.columns)-50)/100
    row_cutoff = 1000
    height = 15+0 if len(data)<row_cutoff else (len(data)-row_cutoff)/75.0
    seaborn_map = sns.clustermap(data, figsize=(width, height))
    seaborn_map.savefig('{}_heatmap.png'.format(os.path.split(args.tsv.name)[1]))
    seaborn_map.data2d.to_csv('{}_heatmap.tsv'.format(os.path.split(args.tsv.name)[1]), sep='\t') 
Example #6
Source File: plot_heatmaps.py    From pancanatlas_code_public with MIT License 6 votes vote down vote up
def plot_heatmap(outpath, df, sample_linkage, sample_colors, event_linkage, desc, sample_color_lut):

    assert desc.lower().startswith('altsplice') or desc.lower().startswith('expression')
    is_altsplice = desc.lower().startswith('altsplice')

    sys.setrecursionlimit(100000)
    print "Plotting data ... "
    graph = sns.clustermap(df.T,
                       col_colors=sample_colors,
                       col_linkage=sample_linkage, row_linkage=event_linkage,
                       cmap = sns.cubehelix_palette(as_cmap=True))
    graph.ax_heatmap.axis('off')
    graph.ax_col_dendrogram.set_title("%s Clustering" %' '.join(desc.split('_')).title())
    graph.ax_heatmap.set_xlabel("Events")
    graph.ax_heatmap.set_ylabel("Samples")
    if is_altsplice: graph.cax.set_title("psi")
    else: graph.cax.set_title("log(counts)")
    add_legend(graph, sample_color_lut)
    plot_utils.save(outpath)
    return 
Example #7
Source File: __init__.py    From pyani with MIT License 6 votes vote down vote up
def get_clustermap(dfr, params, title=None, annot=True):
    """Return a Seaborn clustermap for the passed dataframe.

    :param dfr:
    :param params:
    :param title:  str, plot title
    :param annot:  Boolean, add text for cell values?
    """
    fig = sns.clustermap(
        dfr,
        cmap=params.cmap,
        vmin=params.vmin,
        vmax=params.vmax,
        col_colors=params.colorbar,
        row_colors=params.colorbar,
        figsize=(params.figsize, params.figsize),
        linewidths=params.linewidths,
        annot=annot,
    )

    # add labels for each of the input genomes
    add_labels(fig, params)

    fig.cax.yaxis.set_label_position("left")
    if title:
        fig.cax.set_ylabel(title)

    # Return clustermap
    return fig


# Generate Seaborn heatmap output 
Example #8
Source File: heatmap.py    From mmvec with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _parse_taxonomy_strings(taxonomy_series, level):
    '''
    taxonomy_series: pd.Series of semicolon-delimited taxonomy strings
    level: int
        taxonomic level for annotating clustermap.
     Returns
     -------
    Returns a pd.Series of taxonomy names at specified level,
        or terminal annotation
    '''
    return taxonomy_series.apply(lambda x: x.split(';')[:level][-1].strip()) 
Example #9
Source File: plotlib.py    From mCaller with MIT License 5 votes vote down vote up
def plot_correlation_matrix(curmat,elevenmer,labels,outdir):
    plt.figure(figsize=(7,6))
    cg = sns.clustermap(curmat,metric='euclidean',xticklabels=labels,yticklabels=labels)
    plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
    #sns.heatmap(curmat,xticklabels=labels,yticklabels=labels)
    plt.title(elevenmer)
    plt.show()
    plt.savefig(outdir+'correlation_matrix_'+elevenmer+'.pdf',dpi=500,transparent=True) 
Example #10
Source File: utils.py    From pysster with MIT License 5 votes vote down vote up
def _plot_heatmap(file_path, data, class_id, classes = None):
    import seaborn as sns
    _set_sns_context(data.shape[1])
    n_classes = len(set(class_id))
    palette = _get_colors(n_classes)
    colors = [palette[x] for x in class_id]
    g = sns.clustermap(data = data.T, method = "ward", metric = "euclidean",
                       cmap = "RdBu_r", xticklabels = False, yticklabels = True,
                       figsize = (30,25), row_cluster = True, col_cluster = True,
                       linewidths = 0, col_colors = colors, robust = True,
                       z_score = 0, cbar_kws={"ticks":[-1.5,0,+1.5]})
    g.ax_col_dendrogram.set_xlim([0,1e-10])
    g.ax_col_dendrogram.set_ylim([0,1e-10])
    plt.setp(g.ax_heatmap.get_yticklabels(), rotation=0)
    sns.set(font_scale=2.8)
    if classes == None:
        classes = list(range(n_classes))
    for x in range(n_classes):
        g.ax_col_dendrogram.bar(0, 0, color=palette[x],
                                label="class_{}".format(classes[x]), linewidth=0)
    g.ax_col_dendrogram.legend(loc = "center", ncol = min(6, n_classes))
    cax = plt.gcf().axes[-1]
    cax.tick_params(labelsize=25)
    plt.savefig(file_path, bbox_inches = 'tight')
    plt.close('all')
    sns.reset_orig() 
Example #11
Source File: heatmap.py    From XenonPy with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def draw(self, y=None):
        ax = sb.clustermap(
            self.desc,
            cmap="RdBu",
            method=self.method,
            figsize=self.figsize,
            row_cluster=self.row_cluster,
            col_cluster=self.col_cluster,
            **self.kwargs)
        ax.cax.set_visible(False)
        ax.ax_heatmap.yaxis.set_ticks_position('left')
        ax.ax_heatmap.yaxis.set_label_position('left')

        if y is None:
            ax.ax_col_dendrogram.set_position((0.1, 0.8, 0.9, 0.1))
            ax.ax_heatmap.set_position((0.1, 0.2, 0.9, 0.6))
        else:
            ax.ax_col_dendrogram.set_position((0.1, 0.8, 0.83, 0.1))
            ax.ax_heatmap.set_position((0.1, 0.2, 0.84, 0.6))
            ax = plt.axes([0.95, 0.2, 0.05, 0.6])
            x_ = y.values
            y_ = np.arange(len(x_))[::-1]
            ax.plot(x_, y_, lw=4)
            ax.get_yaxis().set_visible(False)
            ax.spines['top'].set_visible(False)
            ax.spines['right'].set_visible(False)
            ax.set_xlabel('{:s}'.format(y.name), fontsize='large')
        if self.save:
            plt.savefig(**self.save) 
Example #12
Source File: parkDataVisulization.py    From python-urbanPlanning with MIT License 5 votes vote down vote up
def heatmap_pData(df):
    import pandas as pd
    import seaborn as sns
    sns.set()
    
    # Load the brain networks example dataset
    # df = sns.load_dataset("brain_networks", header=[0, 1, 2], index_col=0)
    
    # Select a subset of the networks
    used_networks = [1, 5, 6, 7, 8, 12, 13, 17]
    # used_columns = [True,]*len(df.columns)
    
    # print(len(used_columns))
    # print(used_columns)
    # df = df.loc[:, used_columns]
    columnsList=['shapelyArea', 'shapelyLength','shapeIdx', 'FRAC', 
                 'popu_mean', 'popu_std','SVFW_mean', 'SVFW_std',
                 'SVFep_std', 'SVFep_median','SVFep_majority', 'SVFep_minority',
                 'facilityFre',
                 'HVege_mean','HVege_count','MVege_mean', 'MVege_count','LVege_mean', 'LVege_count',
                 'cla_treeCanopy', 'cla_grassShrub', 'cla_bareSoil','cla_buildings', 'cla_roadsRailraods', 'cla_otherPavedSurfaces','cla_water',
                 ]
    df=df[columnsList]
    
    # Create a categorical palette to identify the networks
    network_pal = sns.husl_palette(8, s=.45)
    network_lut = dict(zip(map(str, used_networks), network_pal))
    
    # Convert the palette to vectors that will be drawn on the side of the matrix
    networks = df.columns
    network_colors = pd.Series(networks, index=df.columns).map(network_lut)
    
    # Draw the full plot
    sns.clustermap(df.corr(), center=0, cmap="vlag",
                    row_colors=network_colors, col_colors=network_colors,
                    linewidths=.75, figsize=(13, 13)) 
Example #13
Source File: plot_figure.py    From pyHSICLasso with MIT License 5 votes vote down vote up
def plot_heatmap(X, row_linkage, featname, filepath):
    df = pd.DataFrame(X)
    df.index = featname
    cg = sns.clustermap(df, center=0, row_linkage=row_linkage,
                        method='ward', cmap=microarray_cmap)
    cg.ax_heatmap.set_xticklabels("")
    plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
    plt.setp(cg.ax_heatmap.xaxis.get_majorticklabels(), rotation=90)
    plt.title('Heatmap')
    plt.savefig(filepath)
    plt.clf() 
Example #14
Source File: csv2heatmap.py    From amptk with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def drawClustermap(df, output, args=False):
    if args.scaling == 'z_score':
        g = sns.clustermap(df, method=args.cluster_method, metric=args.distance_metric, linewidths=0.5, cmap=args.color, col_cluster=cluster, z_score=0, figsize=figSize)
    elif args.scaling == 'standard':
        g = sns.clustermap(df, method=args.cluster_method, metric=args.distance_metric, linewidths=0.5, cmap=args.color, col_cluster=cluster, standard_scale=0, figsize=figSize)
    else:
        g = sns.clustermap(df, method=args.cluster_method, metric=args.distance_metric, linewidths=0.5, cmap=args.color, col_cluster=cluster, figsize=figSize)
    plt.setp(g.ax_heatmap.get_yticklabels(), rotation=0, size=int(args.yaxis_fontsize), family=args.font)
    plt.setp(g.ax_heatmap.get_xticklabels(), rotation=90, size=int(args.xaxis_fontsize), family=args.font, weight='bold')
    g.savefig(output, format=args.format, dpi=1000, bbox_inches='tight') 
Example #15
Source File: plots.py    From cdlib with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def plot_sim_matrix(clusterings, scoring):
    """
    Plot a similarity matrix between a list of clusterings, using the provided scoring function.

    :param clusterings: list of clusterings to compare
    :param scoring: the scoring function to use
    :return: a ClusterGrid instance

    Example:

    >>> from cdlib import algorithms, viz, evaluation
    >>> import networkx as nx
    >>> g = nx.karate_club_graph()
    >>> coms = algorithms.louvain(g)
    >>> coms2 = algorithms.walktrap(g)
    >>> clustermap = viz.plot_sim_matrix([coms,coms2],evaluation.adjusted_mutual_information)

    """
    forDF = []
    for c in clusterings:
        cID = c.get_description()
        for c2 in clusterings:
            c2ID = c2.get_description()
            forDF.append([cID, c2ID, scoring(c, c2).score])
    df = pd.DataFrame(columns=["com1", "com2", "score"], data=forDF)
    df = df.pivot("com1", "com2", "score")
    return sns.clustermap(df) 
Example #16
Source File: __init__.py    From pyani with MIT License 5 votes vote down vote up
def heatmap(dfr, outfilename=None, title=None, params=None):
    """Return seaborn heatmap with cluster dendrograms.

    :param dfr:  pandas DataFrame with relevant data
    :param outfilename:  path to output file (indicates output format)
    :param title:
    :param params:
    """
    # Decide on figure layout size: a minimum size is required for
    # aesthetics, and a maximum to avoid core dumps on rendering.
    # If we hit the maximum size, we should modify font size.
    maxfigsize = 120
    calcfigsize = dfr.shape[0] * 1.1
    figsize = min(max(8, calcfigsize), maxfigsize)
    if figsize == maxfigsize:
        scale = maxfigsize / calcfigsize
        sns.set_context("notebook", font_scale=scale)

    # Add a colorbar?
    if params.classes is None:
        col_cb = None
    else:
        col_cb = get_colorbar(dfr, params.classes)

    # Add attributes to parameter object, and draw heatmap
    params.colorbar = col_cb
    params.figsize = figsize
    params.linewidths = 0.25
    fig = get_clustermap(dfr, params, title=title)

    # Save to file
    if outfilename:
        fig.savefig(outfilename)

    # Return clustermap
    return fig 
Example #17
Source File: word_coocurrence.py    From guesswhat with Apache License 2.0 5 votes vote down vote up
def __init__(self, path, games, logger, suffix):
        super(WordCoocurence, self).__init__(path, self.__class__.__name__, suffix)

        questions = []
        word_counter = collections.Counter()

        NO_WORDS_TO_DISPLAY = 50

        for game in games:
            # split questions into words
            for q in game.questions:
                questions.append(q)
                q = re.sub('[?]', '', q)
                words = re.findall(r'\w+', q)

                for w in words:
                    word_counter[w.lower()] += 1


        # compute word co-coocurrence
        common_words = word_counter.most_common(NO_WORDS_TO_DISPLAY)
        common_words = [pair[0] for pair in common_words]
        corrmat = np.zeros((NO_WORDS_TO_DISPLAY, NO_WORDS_TO_DISPLAY))

        # compute the correlation matrices
        for i, question in enumerate(questions):
            for word in question:
                if word in common_words:
                    for other_word in question:
                        if other_word in common_words:
                            if word != other_word:
                                corrmat[common_words.index(word)][common_words.index(other_word)] += 1.

        # Display the cor matrix
        df = pd.DataFrame(data=corrmat, index=common_words, columns=common_words)
        f = sns.clustermap(df, standard_scale=0, col_cluster=False, row_cluster=True, cbar_kws={"label": "co-occurence"})
        f.ax_heatmap.xaxis.tick_top()

        plt.setp(f.ax_heatmap.get_xticklabels(), rotation=90)
        plt.setp(f.ax_heatmap.get_yticklabels(), rotation=0) 
Example #18
Source File: sf_heatmap.py    From pancanatlas_code_public with MIT License 5 votes vote down vote up
def plot_heatmap(psi_df, meta_df, outpath):
    # Sort by cancer type
    psi_df = psi_df.copy().loc[meta_df['cnc'].sort_values().index]
    psi_df = psi_df.iloc[:, psi_df.columns.map(lambda x: _decode_event_name(x)[1]).argsort()]
    col_colors, col_cmap_lut = _get_heatmap_col_colors(psi_df)
    row_colors, row_cmap_lut = _get_heatmap_row_colors(meta_df, psi_df.index)
    method = 'ward'; metric = 'cosine'
    graph = sns.clustermap(psi_df, cmap='Purples',
                           row_colors=row_colors, col_colors=col_colors,
                           row_cluster=False, col_cluster=False,
                           xticklabels=psi_df.columns.map(lambda x:_decode_event_name(x)[2]),
                           linewidths=0,
                           mask=psi_df.isnull())
    _override_sns_row_colors(graph, row_colors.values)
    graph.ax_heatmap.set_yticks([])
    graph.ax_heatmap.set_xlabel("Events")
    graph.ax_heatmap.set_ylabel("Samples")
    graph.cax.set_title("psi")
    tumor_only_row_cmap_lut = {key:val for key,val in row_cmap_lut.items() if not 'Normal' in key}
    plotter.add_legend(graph, tumor_only_row_cmap_lut)
    plotter.add_col_legend(graph, col_cmap_lut)
    print "Writing: %s" %outpath
    plt.savefig(outpath, bbox_inches='tight')
    pdf_outpath = re.sub('.png$', '.pdf', outpath)
    print "Writing: %s" %pdf_outpath
    #plt.savefig(pdf_outpath, bbox_inches='tight')
    plt.close()
    return 
Example #19
Source File: de.py    From smallrnaseq with GNU General Public License v3.0 5 votes vote down vote up
def cluster_map(data, names):
    """Cluster map of genes"""

    import seaborn as sns
    import pylab as plt
    data = data.ix[names]
    X = np.log(data).fillna(0)
    X = X.apply(lambda x: x-x.mean(), 1)
    cg = sns.clustermap(X,cmap='RdYlBu_r',figsize=(8,10),lw=.5,linecolor='gray')
    mt=plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
    mt=plt.setp(cg.ax_heatmap.xaxis.get_majorticklabels(), rotation=90)
    return cg 
Example #20
Source File: plotting.py    From smallrnaseq with GNU General Public License v3.0 5 votes vote down vote up
def expression_clustermap(counts, freq=0.8):

    scols,ncols = base.get_column_names(counts)
    X = counts.set_index('name')[ncols]
    X = np.log(X)
    v = X.std(1).sort_values(ascending=False)
    X = X[X.isnull().sum(1)/len(X.columns)<0.2]
    X = X.fillna(0)
    cg = sns.clustermap(X,cmap='YlGnBu',figsize=(12,12),lw=0,linecolor='gray')
    mt = plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0, fontsize=9)
    mt = plt.setp(cg.ax_heatmap.xaxis.get_majorticklabels(), rotation=90)
    return cg 
Example #21
Source File: plot.py    From retentioneering-tools with Mozilla Public License 2.0 5 votes vote down vote up
def cluster_heatmap(data, clusters, target, plot_name=None, **kwargs):
    """
    Visualizes feature usage with heatmap.

    Parameters
    --------
    data: pd.DataFrame
        Feature matrix.
    clusters: np.array
        Array of cluster IDs.
    target: np.array
        Boolean vector, if ``True``, then user has `positive_target_event` in trajectory.
    plot_name: str, optional
        Name of plot to save. Default: ``'clusters_heatmap_{timestamp}.svg'``

    Returns
    -------
    Saves plot to ``retention_config.experiments_folder``

    Return type
    -------
    PNG
    """
    heatmap = sns.clustermap(data.values,
                             cmap="BrBG",
                             xticklabels=data.columns,
                             yticklabels=False,
                             row_cluster=True,
                             col_cluster=False)

    heatmap.ax_row_dendrogram.set_visible(False)
    heatmap = heatmap.ax_heatmap

    plot_name = plot_name or 'cluster_heatmap_{}'.format(datetime.now()).replace(':', '_').replace('.', '_') + '.svg'
    plot_name = data.retention.retention_config['experiments_folder'] + '/' + plot_name
    return heatmap, plot_name, None, data.retention.retention_config 
Example #22
Source File: basenji_motifs.py    From basenji with Apache License 2.0 5 votes vote down vote up
def plot_target_corr(filter_outs, seq_targets, filter_names, target_names, out_pdf, seq_op='mean'):
  num_seqs = filter_outs.shape[0]
  num_targets = len(target_names)

  if seq_op == 'mean':
    filter_outs_seq = filter_outs.mean(axis=2)
  else:
    filter_outs_seq = filter_outs.max(axis=2)

  # std is sequence by filter.
  filter_seqs_std = filter_outs_seq.std(axis=0)
  filter_outs_seq = filter_outs_seq[:, filter_seqs_std > 0]
  filter_names_live = filter_names[filter_seqs_std > 0]

  filter_target_cors = np.zeros((len(filter_names_live), num_targets))
  for fi in range(len(filter_names_live)):
    for ti in range(num_targets):
      cor, p = spearmanr(filter_outs_seq[:, fi], seq_targets[:num_seqs, ti])
      filter_target_cors[fi, ti] = cor

  cor_df = pd.DataFrame(
      filter_target_cors, index=filter_names_live, columns=target_names)

  sns.set(font_scale=0.3)
  plt.figure()
  sns.clustermap(cor_df, cmap='BrBG', center=0, figsize=(8, 10))
  plt.savefig(out_pdf)
  plt.close()


################################################################################
# plot_filter_seq_heat
#
# Plot a clustered heatmap of filter activations in
#
# Input
#  param_matrix: np.array of the filter's parameter matrix
#  out_pdf:
################################################################################ 
Example #23
Source File: plots.py    From cgpm with Apache License 2.0 5 votes vote down vote up
def plot_clustermap(D, xticklabels=None, yticklabels=None):
    import seaborn as sns
    if xticklabels is None: xticklabels = range(D.shape[0])
    if yticklabels is None: yticklabels = range(D.shape[1])
    zmat = sns.clustermap(
        D, yticklabels=yticklabels, xticklabels=xticklabels,
        linewidths=0.2, cmap='BuGn')
    plt.setp(zmat.ax_heatmap.get_yticklabels(), rotation=0)
    plt.setp(zmat.ax_heatmap.get_xticklabels(), rotation=90)
    return zmat 
Example #24
Source File: heatmap.py    From mmvec with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _normalize_table(table, method):
    '''
    Normalize column data in a dataframe for plotting in clustermap.

    table: pd.DataFrame
        Input data.
    method: str
        Normalization method to use.

    Returns normalized table as pd.DataFrame
    '''
    if 'col' in method:
        axis = 0
    elif 'row' in method:
        axis = 1
    if 'z_score' in method:
        res = table.apply(lambda x: (x - x.mean()) / x.std(), axis=axis)
    elif 'rel' in method:
        res = table.apply(lambda x: x / x.sum(), axis=axis)
    elif method == 'log10':
        res = table.apply(lambda x: np.log10(x + 1))
    return res.fillna(0) 
Example #25
Source File: heatmap.py    From mmvec with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _parse_heatmap_metadata_annotations(metadata_column, margin_palette):
    '''
    Transform feature or sample metadata into color vector for annotating
    margin of clustermap.
    Parameters
    ----------
    metadata_column: pd.Series of metadata for annotating plots
    margin_palette: str
        Name of color palette to use for annotating metadata
        along margin(s) of clustermap.
    Returns
    -------
    Returns vector of colors for annotating clustermap and dict mapping colors
    to classes.
    '''
    # Create a categorical palette to identify md col
    metadata_column = metadata_column.astype(str)
    col_names = sorted(metadata_column.unique())

    # Select Color palette
    if margin_palette == 'colorhelix':
        col_palette = sns.cubehelix_palette(
            len(col_names), start=2, rot=3, dark=0.3, light=0.8, reverse=True)
    else:
        col_palette = sns.color_palette(margin_palette, len(col_names))
    class_colors = dict(zip(col_names, col_palette))

    # Convert the palette to vectors that will be drawn on the matrix margin
    col_colors = metadata_column.map(class_colors)

    return col_colors, class_colors 
Example #26
Source File: misc.py    From mriqc with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def plot_corrmat(in_csv, out_file=None):
    import seaborn as sn

    sn.set(style="whitegrid")

    dataframe = pd.read_csv(in_csv, index_col=False, na_values="n/a", na_filter=False)
    colnames = dataframe.columns.ravel().tolist()

    for col in ["subject_id", "site", "modality"]:
        try:
            colnames.remove(col)
        except ValueError:
            pass

    # Correlation matrix
    corr = dataframe[colnames].corr()
    corr = corr.dropna((0, 1), "all")

    # Generate a mask for the upper triangle
    mask = np.zeros_like(corr, dtype=np.bool)
    mask[np.triu_indices_from(mask)] = True

    # Generate a custom diverging colormap
    cmap = sn.diverging_palette(220, 10, as_cmap=True)

    # Draw the heatmap with the mask and correct aspect ratio
    corrplot = sn.clustermap(
        corr, cmap=cmap, center=0.0, method="average", square=True, linewidths=0.5
    )
    plt.setp(corrplot.ax_heatmap.yaxis.get_ticklabels(), rotation="horizontal")
    # , mask=mask, square=True, linewidths=.5, cbar_kws={"shrink": .5})

    if out_file is None:
        out_file = "corr_matrix.svg"

    fname, ext = op.splitext(out_file)
    if ext[1:] not in ["pdf", "svg", "png"]:
        ext = ".svg"
        out_file = fname + ".svg"

    corrplot.savefig(
        out_file, format=ext[1:], bbox_inches="tight", pad_inches=0, dpi=100
    )
    return corrplot 
Example #27
Source File: nested_heatmap.py    From Wooey with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def main():
    args = parser.parse_args()
    import numpy as np
    import pandas as pd
    import seaborn as sns
    major_index = args.major_index
    minor_index = args.minor_index
    df = pd.read_table(args.tsv, index_col=[major_index, minor_index], sep=args.delimiter)
    df = np.log2(df) if args.log_normalize else df
    # set our undected samples to our lowest detection
    df[df==-1*np.inf] = df[df!=-1*np.inf].min().min()
    # translate our data so we have no negatives (which would screw up our addition and makes no biological sense)
    if args.translate:
        df+=abs(df.min().min())
    major_counts = df.groupby(level=[major_index]).count()
    # we only want to plot samples with multiple values in the minor index
    cutoff = args.minor_cutoff
    multi = df[df.index.get_level_values(major_index).isin(major_counts[major_counts>=cutoff].dropna().index)]

    # Let's select the most variable minor axis elements
    most_variable = multi.groupby(level=major_index).var().mean(axis=1).order(ascending=False)
    # and group by 20s
    for i in xrange(11):
        dat = multi[multi.index.get_level_values(major_index).isin(most_variable.index[10*i:10*(i+1)])]
        # we want to cluster by our major index, and then under these plot the values of our minor index
        major_dat = dat.groupby(level=major_index).sum()
        seaborn_map = sns.clustermap(major_dat, row_cluster=True, col_cluster=True)
        # now we keep this clustering, but recreate our data to fit the above clustering, with our minor
        # index below the major index (you can think of transcript levels under gene levels if you are
        # a biologist)
        merged_dat = pd.DataFrame(columns=[seaborn_map.data2d.columns])
        for major_val in seaborn_map.data2d.index:
            minor_rows = multi[multi.index.get_level_values(major_index)==major_val][seaborn_map.data2d.columns]
            major_row = major_dat.loc[major_val, ][seaborn_map.data2d.columns]
            merged_dat.append(major_row)
            merged_dat = merged_dat.append(major_row).append(minor_rows)
        merged_map = sns.clustermap(merged_dat, row_cluster=False, col_cluster=False)

        # recreate our dendrogram, this is undocumented and probably a hack but it works
        seaborn_map.dendrogram_col.plot(merged_map.ax_col_dendrogram)

        # for rows, I imagine at some point it will fail to fall within the major axis but fortunately
        # for this dataset it is not true
        seaborn_map.dendrogram_row.plot(merged_map.ax_row_dendrogram)
        merged_map.savefig('{}_heatmap_{}.png'.format(os.path.split(args.tsv.name)[1], i)) 
Example #28
Source File: plot_heatmaps.py    From pancanatlas_code_public with MIT License 4 votes vote down vote up
def main(df, outdir, desc, color_loader, run_representative):
    '''Runs all tasks on a single embedding.

    embed_dir: location of pca & tsne embeddings
    plot_dir: directory to write plots
    desc: identifies embedding (used for e.g. plot titles)
    '''
    print("clustermap: %s" %desc)
    if not os.path.exists(outdir): os.makedirs(outdir)

    assert desc.lower().startswith('altsplice') or desc.lower().startswith('expression')
    is_altsplice = desc.lower().startswith('altsplice')
    if not is_altsplice:
        assert df.values.max() > 1, "99.999999% Sure this is not psi data"
        print("Clipping 99th percentile")
        df.iloc[:] = np.minimum(df.values, np.percentile(df.values, 99, axis=0))

    method = 'ward'
    metric = 'cosine'

    # get colors
    cat_series, color_lut = color_loader(df)
    colors = cat_series.map(color_lut)
    df = df.loc[colors.index]

    # filter to high var
    keep_cols = filter_to_high_var(df.values, df.columns, MAX_EVENTS)
    df = df.iloc[:, keep_cols]

    cluster_desc = '_'.join(['%d_high_var_events'%MAX_EVENTS, method, metric])
    sample_desc = desc.strip().replace(' ', '_').lower() + '_' + cluster_desc

    sample_linkage, event_linkage = get_linkage(df, sample_desc, method=method, metric=metric)
    outpath = os.path.join(outdir, cluster_desc + '_clustermap.png')
    plot_heatmap(outpath, df, sample_linkage, colors, event_linkage, desc, color_lut)

    if run_representative:
        medians = collapse_to_median(df, cat_series)
        rep_colors = colors.loc[medians.index]
        rep_cluster_desc = cluster_desc + '_reps'
        rep_desc = desc.strip().replace(' ', '_').lower() + '_' + rep_cluster_desc
        print("clustermap: %s" %desc)
        rep_sample_linkage, rep_event_linkage = get_linkage(medians, rep_desc, method=method, metric=metric)
        rep_outpath = os.path.join(outdir, rep_cluster_desc + '_clustermap.png')
        plot_heatmap(rep_outpath, medians, rep_sample_linkage, rep_colors, rep_event_linkage, rep_desc, color_lut)
    return 
Example #29
Source File: nested_heatmap.py    From django-djangui with GNU General Public License v3.0 4 votes vote down vote up
def main():
    args = parser.parse_args()
    import numpy as np
    import pandas as pd
    import seaborn as sns
    major_index = args.major_index
    minor_index = args.minor_index
    df = pd.read_table(args.tsv, index_col=[major_index, minor_index], sep=args.delimiter)
    df = np.log2(df) if args.log_normalize else df
    # set our undected samples to our lowest detection
    df[df==-1*np.inf] = df[df!=-1*np.inf].min().min()
    # translate our data so we have no negatives (which would screw up our addition and makes no biological sense)
    if args.translate:
        df+=abs(df.min().min())
    major_counts = df.groupby(level=[major_index]).count()
    # we only want to plot samples with multiple values in the minor index
    cutoff = args.minor_cutoff
    multi = df[df.index.get_level_values(major_index).isin(major_counts[major_counts>=cutoff].dropna().index)]

    # Let's select the most variable minor axis elements
    most_variable = multi.groupby(level=major_index).var().mean(axis=1).order(ascending=False)
    # and group by 20s
    for i in xrange(11):
        dat = multi[multi.index.get_level_values(major_index).isin(most_variable.index[10*i:10*(i+1)])]
        # we want to cluster by our major index, and then under these plot the values of our minor index
        major_dat = dat.groupby(level=major_index).sum()
        seaborn_map = sns.clustermap(major_dat, row_cluster=True, col_cluster=True)
        # now we keep this clustering, but recreate our data to fit the above clustering, with our minor
        # index below the major index (you can think of transcript levels under gene levels if you are
        # a biologist)
        merged_dat = pd.DataFrame(columns=[seaborn_map.data2d.columns])
        for major_val in seaborn_map.data2d.index:
            minor_rows = multi[multi.index.get_level_values(major_index)==major_val][seaborn_map.data2d.columns]
            major_row = major_dat.loc[major_val,][seaborn_map.data2d.columns]
            merged_dat.append(major_row)
            merged_dat = merged_dat.append(major_row).append(minor_rows)
        merged_map = sns.clustermap(merged_dat, row_cluster=False, col_cluster=False)

        # recreate our dendrogram, this is undocumented and probably a hack but it works
        seaborn_map.dendrogram_col.plot(merged_map.ax_col_dendrogram)

        # for rows, I imagine at some point it will fail to fall within the major axis but fortunately
        # for this dataset it is not true
        seaborn_map.dendrogram_row.plot(merged_map.ax_row_dendrogram)
        merged_map.savefig('{}_heatmap_{}.png'.format(os.path.split(args.tsv.name)[1], i)) 
Example #30
Source File: basenji_motifs.py    From basenji with Apache License 2.0 4 votes vote down vote up
def plot_filter_seq_heat(filter_outs, out_pdf, whiten=True, drop_dead=True):
  # compute filter output means per sequence
  filter_seqs = filter_outs.mean(axis=2)

  # whiten
  if whiten:
    filter_seqs = preprocessing.scale(filter_seqs)

  # transpose
  filter_seqs = np.transpose(filter_seqs)

  if drop_dead:
    filter_stds = filter_seqs.std(axis=1)
    filter_seqs = filter_seqs[filter_stds > 0]

  # downsample sequences
  seqs_i = np.random.randint(0, filter_seqs.shape[1], 500)

  hmin = np.percentile(filter_seqs[:, seqs_i], 0.1)
  hmax = np.percentile(filter_seqs[:, seqs_i], 99.9)

  sns.set(font_scale=0.3)

  plt.figure()
  sns.clustermap(
      filter_seqs[:, seqs_i],
      row_cluster=True,
      col_cluster=True,
      linewidths=0,
      xticklabels=False,
      vmin=hmin,
      vmax=hmax)
  plt.savefig(out_pdf)
  #out_png = out_pdf[:-2] + 'ng'
  #plt.savefig(out_png, dpi=300)
  plt.close()


################################################################################
# plot_filter_seq_heat
#
# Plot a clustered heatmap of filter activations in sequence segments.
#
# Mean doesn't work well for the smaller segments for some reason, but taking
# the max looks OK. Still, similar motifs don't cluster quite as well as you
# might expect.
#
# Input
#  filter_outs
################################################################################