Python Examples of seaborn.clustermap

Source File: RnaseqqcReport.py From CGATPipelines with MIT License

6 votes

def __call__(self, data, path):

        colorbar, factors, unique, xkcd = self.getColorBar(data)
        n_samples = data.shape[0]
        data = data.iloc[:, :n_samples]
        col_dict = dict(list(zip(unique, xkcd)))

        print(data.head())
        seaborn.set(font_scale=.5)
        ax = seaborn.clustermap(data,
                                row_colors=colorbar, col_colors=colorbar)
        plt.setp(ax.ax_heatmap.yaxis.set_visible(False))

        for label in unique:
            ax.ax_col_dendrogram.bar(
                0, 0, color=seaborn.xkcd_rgb[col_dict[label]],
                label=label, linewidth=0)
        ax.ax_col_dendrogram.legend(loc="center", ncol=len(unique))

        return ResultBlocks(ResultBlock(
            '''#$mpl %i$#\n''' % ax.cax.figure.number,
            title='ClusterMapPlot'))

Source File: basenji_test_genes.py From basenji with Apache License 2.0

6 votes

def clustermap(gene_values, out_pdf, color=None, table=False):
  """ Generate a clustered heatmap using seaborn. """

  if table:
    np.save(out_pdf[:-4], gene_values)

  plt.figure()
  g = sns.clustermap(
      gene_values,
      metric='euclidean',
      cmap=color,
      xticklabels=False,
      yticklabels=False)
  g.ax_heatmap.set_xlabel('Experiments')
  g.ax_heatmap.set_ylabel('Genes')
  plt.savefig(out_pdf)
  plt.close()

Source File: construction.py From FinanceHub with MIT License

6 votes

def plot_corr_matrix(self, save_path=None, show_chart=True, cmap='vlag', linewidth=0, figsize=(10, 10)):
        """
        Plots the correlation matrix
        :param save_path: local directory to save file. If provided, saves a png of the image to the address.
        :param show_chart: If True, shows the chart.
        :param cmap: matplotlib colormap.
        :param linewidth: witdth of the grid lines of the correlation matrix.
        :param figsize: tuple with figsize dimensions.
        """

        sns.clustermap(self.corr, method=self.method, metric=self.metric, cmap=cmap,
                       figsize=figsize, linewidths=linewidth,
                       col_linkage=self.link, row_linkage=self.link)

        plt.tight_layout()

        if not (save_path is None):
            plt.savefig(save_path,
                        pad_inches=1,
                        dpi=400)

        if show_chart:
            plt.show()

        plt.close()

Source File: heatmap.py From django-djangui with GNU General Public License v3.0

6 votes

def main():
    args = parser.parse_args()
    data = pd.read_table(args.tsv, index_col=args.row if args.row else 0, sep=args.delimiter, encoding='utf-8')
    if args.cols:
        try:
            data = data.loc[:,args.cols.split(',')]
        except KeyError:
            data = data.iloc[:,[int(i)-1 for i in args.cols.split(',')]]
    if len(data.columns) > 50:
        raise BaseException('Too many columns')
    data = np.log2(data) if args.log_normalize else data
    data[data==-1*np.inf] = data[data!=-1*np.inf].min().min()
    width = 5+0 if len(data.columns)<50 else (len(data.columns)-50)/100
    row_cutoff = 1000
    height = 15+0 if len(data)<row_cutoff else (len(data)-row_cutoff)/75.0
    seaborn_map = sns.clustermap(data, figsize=(width, height))
    seaborn_map.savefig('{}_heatmap.png'.format(os.path.split(args.tsv.name)[1]))
    seaborn_map.data2d.to_csv('{}_heatmap.tsv'.format(os.path.split(args.tsv.name)[1]), sep='\t')

Source File: heatmap.py From Wooey with BSD 3-Clause "New" or "Revised" License

6 votes

def main():
    args = parser.parse_args()
    data = pd.read_table(args.tsv, index_col=args.row if args.row else 0, sep=args.delimiter, encoding='utf-8')
    if args.cols:
        try:
            data = data.loc[:,args.cols.split(',')]
        except KeyError:
            data = data.iloc[:,[int(i)-1 for i in args.cols.split(',')]]
    if len(data.columns) > 50:
        raise BaseException('Too many columns')
    data = np.log2(data) if args.log_normalize else data
    data[data==-1*np.inf] = data[data!=-1*np.inf].min().min()
    width = 5+0 if len(data.columns)<50 else (len(data.columns)-50)/100
    row_cutoff = 1000
    height = 15+0 if len(data)<row_cutoff else (len(data)-row_cutoff)/75.0
    seaborn_map = sns.clustermap(data, figsize=(width, height))
    seaborn_map.savefig('{}_heatmap.png'.format(os.path.split(args.tsv.name)[1]))
    seaborn_map.data2d.to_csv('{}_heatmap.tsv'.format(os.path.split(args.tsv.name)[1]), sep='\t')

Source File: plot_heatmaps.py From pancanatlas_code_public with MIT License

6 votes

def plot_heatmap(outpath, df, sample_linkage, sample_colors, event_linkage, desc, sample_color_lut):

    assert desc.lower().startswith('altsplice') or desc.lower().startswith('expression')
    is_altsplice = desc.lower().startswith('altsplice')

    sys.setrecursionlimit(100000)
    print "Plotting data ... "
    graph = sns.clustermap(df.T,
                       col_colors=sample_colors,
                       col_linkage=sample_linkage, row_linkage=event_linkage,
                       cmap = sns.cubehelix_palette(as_cmap=True))
    graph.ax_heatmap.axis('off')
    graph.ax_col_dendrogram.set_title("%s Clustering" %' '.join(desc.split('_')).title())
    graph.ax_heatmap.set_xlabel("Events")
    graph.ax_heatmap.set_ylabel("Samples")
    if is_altsplice: graph.cax.set_title("psi")
    else: graph.cax.set_title("log(counts)")
    add_legend(graph, sample_color_lut)
    plot_utils.save(outpath)
    return

Source File: __init__.py From pyani with MIT License

6 votes

def get_clustermap(dfr, params, title=None, annot=True):
    """Return a Seaborn clustermap for the passed dataframe.

    :param dfr:
    :param params:
    :param title:  str, plot title
    :param annot:  Boolean, add text for cell values?
    """
    fig = sns.clustermap(
        dfr,
        cmap=params.cmap,
        vmin=params.vmin,
        vmax=params.vmax,
        col_colors=params.colorbar,
        row_colors=params.colorbar,
        figsize=(params.figsize, params.figsize),
        linewidths=params.linewidths,
        annot=annot,
    )

    # add labels for each of the input genomes
    add_labels(fig, params)

    fig.cax.yaxis.set_label_position("left")
    if title:
        fig.cax.set_ylabel(title)

    # Return clustermap
    return fig


# Generate Seaborn heatmap output

Source File: heatmap.py From mmvec with BSD 3-Clause "New" or "Revised" License

5 votes

def _parse_taxonomy_strings(taxonomy_series, level):
    '''
    taxonomy_series: pd.Series of semicolon-delimited taxonomy strings
    level: int
        taxonomic level for annotating clustermap.
     Returns
     -------
    Returns a pd.Series of taxonomy names at specified level,
        or terminal annotation
    '''
    return taxonomy_series.apply(lambda x: x.split(';')[:level][-1].strip())

Source File: plotlib.py From mCaller with MIT License

5 votes

def plot_correlation_matrix(curmat,elevenmer,labels,outdir):
    plt.figure(figsize=(7,6))
    cg = sns.clustermap(curmat,metric='euclidean',xticklabels=labels,yticklabels=labels)
    plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
    #sns.heatmap(curmat,xticklabels=labels,yticklabels=labels)
    plt.title(elevenmer)
    plt.show()
    plt.savefig(outdir+'correlation_matrix_'+elevenmer+'.pdf',dpi=500,transparent=True)

Source File: utils.py From pysster with MIT License

5 votes

def _plot_heatmap(file_path, data, class_id, classes = None):
    import seaborn as sns
    _set_sns_context(data.shape[1])
    n_classes = len(set(class_id))
    palette = _get_colors(n_classes)
    colors = [palette[x] for x in class_id]
    g = sns.clustermap(data = data.T, method = "ward", metric = "euclidean",
                       cmap = "RdBu_r", xticklabels = False, yticklabels = True,
                       figsize = (30,25), row_cluster = True, col_cluster = True,
                       linewidths = 0, col_colors = colors, robust = True,
                       z_score = 0, cbar_kws={"ticks":[-1.5,0,+1.5]})
    g.ax_col_dendrogram.set_xlim([0,1e-10])
    g.ax_col_dendrogram.set_ylim([0,1e-10])
    plt.setp(g.ax_heatmap.get_yticklabels(), rotation=0)
    sns.set(font_scale=2.8)
    if classes == None:
        classes = list(range(n_classes))
    for x in range(n_classes):
        g.ax_col_dendrogram.bar(0, 0, color=palette[x],
                                label="class_{}".format(classes[x]), linewidth=0)
    g.ax_col_dendrogram.legend(loc = "center", ncol = min(6, n_classes))
    cax = plt.gcf().axes[-1]
    cax.tick_params(labelsize=25)
    plt.savefig(file_path, bbox_inches = 'tight')
    plt.close('all')
    sns.reset_orig()

Source File: heatmap.py From XenonPy with BSD 3-Clause "New" or "Revised" License

5 votes

def draw(self, y=None):
        ax = sb.clustermap(
            self.desc,
            cmap="RdBu",
            method=self.method,
            figsize=self.figsize,
            row_cluster=self.row_cluster,
            col_cluster=self.col_cluster,
            **self.kwargs)
        ax.cax.set_visible(False)
        ax.ax_heatmap.yaxis.set_ticks_position('left')
        ax.ax_heatmap.yaxis.set_label_position('left')

        if y is None:
            ax.ax_col_dendrogram.set_position((0.1, 0.8, 0.9, 0.1))
            ax.ax_heatmap.set_position((0.1, 0.2, 0.9, 0.6))
        else:
            ax.ax_col_dendrogram.set_position((0.1, 0.8, 0.83, 0.1))
            ax.ax_heatmap.set_position((0.1, 0.2, 0.84, 0.6))
            ax = plt.axes([0.95, 0.2, 0.05, 0.6])
            x_ = y.values
            y_ = np.arange(len(x_))[::-1]
            ax.plot(x_, y_, lw=4)
            ax.get_yaxis().set_visible(False)
            ax.spines['top'].set_visible(False)
            ax.spines['right'].set_visible(False)
            ax.set_xlabel('{:s}'.format(y.name), fontsize='large')
        if self.save:
            plt.savefig(**self.save)

Source File: parkDataVisulization.py From python-urbanPlanning with MIT License

5 votes

def heatmap_pData(df):
    import pandas as pd
    import seaborn as sns
    sns.set()
    
    # Load the brain networks example dataset
    # df = sns.load_dataset("brain_networks", header=[0, 1, 2], index_col=0)
    
    # Select a subset of the networks
    used_networks = [1, 5, 6, 7, 8, 12, 13, 17]
    # used_columns = [True,]*len(df.columns)
    
    # print(len(used_columns))
    # print(used_columns)
    # df = df.loc[:, used_columns]
    columnsList=['shapelyArea', 'shapelyLength','shapeIdx', 'FRAC', 
                 'popu_mean', 'popu_std','SVFW_mean', 'SVFW_std',
                 'SVFep_std', 'SVFep_median','SVFep_majority', 'SVFep_minority',
                 'facilityFre',
                 'HVege_mean','HVege_count','MVege_mean', 'MVege_count','LVege_mean', 'LVege_count',
                 'cla_treeCanopy', 'cla_grassShrub', 'cla_bareSoil','cla_buildings', 'cla_roadsRailraods', 'cla_otherPavedSurfaces','cla_water',
                 ]
    df=df[columnsList]
    
    # Create a categorical palette to identify the networks
    network_pal = sns.husl_palette(8, s=.45)
    network_lut = dict(zip(map(str, used_networks), network_pal))
    
    # Convert the palette to vectors that will be drawn on the side of the matrix
    networks = df.columns
    network_colors = pd.Series(networks, index=df.columns).map(network_lut)
    
    # Draw the full plot
    sns.clustermap(df.corr(), center=0, cmap="vlag",
                    row_colors=network_colors, col_colors=network_colors,
                    linewidths=.75, figsize=(13, 13))

Source File: plot_figure.py From pyHSICLasso with MIT License

5 votes

def plot_heatmap(X, row_linkage, featname, filepath):
    df = pd.DataFrame(X)
    df.index = featname
    cg = sns.clustermap(df, center=0, row_linkage=row_linkage,
                        method='ward', cmap=microarray_cmap)
    cg.ax_heatmap.set_xticklabels("")
    plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
    plt.setp(cg.ax_heatmap.xaxis.get_majorticklabels(), rotation=90)
    plt.title('Heatmap')
    plt.savefig(filepath)
    plt.clf()

Source File: csv2heatmap.py From amptk with BSD 2-Clause "Simplified" License

5 votes

def drawClustermap(df, output, args=False):
    if args.scaling == 'z_score':
        g = sns.clustermap(df, method=args.cluster_method, metric=args.distance_metric, linewidths=0.5, cmap=args.color, col_cluster=cluster, z_score=0, figsize=figSize)
    elif args.scaling == 'standard':
        g = sns.clustermap(df, method=args.cluster_method, metric=args.distance_metric, linewidths=0.5, cmap=args.color, col_cluster=cluster, standard_scale=0, figsize=figSize)
    else:
        g = sns.clustermap(df, method=args.cluster_method, metric=args.distance_metric, linewidths=0.5, cmap=args.color, col_cluster=cluster, figsize=figSize)
    plt.setp(g.ax_heatmap.get_yticklabels(), rotation=0, size=int(args.yaxis_fontsize), family=args.font)
    plt.setp(g.ax_heatmap.get_xticklabels(), rotation=90, size=int(args.xaxis_fontsize), family=args.font, weight='bold')
    g.savefig(output, format=args.format, dpi=1000, bbox_inches='tight')

Source File: plots.py From cdlib with BSD 2-Clause "Simplified" License

5 votes

def plot_sim_matrix(clusterings, scoring):
    """
    Plot a similarity matrix between a list of clusterings, using the provided scoring function.

    :param clusterings: list of clusterings to compare
    :param scoring: the scoring function to use
    :return: a ClusterGrid instance

    Example:

    >>> from cdlib import algorithms, viz, evaluation
    >>> import networkx as nx
    >>> g = nx.karate_club_graph()
    >>> coms = algorithms.louvain(g)
    >>> coms2 = algorithms.walktrap(g)
    >>> clustermap = viz.plot_sim_matrix([coms,coms2],evaluation.adjusted_mutual_information)

    """
    forDF = []
    for c in clusterings:
        cID = c.get_description()
        for c2 in clusterings:
            c2ID = c2.get_description()
            forDF.append([cID, c2ID, scoring(c, c2).score])
    df = pd.DataFrame(columns=["com1", "com2", "score"], data=forDF)
    df = df.pivot("com1", "com2", "score")
    return sns.clustermap(df)

Source File: __init__.py From pyani with MIT License

5 votes

def heatmap(dfr, outfilename=None, title=None, params=None):
    """Return seaborn heatmap with cluster dendrograms.

    :param dfr:  pandas DataFrame with relevant data
    :param outfilename:  path to output file (indicates output format)
    :param title:
    :param params:
    """
    # Decide on figure layout size: a minimum size is required for
    # aesthetics, and a maximum to avoid core dumps on rendering.
    # If we hit the maximum size, we should modify font size.
    maxfigsize = 120
    calcfigsize = dfr.shape[0] * 1.1
    figsize = min(max(8, calcfigsize), maxfigsize)
    if figsize == maxfigsize:
        scale = maxfigsize / calcfigsize
        sns.set_context("notebook", font_scale=scale)

    # Add a colorbar?
    if params.classes is None:
        col_cb = None
    else:
        col_cb = get_colorbar(dfr, params.classes)

    # Add attributes to parameter object, and draw heatmap
    params.colorbar = col_cb
    params.figsize = figsize
    params.linewidths = 0.25
    fig = get_clustermap(dfr, params, title=title)

    # Save to file
    if outfilename:
        fig.savefig(outfilename)

    # Return clustermap
    return fig

Source File: word_coocurrence.py From guesswhat with Apache License 2.0

5 votes

def __init__(self, path, games, logger, suffix):
        super(WordCoocurence, self).__init__(path, self.__class__.__name__, suffix)

        questions = []
        word_counter = collections.Counter()

        NO_WORDS_TO_DISPLAY = 50

        for game in games:
            # split questions into words
            for q in game.questions:
                questions.append(q)
                q = re.sub('[?]', '', q)
                words = re.findall(r'\w+', q)

                for w in words:
                    word_counter[w.lower()] += 1


        # compute word co-coocurrence
        common_words = word_counter.most_common(NO_WORDS_TO_DISPLAY)
        common_words = [pair[0] for pair in common_words]
        corrmat = np.zeros((NO_WORDS_TO_DISPLAY, NO_WORDS_TO_DISPLAY))

        # compute the correlation matrices
        for i, question in enumerate(questions):
            for word in question:
                if word in common_words:
                    for other_word in question:
                        if other_word in common_words:
                            if word != other_word:
                                corrmat[common_words.index(word)][common_words.index(other_word)] += 1.

        # Display the cor matrix
        df = pd.DataFrame(data=corrmat, index=common_words, columns=common_words)
        f = sns.clustermap(df, standard_scale=0, col_cluster=False, row_cluster=True, cbar_kws={"label": "co-occurence"})
        f.ax_heatmap.xaxis.tick_top()

        plt.setp(f.ax_heatmap.get_xticklabels(), rotation=90)
        plt.setp(f.ax_heatmap.get_yticklabels(), rotation=0)

Source File: sf_heatmap.py From pancanatlas_code_public with MIT License

5 votes

def plot_heatmap(psi_df, meta_df, outpath):
    # Sort by cancer type
    psi_df = psi_df.copy().loc[meta_df['cnc'].sort_values().index]
    psi_df = psi_df.iloc[:, psi_df.columns.map(lambda x: _decode_event_name(x)[1]).argsort()]
    col_colors, col_cmap_lut = _get_heatmap_col_colors(psi_df)
    row_colors, row_cmap_lut = _get_heatmap_row_colors(meta_df, psi_df.index)
    method = 'ward'; metric = 'cosine'
    graph = sns.clustermap(psi_df, cmap='Purples',
                           row_colors=row_colors, col_colors=col_colors,
                           row_cluster=False, col_cluster=False,
                           xticklabels=psi_df.columns.map(lambda x:_decode_event_name(x)[2]),
                           linewidths=0,
                           mask=psi_df.isnull())
    _override_sns_row_colors(graph, row_colors.values)
    graph.ax_heatmap.set_yticks([])
    graph.ax_heatmap.set_xlabel("Events")
    graph.ax_heatmap.set_ylabel("Samples")
    graph.cax.set_title("psi")
    tumor_only_row_cmap_lut = {key:val for key,val in row_cmap_lut.items() if not 'Normal' in key}
    plotter.add_legend(graph, tumor_only_row_cmap_lut)
    plotter.add_col_legend(graph, col_cmap_lut)
    print "Writing: %s" %outpath
    plt.savefig(outpath, bbox_inches='tight')
    pdf_outpath = re.sub('.png$', '.pdf', outpath)
    print "Writing: %s" %pdf_outpath
    #plt.savefig(pdf_outpath, bbox_inches='tight')
    plt.close()
    return

Source File: de.py From smallrnaseq with GNU General Public License v3.0

5 votes

def cluster_map(data, names):
    """Cluster map of genes"""

    import seaborn as sns
    import pylab as plt
    data = data.ix[names]
    X = np.log(data).fillna(0)
    X = X.apply(lambda x: x-x.mean(), 1)
    cg = sns.clustermap(X,cmap='RdYlBu_r',figsize=(8,10),lw=.5,linecolor='gray')
    mt=plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
    mt=plt.setp(cg.ax_heatmap.xaxis.get_majorticklabels(), rotation=90)
    return cg

Source File: plotting.py From smallrnaseq with GNU General Public License v3.0

5 votes

def expression_clustermap(counts, freq=0.8):

    scols,ncols = base.get_column_names(counts)
    X = counts.set_index('name')[ncols]
    X = np.log(X)
    v = X.std(1).sort_values(ascending=False)
    X = X[X.isnull().sum(1)/len(X.columns)<0.2]
    X = X.fillna(0)
    cg = sns.clustermap(X,cmap='YlGnBu',figsize=(12,12),lw=0,linecolor='gray')
    mt = plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0, fontsize=9)
    mt = plt.setp(cg.ax_heatmap.xaxis.get_majorticklabels(), rotation=90)
    return cg

Source File: plot.py From retentioneering-tools with Mozilla Public License 2.0

5 votes

def cluster_heatmap(data, clusters, target, plot_name=None, **kwargs):
    """
    Visualizes feature usage with heatmap.

    Parameters
    --------
    data: pd.DataFrame
        Feature matrix.
    clusters: np.array
        Array of cluster IDs.
    target: np.array
        Boolean vector, if ``True``, then user has `positive_target_event` in trajectory.
    plot_name: str, optional
        Name of plot to save. Default: ``'clusters_heatmap_{timestamp}.svg'``

    Returns
    -------
    Saves plot to ``retention_config.experiments_folder``

    Return type
    -------
    PNG
    """
    heatmap = sns.clustermap(data.values,
                             cmap="BrBG",
                             xticklabels=data.columns,
                             yticklabels=False,
                             row_cluster=True,
                             col_cluster=False)

    heatmap.ax_row_dendrogram.set_visible(False)
    heatmap = heatmap.ax_heatmap

    plot_name = plot_name or 'cluster_heatmap_{}'.format(datetime.now()).replace(':', '_').replace('.', '_') + '.svg'
    plot_name = data.retention.retention_config['experiments_folder'] + '/' + plot_name
    return heatmap, plot_name, None, data.retention.retention_config

Source File: basenji_motifs.py From basenji with Apache License 2.0

5 votes

def plot_target_corr(filter_outs, seq_targets, filter_names, target_names, out_pdf, seq_op='mean'):
  num_seqs = filter_outs.shape[0]
  num_targets = len(target_names)

  if seq_op == 'mean':
    filter_outs_seq = filter_outs.mean(axis=2)
  else:
    filter_outs_seq = filter_outs.max(axis=2)

  # std is sequence by filter.
  filter_seqs_std = filter_outs_seq.std(axis=0)
  filter_outs_seq = filter_outs_seq[:, filter_seqs_std > 0]
  filter_names_live = filter_names[filter_seqs_std > 0]

  filter_target_cors = np.zeros((len(filter_names_live), num_targets))
  for fi in range(len(filter_names_live)):
    for ti in range(num_targets):
      cor, p = spearmanr(filter_outs_seq[:, fi], seq_targets[:num_seqs, ti])
      filter_target_cors[fi, ti] = cor

  cor_df = pd.DataFrame(
      filter_target_cors, index=filter_names_live, columns=target_names)

  sns.set(font_scale=0.3)
  plt.figure()
  sns.clustermap(cor_df, cmap='BrBG', center=0, figsize=(8, 10))
  plt.savefig(out_pdf)
  plt.close()


################################################################################
# plot_filter_seq_heat
#
# Plot a clustered heatmap of filter activations in
#
# Input
#  param_matrix: np.array of the filter's parameter matrix
#  out_pdf:
################################################################################

Source File: plots.py From cgpm with Apache License 2.0

5 votes

def plot_clustermap(D, xticklabels=None, yticklabels=None):
    import seaborn as sns
    if xticklabels is None: xticklabels = range(D.shape[0])
    if yticklabels is None: yticklabels = range(D.shape[1])
    zmat = sns.clustermap(
        D, yticklabels=yticklabels, xticklabels=xticklabels,
        linewidths=0.2, cmap='BuGn')
    plt.setp(zmat.ax_heatmap.get_yticklabels(), rotation=0)
    plt.setp(zmat.ax_heatmap.get_xticklabels(), rotation=90)
    return zmat

Source File: heatmap.py From mmvec with BSD 3-Clause "New" or "Revised" License

5 votes

def _normalize_table(table, method):
    '''
    Normalize column data in a dataframe for plotting in clustermap.

    table: pd.DataFrame
        Input data.
    method: str
        Normalization method to use.

    Returns normalized table as pd.DataFrame
    '''
    if 'col' in method:
        axis = 0
    elif 'row' in method:
        axis = 1
    if 'z_score' in method:
        res = table.apply(lambda x: (x - x.mean()) / x.std(), axis=axis)
    elif 'rel' in method:
        res = table.apply(lambda x: x / x.sum(), axis=axis)
    elif method == 'log10':
        res = table.apply(lambda x: np.log10(x + 1))
    return res.fillna(0)

Source File: heatmap.py From mmvec with BSD 3-Clause "New" or "Revised" License

5 votes

def _parse_heatmap_metadata_annotations(metadata_column, margin_palette):
    '''
    Transform feature or sample metadata into color vector for annotating
    margin of clustermap.
    Parameters
    ----------
    metadata_column: pd.Series of metadata for annotating plots
    margin_palette: str
        Name of color palette to use for annotating metadata
        along margin(s) of clustermap.
    Returns
    -------
    Returns vector of colors for annotating clustermap and dict mapping colors
    to classes.
    '''
    # Create a categorical palette to identify md col
    metadata_column = metadata_column.astype(str)
    col_names = sorted(metadata_column.unique())

    # Select Color palette
    if margin_palette == 'colorhelix':
        col_palette = sns.cubehelix_palette(
            len(col_names), start=2, rot=3, dark=0.3, light=0.8, reverse=True)
    else:
        col_palette = sns.color_palette(margin_palette, len(col_names))
    class_colors = dict(zip(col_names, col_palette))

    # Convert the palette to vectors that will be drawn on the matrix margin
    col_colors = metadata_column.map(class_colors)

    return col_colors, class_colors

Source File: misc.py From mriqc with BSD 3-Clause "New" or "Revised" License

4 votes

def plot_corrmat(in_csv, out_file=None):
    import seaborn as sn

    sn.set(style="whitegrid")

    dataframe = pd.read_csv(in_csv, index_col=False, na_values="n/a", na_filter=False)
    colnames = dataframe.columns.ravel().tolist()

    for col in ["subject_id", "site", "modality"]:
        try:
            colnames.remove(col)
        except ValueError:
            pass

    # Correlation matrix
    corr = dataframe[colnames].corr()
    corr = corr.dropna((0, 1), "all")

    # Generate a mask for the upper triangle
    mask = np.zeros_like(corr, dtype=np.bool)
    mask[np.triu_indices_from(mask)] = True

    # Generate a custom diverging colormap
    cmap = sn.diverging_palette(220, 10, as_cmap=True)

    # Draw the heatmap with the mask and correct aspect ratio
    corrplot = sn.clustermap(
        corr, cmap=cmap, center=0.0, method="average", square=True, linewidths=0.5
    )
    plt.setp(corrplot.ax_heatmap.yaxis.get_ticklabels(), rotation="horizontal")
    # , mask=mask, square=True, linewidths=.5, cbar_kws={"shrink": .5})

    if out_file is None:
        out_file = "corr_matrix.svg"

    fname, ext = op.splitext(out_file)
    if ext[1:] not in ["pdf", "svg", "png"]:
        ext = ".svg"
        out_file = fname + ".svg"

    corrplot.savefig(
        out_file, format=ext[1:], bbox_inches="tight", pad_inches=0, dpi=100
    )
    return corrplot

Source File: nested_heatmap.py From Wooey with BSD 3-Clause "New" or "Revised" License

4 votes

def main():
    args = parser.parse_args()
    import numpy as np
    import pandas as pd
    import seaborn as sns
    major_index = args.major_index
    minor_index = args.minor_index
    df = pd.read_table(args.tsv, index_col=[major_index, minor_index], sep=args.delimiter)
    df = np.log2(df) if args.log_normalize else df
    # set our undected samples to our lowest detection
    df[df==-1*np.inf] = df[df!=-1*np.inf].min().min()
    # translate our data so we have no negatives (which would screw up our addition and makes no biological sense)
    if args.translate:
        df+=abs(df.min().min())
    major_counts = df.groupby(level=[major_index]).count()
    # we only want to plot samples with multiple values in the minor index
    cutoff = args.minor_cutoff
    multi = df[df.index.get_level_values(major_index).isin(major_counts[major_counts>=cutoff].dropna().index)]

    # Let's select the most variable minor axis elements
    most_variable = multi.groupby(level=major_index).var().mean(axis=1).order(ascending=False)
    # and group by 20s
    for i in xrange(11):
        dat = multi[multi.index.get_level_values(major_index).isin(most_variable.index[10*i:10*(i+1)])]
        # we want to cluster by our major index, and then under these plot the values of our minor index
        major_dat = dat.groupby(level=major_index).sum()
        seaborn_map = sns.clustermap(major_dat, row_cluster=True, col_cluster=True)
        # now we keep this clustering, but recreate our data to fit the above clustering, with our minor
        # index below the major index (you can think of transcript levels under gene levels if you are
        # a biologist)
        merged_dat = pd.DataFrame(columns=[seaborn_map.data2d.columns])
        for major_val in seaborn_map.data2d.index:
            minor_rows = multi[multi.index.get_level_values(major_index)==major_val][seaborn_map.data2d.columns]
            major_row = major_dat.loc[major_val, ][seaborn_map.data2d.columns]
            merged_dat.append(major_row)
            merged_dat = merged_dat.append(major_row).append(minor_rows)
        merged_map = sns.clustermap(merged_dat, row_cluster=False, col_cluster=False)

        # recreate our dendrogram, this is undocumented and probably a hack but it works
        seaborn_map.dendrogram_col.plot(merged_map.ax_col_dendrogram)

        # for rows, I imagine at some point it will fail to fall within the major axis but fortunately
        # for this dataset it is not true
        seaborn_map.dendrogram_row.plot(merged_map.ax_row_dendrogram)
        merged_map.savefig('{}_heatmap_{}.png'.format(os.path.split(args.tsv.name)[1], i))

Source File: plot_heatmaps.py From pancanatlas_code_public with MIT License

4 votes

def main(df, outdir, desc, color_loader, run_representative):
    '''Runs all tasks on a single embedding.

    embed_dir: location of pca & tsne embeddings
    plot_dir: directory to write plots
    desc: identifies embedding (used for e.g. plot titles)
    '''
    print("clustermap: %s" %desc)
    if not os.path.exists(outdir): os.makedirs(outdir)

    assert desc.lower().startswith('altsplice') or desc.lower().startswith('expression')
    is_altsplice = desc.lower().startswith('altsplice')
    if not is_altsplice:
        assert df.values.max() > 1, "99.999999% Sure this is not psi data"
        print("Clipping 99th percentile")
        df.iloc[:] = np.minimum(df.values, np.percentile(df.values, 99, axis=0))

    method = 'ward'
    metric = 'cosine'

    # get colors
    cat_series, color_lut = color_loader(df)
    colors = cat_series.map(color_lut)
    df = df.loc[colors.index]

    # filter to high var
    keep_cols = filter_to_high_var(df.values, df.columns, MAX_EVENTS)
    df = df.iloc[:, keep_cols]

    cluster_desc = '_'.join(['%d_high_var_events'%MAX_EVENTS, method, metric])
    sample_desc = desc.strip().replace(' ', '_').lower() + '_' + cluster_desc

    sample_linkage, event_linkage = get_linkage(df, sample_desc, method=method, metric=metric)
    outpath = os.path.join(outdir, cluster_desc + '_clustermap.png')
    plot_heatmap(outpath, df, sample_linkage, colors, event_linkage, desc, color_lut)

    if run_representative:
        medians = collapse_to_median(df, cat_series)
        rep_colors = colors.loc[medians.index]
        rep_cluster_desc = cluster_desc + '_reps'
        rep_desc = desc.strip().replace(' ', '_').lower() + '_' + rep_cluster_desc
        print("clustermap: %s" %desc)
        rep_sample_linkage, rep_event_linkage = get_linkage(medians, rep_desc, method=method, metric=metric)
        rep_outpath = os.path.join(outdir, rep_cluster_desc + '_clustermap.png')
        plot_heatmap(rep_outpath, medians, rep_sample_linkage, rep_colors, rep_event_linkage, rep_desc, color_lut)
    return

Source File: nested_heatmap.py From django-djangui with GNU General Public License v3.0

4 votes

def main():
    args = parser.parse_args()
    import numpy as np
    import pandas as pd
    import seaborn as sns
    major_index = args.major_index
    minor_index = args.minor_index
    df = pd.read_table(args.tsv, index_col=[major_index, minor_index], sep=args.delimiter)
    df = np.log2(df) if args.log_normalize else df
    # set our undected samples to our lowest detection
    df[df==-1*np.inf] = df[df!=-1*np.inf].min().min()
    # translate our data so we have no negatives (which would screw up our addition and makes no biological sense)
    if args.translate:
        df+=abs(df.min().min())
    major_counts = df.groupby(level=[major_index]).count()
    # we only want to plot samples with multiple values in the minor index
    cutoff = args.minor_cutoff
    multi = df[df.index.get_level_values(major_index).isin(major_counts[major_counts>=cutoff].dropna().index)]

    # Let's select the most variable minor axis elements
    most_variable = multi.groupby(level=major_index).var().mean(axis=1).order(ascending=False)
    # and group by 20s
    for i in xrange(11):
        dat = multi[multi.index.get_level_values(major_index).isin(most_variable.index[10*i:10*(i+1)])]
        # we want to cluster by our major index, and then under these plot the values of our minor index
        major_dat = dat.groupby(level=major_index).sum()
        seaborn_map = sns.clustermap(major_dat, row_cluster=True, col_cluster=True)
        # now we keep this clustering, but recreate our data to fit the above clustering, with our minor
        # index below the major index (you can think of transcript levels under gene levels if you are
        # a biologist)
        merged_dat = pd.DataFrame(columns=[seaborn_map.data2d.columns])
        for major_val in seaborn_map.data2d.index:
            minor_rows = multi[multi.index.get_level_values(major_index)==major_val][seaborn_map.data2d.columns]
            major_row = major_dat.loc[major_val,][seaborn_map.data2d.columns]
            merged_dat.append(major_row)
            merged_dat = merged_dat.append(major_row).append(minor_rows)
        merged_map = sns.clustermap(merged_dat, row_cluster=False, col_cluster=False)

        # recreate our dendrogram, this is undocumented and probably a hack but it works
        seaborn_map.dendrogram_col.plot(merged_map.ax_col_dendrogram)

        # for rows, I imagine at some point it will fail to fall within the major axis but fortunately
        # for this dataset it is not true
        seaborn_map.dendrogram_row.plot(merged_map.ax_row_dendrogram)
        merged_map.savefig('{}_heatmap_{}.png'.format(os.path.split(args.tsv.name)[1], i))

Source File: basenji_motifs.py From basenji with Apache License 2.0

4 votes

def plot_filter_seq_heat(filter_outs, out_pdf, whiten=True, drop_dead=True):
  # compute filter output means per sequence
  filter_seqs = filter_outs.mean(axis=2)

  # whiten
  if whiten:
    filter_seqs = preprocessing.scale(filter_seqs)

  # transpose
  filter_seqs = np.transpose(filter_seqs)

  if drop_dead:
    filter_stds = filter_seqs.std(axis=1)
    filter_seqs = filter_seqs[filter_stds > 0]

  # downsample sequences
  seqs_i = np.random.randint(0, filter_seqs.shape[1], 500)

  hmin = np.percentile(filter_seqs[:, seqs_i], 0.1)
  hmax = np.percentile(filter_seqs[:, seqs_i], 99.9)

  sns.set(font_scale=0.3)

  plt.figure()
  sns.clustermap(
      filter_seqs[:, seqs_i],
      row_cluster=True,
      col_cluster=True,
      linewidths=0,
      xticklabels=False,
      vmin=hmin,
      vmax=hmax)
  plt.savefig(out_pdf)
  #out_png = out_pdf[:-2] + 'ng'
  #plt.savefig(out_png, dpi=300)
  plt.close()


################################################################################
# plot_filter_seq_heat
#
# Plot a clustered heatmap of filter activations in sequence segments.
#
# Mean doesn't work well for the smaller segments for some reason, but taking
# the max looks OK. Still, similar motifs don't cluster quite as well as you
# might expect.
#
# Input
#  filter_outs
################################################################################

Python seaborn.clustermap() Examples