Python Examples of seaborn.violinplot

Source File: timeplots.py From NanoPlot with GNU General Public License v3.0

7 votes

def quality_over_time(dfs, path, figformat, title, plot_settings={}):
    time_qual = Plot(path=path + "TimeQualityViolinPlot." + figformat,
                     title="Violin plot of quality over time")
    sns.set(style="white", **plot_settings)
    ax = sns.violinplot(x="timebin",
                        y="quals",
                        data=dfs,
                        inner=None,
                        cut=0,
                        linewidth=0)
    ax.set(xlabel='Interval (hours)',
           ylabel="Basecall quality",
           title=title or time_qual.title)
    plt.xticks(rotation=45, ha='center', fontsize=8)
    time_qual.fig = ax.get_figure()
    time_qual.save(format=figformat)
    plt.close("all")
    return time_qual

Source File: timeplots.py From NanoPlot with GNU General Public License v3.0

7 votes

def sequencing_speed_over_time(dfs, path, figformat, title, plot_settings={}):
    time_duration = Plot(path=path + "TimeSequencingSpeed_ViolinPlot." + figformat,
                         title="Violin plot of sequencing speed over time")
    sns.set(style="white", **plot_settings)
    if "timebin" not in dfs:
        dfs['timebin'] = add_time_bins(dfs)
    mask = dfs['duration'] != 0
    ax = sns.violinplot(x=dfs.loc[mask, "timebin"],
                        y=dfs.loc[mask, "lengths"] / dfs.loc[mask, "duration"],
                        inner=None,
                        cut=0,
                        linewidth=0)
    ax.set(xlabel='Interval (hours)',
           ylabel="Sequencing speed (nucleotides/second)",
           title=title or time_duration.title)
    plt.xticks(rotation=45, ha='center', fontsize=8)
    time_duration.fig = ax.get_figure()
    time_duration.save(format=figformat)
    plt.close("all")
    return time_duration

Source File: brute_force_plotter.py From brute-force-plotter with MIT License

7 votes

def bar_box_violin_dot_plots(data, category_col, numeric_col, axes, file_name=None):
    sns.barplot(category_col, numeric_col, data=data, ax=axes[0])
    sns.boxplot(
        category_col, numeric_col, data=data[data[numeric_col].notnull()], ax=axes[2]
    )
    sns.violinplot(
        category_col,
        numeric_col,
        data=data,
        kind="violin",
        inner="quartile",
        scale="count",
        split=True,
        ax=axes[3],
    )
    sns.stripplot(category_col, numeric_col, data=data, jitter=True, ax=axes[1])
    sns.despine(left=True)

Source File: plotlib.py From mCaller with MIT License

6 votes

def plot_change_by_pos(diffs_by_context,plottype='box'):
    fig = plt.figure(figsize=(6,4))
    changes_by_position = {'position':[],'base':[],'diff':[]}
    for lab in diffs_by_context:
        for context in diffs_by_context[lab]:
            for entry in diffs_by_context[lab][context]:
                for pos,diff in enumerate(entry[:-1]):
                    changes_by_position['position'].append(pos+1)
                    changes_by_position['base'].append(lab)
                    changes_by_position['diff'].append(diff)
    dPos = pd.DataFrame(changes_by_position)
    if plottype == 'box':
        sns.boxplot(x="position", y="diff", hue="base", data=dPos, palette=[cols[base],cols[methbase]])
    elif plottype == 'violin':
        sns.violinplot(x="position",y="diff", hue="base", data=dPos, palette=[cols[base],cols[methbase]])
    sns.despine(trim=False)
    plt.xlabel('Adenine Position in 6-mer')
    plt.ylabel('Measured - Expected Current (pA)')
    plt.ylim([-20,20])
    plt.legend(title='',loc='upper center', bbox_to_anchor=(0.5, 1.05),
          ncol=3, fancybox=True)
    plt.savefig('change_by_position_box.pdf',transparent=True,dpi=500, bbox_inches='tight')

Source File: figure.py From DrugEx with MIT License

6 votes

def fig9():
    """ violin plot for the physicochemical proerties comparison.
            1: molecules generated by DrugEx with pre-trained model as exploration network.
            2: molecules generated by DrugEx with fine-tuned model as exploration network.
        """
    fig = plt.figure(figsize=(12, 12))
    ax1 = fig.add_subplot(211)
    sns.set(style="white", palette="pastel", color_codes=True)
    df = properties(mol_paths + real_path, labels + real_label, is_active=True)
    sns.violinplot(x='Property', y='Number', hue='Set', data=df, linewidth=1, bw=0.8)
    sns.despine(left=True)
    ax1.set(ylim=[0.0, 15.0], xlabel='Structural Properties')

    ax2 = fig.add_subplot(212)
    df = properties(mol_paths1 + real_path, labels + real_label, is_active=True)
    sns.set(style="white", palette="pastel", color_codes=True)
    sns.violinplot(x='Property', y='Number', hue='Set', data=df, linewidth=1, bw=0.8)
    sns.despine(left=True)
    ax2.set(ylim=[0.0, 15.0], xlabel='Structural Properties')
    fig.tight_layout()
    fig.savefig('Figure_9.tif', dpi=300)

Source File: figure.py From DrugEx with MIT License

6 votes

def fig6():
    """ violin plot for the physicochemical proerties comparison.
        A: molecules generated by pre-trained model v.s. ZINC set.
        B: molecules generated by fine-tuned model v.s. A2AR set.
    """
    plt.figure(figsize=(12, 6))
    plt.subplot(121)
    sns.set(style="white", palette="pastel", color_codes=True)
    df = properties(['data/ZINC_B.txt', 'mol_p.txt'], ['ZINC Dataset', 'Pre-trained Model'])
    sns.violinplot(x='Property', y='Number', hue='Set', data=df, linewidth=1, split=True, bw=1)
    sns.despine(left=True)
    plt.ylim([0.0, 18.0])
    plt.xlabel('Structural Properties')

    plt.subplot(122)
    df = properties(['data/CHEMBL251.txt', 'mol_ex.txt'], ['A2AR Dataset', 'Fine-tuned Model'])
    sns.set(style="white", palette="pastel", color_codes=True)
    sns.violinplot(x='Property', y='Number', hue='Set', data=df, linewidth=1, split=True, bw=1)
    sns.despine(left=True)
    plt.ylim([0.0, 18.0])
    plt.xlabel('Structural Properties')
    plt.tight_layout()
    plt.savefig('Figure_6.tif', dpi=300)

Source File: structureViewer.py From mmtf-pyspark with Apache License 2.0

6 votes

def metal_distance_widget(df_concat):
    '''Plot an violinplot of metal-element distances with ipywidgets

    Parameters
    ----------
    df_concat : Dataframe
       dataframe of metal-elements distances

    '''
    metals = df_concat['Metal'].unique().tolist()
    m_widget = Dropdown(options = metals, description = "Metals")

    def metal_distance_violinplot(metal):
        df_metal = df_concat[df_concat["Metal"] == metal].copy()
        df_metal['Element'] = df_metal['Element'].apply(lambda x: metal+"-"+x)

        # Set fonts
        fig, ax = plt.subplots()
        fig.set_size_inches(15,6)
        subplot = sns.violinplot(x="Element", y="Distance", palette="muted", data=df_metal, ax=ax)
        subplot.set(xlabel="Metal Interactions", ylabel="Distance", title=f"{metal} to Elements Distances Violin Plot")

    return interact(metal_distance_violinplot, metal=m_widget);

Source File: pltfile.py From CatLearn with GNU General Public License v3.0

6 votes

def featselect_featvar_plot(p_error_select, number_feat):
    """Create learning curve with data size and prediction error.

    Parameters
    ----------
    data_size : list
        Data_size for where the prediction were made.
    p_error : list
        Error for where the prediction were made.
    data_size_mean : list
        Mean of the data size in a sub-set.
    p_error_mean : list
        The mean error for the sub-set.
    corrected_std : array
        The standard deaviation for the sub-set of data.
    """
    fig = plt.figure()
    fig.add_subplot(111)
    sns.violinplot(x=number_feat, y=p_error_select, scale="count")
    sns.pointplot(x=number_feat, y=p_error_select)
    plt.legend(loc='upper right')
    plt.ylabel('Prediction error')
    plt.xlabel('Data size')
    plt.show()

Source File: pltfile.py From CatLearn with GNU General Public License v3.0

6 votes

def violinplot(set_size, p_error, subplot, i):
    """Make learning cuves with violinplot.

    Parameters
    ----------
    set_size : list
       Size of sub-set of data/features which the model is based on.
    p_error : list
       The prediction error for plain vanilla ridge.
    subplot : int
        Which subplot being produced.
    i : int
       Which iteration in the featureselection.
    """
    plt.figure(1)
    plt.subplot(int("22" + str(subplot))).set_title('Feature size ' + str(i),
                                                    loc='left')
    plt.legend(loc='upper right')
    plt.ylabel('Prediction error')
    plt.xlabel('Data size')
    sns.violinplot(x=set_size, y=p_error, scale="count")
    sns.pointplot(x=set_size, y=p_error, ci=100, capsize=.2)
    if subplot == 4:
        plt.show()

Source File: umbilical.py From geosketch with MIT License

6 votes

def violin_jitter(X, genes, gene, labels, focus, background=None,
                  xlabels=None):
    gidx = list(genes).index(gene)

    focus_idx = focus == labels
    if background is None:
        background_idx = focus != labels
    else:
        background_idx = background == labels

    if xlabels is None:
        xlabels = [ 'Background', 'Focus' ]

    x_gene = X[:, gidx].toarray().flatten()
    x_focus = x_gene[focus_idx]
    x_background = x_gene[background_idx]
    
    plt.figure()
    sns.violinplot(data=[ x_focus, x_background ], scale='width', cut=0)
    sns.stripplot(data=[ x_focus, x_background ], jitter=True, color='black', size=1)
    plt.xticks([0, 1], xlabels)
    plt.savefig('{}_violin_{}.png'.format(NAMESPACE, gene))

Source File: mouse_brain_astrocyte.py From geosketch with MIT License

5 votes

def astro_oligo_violin(X, genes, gene, labels, name):
    X = X.toarray()

    gidx = list(genes).index(gene)

    astro = X[labels == 'astro', gidx]
    oligo = X[labels == 'oligo', gidx]
    both = X[labels == 'both', gidx]

    plt.figure()
    sns.violinplot(data=[ astro, oligo, both ], scale='width', cut=0)
    sns.stripplot(data=[ astro, oligo, both ], jitter=True, color='black', size=1)
    plt.xticks([0, 1, 2], ['Astrocytes', 'Oligodendrocytes', 'Both'])
    plt.savefig('{}_violin_{}.svg'.format(name, gene))

Source File: timeplots.py From NanoPlot with GNU General Public License v3.0

5 votes

def length_over_time(dfs, path, figformat, title, log_length=False, plot_settings={}):
    if log_length:
        time_length = Plot(path=path + "TimeLogLengthViolinPlot." + figformat,
                           title="Violin plot of log read lengths over time")
    else:
        time_length = Plot(path=path + "TimeLengthViolinPlot." + figformat,
                           title="Violin plot of read lengths over time")
    sns.set(style="white", **plot_settings)
    if log_length:
        length_column = "log_lengths"
    else:
        length_column = "lengths"

    if "length_filter" in dfs:  # produced by NanoPlot filtering of too long reads
        temp_dfs = dfs[dfs["length_filter"]]
    else:
        temp_dfs = dfs

    ax = sns.violinplot(x="timebin",
                        y=length_column,
                        data=temp_dfs,
                        inner=None,
                        cut=0,
                        linewidth=0)
    ax.set(xlabel='Interval (hours)',
           ylabel="Read length",
           title=title or time_length.title)
    if log_length:
        ticks = [10**i for i in range(10) if not 10**i > 10 * np.amax(dfs["lengths"])]
        ax.set(yticks=np.log10(ticks),
               yticklabels=ticks)
    plt.xticks(rotation=45, ha='center', fontsize=8)
    time_length.fig = ax.get_figure()
    time_length.save(format=figformat)
    plt.close("all")
    return time_length

Source File: plots.py From cdlib with BSD 2-Clause "Simplified" License

5 votes

def plot_com_stat(com_clusters, com_fitness):
    """
    Plot the distribution of a property among all communities for a clustering, or a list of clusterings (violin-plots)

    :param com_clusters: list of clusterings to compare, or a single clustering
    :param com_fitness: the fitness/community property to use
    :return: the violin-plots

    Example:

    >>> from cdlib import algorithms, viz, evaluation
    >>> import networkx as nx
    >>> g = nx.karate_club_graph()
    >>> coms = algorithms.louvain(g)
    >>> coms2 = algorithms.walktrap(g)
    >>> violinplot = viz.plot_com_stat([coms,coms2],evaluation.size)

    """
    if isinstance(com_clusters, cdlib.classes.clustering.Clustering):
        com_clusters = [com_clusters]

    allVals = []
    allNames = []
    for c in com_clusters:
        prop = com_fitness(c.graph, c, summary=False)
        allVals += prop
        allNames += [c.get_description()] * len(prop)

    ax = sns.violinplot(allNames, allVals,cut=0,saturation=0.5,palette="Set3")
    for tick in ax.get_xticklabels():
        tick.set_rotation(90)

    plt.ylabel("%s" % com_fitness.__name__)
    plt.xlabel("Algorithm")
    plt.tight_layout()




    return ax

Source File: mouse_brain_subcluster.py From geosketch with MIT License

5 votes

def astro_oligo_violin(X, genes, gene, labels, name):
    X = X.toarray()

    gidx = list(genes).index(gene)

    astro = X[labels == 'astro', gidx]
    oligo = X[labels == 'oligo', gidx]
    both = X[labels == 'both', gidx]

    plt.figure()
    sns.violinplot(data=[ astro, oligo, both ], scale='width', cut=0)
    sns.stripplot(data=[ astro, oligo, both ], jitter=True, color='black', size=1)
    plt.xticks([0, 1, 2], ['Astrocytes', 'Oligodendrocytes', 'Both'])
    plt.savefig('{}_violin_{}.svg'.format(name, gene))

Source File: plot_kmer_evenness.py From EdwardsLab with MIT License

5 votes

def plot_shannon(df, output, verbose=False):
    if verbose:
        sys.stderr.write(f"{bcolors.GREEN}Plotting swarmed shannon{bcolors.ENDC}\n")
    sns.violinplot(data=df, x='kmer', y='Shannon')
    sns.swarmplot(data=df, x='kmer', y='Shannon')
    sns.despine(offset=10, trim=True)
    plt.savefig(f"{output}.shannon.png")
    plt.clf()

Source File: plot_kmer_evenness.py From EdwardsLab with MIT License

5 votes

def plot_swarm_evenness(df, output, verbose=False):
    if verbose:
        sys.stderr.write(f"{bcolors.GREEN}Plotting swarmed evenness{bcolors.ENDC}\n")
    sns.violinplot(data=df, x='kmer', y='Evenness')
    sns.swarmplot(data=df, x='kmer', y='Evenness')
    sns.despine(offset=10, trim=True)
    plt.savefig(f"{output}.swarm.evenness.png")
    plt.clf()

Source File: plot_kmer_evenness.py From EdwardsLab with MIT License

5 votes

def plot_evenness(df, output, verbose=False):
    if verbose:
        sys.stderr.write(f"{bcolors.GREEN}Plotting evenness{bcolors.ENDC}\n")
    sns.violinplot(data=df, x='kmer', y='Evenness')
    sns.despine(offset=10, trim=True)
    plt.savefig(f"{output}.evenness.png")
    plt.clf()

Source File: brute_force_plotter.py From brute-force-plotter with MIT License

5 votes

def histogram_violin_plots(data, axes, file_name=None):
    # histogram
    sns.distplot(data, ax=axes[0], axlabel="")
    sns.violinplot(data, ax=axes[1], inner="quartile", scale="count")
    sns.despine(left=True)

Source File: plot.py From speedml with MIT License

5 votes

def ordinal(self, y):
        """
        Plot ordinal features (categorical numeric) using Violin plot against target feature. Use this to determine outliers within ordinal features spread across associated target feature values.
        """
        Base.data_n()
        plt.figure(figsize=(8,4))
        sns.violinplot(x=Base.target, y=y, data=Base.train_n)
        plt.xlabel(Base.target, fontsize=12)
        plt.ylabel(y, fontsize=12)
        plt.show();

Source File: adjacency.py From nltools with MIT License

5 votes

def plot_label_distance(self, labels=None, ax=None):
        ''' Create a violin plot indicating within and between label distance

            Args:
                labels (np.array):  numpy array of labels to plot

            Returns:
                f: violin plot handles

        '''

        if not self.is_single_matrix:
            raise ValueError('This function only works on single adjacency '
                             'matrices.')

        distance = pd.DataFrame(self.squareform())

        if labels is None:
            labels = np.array(deepcopy(self.labels))
        else:
            if len(labels) != distance.shape[0]:
                raise ValueError('Labels must be same length as distance matrix')

        out = pd.DataFrame(columns=['Distance', 'Group', 'Type'], index=None)
        for i in np.unique(labels):
            tmp_w = pd.DataFrame(columns=out.columns, index=None)
            tmp_w['Distance'] = distance.loc[labels == i, labels == i].values[np.triu_indices(sum(labels == i), k=1)]
            tmp_w['Type'] = 'Within'
            tmp_w['Group'] = i
            tmp_b = pd.DataFrame(columns=out.columns, index=None)
            tmp_b['Distance'] = distance.loc[labels != i, labels != i].values[np.triu_indices(sum(labels == i), k=1)]
            tmp_b['Type'] = 'Between'
            tmp_b['Group'] = i
            out = out.append(tmp_w).append(tmp_b)
        f = sns.violinplot(x="Group", y="Distance", hue="Type", data=out, split=True, inner='quartile',
                           palette={"Within": "lightskyblue", "Between": "red"}, ax=ax)
        f.set_ylabel('Average Distance')
        f.set_title('Average Group Distance')
        return

Source File: plots.py From Comparative-Annotation-Toolkit with Apache License 2.0

5 votes

def horizontal_violin_plot(data, ordered_genomes, title, xlabel, pdf, hue=None, x=None, y=None, xlim=None):
    """not so generic function that specifically produces a paired boxplot/violinplot"""
    fig, ax = plt.subplots()
    sns.violinplot(data=data, x=x, y=y, hue=hue, order=ordered_genomes, palette=choose_palette(ordered_genomes),
                   saturation=boxplot_saturation, orient='h', cut=0, scale='count', ax=ax)
    fig.suptitle(title)
    ax.set_xlabel(xlabel)
    if xlim is not None:
        ax.set_xlim(xlim)
    multipage_close(pdf, tight_layout=False)

Source File: analyze_hostguest.py From SAMPL6 with MIT License

5 votes

def generate_molecules_plot(self):
        # Correlation plot by molecules.
        plt.close('all')
        n_rows = len(self.data.system_id.unique())
        fig, ax = plt.subplots(figsize=(6, 0.4*n_rows))
        sns.violinplot(y='system_id', x='$\Delta\Delta$G error (calc - expt)  [kcal/mol]',
                       data=self.data, linewidth=1.0, inner='point', cut=0, ax=ax)
        plt.tight_layout(pad=0.2)
        # plt.show()
        plt.savefig(os.path.join(self.output_directory_path, self.MOLECULE_CORRELATION_PLOT_PATH))

Source File: sct_compute_hausdorff_distance.py From spinalcordtoolbox with MIT License

5 votes

def show_results(self):
        import seaborn as sns
        import matplotlib.pyplot as plt
        import pandas as pd
        plt.hold(True)
        sns.set(style="whitegrid", palette="pastel", color_codes=True)
        plt.figure(figsize=(35, 20))

        data_dist = {"distances": [], "image": [], "slice": []}

        if self.dim_im == 2:
            data_dist["distances"].append([dist * self.dim_pix for dist in self.dist1_distribution])
            data_dist["image"].append(len(self.dist1_distribution) * [1])
            data_dist["slice"].append(len(self.dist1_distribution) * [0])

            data_dist["distances"].append([dist * self.dim_pix for dist in self.dist2_distribution])
            data_dist["image"].append(len(self.dist2_distribution) * [2])
            data_dist["slice"].append(len(self.dist2_distribution) * [0])

        if self.dim_im == 3:
            for i in range(len(self.distances)):
                data_dist["distances"].append([dist * self.dim_pix for dist in self.dist1_distribution[i]])
                data_dist["image"].append(len(self.dist1_distribution[i]) * [1])
                data_dist["slice"].append(len(self.dist1_distribution[i]) * [i])
                data_dist["distances"].append([dist * self.dim_pix for dist in self.dist2_distribution[i]])
                data_dist["image"].append(len(self.dist2_distribution[i]) * [2])
                data_dist["slice"].append(len(self.dist2_distribution[i]) * [i])

        for k in data_dist.keys():  # flatten the lists in data_dist
            data_dist[k] = [item for sublist in data_dist[k] for item in sublist]

        data_dist = pd.DataFrame(data_dist)
        sns.violinplot(x="slice", y="distances", hue="image", data=data_dist, split=True, inner="point", cut=0)
        plt.savefig('violin_plot.png')
        # plt.show()


# ----------------------------------------------------------------------------------------------------------------------

Source File: typeI_analysis.py From SAMPL6 with MIT License

5 votes

def generate_molecules_plot(self):
        # Correlation plot by molecules.
        plt.close('all')
        data_ordered_by_pKa_ID = self.data.sort_values(["pKa ID"], ascending=["True"])
        sns.set(rc={'figure.figsize': (8.27,11.7)})
        sns.violinplot(y='pKa ID', x='$\Delta$pKa error (calc - exp)', data=data_ordered_by_pKa_ID,
                           inner='point', linewidth=1, width=1.2)
        plt.tight_layout()
        # plt.show()
        plt.savefig(os.path.join(self.output_directory_path, self.PKA_CORRELATION_PLOT_BY_PKA_PATH_DIR))

Source File: typeIII_analysis.py From SAMPL6 with MIT License

5 votes

def generate_molecules_plot(self):
        # Correlation plot by molecules.
        plt.close('all')
        data_ordered_by_pKa_ID = self.data.sort_values(["pKa ID"], ascending=["True"])
        sns.set(rc={'figure.figsize': (8.27,11.7)})
        sns.violinplot(y='pKa ID', x='$\Delta$pKa error (calc - exp)', data=data_ordered_by_pKa_ID,
                           inner='point', linewidth=1, width=1.2)
        plt.tight_layout()
        # plt.show()
        plt.savefig(os.path.join(self.output_directory_path, self.PKA_CORRELATION_PLOT_BY_PKA_PATH_DIR))

Source File: logP_analysis.py From SAMPL6 with MIT License

5 votes

def generate_molecules_plot(self):
        # Correlation plot by molecules.
        plt.close('all')
        data_ordered_by_mol_ID = self.data.sort_values(["Molecule ID"], ascending=["True"])
        sns.set(rc={'figure.figsize': (8.27,11.7)})
        sns.violinplot(y='Molecule ID', x='$\Delta$logP error (calc - exp)', data=data_ordered_by_mol_ID,
                           inner='point', linewidth=1, width=1.2)
        plt.tight_layout()
        # plt.show()
        plt.savefig(os.path.join(self.output_directory_path, self.LOGP_CORRELATION_PLOT_BY_LOGP_PATH_DIR))

Source File: utils.py From dl-eeg-review with MIT License

5 votes

def run_kruskal(df, condition_col, value_col='acc_diff', min_n_obs=6, 
                plot=False):
    """Run Kruskal-Wallis analysis of variance test.

    Args:
        df (pd.DataFrame): dataframe where each row is a paper.
        condition_col (str): name of column to use as condition.

    Keyword Args:
        value_col (str): name of column to use as the numerical value to run the
            test on.
        min_n_obs (int): minimum number of observations in each sample in order
            to run the test.

    Returns:
        (float): U statistic
        (float): p-value
    """
    data = [i for name, i in df.groupby(condition_col)[value_col]
            if len(i) >= min_n_obs]

    if len(data) > 2:
        stat, p = kruskal(*data)
    else:
        stat, p = np.nan, np.nan
        print('Not enough samples with more than {} observations.'.format(min_n_obs))

    if plot:
        enough_samples = df[condition_col].value_counts() >= min_n_obs
        enough_samples = enough_samples.index[enough_samples].tolist()
        fig, ax = plt.subplots()
        sns.violinplot(
            data=df[df[condition_col].isin(enough_samples)], x=condition_col, 
            y=value_col, ax=ax)
        ax.set_title('Kruskal-Wallis for {} vs. {}\n(pvalue={:0.4f})'.format(
            condition_col, value_col, p))
    else:
        fig = None

    return {'test': 'kruskal', 'pvalue': p, 'stat': stat, 'fig': fig}

Source File: logP_analysis.py From SAMPL6 with MIT License

5 votes

def generate_molecules_plot(self):
        # Correlation plot by molecules.
        plt.close('all')
        data_ordered_by_mol_ID = self.data.sort_values(["Molecule ID"], ascending=["True"])
        sns.set(rc={'figure.figsize': (8.27,11.7)})
        sns.violinplot(y='Molecule ID', x='$\Delta$logP error (calc - exp)', data=data_ordered_by_mol_ID,
                           inner='point', linewidth=1, width=1.2)
        plt.tight_layout()
        # plt.show()
        plt.savefig(os.path.join(self.output_directory_path, self.LOGP_CORRELATION_PLOT_BY_LOGP_PATH_DIR))

Source File: plotting.py From QUANTAXIS with MIT License

4 votes

def plot_quantile_returns_violin(return_by_q, ylim_percentiles=None, ax=None):
    return_by_q = return_by_q.copy()

    if ylim_percentiles is not None:
        ymin = (
            np.nanpercentile(return_by_q.values,
                             ylim_percentiles[0]) * DECIMAL_TO_BPS
        )
        ymax = (
            np.nanpercentile(return_by_q.values,
                             ylim_percentiles[1]) * DECIMAL_TO_BPS
        )
    else:
        ymin = None
        ymax = None

    if ax is None:
        f, ax = plt.subplots(1, 1, figsize=(18, 6))

    unstacked_dr = return_by_q.multiply(DECIMAL_TO_BPS)
    unstacked_dr.columns = unstacked_dr.columns.set_names("forward_periods")
    unstacked_dr = unstacked_dr.stack()
    unstacked_dr.name = "return"
    unstacked_dr = unstacked_dr.reset_index()

    sns.violinplot(
        data=unstacked_dr,
        x="factor_quantile",
        hue="forward_periods",
        y="return",
        orient="v",
        cut=0,
        inner="quartile",
        ax=ax,
    )
    ax.set(
        xlabel="",
        ylabel="Return (bps)",
        title="Period Wise Return By Factor Quantile",
        ylim=(ymin,
              ymax),
    )

    ax.axhline(0.0, linestyle="-", color="black", lw=0.7, alpha=0.6)

    return ax

Source File: utils.py From dl-eeg-review with MIT License

4 votes

def run_mannwhitneyu(df, condition_col, conditions, value_col='acc_diff',
                     min_n_obs=10, plot=False):
    """Run Mann-Whitney rank-sum test.

    Args:
        df (pd.DataFrame): dataframe where each row is a paper.
        condition_col (str): name of column to use as condition.
        conditions (list): list of two strings containing the values of the
            condition to compare.

    Keyword Args:
        value_col (str): name of column to use as the numerical value to run the
            test on.
        min_n_obs (int): minimum number of observations in each sample in order
            to run the test.

    Returns:
        (float): U statistic
        (float): p-value
    """
    assert len(conditions) == 2, '`conditions` must be of length 2, got {}'.format(
        len(conditions))
    data1 = df[df[condition_col] == conditions[0]][value_col]
    data2 = df[df[condition_col] == conditions[1]][value_col]

    if len(data1) >= min_n_obs and len(data2) >= min_n_obs:
        stat, p = mannwhitneyu(data1, data2)
    else:
        stat, p = np.nan, np.nan
        print('Not enough observations in each sample ({} and {}).'.format(
            len(data1), len(data2)))

    if plot:
        fig, ax = plt.subplots()
        sns.violinplot(
            data=df[df[condition_col].isin(conditions)], x=condition_col, 
            y=value_col, ax=ax)
        ax.set_title('Mann-Whitney for {} vs. {}\n(pvalue={:0.4f})'.format(
            condition_col, value_col, p))
    else:
        fig = None

    return {'test': 'mannwhitneyu', 'pvalue': p, 'stat': stat, 'fig': fig}

Python seaborn.violinplot() Examples