Python seaborn.violinplot() Examples
The following are 30
code examples of seaborn.violinplot().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
seaborn
, or try the search function
.
Example #1
Source File: timeplots.py From NanoPlot with GNU General Public License v3.0 | 7 votes |
def quality_over_time(dfs, path, figformat, title, plot_settings={}): time_qual = Plot(path=path + "TimeQualityViolinPlot." + figformat, title="Violin plot of quality over time") sns.set(style="white", **plot_settings) ax = sns.violinplot(x="timebin", y="quals", data=dfs, inner=None, cut=0, linewidth=0) ax.set(xlabel='Interval (hours)', ylabel="Basecall quality", title=title or time_qual.title) plt.xticks(rotation=45, ha='center', fontsize=8) time_qual.fig = ax.get_figure() time_qual.save(format=figformat) plt.close("all") return time_qual
Example #2
Source File: timeplots.py From NanoPlot with GNU General Public License v3.0 | 7 votes |
def sequencing_speed_over_time(dfs, path, figformat, title, plot_settings={}): time_duration = Plot(path=path + "TimeSequencingSpeed_ViolinPlot." + figformat, title="Violin plot of sequencing speed over time") sns.set(style="white", **plot_settings) if "timebin" not in dfs: dfs['timebin'] = add_time_bins(dfs) mask = dfs['duration'] != 0 ax = sns.violinplot(x=dfs.loc[mask, "timebin"], y=dfs.loc[mask, "lengths"] / dfs.loc[mask, "duration"], inner=None, cut=0, linewidth=0) ax.set(xlabel='Interval (hours)', ylabel="Sequencing speed (nucleotides/second)", title=title or time_duration.title) plt.xticks(rotation=45, ha='center', fontsize=8) time_duration.fig = ax.get_figure() time_duration.save(format=figformat) plt.close("all") return time_duration
Example #3
Source File: brute_force_plotter.py From brute-force-plotter with MIT License | 7 votes |
def bar_box_violin_dot_plots(data, category_col, numeric_col, axes, file_name=None): sns.barplot(category_col, numeric_col, data=data, ax=axes[0]) sns.boxplot( category_col, numeric_col, data=data[data[numeric_col].notnull()], ax=axes[2] ) sns.violinplot( category_col, numeric_col, data=data, kind="violin", inner="quartile", scale="count", split=True, ax=axes[3], ) sns.stripplot(category_col, numeric_col, data=data, jitter=True, ax=axes[1]) sns.despine(left=True)
Example #4
Source File: plotlib.py From mCaller with MIT License | 6 votes |
def plot_change_by_pos(diffs_by_context,plottype='box'): fig = plt.figure(figsize=(6,4)) changes_by_position = {'position':[],'base':[],'diff':[]} for lab in diffs_by_context: for context in diffs_by_context[lab]: for entry in diffs_by_context[lab][context]: for pos,diff in enumerate(entry[:-1]): changes_by_position['position'].append(pos+1) changes_by_position['base'].append(lab) changes_by_position['diff'].append(diff) dPos = pd.DataFrame(changes_by_position) if plottype == 'box': sns.boxplot(x="position", y="diff", hue="base", data=dPos, palette=[cols[base],cols[methbase]]) elif plottype == 'violin': sns.violinplot(x="position",y="diff", hue="base", data=dPos, palette=[cols[base],cols[methbase]]) sns.despine(trim=False) plt.xlabel('Adenine Position in 6-mer') plt.ylabel('Measured - Expected Current (pA)') plt.ylim([-20,20]) plt.legend(title='',loc='upper center', bbox_to_anchor=(0.5, 1.05), ncol=3, fancybox=True) plt.savefig('change_by_position_box.pdf',transparent=True,dpi=500, bbox_inches='tight')
Example #5
Source File: figure.py From DrugEx with MIT License | 6 votes |
def fig9(): """ violin plot for the physicochemical proerties comparison. 1: molecules generated by DrugEx with pre-trained model as exploration network. 2: molecules generated by DrugEx with fine-tuned model as exploration network. """ fig = plt.figure(figsize=(12, 12)) ax1 = fig.add_subplot(211) sns.set(style="white", palette="pastel", color_codes=True) df = properties(mol_paths + real_path, labels + real_label, is_active=True) sns.violinplot(x='Property', y='Number', hue='Set', data=df, linewidth=1, bw=0.8) sns.despine(left=True) ax1.set(ylim=[0.0, 15.0], xlabel='Structural Properties') ax2 = fig.add_subplot(212) df = properties(mol_paths1 + real_path, labels + real_label, is_active=True) sns.set(style="white", palette="pastel", color_codes=True) sns.violinplot(x='Property', y='Number', hue='Set', data=df, linewidth=1, bw=0.8) sns.despine(left=True) ax2.set(ylim=[0.0, 15.0], xlabel='Structural Properties') fig.tight_layout() fig.savefig('Figure_9.tif', dpi=300)
Example #6
Source File: figure.py From DrugEx with MIT License | 6 votes |
def fig6(): """ violin plot for the physicochemical proerties comparison. A: molecules generated by pre-trained model v.s. ZINC set. B: molecules generated by fine-tuned model v.s. A2AR set. """ plt.figure(figsize=(12, 6)) plt.subplot(121) sns.set(style="white", palette="pastel", color_codes=True) df = properties(['data/ZINC_B.txt', 'mol_p.txt'], ['ZINC Dataset', 'Pre-trained Model']) sns.violinplot(x='Property', y='Number', hue='Set', data=df, linewidth=1, split=True, bw=1) sns.despine(left=True) plt.ylim([0.0, 18.0]) plt.xlabel('Structural Properties') plt.subplot(122) df = properties(['data/CHEMBL251.txt', 'mol_ex.txt'], ['A2AR Dataset', 'Fine-tuned Model']) sns.set(style="white", palette="pastel", color_codes=True) sns.violinplot(x='Property', y='Number', hue='Set', data=df, linewidth=1, split=True, bw=1) sns.despine(left=True) plt.ylim([0.0, 18.0]) plt.xlabel('Structural Properties') plt.tight_layout() plt.savefig('Figure_6.tif', dpi=300)
Example #7
Source File: structureViewer.py From mmtf-pyspark with Apache License 2.0 | 6 votes |
def metal_distance_widget(df_concat): '''Plot an violinplot of metal-element distances with ipywidgets Parameters ---------- df_concat : Dataframe dataframe of metal-elements distances ''' metals = df_concat['Metal'].unique().tolist() m_widget = Dropdown(options = metals, description = "Metals") def metal_distance_violinplot(metal): df_metal = df_concat[df_concat["Metal"] == metal].copy() df_metal['Element'] = df_metal['Element'].apply(lambda x: metal+"-"+x) # Set fonts fig, ax = plt.subplots() fig.set_size_inches(15,6) subplot = sns.violinplot(x="Element", y="Distance", palette="muted", data=df_metal, ax=ax) subplot.set(xlabel="Metal Interactions", ylabel="Distance", title=f"{metal} to Elements Distances Violin Plot") return interact(metal_distance_violinplot, metal=m_widget);
Example #8
Source File: pltfile.py From CatLearn with GNU General Public License v3.0 | 6 votes |
def featselect_featvar_plot(p_error_select, number_feat): """Create learning curve with data size and prediction error. Parameters ---------- data_size : list Data_size for where the prediction were made. p_error : list Error for where the prediction were made. data_size_mean : list Mean of the data size in a sub-set. p_error_mean : list The mean error for the sub-set. corrected_std : array The standard deaviation for the sub-set of data. """ fig = plt.figure() fig.add_subplot(111) sns.violinplot(x=number_feat, y=p_error_select, scale="count") sns.pointplot(x=number_feat, y=p_error_select) plt.legend(loc='upper right') plt.ylabel('Prediction error') plt.xlabel('Data size') plt.show()
Example #9
Source File: pltfile.py From CatLearn with GNU General Public License v3.0 | 6 votes |
def violinplot(set_size, p_error, subplot, i): """Make learning cuves with violinplot. Parameters ---------- set_size : list Size of sub-set of data/features which the model is based on. p_error : list The prediction error for plain vanilla ridge. subplot : int Which subplot being produced. i : int Which iteration in the featureselection. """ plt.figure(1) plt.subplot(int("22" + str(subplot))).set_title('Feature size ' + str(i), loc='left') plt.legend(loc='upper right') plt.ylabel('Prediction error') plt.xlabel('Data size') sns.violinplot(x=set_size, y=p_error, scale="count") sns.pointplot(x=set_size, y=p_error, ci=100, capsize=.2) if subplot == 4: plt.show()
Example #10
Source File: umbilical.py From geosketch with MIT License | 6 votes |
def violin_jitter(X, genes, gene, labels, focus, background=None, xlabels=None): gidx = list(genes).index(gene) focus_idx = focus == labels if background is None: background_idx = focus != labels else: background_idx = background == labels if xlabels is None: xlabels = [ 'Background', 'Focus' ] x_gene = X[:, gidx].toarray().flatten() x_focus = x_gene[focus_idx] x_background = x_gene[background_idx] plt.figure() sns.violinplot(data=[ x_focus, x_background ], scale='width', cut=0) sns.stripplot(data=[ x_focus, x_background ], jitter=True, color='black', size=1) plt.xticks([0, 1], xlabels) plt.savefig('{}_violin_{}.png'.format(NAMESPACE, gene))
Example #11
Source File: mouse_brain_astrocyte.py From geosketch with MIT License | 5 votes |
def astro_oligo_violin(X, genes, gene, labels, name): X = X.toarray() gidx = list(genes).index(gene) astro = X[labels == 'astro', gidx] oligo = X[labels == 'oligo', gidx] both = X[labels == 'both', gidx] plt.figure() sns.violinplot(data=[ astro, oligo, both ], scale='width', cut=0) sns.stripplot(data=[ astro, oligo, both ], jitter=True, color='black', size=1) plt.xticks([0, 1, 2], ['Astrocytes', 'Oligodendrocytes', 'Both']) plt.savefig('{}_violin_{}.svg'.format(name, gene))
Example #12
Source File: timeplots.py From NanoPlot with GNU General Public License v3.0 | 5 votes |
def length_over_time(dfs, path, figformat, title, log_length=False, plot_settings={}): if log_length: time_length = Plot(path=path + "TimeLogLengthViolinPlot." + figformat, title="Violin plot of log read lengths over time") else: time_length = Plot(path=path + "TimeLengthViolinPlot." + figformat, title="Violin plot of read lengths over time") sns.set(style="white", **plot_settings) if log_length: length_column = "log_lengths" else: length_column = "lengths" if "length_filter" in dfs: # produced by NanoPlot filtering of too long reads temp_dfs = dfs[dfs["length_filter"]] else: temp_dfs = dfs ax = sns.violinplot(x="timebin", y=length_column, data=temp_dfs, inner=None, cut=0, linewidth=0) ax.set(xlabel='Interval (hours)', ylabel="Read length", title=title or time_length.title) if log_length: ticks = [10**i for i in range(10) if not 10**i > 10 * np.amax(dfs["lengths"])] ax.set(yticks=np.log10(ticks), yticklabels=ticks) plt.xticks(rotation=45, ha='center', fontsize=8) time_length.fig = ax.get_figure() time_length.save(format=figformat) plt.close("all") return time_length
Example #13
Source File: plots.py From cdlib with BSD 2-Clause "Simplified" License | 5 votes |
def plot_com_stat(com_clusters, com_fitness): """ Plot the distribution of a property among all communities for a clustering, or a list of clusterings (violin-plots) :param com_clusters: list of clusterings to compare, or a single clustering :param com_fitness: the fitness/community property to use :return: the violin-plots Example: >>> from cdlib import algorithms, viz, evaluation >>> import networkx as nx >>> g = nx.karate_club_graph() >>> coms = algorithms.louvain(g) >>> coms2 = algorithms.walktrap(g) >>> violinplot = viz.plot_com_stat([coms,coms2],evaluation.size) """ if isinstance(com_clusters, cdlib.classes.clustering.Clustering): com_clusters = [com_clusters] allVals = [] allNames = [] for c in com_clusters: prop = com_fitness(c.graph, c, summary=False) allVals += prop allNames += [c.get_description()] * len(prop) ax = sns.violinplot(allNames, allVals,cut=0,saturation=0.5,palette="Set3") for tick in ax.get_xticklabels(): tick.set_rotation(90) plt.ylabel("%s" % com_fitness.__name__) plt.xlabel("Algorithm") plt.tight_layout() return ax
Example #14
Source File: mouse_brain_subcluster.py From geosketch with MIT License | 5 votes |
def astro_oligo_violin(X, genes, gene, labels, name): X = X.toarray() gidx = list(genes).index(gene) astro = X[labels == 'astro', gidx] oligo = X[labels == 'oligo', gidx] both = X[labels == 'both', gidx] plt.figure() sns.violinplot(data=[ astro, oligo, both ], scale='width', cut=0) sns.stripplot(data=[ astro, oligo, both ], jitter=True, color='black', size=1) plt.xticks([0, 1, 2], ['Astrocytes', 'Oligodendrocytes', 'Both']) plt.savefig('{}_violin_{}.svg'.format(name, gene))
Example #15
Source File: plot_kmer_evenness.py From EdwardsLab with MIT License | 5 votes |
def plot_shannon(df, output, verbose=False): if verbose: sys.stderr.write(f"{bcolors.GREEN}Plotting swarmed shannon{bcolors.ENDC}\n") sns.violinplot(data=df, x='kmer', y='Shannon') sns.swarmplot(data=df, x='kmer', y='Shannon') sns.despine(offset=10, trim=True) plt.savefig(f"{output}.shannon.png") plt.clf()
Example #16
Source File: plot_kmer_evenness.py From EdwardsLab with MIT License | 5 votes |
def plot_swarm_evenness(df, output, verbose=False): if verbose: sys.stderr.write(f"{bcolors.GREEN}Plotting swarmed evenness{bcolors.ENDC}\n") sns.violinplot(data=df, x='kmer', y='Evenness') sns.swarmplot(data=df, x='kmer', y='Evenness') sns.despine(offset=10, trim=True) plt.savefig(f"{output}.swarm.evenness.png") plt.clf()
Example #17
Source File: plot_kmer_evenness.py From EdwardsLab with MIT License | 5 votes |
def plot_evenness(df, output, verbose=False): if verbose: sys.stderr.write(f"{bcolors.GREEN}Plotting evenness{bcolors.ENDC}\n") sns.violinplot(data=df, x='kmer', y='Evenness') sns.despine(offset=10, trim=True) plt.savefig(f"{output}.evenness.png") plt.clf()
Example #18
Source File: brute_force_plotter.py From brute-force-plotter with MIT License | 5 votes |
def histogram_violin_plots(data, axes, file_name=None): # histogram sns.distplot(data, ax=axes[0], axlabel="") sns.violinplot(data, ax=axes[1], inner="quartile", scale="count") sns.despine(left=True)
Example #19
Source File: plot.py From speedml with MIT License | 5 votes |
def ordinal(self, y): """ Plot ordinal features (categorical numeric) using Violin plot against target feature. Use this to determine outliers within ordinal features spread across associated target feature values. """ Base.data_n() plt.figure(figsize=(8,4)) sns.violinplot(x=Base.target, y=y, data=Base.train_n) plt.xlabel(Base.target, fontsize=12) plt.ylabel(y, fontsize=12) plt.show();
Example #20
Source File: adjacency.py From nltools with MIT License | 5 votes |
def plot_label_distance(self, labels=None, ax=None): ''' Create a violin plot indicating within and between label distance Args: labels (np.array): numpy array of labels to plot Returns: f: violin plot handles ''' if not self.is_single_matrix: raise ValueError('This function only works on single adjacency ' 'matrices.') distance = pd.DataFrame(self.squareform()) if labels is None: labels = np.array(deepcopy(self.labels)) else: if len(labels) != distance.shape[0]: raise ValueError('Labels must be same length as distance matrix') out = pd.DataFrame(columns=['Distance', 'Group', 'Type'], index=None) for i in np.unique(labels): tmp_w = pd.DataFrame(columns=out.columns, index=None) tmp_w['Distance'] = distance.loc[labels == i, labels == i].values[np.triu_indices(sum(labels == i), k=1)] tmp_w['Type'] = 'Within' tmp_w['Group'] = i tmp_b = pd.DataFrame(columns=out.columns, index=None) tmp_b['Distance'] = distance.loc[labels != i, labels != i].values[np.triu_indices(sum(labels == i), k=1)] tmp_b['Type'] = 'Between' tmp_b['Group'] = i out = out.append(tmp_w).append(tmp_b) f = sns.violinplot(x="Group", y="Distance", hue="Type", data=out, split=True, inner='quartile', palette={"Within": "lightskyblue", "Between": "red"}, ax=ax) f.set_ylabel('Average Distance') f.set_title('Average Group Distance') return
Example #21
Source File: plots.py From Comparative-Annotation-Toolkit with Apache License 2.0 | 5 votes |
def horizontal_violin_plot(data, ordered_genomes, title, xlabel, pdf, hue=None, x=None, y=None, xlim=None): """not so generic function that specifically produces a paired boxplot/violinplot""" fig, ax = plt.subplots() sns.violinplot(data=data, x=x, y=y, hue=hue, order=ordered_genomes, palette=choose_palette(ordered_genomes), saturation=boxplot_saturation, orient='h', cut=0, scale='count', ax=ax) fig.suptitle(title) ax.set_xlabel(xlabel) if xlim is not None: ax.set_xlim(xlim) multipage_close(pdf, tight_layout=False)
Example #22
Source File: analyze_hostguest.py From SAMPL6 with MIT License | 5 votes |
def generate_molecules_plot(self): # Correlation plot by molecules. plt.close('all') n_rows = len(self.data.system_id.unique()) fig, ax = plt.subplots(figsize=(6, 0.4*n_rows)) sns.violinplot(y='system_id', x='$\Delta\Delta$G error (calc - expt) [kcal/mol]', data=self.data, linewidth=1.0, inner='point', cut=0, ax=ax) plt.tight_layout(pad=0.2) # plt.show() plt.savefig(os.path.join(self.output_directory_path, self.MOLECULE_CORRELATION_PLOT_PATH))
Example #23
Source File: sct_compute_hausdorff_distance.py From spinalcordtoolbox with MIT License | 5 votes |
def show_results(self): import seaborn as sns import matplotlib.pyplot as plt import pandas as pd plt.hold(True) sns.set(style="whitegrid", palette="pastel", color_codes=True) plt.figure(figsize=(35, 20)) data_dist = {"distances": [], "image": [], "slice": []} if self.dim_im == 2: data_dist["distances"].append([dist * self.dim_pix for dist in self.dist1_distribution]) data_dist["image"].append(len(self.dist1_distribution) * [1]) data_dist["slice"].append(len(self.dist1_distribution) * [0]) data_dist["distances"].append([dist * self.dim_pix for dist in self.dist2_distribution]) data_dist["image"].append(len(self.dist2_distribution) * [2]) data_dist["slice"].append(len(self.dist2_distribution) * [0]) if self.dim_im == 3: for i in range(len(self.distances)): data_dist["distances"].append([dist * self.dim_pix for dist in self.dist1_distribution[i]]) data_dist["image"].append(len(self.dist1_distribution[i]) * [1]) data_dist["slice"].append(len(self.dist1_distribution[i]) * [i]) data_dist["distances"].append([dist * self.dim_pix for dist in self.dist2_distribution[i]]) data_dist["image"].append(len(self.dist2_distribution[i]) * [2]) data_dist["slice"].append(len(self.dist2_distribution[i]) * [i]) for k in data_dist.keys(): # flatten the lists in data_dist data_dist[k] = [item for sublist in data_dist[k] for item in sublist] data_dist = pd.DataFrame(data_dist) sns.violinplot(x="slice", y="distances", hue="image", data=data_dist, split=True, inner="point", cut=0) plt.savefig('violin_plot.png') # plt.show() # ----------------------------------------------------------------------------------------------------------------------
Example #24
Source File: typeI_analysis.py From SAMPL6 with MIT License | 5 votes |
def generate_molecules_plot(self): # Correlation plot by molecules. plt.close('all') data_ordered_by_pKa_ID = self.data.sort_values(["pKa ID"], ascending=["True"]) sns.set(rc={'figure.figsize': (8.27,11.7)}) sns.violinplot(y='pKa ID', x='$\Delta$pKa error (calc - exp)', data=data_ordered_by_pKa_ID, inner='point', linewidth=1, width=1.2) plt.tight_layout() # plt.show() plt.savefig(os.path.join(self.output_directory_path, self.PKA_CORRELATION_PLOT_BY_PKA_PATH_DIR))
Example #25
Source File: typeIII_analysis.py From SAMPL6 with MIT License | 5 votes |
def generate_molecules_plot(self): # Correlation plot by molecules. plt.close('all') data_ordered_by_pKa_ID = self.data.sort_values(["pKa ID"], ascending=["True"]) sns.set(rc={'figure.figsize': (8.27,11.7)}) sns.violinplot(y='pKa ID', x='$\Delta$pKa error (calc - exp)', data=data_ordered_by_pKa_ID, inner='point', linewidth=1, width=1.2) plt.tight_layout() # plt.show() plt.savefig(os.path.join(self.output_directory_path, self.PKA_CORRELATION_PLOT_BY_PKA_PATH_DIR))
Example #26
Source File: logP_analysis.py From SAMPL6 with MIT License | 5 votes |
def generate_molecules_plot(self): # Correlation plot by molecules. plt.close('all') data_ordered_by_mol_ID = self.data.sort_values(["Molecule ID"], ascending=["True"]) sns.set(rc={'figure.figsize': (8.27,11.7)}) sns.violinplot(y='Molecule ID', x='$\Delta$logP error (calc - exp)', data=data_ordered_by_mol_ID, inner='point', linewidth=1, width=1.2) plt.tight_layout() # plt.show() plt.savefig(os.path.join(self.output_directory_path, self.LOGP_CORRELATION_PLOT_BY_LOGP_PATH_DIR))
Example #27
Source File: utils.py From dl-eeg-review with MIT License | 5 votes |
def run_kruskal(df, condition_col, value_col='acc_diff', min_n_obs=6, plot=False): """Run Kruskal-Wallis analysis of variance test. Args: df (pd.DataFrame): dataframe where each row is a paper. condition_col (str): name of column to use as condition. Keyword Args: value_col (str): name of column to use as the numerical value to run the test on. min_n_obs (int): minimum number of observations in each sample in order to run the test. Returns: (float): U statistic (float): p-value """ data = [i for name, i in df.groupby(condition_col)[value_col] if len(i) >= min_n_obs] if len(data) > 2: stat, p = kruskal(*data) else: stat, p = np.nan, np.nan print('Not enough samples with more than {} observations.'.format(min_n_obs)) if plot: enough_samples = df[condition_col].value_counts() >= min_n_obs enough_samples = enough_samples.index[enough_samples].tolist() fig, ax = plt.subplots() sns.violinplot( data=df[df[condition_col].isin(enough_samples)], x=condition_col, y=value_col, ax=ax) ax.set_title('Kruskal-Wallis for {} vs. {}\n(pvalue={:0.4f})'.format( condition_col, value_col, p)) else: fig = None return {'test': 'kruskal', 'pvalue': p, 'stat': stat, 'fig': fig}
Example #28
Source File: logP_analysis.py From SAMPL6 with MIT License | 5 votes |
def generate_molecules_plot(self): # Correlation plot by molecules. plt.close('all') data_ordered_by_mol_ID = self.data.sort_values(["Molecule ID"], ascending=["True"]) sns.set(rc={'figure.figsize': (8.27,11.7)}) sns.violinplot(y='Molecule ID', x='$\Delta$logP error (calc - exp)', data=data_ordered_by_mol_ID, inner='point', linewidth=1, width=1.2) plt.tight_layout() # plt.show() plt.savefig(os.path.join(self.output_directory_path, self.LOGP_CORRELATION_PLOT_BY_LOGP_PATH_DIR))
Example #29
Source File: plotting.py From QUANTAXIS with MIT License | 4 votes |
def plot_quantile_returns_violin(return_by_q, ylim_percentiles=None, ax=None): return_by_q = return_by_q.copy() if ylim_percentiles is not None: ymin = ( np.nanpercentile(return_by_q.values, ylim_percentiles[0]) * DECIMAL_TO_BPS ) ymax = ( np.nanpercentile(return_by_q.values, ylim_percentiles[1]) * DECIMAL_TO_BPS ) else: ymin = None ymax = None if ax is None: f, ax = plt.subplots(1, 1, figsize=(18, 6)) unstacked_dr = return_by_q.multiply(DECIMAL_TO_BPS) unstacked_dr.columns = unstacked_dr.columns.set_names("forward_periods") unstacked_dr = unstacked_dr.stack() unstacked_dr.name = "return" unstacked_dr = unstacked_dr.reset_index() sns.violinplot( data=unstacked_dr, x="factor_quantile", hue="forward_periods", y="return", orient="v", cut=0, inner="quartile", ax=ax, ) ax.set( xlabel="", ylabel="Return (bps)", title="Period Wise Return By Factor Quantile", ylim=(ymin, ymax), ) ax.axhline(0.0, linestyle="-", color="black", lw=0.7, alpha=0.6) return ax
Example #30
Source File: utils.py From dl-eeg-review with MIT License | 4 votes |
def run_mannwhitneyu(df, condition_col, conditions, value_col='acc_diff', min_n_obs=10, plot=False): """Run Mann-Whitney rank-sum test. Args: df (pd.DataFrame): dataframe where each row is a paper. condition_col (str): name of column to use as condition. conditions (list): list of two strings containing the values of the condition to compare. Keyword Args: value_col (str): name of column to use as the numerical value to run the test on. min_n_obs (int): minimum number of observations in each sample in order to run the test. Returns: (float): U statistic (float): p-value """ assert len(conditions) == 2, '`conditions` must be of length 2, got {}'.format( len(conditions)) data1 = df[df[condition_col] == conditions[0]][value_col] data2 = df[df[condition_col] == conditions[1]][value_col] if len(data1) >= min_n_obs and len(data2) >= min_n_obs: stat, p = mannwhitneyu(data1, data2) else: stat, p = np.nan, np.nan print('Not enough observations in each sample ({} and {}).'.format( len(data1), len(data2))) if plot: fig, ax = plt.subplots() sns.violinplot( data=df[df[condition_col].isin(conditions)], x=condition_col, y=value_col, ax=ax) ax.set_title('Mann-Whitney for {} vs. {}\n(pvalue={:0.4f})'.format( condition_col, value_col, p)) else: fig = None return {'test': 'mannwhitneyu', 'pvalue': p, 'stat': stat, 'fig': fig}