Python seaborn.boxplot() Examples
The following are 28
code examples of seaborn.boxplot().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
seaborn
, or try the search function
.
Example #1
Source File: plot_errors_boxplot.py From MDI with MIT License | 7 votes |
def plot(params_dir): model_dirs = [name for name in os.listdir(params_dir) if os.path.isdir(os.path.join(params_dir, name))] df = defaultdict(list) for model_dir in model_dirs: df[re.sub('_bin_scaled_mono_True_ratio', '', model_dir)] = [ dd.io.load(path)['best_epoch']['validate_objective'] for path in glob.glob(os.path.join( params_dir, model_dir) + '/*.h5')] df = pd.DataFrame(dict([(k, pd.Series(v)) for k, v in df.iteritems()])) df.to_csv(os.path.basename(os.path.normpath(params_dir))) plt.figure(figsize=(16, 4), dpi=300) g = sns.boxplot(df) g.set_xticklabels(df.columns, rotation=45) plt.tight_layout() plt.savefig('{}_errors_box_plot.png'.format( os.path.join(IMAGES_DIRECTORY, os.path.basename(os.path.normpath(params_dir)))))
Example #2
Source File: brute_force_plotter.py From brute-force-plotter with MIT License | 7 votes |
def bar_box_violin_dot_plots(data, category_col, numeric_col, axes, file_name=None): sns.barplot(category_col, numeric_col, data=data, ax=axes[0]) sns.boxplot( category_col, numeric_col, data=data[data[numeric_col].notnull()], ax=axes[2] ) sns.violinplot( category_col, numeric_col, data=data, kind="violin", inner="quartile", scale="count", split=True, ax=axes[3], ) sns.stripplot(category_col, numeric_col, data=data, jitter=True, ax=axes[1]) sns.despine(left=True)
Example #3
Source File: stock_visualizer.py From stock-analysis with MIT License | 6 votes |
def boxplot(self, column, **kwargs): """ Generate boxplots for a given column in all assets. Parameters: - column: The name of the column to visualize. - kwargs: Additional keyword arguments to pass down to the plotting function. Returns: A matplotlib Axes object. """ return sns.boxplot( x=self.group_by, y=column, data=self.data, **kwargs )
Example #4
Source File: plotlib.py From mCaller with MIT License | 6 votes |
def plot_change_by_pos(diffs_by_context,plottype='box'): fig = plt.figure(figsize=(6,4)) changes_by_position = {'position':[],'base':[],'diff':[]} for lab in diffs_by_context: for context in diffs_by_context[lab]: for entry in diffs_by_context[lab][context]: for pos,diff in enumerate(entry[:-1]): changes_by_position['position'].append(pos+1) changes_by_position['base'].append(lab) changes_by_position['diff'].append(diff) dPos = pd.DataFrame(changes_by_position) if plottype == 'box': sns.boxplot(x="position", y="diff", hue="base", data=dPos, palette=[cols[base],cols[methbase]]) elif plottype == 'violin': sns.violinplot(x="position",y="diff", hue="base", data=dPos, palette=[cols[base],cols[methbase]]) sns.despine(trim=False) plt.xlabel('Adenine Position in 6-mer') plt.ylabel('Measured - Expected Current (pA)') plt.ylim([-20,20]) plt.legend(title='',loc='upper center', bbox_to_anchor=(0.5, 1.05), ncol=3, fancybox=True) plt.savefig('change_by_position_box.pdf',transparent=True,dpi=500, bbox_inches='tight')
Example #5
Source File: TargetAnalysisCategorical.py From exploripy with MIT License | 6 votes |
def BoxPlot(self, feature): fig, ax = plt.subplots() ax = sns.boxplot(y=self.df[feature], ax=ax) box = ax.artists[0] indices = random.sample(range(len(self.SelectedColors)), 2) colors=[self.SelectedColors[i] for i in sorted(indices)] box.set_facecolor(colors[0]) box.set_edgecolor(colors[1]) sns.despine(offset=10, trim=True) this_dir, this_filename = os.path.split(__file__) OutFileName = os.path.join(this_dir, 'HTMLTemplate/dist/output/'+feature + '.png') if platform.system() == 'Linux': out_filename = os.path.join(this_dir, 'ExploriPy/HTMLTemplate/dist/output/'+feature + '.png') plt.savefig(OutFileName) return OutFileName
Example #6
Source File: EDA.py From exploripy with MIT License | 6 votes |
def BoxPlot(self,var): start = time.time() fig, ax = plt.subplots() ax = sns.boxplot(y=self.df[var], ax=ax) box = ax.artists[0] indices = random.sample(range(len(self.SelectedColors)), 2) colors=[self.SelectedColors[i] for i in sorted(indices)] box.set_facecolor(colors[0]) box.set_edgecolor(colors[1]) sns.despine(offset=10, trim=True) this_dir, this_filename = os.path.split(__file__) OutFileName = os.path.join(this_dir, 'HTMLTemplate/dist/output/'+var + '.png') plt.savefig(OutFileName) end = time.time() if self.debug == 'YES': print('BoxPlot',end-start) return OutFileName
Example #7
Source File: TargetAnalysisContinuous.py From exploripy with MIT License | 6 votes |
def BoxPlot(self, feature): fig, ax = plt.subplots() ax = sns.boxplot(y=self.df[feature], ax=ax) box = ax.artists[0] indices = random.sample(range(len(self.SelectedColors)), 2) colors=[self.SelectedColors[i] for i in sorted(indices)] box.set_facecolor(colors[0]) box.set_edgecolor(colors[1]) sns.despine(offset=10, trim=True) this_dir, this_filename = os.path.split(__file__) OutFileName = os.path.join(this_dir, 'HTMLTemplate/dist/output/'+feature + '.png') if platform.system() =='Linux': OutFileName = os.path.join(this_dir, 'HTMLTemplate/dist/output/' + feature + '.png') plt.savefig(OutFileName) return OutFileName
Example #8
Source File: metrics_acdc.py From acdc_segmenter with Apache License 2.0 | 6 votes |
def boxplot_metrics(df, eval_dir): """ Create summary boxplots of all geometric measures. :param df: :param eval_dir: :return: """ boxplots_file = os.path.join(eval_dir, 'boxplots.eps') fig, axes = plt.subplots(3, 1) fig.set_figheight(14) fig.set_figwidth(7) sns.boxplot(x='struc', y='dice', hue='phase', data=df, palette="PRGn", ax=axes[0]) sns.boxplot(x='struc', y='hd', hue='phase', data=df, palette="PRGn", ax=axes[1]) sns.boxplot(x='struc', y='assd', hue='phase', data=df, palette="PRGn", ax=axes[2]) plt.savefig(boxplots_file) plt.close() return 0
Example #9
Source File: stats.py From temci with GNU General Public License v3.0 | 6 votes |
def boxplot(self, fig_width: Number, fig_height: Number = None): """ Creates a (horizontal) box plot comparing all single object for a given property. :param fig_width: width of the figure in cm :param fig_height: height of the figure in cm, if None it is calculated from the figure width using the aesthetic ratio """ import seaborn as sns import matplotlib.pyplot as plt self.reset_plt() if fig_height is None: fig_height = self._height_for_width(fig_width) self._fig = plt.figure(figsize=self._fig_size_cm_to_inch(fig_width, fig_height)) df = self.get_data_frame() sns.boxplot(data=df, orient="h")
Example #10
Source File: plotlib.py From mCaller with MIT License | 5 votes |
def plot_training_probabilities(prob_scores,tb): #prob_scores = {'m6A':[0.9,0.4,...],'A':[0.1,0.5,0.2,...]} sns.set_style('darkgrid') sns.set_palette(['#55B196','#B4656F']) fig = plt.figure(figsize=(3,4)) prob_dict = {'probability':prob_scores[base]+prob_scores[modbase],'base':[base]*len(prob_scores[base])+[modbase]*len(prob_scores[modbase])} prob_db = pd.DataFrame(prob_dict) sns.boxplot(x="base", y="probability", data=prob_db) sns.despine() plt.show() plt.savefig('training_probability_'+tb+'_model_boxplot.pdf',transparent=True,dpi=500,bbox_inches='tight')
Example #11
Source File: stock_visualizer.py From stock-analysis with MIT License | 5 votes |
def boxplot(self, **kwargs): """To be implemented by subclasses for generating boxplots.""" raise NotImplementedError('To be implemented by subclasses!')
Example #12
Source File: stats.py From temci with GNU General Public License v3.0 | 5 votes |
def whiskers(self, whis: float = 1.5) -> t.Tuple[float, float]: """ Calculates the upper and the lower whisker for a boxplot. I.e. the minimum and the maximum value of the data set the lie in the range (Q1 - whis * IQR, Q3 + whis * IQR). IQR being the interquartil distance, Q1 the lower and Q2 the upper quartile. Adapted from http://stackoverflow.com/a/20096945 """ q1, q2, q3 = self.quartiles() iqr = self.iqr() hi_val = q1 + whis * self.iqr() whisk_hi = np.compress(self.array <= hi_val, self.array) if len(whisk_hi) == 0 or np.max(whisk_hi) < q3: whisk_hi = q3 else: whisk_hi = max(whisk_hi) # get low extreme lo_val = q1 - whis * iqr whisk_lo = np.compress(self.array >= lo_val, self.array) if len(whisk_lo) == 0 or np.min(whisk_lo) > q1: whisk_lo = q1 else: whisk_lo = min(whisk_lo) return whisk_lo, whisk_hi
Example #13
Source File: c5.py From abu with GNU General Public License v3.0 | 5 votes |
def sample_54_1(): """ 5.4 使用seaborn可视化数据 :return: """ sns.distplot(tsla_df['p_change'], bins=80) plt.show() sns.boxplot(x='date_week', y='p_change', data=tsla_df) plt.show() sns.jointplot(tsla_df['high'], tsla_df['low']) plt.show()
Example #14
Source File: plot.py From gumpy with MIT License | 5 votes |
def accuracy_results_plot(data_path): data = pd.read_csv(data_path,index_col=0) sns.boxplot(data=data) sns.set(rc={"figure.figsize": (9, 6)}) ax = sns.boxplot( data=data) ax.set_xlabel(x_label,fontsize=15) ax.set_ylabel(y_label,fontsize=15) plt.show()
Example #15
Source File: visualization.py From default-credit-card-prediction with MIT License | 5 votes |
def visualize_feature_boxplot(X,y,selected_feature,features): """ Visualize the boxplot of a feature Keyword arguments: X -- The feature vectors y -- The target vector selected_feature -- The desired feature to obtain the histogram features -- Vector of feature names (X1 to XN) """ #create data joint_data=np.column_stack((X,y)) column_names=features #create dataframe df=pd.DataFrame(data=joint_data,columns=column_names) # palette = sea.hls_palette() splot=sea.boxplot(data=df,x='Y',y=selected_feature,hue="Y",palette="husl") plt.title('BoxPlot Distribution of '+selected_feature) #save fig output_dir = "img" save_fig(output_dir,'{}/{}_boxplot.png'.format(output_dir,selected_feature)) # plt.show()
Example #16
Source File: features.py From bartpy with MIT License | 5 votes |
def plot_null_feature_importance_distributions(null_distributions: Mapping[int, List[float]], ax=None) -> None: if ax is None: _, ax = plt.subplots(1, 1) df = pd.DataFrame(null_distributions) df = pd.DataFrame(df.unstack()).reset_index().drop("level_1", axis=1) df.columns = ["variable", "p"] sns.boxplot(x="variable", y="p", data=df, ax=ax) ax.set_title("Null Feature Importance Distribution") return ax
Example #17
Source File: alignment_evaluation.py From policy_diffusion with MIT License | 5 votes |
def plot_grid(self): self._create_grid_df() df = self.grid_df #make maximum possible 500 df.loc[df['score']>500,'score'] = 500 #match plot df_match = df[(df['mismatch_score'] == -2) & (df['gap_score'] == -1)] g = sns.FacetGrid(df_match, col="match_score") g = g.map(sns.boxplot, "match", "score") sns.plt.ylim(0,400) sns.plt.show() #mismatch plot df_mismatch = df[(df['match_score'] == 3) & (df['gap_score'] == -1)] g = sns.FacetGrid(df_mismatch, col="mismatch_score") g = g.map(sns.boxplot, "match", "score") sns.plt.ylim(0,400) sns.plt.show() #gap plot df_gap = df[(df['match_score'] == 3) & (df['mismatch_score'] == -2)] g = sns.FacetGrid(df_gap, col="gap_score") g = g.map(sns.boxplot, "match", "score") sns.plt.ylim(0,400) sns.plt.show()
Example #18
Source File: alignment_evaluation.py From policy_diffusion with MIT License | 5 votes |
def plot_num_matches(self): matchScores = [] nonMatchScores = [] for i in self.bills.keys(): for j in self.bills.keys(): if self.scores[i,j] == 0: #ignore if score zero because url is broken pass elif i < j and self.results[(i,j)]['match']: matchScores.append(min(self.results[(i,j)]['features'][0]['num_matches'],200)) else: nonMatchScores.append(min(self.results[(i,j)]['features'][0]['num_matches'],200)) bins = np.linspace(min(nonMatchScores + matchScores), max(nonMatchScores + matchScores), 100) plt.hist(nonMatchScores, bins, alpha=0.5, label='Non-Matches') plt.hist(matchScores, bins, alpha=0.5, label='Matches') plt.legend(loc='upper right') plt.xlabel('Alignment Score') plt.ylabel('Number of Bill Pairs') plt.title('Distribution of Alignment Scores') plt.show() #make boxplot data_to_plot = [matchScores, nonMatchScores] fig = plt.figure(1, figsize=(9, 6)) ax = fig.add_subplot(111) bp = ax.boxplot(data_to_plot) ax.set_xticklabels(['Match Scores', 'Non-Match Scores']) fig.show()
Example #19
Source File: alignment_evaluation.py From policy_diffusion with MIT License | 5 votes |
def plot_scores(self): matchScores = [] nonMatchScores = [] for i in self.bills.keys(): for j in self.bills.keys(): if (i,j) not in self.results or self.results[(i,j)]['score'] == 0: #ignore if score zero because url is broken pass elif i < j and self.results[(i,j)]['match']: matchScores.append(min(self.results[(i,j)]['score'],200)) else: nonMatchScores.append(min(self.results[(i,j)]['score'],200)) bins = np.linspace(min(nonMatchScores + matchScores), max(nonMatchScores + matchScores), 100) plt.hist(nonMatchScores, bins, alpha=0.5, label='Non-Matches') plt.hist(matchScores, bins, alpha=0.5, label='Matches') plt.legend(loc='upper right') plt.xlabel('Alignment Score') plt.ylabel('Number of Bill Pairs') plt.title('Distribution of Alignment Scores') plt.show() #make boxplot data_to_plot = [matchScores, nonMatchScores] fig = plt.figure(1, figsize=(9, 6)) ax = fig.add_subplot(111) bp = ax.boxplot(data_to_plot) ax.set_xticklabels(['Match Scores', 'Non-Match Scores']) fig.show()
Example #20
Source File: stock_visualizer.py From stock-analysis with MIT License | 5 votes |
def boxplot(self, **kwargs): """ Generate boxplots for all columns. Parameters: - kwargs: Additional keyword arguments to pass down to the plotting function. Returns: A matplotlib Axes object. """ return self.data.plot(kind='box', **kwargs)
Example #21
Source File: plotting.py From fmridenoise with Apache License 2.0 | 4 votes |
def motion_plot(group_conf_summary): # Plot style setup plt.style.use('seaborn-white') plt.rcParams['font.family'] = 'Helvetica' colour = ["#fe6863", "#00a074"] palette = sns.set_palette(colour) small = 15 plt.rc('font', size=small) # controls default text sizes plt.rc('axes', titlesize=small) # fontsize of the axes title plt.rc('axes', linewidth=2.2) plt.rc('axes', labelsize=small) # fontsize of the x and y labels plt.rc('xtick', labelsize=small) # fontsize of the tick labels plt.rc('ytick', labelsize=small) # fontsize of the tick labels plt.rc('legend', fontsize=small) # legend fontsize plt.rc('lines', linewidth=2.2, color='gray') # ------------------------------------------ motion_dict = {'Mean FD': ['mean_fd', 0.2], 'Max FD': ['max_fd', 5], 'Percent of outlier dataframes (%)': ['perc_spikes', 20]} fig, axes = plt.subplots(1, 3, figsize=(16, 7)) fig.subplots_adjust(wspace=0.4, hspace=0.4) i = 0 for key, value in motion_dict.items(): plt.figure(figsize=(4, 6)) p = sns.swarmplot(y=value[0], x="task", hue="include", data=group_conf_summary, alpha=0.8, s=10, color=palette, ax=axes[i] ) p = sns.boxplot(y=value[0], x="task", data=group_conf_summary, showcaps=False, boxprops={'facecolor': 'None'}, showfliers=False, ax=axes[i]) p.title.set_text(f"Threshold = {value[1]}") p.axhline(value[1], ls='--', color="#fe6863") p.set(xlabel='') p.set(ylabel=key) p.get_legend().set_visible(False) p.tick_params(axis='both', which='both', length=6, width=2.2) i += 1 fig.suptitle(f"Excluding high motion subjects", va="top") return fig
Example #22
Source File: visualization.py From default-credit-card-prediction with MIT License | 4 votes |
def visualize_boxplots(X,y): """ Visualize the boxplots of the features Keyword arguments: X -- The feature vectors y -- The target vector """ credit=X[:,0:1] df=pd.DataFrame(data=credit,columns=["Credit"]) splot=sea.boxplot(data=df, orient="h",palette="husl") plt.title('BoxPlot Distribution of Credit') plt.show() one_to_four_columns=X[:,1:4] df=pd.DataFrame(data=one_to_four_columns,columns=["Gender","Education","Marital Status"]) splot=sea.boxplot(data=df, orient="h",palette="husl") plt.title('BoxPlot Distribution of Features: Gender, Education and Marital Status') plt.show() age=X[:,4:5] df=pd.DataFrame(data=age,columns=["Age"]) splot=sea.boxplot(data=df, orient="h",palette="husl") plt.title('BoxPlot Distribution of Age') plt.show() x6_to_x11=X[:,5:11] df=pd.DataFrame(data=x6_to_x11,columns=["X6","X7","X8","X9","X10","X11"]) splot=sea.boxplot(data=df, orient="h",palette="husl") plt.title('BoxPlot Distribution of Features: History of Payment') plt.show() x12_to_x17=X[:,11:17] df=pd.DataFrame(data=x12_to_x17,columns=["X12","X13","X14","X15","X16","X17"]) splot=sea.boxplot(data=df, orient="h",palette="husl") plt.title('BoxPlot Distribution of Features: Amount of Bill Statements') plt.show() x18_to_x23=X[:,17:23] df=pd.DataFrame(data=x12_to_x17,columns=["X18","X19","X20","X21","X22","X23"]) splot=sea.boxplot(data=df, orient="h",palette="husl") plt.title('BoxPlot Distribution of Features: Amount of Previous Payments') plt.show()
Example #23
Source File: plots.py From AlphaPy with Apache License 2.0 | 4 votes |
def plot_box(df, x, y, hue, tag='eda', directory=None): r"""Display a Box Plot. Parameters ---------- df : pandas.DataFrame The dataframe containing the ``x`` and ``y`` features. x : str Variable name in ``df`` to display along the x-axis. y : str Variable name in ``df`` to display along the y-axis. hue : str Variable name to be used as hue, i.e., another data dimension. tag : str Unique identifier for the plot. directory : str, optional The full specification of the plot location. Returns ------- None : None. References ---------- http://seaborn.pydata.org/generated/seaborn.boxplot.html """ logger.info("Generating Box Plot") # Generate the box plot box_plot = sns.boxplot(x=x, y=y, hue=hue, data=df) sns.despine(offset=10, trim=True) box_fig = box_plot.get_figure() # Save the plot write_plot('seaborn', box_fig, 'box_plot', tag, directory) # # Function plot_swarm #
Example #24
Source File: benchmarks.py From datawig with Apache License 2.0 | 4 votes |
def plot_results(results): import matplotlib.pyplot as plt import seaborn as sns df = pd.read_csv(open(os.path.join(dir_path, 'benchmark_results.csv')) df['mse_percent'] = df.mse / df.groupby(['data','missingness','percent_missing'])['mse'].transform(max) df.groupby(['missingness','percent_missing','imputer']).agg({'mse_percent':'median'}) sns.set_style("whitegrid") sns.set_palette(sns.color_palette("RdBu_r", 7)) sns.set_context("notebook", font_scale=1.3, rc={"lines.linewidth": 1.5}) plt.figure(figsize=(12,3)) plt.subplot(1,3,1) sns.boxplot(hue='imputer', y='mse_percent', x='percent_missing', data=df[df['missingness']=='MCAR']) plt.title("Missing completely at random") plt.xlabel('Percent Missing') plt.ylabel("Relative MSE") plt.gca().get_legend().remove() plt.subplot(1,3,2) sns.boxplot(hue='imputer', y='mse_percent', x='percent_missing', data=df[df['missingness']=='MAR']) plt.title("Missing at random") plt.ylabel('') plt.xlabel('Percent Missing') plt.gca().get_legend().remove() plt.subplot(1,3,3) sns.boxplot(hue='imputer', y='mse_percent', x='percent_missing', data=df[df['missingness']=='MNAR']) plt.title("Missing not at random") plt.ylabel("") plt.xlabel('Percent Missing') handles, labels = plt.gca().get_legend_handles_labels() l = plt.legend(handles, labels, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) plt.tight_layout() plt.savefig('benchmarks_datawig.pdf') experiment()
Example #25
Source File: analysis.py From dl-eeg-review with MIT License | 4 votes |
def _plot_results_accuracy_per_domain(results_df, diff_df, save_cfg): """Make scatterplot + boxplot to show accuracy difference by domain. """ fig, axes = plt.subplots( nrows=2, ncols=1, sharex=True, figsize=(save_cfg['text_width'], save_cfg['text_height'] / 3), gridspec_kw = {'height_ratios':[5, 1]}) results_df['Main domain'] = results_df['Main domain'].apply( ut.wrap_text, max_char=20) sns.catplot(y='Main domain', x='acc_diff', s=3, jitter=True, data=results_df, ax=axes[0]) axes[0].set_xlabel('') axes[0].set_ylabel('') axes[0].axvline(0, c='k', alpha=0.2) sns.boxplot(x='acc_diff', data=diff_df, ax=axes[1]) sns.swarmplot(x='acc_diff', data=diff_df, color="0", size=2, ax=axes[1]) axes[1].axvline(0, c='k', alpha=0.2) axes[1].set_xlabel('Accuracy difference') fig.subplots_adjust(wspace=0, hspace=0.02) plt.tight_layout() logger.info('Number of studies included in the accuracy improvement analysis: {}'.format( results_df.shape[0])) median = diff_df['acc_diff'].median() iqr = diff_df['acc_diff'].quantile(.75) - diff_df['acc_diff'].quantile(.25) logger.info('Median gain in accuracy: {:.6f}'.format(median)) logger.info('Interquartile range of the gain in accuracy: {:.6f}'.format(iqr)) best_improvement = diff_df.nlargest(3, 'acc_diff') logger.info('Best improvement in accuracy: {}, in {}'.format( best_improvement['acc_diff'].values[0], best_improvement['Citation'].values[0])) logger.info('Second best improvement in accuracy: {}, in {}'.format( best_improvement['acc_diff'].values[1], best_improvement['Citation'].values[1])) logger.info('Third best improvement in accuracy: {}, in {}'.format( best_improvement['acc_diff'].values[2], best_improvement['Citation'].values[2])) if save_cfg is not None: savename = 'reported_accuracy_per_domain' fname = os.path.join(save_cfg['savepath'], savename) fig.savefig(fname + '.' + save_cfg['format'], **save_cfg) return axes
Example #26
Source File: interpretation.py From lumin with Apache License 2.0 | 4 votes |
def plot_bottleneck_weighted_inputs(model:AbsModel, bottleneck_idx:int, inputs:Union[np.ndarray,Tensor], log_y:bool=True, savename:Optional[str]=None, settings:PlotSettings=PlotSettings()) -> None: r''' Interpret how a single-neuron bottleneck in a :class:MultiBlock relies on input features by plotting the absolute values of the features times their associated weight for a given set of input data. Arguments: model: model to interpret bottleneck_idx: index of the bottleneck to interpret, i.e. model.body.bottleneck_blocks[bottleneck_idx] inputs: input data to use for interpretation log_y: whether to plot a log scale for the y-axis savename: Optional name of file to which to save the plot of feature importances settings: :class:`~lumin.plotting.plot_settings.PlotSettings` class to control figure appearance ''' body = model.body bn = body.bottleneck_blocks[bottleneck_idx] assert bn[0].weight.shape[0] == 1, 'This function currently only supports bottlenecks whose width is one neuron' hook = FowardHook(bn[0]) model.predict(inputs) weighted_input = to_np(torch.abs(hook.input[0]*bn[0].weight[0])) rfm = {} for f in model.head.feat_map: if len(model.head.feat_map[f]) == 1: rfm[model.head.feat_map[f][0]] = f else: for i, idx in enumerate(model.head.feat_map[f]): rfm[idx] = f'{f}_{i}' y, x = [], [] for i, f in enumerate(model.body.bottleneck_masks[bottleneck_idx]): x.append(rfm[f]) y.append(weighted_input[:, i]) x,y = np.array(x),np.array(y) order = np.argsort(y.mean(axis=1)) x,y = list(x[order]),list(y[order]) with sns.axes_style(**settings.style), sns.color_palette(settings.cat_palette): plt.figure(figsize=(settings.w_mid, settings.h_mid)) sns.boxplot(x=x, y=y) plt.xlabel("Features", fontsize=settings.lbl_sz, color=settings.lbl_col) plt.ylabel(r"$|w_i\times x_i|$", fontsize=settings.lbl_sz, color=settings.lbl_col) plt.xticks(fontsize=settings.tk_sz, color=settings.tk_col) plt.yticks(fontsize=settings.tk_sz, color=settings.tk_col) if log_y: plt.yscale('log', nonposy='clip') plt.xticks(rotation=90) plt.title(settings.title, fontsize=settings.title_sz, color=settings.title_col, loc=settings.title_loc) if savename is not None: plt.savefig(settings.savepath/f'{savename}{settings.format}', bbox_inches='tight') plt.show()
Example #27
Source File: interpretation.py From lumin with Apache License 2.0 | 4 votes |
def plot_multibody_weighted_outputs(model:AbsModel, inputs:Union[np.ndarray,Tensor], block_names:Optional[List[str]]=None, use_mean:bool=False, savename:Optional[str]=None, settings:PlotSettings=PlotSettings()) -> None: r''' Interpret how a model relies on the outputs of each block in a :class:MultiBlock by plotting the outputs of each block as weighted by the tail block. This function currently only supports models whose tail block contains a single neuron in the first dense layer. Input data is passed through the model and the absolute sums of the weighted block outputs are computed per datum, and optionally averaged over the number of block outputs. Arguments: model: model to interpret inputs: input data to use for interpretation block_names: names for each block to use when plotting use_mean: if True, will average the weighted outputs over the number of output neurons in each block savename: Optional name of file to which to save the plot of feature importances settings: :class:`~lumin.plotting.plot_settings.PlotSettings` class to control figure appearance ''' assert model.tail[0].weight.shape[0] == 1, 'This function currently only supports models whose tail block contains a single neuron in the first dense layer' if block_names is not None: assert len(block_names) == len(model.body.blocks), 'block_names passed, but number of names does not match number of blocks' else: block_names = [f'{i}' for i in range(len(model.body.blocks))] hook = FowardHook(model.tail[0]) model.predict(inputs) y, itr = [], 0 for b in model.body.blocks: o = hook.input[0][:,itr:itr+b.get_out_size()] w = model.tail[0].weight[0][itr:itr+b.get_out_size()] y.append(to_np(torch.abs(o@w)/b.get_out_size()) if use_mean else to_np(torch.abs(o@w))) itr += b.get_out_size() with sns.axes_style(**settings.style), sns.color_palette(settings.cat_palette): plt.figure(figsize=(settings.w_mid, settings.h_mid)) sns.boxplot(x=block_names, y=y) plt.xlabel("Block", fontsize=settings.lbl_sz, color=settings.lbl_col) plt.ylabel(r"Mean $|\bar{w}\cdot\bar{x}|$" if use_mean else r"$|\bar{w}\cdot\bar{x}|$", fontsize=settings.lbl_sz, color=settings.lbl_col) plt.xticks(fontsize=settings.tk_sz, color=settings.tk_col) plt.yticks(fontsize=settings.tk_sz, color=settings.tk_col) plt.title(settings.title, fontsize=settings.title_sz, color=settings.title_col, loc=settings.title_loc) if savename is not None: plt.savefig(settings.savepath/f'{savename}{settings.format}', bbox_inches='tight') plt.show()
Example #28
Source File: similarity_scores_time_benchmark.py From dirty_cat with BSD 3-Clause "New" or "Revised" License | 4 votes |
def plot(bench, title=''): sns.set(style='ticks', palette='muted') hash_dims = ['Count', '2 ** 14', '2 ** 16', '2 ** 18', '2 ** 20'] scores = [] vectorizer = [] strategy = [] for i, e in enumerate(bench): vectorizer.extend([hash_dims[i % 5]] * (2 * len(e[0][1]))) strategy.extend(['k-means'] * len(e[0][1])) strategy.extend(['most-frequent'] * len(e[1][1])) scores.extend(e[0][1]) scores.extend(e[1][1]) df = pd.DataFrame(columns=['vectorizer', 'strategy', 'score']) df['vectorizer'] = vectorizer df['strategy'] = strategy df['score'] = scores first = plt.figure() ax = sns.boxplot(x='vectorizer', y='score', hue='strategy', data=df) ax.set(title=title, xlabel='Vectorizer used', ylabel='Mean score on 10 cross validations') ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) first.tight_layout() vectorizer.clear() scores.clear() strategy.clear() times = [] for i, e in enumerate(bench): vectorizer.extend([hash_dims[i % 5]] * 4) strategy.extend(['K-means vect', 'K-means X-val', 'MF vect', 'MF X-val']) times.extend([e[0][0], e[0][2] / 20, e[1][0], e[1][2] / 20]) df = pd.DataFrame(columns=['vectorizer', 'strategy/operation', 'time']) df['vectorizer'] = vectorizer df['strategy/operation'] = strategy df['time'] = times second = plt.figure() ax1 = sns.barplot(x='vectorizer', y='time', hue='strategy/operation', data=df) ax1.set(title=title, xlabel='Vectorizer used', ylabel='Time in seconds') ax1.legend(loc='center left', bbox_to_anchor=(1, 0.5)) second.tight_layout() title = title.replace(' ', '_').replace(':', '-').replace(',', '_').lower() first.savefig(title + '_score.png') second.savefig(title + '_time.png') # first.show() # second.show(t)