Python Examples of seaborn.boxplot

Source File: plot_errors_boxplot.py From MDI with MIT License

7 votes

def plot(params_dir):
    model_dirs = [name for name in os.listdir(params_dir)
                  if os.path.isdir(os.path.join(params_dir, name))]

    df = defaultdict(list)
    for model_dir in model_dirs:
        df[re.sub('_bin_scaled_mono_True_ratio', '', model_dir)] = [
            dd.io.load(path)['best_epoch']['validate_objective']
            for path in glob.glob(os.path.join(
                params_dir, model_dir) + '/*.h5')]

    df = pd.DataFrame(dict([(k, pd.Series(v)) for k, v in df.iteritems()]))
    df.to_csv(os.path.basename(os.path.normpath(params_dir)))
    plt.figure(figsize=(16, 4), dpi=300)
    g = sns.boxplot(df)
    g.set_xticklabels(df.columns, rotation=45)
    plt.tight_layout()
    plt.savefig('{}_errors_box_plot.png'.format(
        os.path.join(IMAGES_DIRECTORY,
                     os.path.basename(os.path.normpath(params_dir)))))

Source File: brute_force_plotter.py From brute-force-plotter with MIT License

7 votes

def bar_box_violin_dot_plots(data, category_col, numeric_col, axes, file_name=None):
    sns.barplot(category_col, numeric_col, data=data, ax=axes[0])
    sns.boxplot(
        category_col, numeric_col, data=data[data[numeric_col].notnull()], ax=axes[2]
    )
    sns.violinplot(
        category_col,
        numeric_col,
        data=data,
        kind="violin",
        inner="quartile",
        scale="count",
        split=True,
        ax=axes[3],
    )
    sns.stripplot(category_col, numeric_col, data=data, jitter=True, ax=axes[1])
    sns.despine(left=True)

Source File: stock_visualizer.py From stock-analysis with MIT License

6 votes

def boxplot(self, column, **kwargs):
        """
        Generate boxplots for a given column in all assets.

        Parameters:
            - column: The name of the column to visualize.
            - kwargs: Additional keyword arguments to pass down
                      to the plotting function.

        Returns:
            A matplotlib Axes object.
        """
        return sns.boxplot(
            x=self.group_by,
            y=column,
            data=self.data,
            **kwargs
        )

Source File: plotlib.py From mCaller with MIT License

6 votes

def plot_change_by_pos(diffs_by_context,plottype='box'):
    fig = plt.figure(figsize=(6,4))
    changes_by_position = {'position':[],'base':[],'diff':[]}
    for lab in diffs_by_context:
        for context in diffs_by_context[lab]:
            for entry in diffs_by_context[lab][context]:
                for pos,diff in enumerate(entry[:-1]):
                    changes_by_position['position'].append(pos+1)
                    changes_by_position['base'].append(lab)
                    changes_by_position['diff'].append(diff)
    dPos = pd.DataFrame(changes_by_position)
    if plottype == 'box':
        sns.boxplot(x="position", y="diff", hue="base", data=dPos, palette=[cols[base],cols[methbase]])
    elif plottype == 'violin':
        sns.violinplot(x="position",y="diff", hue="base", data=dPos, palette=[cols[base],cols[methbase]])
    sns.despine(trim=False)
    plt.xlabel('Adenine Position in 6-mer')
    plt.ylabel('Measured - Expected Current (pA)')
    plt.ylim([-20,20])
    plt.legend(title='',loc='upper center', bbox_to_anchor=(0.5, 1.05),
          ncol=3, fancybox=True)
    plt.savefig('change_by_position_box.pdf',transparent=True,dpi=500, bbox_inches='tight')

Source File: TargetAnalysisCategorical.py From exploripy with MIT License

6 votes

def BoxPlot(self, feature):		
		fig, ax = plt.subplots()
		ax = sns.boxplot(y=self.df[feature], ax=ax)
		box = ax.artists[0]
		indices = random.sample(range(len(self.SelectedColors)), 2)
		colors=[self.SelectedColors[i] for i in sorted(indices)]
		box.set_facecolor(colors[0])
		box.set_edgecolor(colors[1])
		sns.despine(offset=10, trim=True)
		this_dir, this_filename = os.path.split(__file__)
		OutFileName = os.path.join(this_dir, 'HTMLTemplate/dist/output/'+feature + '.png')
		if platform.system() == 'Linux':
			out_filename = os.path.join(this_dir, 'ExploriPy/HTMLTemplate/dist/output/'+feature + '.png')
		plt.savefig(OutFileName)
		
		return OutFileName

Source File: EDA.py From exploripy with MIT License

6 votes

def BoxPlot(self,var):

		start = time.time()
		fig, ax = plt.subplots()
		ax = sns.boxplot(y=self.df[var], ax=ax)
		box = ax.artists[0]
		indices = random.sample(range(len(self.SelectedColors)), 2)
		colors=[self.SelectedColors[i] for i in sorted(indices)]
		box.set_facecolor(colors[0])
		box.set_edgecolor(colors[1])
		sns.despine(offset=10, trim=True)
		
		
		this_dir, this_filename = os.path.split(__file__)
		OutFileName = os.path.join(this_dir, 'HTMLTemplate/dist/output/'+var + '.png')
		
		plt.savefig(OutFileName)
		end = time.time()
		if self.debug == 'YES':
			print('BoxPlot',end-start)
		
		return OutFileName

Source File: TargetAnalysisContinuous.py From exploripy with MIT License

6 votes

def BoxPlot(self, feature):		
		fig, ax = plt.subplots()
		ax = sns.boxplot(y=self.df[feature], ax=ax)
		box = ax.artists[0]
		indices = random.sample(range(len(self.SelectedColors)), 2)
		colors=[self.SelectedColors[i] for i in sorted(indices)]
		box.set_facecolor(colors[0])
		box.set_edgecolor(colors[1])
		sns.despine(offset=10, trim=True)
		this_dir, this_filename = os.path.split(__file__)
		OutFileName = os.path.join(this_dir, 'HTMLTemplate/dist/output/'+feature + '.png')
		if platform.system() =='Linux':
			OutFileName = os.path.join(this_dir, 'HTMLTemplate/dist/output/' + feature + '.png')
		plt.savefig(OutFileName)
		
		return OutFileName

Source File: metrics_acdc.py From acdc_segmenter with Apache License 2.0

6 votes

def boxplot_metrics(df, eval_dir):
    """
    Create summary boxplots of all geometric measures.

    :param df:
    :param eval_dir:
    :return:
    """

    boxplots_file = os.path.join(eval_dir, 'boxplots.eps')

    fig, axes = plt.subplots(3, 1)
    fig.set_figheight(14)
    fig.set_figwidth(7)

    sns.boxplot(x='struc', y='dice', hue='phase', data=df, palette="PRGn", ax=axes[0])
    sns.boxplot(x='struc', y='hd', hue='phase', data=df, palette="PRGn", ax=axes[1])
    sns.boxplot(x='struc', y='assd', hue='phase', data=df, palette="PRGn", ax=axes[2])

    plt.savefig(boxplots_file)
    plt.close()

    return 0

Source File: stats.py From temci with GNU General Public License v3.0

6 votes

def boxplot(self, fig_width: Number, fig_height: Number = None):
        """
        Creates a (horizontal) box plot comparing all single object for a given property.

        :param fig_width: width of the figure in cm
        :param fig_height: height of the figure in cm, if None it is calculated from the figure width using the
                           aesthetic ratio
        """
        import seaborn as sns
        import matplotlib.pyplot as plt
        self.reset_plt()
        if fig_height is None:
            fig_height = self._height_for_width(fig_width)
        self._fig = plt.figure(figsize=self._fig_size_cm_to_inch(fig_width, fig_height))
        df = self.get_data_frame()
        sns.boxplot(data=df, orient="h")

Source File: plotlib.py From mCaller with MIT License

5 votes

def plot_training_probabilities(prob_scores,tb):
    #prob_scores = {'m6A':[0.9,0.4,...],'A':[0.1,0.5,0.2,...]}
    sns.set_style('darkgrid')
    sns.set_palette(['#55B196','#B4656F'])
    fig = plt.figure(figsize=(3,4))
    prob_dict = {'probability':prob_scores[base]+prob_scores[modbase],'base':[base]*len(prob_scores[base])+[modbase]*len(prob_scores[modbase])}
    prob_db = pd.DataFrame(prob_dict)
    sns.boxplot(x="base", y="probability", data=prob_db)
    sns.despine()
    plt.show()
    plt.savefig('training_probability_'+tb+'_model_boxplot.pdf',transparent=True,dpi=500,bbox_inches='tight')

Source File: stock_visualizer.py From stock-analysis with MIT License

5 votes

def boxplot(self, **kwargs):
        """To be implemented by subclasses for generating boxplots."""
        raise NotImplementedError('To be implemented by subclasses!')

Source File: stats.py From temci with GNU General Public License v3.0

5 votes

def whiskers(self, whis: float = 1.5) -> t.Tuple[float, float]:
        """
        Calculates the upper and the lower whisker for a boxplot.
        I.e. the minimum and the maximum value of the data set
        the lie in the range (Q1 - whis * IQR, Q3 + whis * IQR).
        IQR being the interquartil distance, Q1 the lower and Q2 the upper quartile.

        Adapted from http://stackoverflow.com/a/20096945
        """
        q1, q2, q3 = self.quartiles()
        iqr = self.iqr()
        hi_val = q1 + whis * self.iqr()
        whisk_hi = np.compress(self.array <= hi_val, self.array)
        if len(whisk_hi) == 0 or np.max(whisk_hi) < q3:
            whisk_hi = q3
        else:
            whisk_hi = max(whisk_hi)

        # get low extreme
        lo_val = q1 - whis * iqr
        whisk_lo = np.compress(self.array >= lo_val, self.array)
        if len(whisk_lo) == 0 or np.min(whisk_lo) > q1:
            whisk_lo = q1
        else:
            whisk_lo = min(whisk_lo)
        return whisk_lo, whisk_hi

Source File: c5.py From abu with GNU General Public License v3.0

5 votes

def sample_54_1():
    """
    5.4 使用seaborn可视化数据
    :return:
    """
    sns.distplot(tsla_df['p_change'], bins=80)
    plt.show()

    sns.boxplot(x='date_week', y='p_change', data=tsla_df)
    plt.show()

    sns.jointplot(tsla_df['high'], tsla_df['low'])
    plt.show()

Source File: plot.py From gumpy with MIT License

5 votes

def accuracy_results_plot(data_path):
    data = pd.read_csv(data_path,index_col=0)
    sns.boxplot(data=data)
    sns.set(rc={"figure.figsize": (9, 6)})
    ax = sns.boxplot( data=data)
    ax.set_xlabel(x_label,fontsize=15)
    ax.set_ylabel(y_label,fontsize=15)
    plt.show()

Source File: visualization.py From default-credit-card-prediction with MIT License

5 votes

def visualize_feature_boxplot(X,y,selected_feature,features):
	"""
	Visualize the boxplot of a feature

	Keyword arguments:
	X -- The feature vectors
	y -- The target vector
	selected_feature -- The desired feature to obtain the histogram
	features -- Vector of feature names (X1 to XN)
	"""

	#create data
	joint_data=np.column_stack((X,y))
	column_names=features

	#create dataframe
	df=pd.DataFrame(data=joint_data,columns=column_names)

	# palette = sea.hls_palette()
	splot=sea.boxplot(data=df,x='Y',y=selected_feature,hue="Y",palette="husl")
	plt.title('BoxPlot Distribution of '+selected_feature)

	#save fig
	output_dir = "img"
	save_fig(output_dir,'{}/{}_boxplot.png'.format(output_dir,selected_feature))
	# plt.show()

Source File: features.py From bartpy with MIT License

5 votes

def plot_null_feature_importance_distributions(null_distributions: Mapping[int, List[float]], ax=None) -> None:
    if ax is None:
        _, ax = plt.subplots(1, 1)
    df = pd.DataFrame(null_distributions)
    df = pd.DataFrame(df.unstack()).reset_index().drop("level_1", axis=1)
    df.columns = ["variable", "p"]
    sns.boxplot(x="variable", y="p", data=df, ax=ax)
    ax.set_title("Null Feature Importance Distribution")
    return ax

Source File: alignment_evaluation.py From policy_diffusion with MIT License

5 votes

def plot_grid(self):

        self._create_grid_df()

        df = self.grid_df
        #make maximum possible 500
        df.loc[df['score']>500,'score'] = 500

        #match plot
        df_match = df[(df['mismatch_score'] == -2) & (df['gap_score'] == -1)]

        g = sns.FacetGrid(df_match, col="match_score")
        g = g.map(sns.boxplot, "match", "score")
        sns.plt.ylim(0,400)
        sns.plt.show()

        #mismatch plot
        df_mismatch = df[(df['match_score'] == 3) & (df['gap_score'] == -1)]

        g = sns.FacetGrid(df_mismatch, col="mismatch_score")
        g = g.map(sns.boxplot, "match", "score")
        sns.plt.ylim(0,400)
        sns.plt.show()

        #gap plot
        df_gap = df[(df['match_score'] == 3) & (df['mismatch_score'] == -2)]

        g = sns.FacetGrid(df_gap, col="gap_score")
        g = g.map(sns.boxplot, "match", "score")
        sns.plt.ylim(0,400)
        sns.plt.show()

Source File: alignment_evaluation.py From policy_diffusion with MIT License

5 votes

def plot_num_matches(self):

        matchScores = []
        nonMatchScores = []

        for i in self.bills.keys():
            for j in self.bills.keys():

                if self.scores[i,j] == 0:
                    #ignore if score zero because url is broken
                    pass
                elif i < j and self.results[(i,j)]['match']:
                    matchScores.append(min(self.results[(i,j)]['features'][0]['num_matches'],200))
                else:
                    nonMatchScores.append(min(self.results[(i,j)]['features'][0]['num_matches'],200))

        bins = np.linspace(min(nonMatchScores + matchScores), max(nonMatchScores + matchScores), 100)
        plt.hist(nonMatchScores, bins, alpha=0.5, label='Non-Matches')
        plt.hist(matchScores, bins, alpha=0.5, label='Matches')
        plt.legend(loc='upper right')
        plt.xlabel('Alignment Score')
        plt.ylabel('Number of Bill Pairs')
        plt.title('Distribution of Alignment Scores')
        plt.show()

        #make boxplot
        data_to_plot = [matchScores, nonMatchScores]
        fig = plt.figure(1, figsize=(9, 6))
        ax = fig.add_subplot(111)
        bp = ax.boxplot(data_to_plot)
        ax.set_xticklabels(['Match Scores', 'Non-Match Scores'])
        fig.show()

Source File: alignment_evaluation.py From policy_diffusion with MIT License

5 votes

def plot_scores(self):

        matchScores = []
        nonMatchScores = []

        for i in self.bills.keys():
            for j in self.bills.keys():

                if (i,j) not in self.results or self.results[(i,j)]['score'] == 0:
                    #ignore if score zero because url is broken
                    pass
                elif i < j and self.results[(i,j)]['match']:
                    matchScores.append(min(self.results[(i,j)]['score'],200))
                else:
                    nonMatchScores.append(min(self.results[(i,j)]['score'],200))

        bins = np.linspace(min(nonMatchScores + matchScores), max(nonMatchScores + matchScores), 100)
        plt.hist(nonMatchScores, bins, alpha=0.5, label='Non-Matches')
        plt.hist(matchScores, bins, alpha=0.5, label='Matches')
        plt.legend(loc='upper right')
        plt.xlabel('Alignment Score')
        plt.ylabel('Number of Bill Pairs')
        plt.title('Distribution of Alignment Scores')
        plt.show()

        #make boxplot
        data_to_plot = [matchScores, nonMatchScores]
        fig = plt.figure(1, figsize=(9, 6))
        ax = fig.add_subplot(111)
        bp = ax.boxplot(data_to_plot)
        ax.set_xticklabels(['Match Scores', 'Non-Match Scores'])
        fig.show()

Source File: stock_visualizer.py From stock-analysis with MIT License

5 votes

def boxplot(self, **kwargs):
        """
        Generate boxplots for all columns.

        Parameters:
            - kwargs: Additional keyword arguments to pass down
                      to the plotting function.

        Returns:
            A matplotlib Axes object.
        """
        return self.data.plot(kind='box', **kwargs)

Source File: plotting.py From fmridenoise with Apache License 2.0

4 votes

def motion_plot(group_conf_summary):
    # Plot style setup
    plt.style.use('seaborn-white')
    plt.rcParams['font.family'] = 'Helvetica'
    colour = ["#fe6863", "#00a074"]
    palette = sns.set_palette(colour)

    small = 15
    plt.rc('font', size=small)  # controls default text sizes
    plt.rc('axes', titlesize=small)  # fontsize of the axes title
    plt.rc('axes', linewidth=2.2)
    plt.rc('axes', labelsize=small)  # fontsize of the x and y labels
    plt.rc('xtick', labelsize=small)  # fontsize of the tick labels
    plt.rc('ytick', labelsize=small)  # fontsize of the tick labels
    plt.rc('legend', fontsize=small)  # legend fontsize
    plt.rc('lines', linewidth=2.2, color='gray')
    # ------------------------------------------

    motion_dict = {'Mean FD': ['mean_fd', 0.2],
                   'Max FD': ['max_fd', 5],
                   'Percent of outlier dataframes (%)': ['perc_spikes', 20]}

    fig, axes = plt.subplots(1, 3, figsize=(16, 7))
    fig.subplots_adjust(wspace=0.4, hspace=0.4)

    i = 0
    for key, value in motion_dict.items():
        plt.figure(figsize=(4, 6))
        p = sns.swarmplot(y=value[0],
                          x="task",
                          hue="include",
                          data=group_conf_summary,
                          alpha=0.8,
                          s=10,
                          color=palette,
                          ax=axes[i]
                          )

        p = sns.boxplot(y=value[0],
                        x="task",
                        data=group_conf_summary,
                        showcaps=False,
                        boxprops={'facecolor': 'None'},
                        showfliers=False, ax=axes[i])

        p.title.set_text(f"Threshold = {value[1]}")
        p.axhline(value[1], ls='--', color="#fe6863")
        p.set(xlabel='')
        p.set(ylabel=key)
        p.get_legend().set_visible(False)
        p.tick_params(axis='both', which='both', length=6, width=2.2)
        i += 1
    fig.suptitle(f"Excluding high motion subjects", va="top")

    return fig

Source File: visualization.py From default-credit-card-prediction with MIT License

4 votes

def visualize_boxplots(X,y):
	"""
	Visualize the boxplots of the features

	Keyword arguments:
	X -- The feature vectors
	y -- The target vector
	"""

	credit=X[:,0:1]
	df=pd.DataFrame(data=credit,columns=["Credit"])
	splot=sea.boxplot(data=df, orient="h",palette="husl")
	plt.title('BoxPlot Distribution of Credit')
	plt.show()

	one_to_four_columns=X[:,1:4]
	df=pd.DataFrame(data=one_to_four_columns,columns=["Gender","Education","Marital Status"])
	splot=sea.boxplot(data=df, orient="h",palette="husl")
	plt.title('BoxPlot Distribution of Features: Gender, Education and Marital Status')
	plt.show()

	age=X[:,4:5]
	df=pd.DataFrame(data=age,columns=["Age"])
	splot=sea.boxplot(data=df, orient="h",palette="husl")
	plt.title('BoxPlot Distribution of Age')
	plt.show()

	x6_to_x11=X[:,5:11]
	df=pd.DataFrame(data=x6_to_x11,columns=["X6","X7","X8","X9","X10","X11"])
	splot=sea.boxplot(data=df, orient="h",palette="husl")
	plt.title('BoxPlot Distribution of Features: History of Payment')
	plt.show()

	x12_to_x17=X[:,11:17]
	df=pd.DataFrame(data=x12_to_x17,columns=["X12","X13","X14","X15","X16","X17"])
	splot=sea.boxplot(data=df, orient="h",palette="husl")
	plt.title('BoxPlot Distribution of Features: Amount of Bill Statements')
	plt.show()

	x18_to_x23=X[:,17:23]
	df=pd.DataFrame(data=x12_to_x17,columns=["X18","X19","X20","X21","X22","X23"])
	splot=sea.boxplot(data=df, orient="h",palette="husl")
	plt.title('BoxPlot Distribution of Features: Amount of Previous Payments')
	plt.show()

Source File: plots.py From AlphaPy with Apache License 2.0

4 votes

def plot_box(df, x, y, hue, tag='eda', directory=None):
    r"""Display a Box Plot.

    Parameters
    ----------
    df : pandas.DataFrame
        The dataframe containing the ``x`` and ``y`` features.
    x : str
        Variable name in ``df`` to display along the x-axis.
    y : str
        Variable name in ``df`` to display along the y-axis.
    hue : str
        Variable name to be used as hue, i.e., another data dimension.
    tag : str
        Unique identifier for the plot.
    directory : str, optional
        The full specification of the plot location.

    Returns
    -------
    None : None.

    References
    ----------

    http://seaborn.pydata.org/generated/seaborn.boxplot.html

    """

    logger.info("Generating Box Plot")

    # Generate the box plot

    box_plot = sns.boxplot(x=x, y=y, hue=hue, data=df)
    sns.despine(offset=10, trim=True)
    box_fig = box_plot.get_figure()

    # Save the plot
    write_plot('seaborn', box_fig, 'box_plot', tag, directory)


#
# Function plot_swarm
#

Source File: benchmarks.py From datawig with Apache License 2.0

4 votes

def plot_results(results):
    import matplotlib.pyplot as plt
    import seaborn as sns

    df = pd.read_csv(open(os.path.join(dir_path, 'benchmark_results.csv'))
    df['mse_percent'] = df.mse / df.groupby(['data','missingness','percent_missing'])['mse'].transform(max)
    df.groupby(['missingness','percent_missing','imputer']).agg({'mse_percent':'median'}) 

    sns.set_style("whitegrid")
    sns.set_palette(sns.color_palette("RdBu_r", 7))
    sns.set_context("notebook", 
                    font_scale=1.3, 
                    rc={"lines.linewidth": 1.5})
    plt.figure(figsize=(12,3))
    plt.subplot(1,3,1)
    sns.boxplot(hue='imputer',
                y='mse_percent',
                x='percent_missing', data=df[df['missingness']=='MCAR'])
    plt.title("Missing completely at random")
    plt.xlabel('Percent Missing')
    plt.ylabel("Relative MSE")
    plt.gca().get_legend().remove()


    plt.subplot(1,3,2)
    sns.boxplot(hue='imputer',
                y='mse_percent',
                x='percent_missing', 
                data=df[df['missingness']=='MAR'])
    plt.title("Missing at random")
    plt.ylabel('')
    plt.xlabel('Percent Missing')
    plt.gca().get_legend().remove()

    plt.subplot(1,3,3)
    sns.boxplot(hue='imputer',
                y='mse_percent',
                x='percent_missing', 
                data=df[df['missingness']=='MNAR'])
    plt.title("Missing not at random")
    plt.ylabel("")
    plt.xlabel('Percent Missing')

    handles, labels = plt.gca().get_legend_handles_labels()

    l = plt.legend(handles, labels, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

    plt.tight_layout()
    plt.savefig('benchmarks_datawig.pdf')

experiment()

Source File: analysis.py From dl-eeg-review with MIT License

4 votes

def _plot_results_accuracy_per_domain(results_df, diff_df, save_cfg):
    """Make scatterplot + boxplot to show accuracy difference by domain.
    """
    fig, axes = plt.subplots(
        nrows=2, ncols=1, sharex=True, 
        figsize=(save_cfg['text_width'], save_cfg['text_height'] / 3), 
        gridspec_kw = {'height_ratios':[5, 1]})

    results_df['Main domain'] = results_df['Main domain'].apply(
        ut.wrap_text, max_char=20)

    sns.catplot(y='Main domain', x='acc_diff', s=3, jitter=True, 
                data=results_df, ax=axes[0])
    axes[0].set_xlabel('')
    axes[0].set_ylabel('')
    axes[0].axvline(0, c='k', alpha=0.2)

    sns.boxplot(x='acc_diff', data=diff_df, ax=axes[1])
    sns.swarmplot(x='acc_diff', data=diff_df, color="0", size=2, ax=axes[1])
    axes[1].axvline(0, c='k', alpha=0.2)
    axes[1].set_xlabel('Accuracy difference')

    fig.subplots_adjust(wspace=0, hspace=0.02)
    plt.tight_layout()

    logger.info('Number of studies included in the accuracy improvement analysis: {}'.format(
        results_df.shape[0]))
    median = diff_df['acc_diff'].median()
    iqr = diff_df['acc_diff'].quantile(.75) - diff_df['acc_diff'].quantile(.25)
    logger.info('Median gain in accuracy: {:.6f}'.format(median))
    logger.info('Interquartile range of the gain in accuracy: {:.6f}'.format(iqr))
    best_improvement = diff_df.nlargest(3, 'acc_diff')
    logger.info('Best improvement in accuracy: {}, in {}'.format(
        best_improvement['acc_diff'].values[0], 
        best_improvement['Citation'].values[0]))
    logger.info('Second best improvement in accuracy: {}, in {}'.format(
        best_improvement['acc_diff'].values[1], 
        best_improvement['Citation'].values[1]))
    logger.info('Third best improvement in accuracy: {}, in {}'.format(
        best_improvement['acc_diff'].values[2], 
        best_improvement['Citation'].values[2]))

    if save_cfg is not None:
        savename = 'reported_accuracy_per_domain'
        fname = os.path.join(save_cfg['savepath'], savename)
        fig.savefig(fname + '.' + save_cfg['format'], **save_cfg)

    return axes

Source File: interpretation.py From lumin with Apache License 2.0

4 votes

def plot_bottleneck_weighted_inputs(model:AbsModel, bottleneck_idx:int, inputs:Union[np.ndarray,Tensor], log_y:bool=True,
                                    savename:Optional[str]=None, settings:PlotSettings=PlotSettings()) -> None:
    r'''
    Interpret how a single-neuron bottleneck in a :class:MultiBlock relies on input features by plotting the absolute values of the features times their
    associated weight for a given set of input data.

    Arguments:
        model: model to interpret
        bottleneck_idx: index of the bottleneck to interpret, i.e. model.body.bottleneck_blocks[bottleneck_idx]
        inputs: input data to use for interpretation
        log_y: whether to plot a log scale for the y-axis
        savename: Optional name of file to which to save the plot of feature importances
        settings: :class:`~lumin.plotting.plot_settings.PlotSettings` class to control figure appearance
    '''

    body = model.body
    bn = body.bottleneck_blocks[bottleneck_idx]
    assert bn[0].weight.shape[0] == 1, 'This function currently only supports bottlenecks whose width is one neuron'
    
    hook = FowardHook(bn[0])
    model.predict(inputs)
    
    weighted_input = to_np(torch.abs(hook.input[0]*bn[0].weight[0]))
    rfm = {}
    for f in model.head.feat_map:
        if len(model.head.feat_map[f]) == 1:
            rfm[model.head.feat_map[f][0]] = f
        else:
            for i, idx in enumerate(model.head.feat_map[f]): rfm[idx] = f'{f}_{i}'
    y, x = [], []
    for i, f in enumerate(model.body.bottleneck_masks[bottleneck_idx]):
        x.append(rfm[f])
        y.append(weighted_input[:, i])
        
    x,y = np.array(x),np.array(y)
    order = np.argsort(y.mean(axis=1))
    x,y = list(x[order]),list(y[order])
    
    with sns.axes_style(**settings.style), sns.color_palette(settings.cat_palette):
        plt.figure(figsize=(settings.w_mid, settings.h_mid))
        sns.boxplot(x=x, y=y)
        plt.xlabel("Features", fontsize=settings.lbl_sz, color=settings.lbl_col)
        plt.ylabel(r"$|w_i\times x_i|$", fontsize=settings.lbl_sz, color=settings.lbl_col)
        plt.xticks(fontsize=settings.tk_sz, color=settings.tk_col)
        plt.yticks(fontsize=settings.tk_sz, color=settings.tk_col)
        if log_y: plt.yscale('log', nonposy='clip')
        plt.xticks(rotation=90)
        plt.title(settings.title, fontsize=settings.title_sz, color=settings.title_col, loc=settings.title_loc)
        if savename is not None: plt.savefig(settings.savepath/f'{savename}{settings.format}', bbox_inches='tight')
        plt.show()

Source File: interpretation.py From lumin with Apache License 2.0

4 votes

def plot_multibody_weighted_outputs(model:AbsModel, inputs:Union[np.ndarray,Tensor], block_names:Optional[List[str]]=None, use_mean:bool=False,
                                    savename:Optional[str]=None, settings:PlotSettings=PlotSettings()) -> None:
    r'''
    Interpret how a model relies on the outputs of each block in a :class:MultiBlock by plotting the outputs of each block as weighted by the tail block.
    This function currently only supports models whose tail block contains a single neuron in the first dense layer.
    Input data is passed through the model and the absolute sums of the weighted block outputs are computed per datum, and optionally averaged over the number
    of block outputs.

    Arguments:
        model: model to interpret
        inputs: input data to use for interpretation
        block_names: names for each block to use when plotting
        use_mean: if True, will average the weighted outputs over the number of output neurons in each block
        savename: Optional name of file to which to save the plot of feature importances
        settings: :class:`~lumin.plotting.plot_settings.PlotSettings` class to control figure appearance
    '''

    assert model.tail[0].weight.shape[0] == 1, 'This function currently only supports models whose tail block contains a single neuron in the first dense layer'
    if block_names is not None:
        assert len(block_names) == len(model.body.blocks), 'block_names passed, but number of names does not match number of blocks'
    else:
        block_names = [f'{i}' for i in range(len(model.body.blocks))]
    
    hook = FowardHook(model.tail[0])
    model.predict(inputs)
    
    y, itr = [], 0
    for b in model.body.blocks:
        o = hook.input[0][:,itr:itr+b.get_out_size()]
        w = model.tail[0].weight[0][itr:itr+b.get_out_size()]
        y.append(to_np(torch.abs(o@w)/b.get_out_size()) if use_mean else to_np(torch.abs(o@w)))
        itr += b.get_out_size()
    
    with sns.axes_style(**settings.style), sns.color_palette(settings.cat_palette):
        plt.figure(figsize=(settings.w_mid, settings.h_mid))
        sns.boxplot(x=block_names, y=y)
        plt.xlabel("Block", fontsize=settings.lbl_sz, color=settings.lbl_col)
        plt.ylabel(r"Mean $|\bar{w}\cdot\bar{x}|$" if use_mean else r"$|\bar{w}\cdot\bar{x}|$", fontsize=settings.lbl_sz, color=settings.lbl_col)
        plt.xticks(fontsize=settings.tk_sz, color=settings.tk_col)
        plt.yticks(fontsize=settings.tk_sz, color=settings.tk_col)
        plt.title(settings.title, fontsize=settings.title_sz, color=settings.title_col, loc=settings.title_loc)
        if savename is not None: plt.savefig(settings.savepath/f'{savename}{settings.format}', bbox_inches='tight')
        plt.show()

Source File: similarity_scores_time_benchmark.py From dirty_cat with BSD 3-Clause "New" or "Revised" License

4 votes

def plot(bench, title=''):
    sns.set(style='ticks', palette='muted')
    hash_dims = ['Count', '2 ** 14', '2 ** 16', '2 ** 18', '2 ** 20']
    scores = []
    vectorizer = []
    strategy = []

    for i, e in enumerate(bench):
        vectorizer.extend([hash_dims[i % 5]] * (2 * len(e[0][1])))
        strategy.extend(['k-means'] * len(e[0][1]))
        strategy.extend(['most-frequent'] * len(e[1][1]))
        scores.extend(e[0][1])
        scores.extend(e[1][1])

    df = pd.DataFrame(columns=['vectorizer', 'strategy', 'score'])
    df['vectorizer'] = vectorizer
    df['strategy'] = strategy
    df['score'] = scores

    first = plt.figure()
    ax = sns.boxplot(x='vectorizer', y='score', hue='strategy', data=df)
    ax.set(title=title, xlabel='Vectorizer used', ylabel='Mean score on 10 cross validations')
    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    first.tight_layout()

    vectorizer.clear()
    scores.clear()
    strategy.clear()
    times = []

    for i, e in enumerate(bench):
        vectorizer.extend([hash_dims[i % 5]] * 4)
        strategy.extend(['K-means vect', 'K-means X-val', 'MF vect', 'MF X-val'])
        times.extend([e[0][0], e[0][2] / 20, e[1][0], e[1][2] / 20])

    df = pd.DataFrame(columns=['vectorizer', 'strategy/operation', 'time'])
    df['vectorizer'] = vectorizer
    df['strategy/operation'] = strategy
    df['time'] = times

    second = plt.figure()
    ax1 = sns.barplot(x='vectorizer', y='time', hue='strategy/operation', data=df)
    ax1.set(title=title, xlabel='Vectorizer used', ylabel='Time in seconds')
    ax1.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    second.tight_layout()

    title = title.replace(' ', '_').replace(':', '-').replace(',', '_').lower()
    first.savefig(title + '_score.png')
    second.savefig(title + '_time.png')
    # first.show()
    # second.show(t)

Python seaborn.boxplot() Examples