Python Examples of seaborn.countplot

Source File: analysis.py From dl-eeg-review with MIT License

7 votes

def plot_country(df, save_cfg=cfg.saving_config):
    """Plot bar graph showing the country of the first author's affiliation.
    """
    fig, ax = plt.subplots(figsize=(save_cfg['text_width'] / 4 * 3, 
                                    save_cfg['text_height'] / 5))
    sns.countplot(x=df['Country'], ax=ax,
                order=df['Country'].value_counts().index)
    ax.set_ylabel('Number of papers')
    ax.set_xlabel('')
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
    plt.tight_layout()

    top3 = df['Country'].value_counts().index[:3]
    logger.info('Top 3 countries of first author affiliation: {}'.format(top3.values))

    if save_cfg is not None:
        fname = os.path.join(save_cfg['savepath'], 'country')
        fig.savefig(fname + '.' + save_cfg['format'], **save_cfg)

    return ax

Source File: analysis.py From dl-eeg-review with MIT License

6 votes

def plot_model_comparison(df, save_cfg=cfg.saving_config):
    """Plot bar graph showing the types of baseline models used.
    """
    fig, ax = plt.subplots(figsize=(save_cfg['text_width'] / 4 * 2, 
                                    save_cfg['text_height'] / 5))
    sns.countplot(y=df['Baseline model type'].dropna(axis=0), ax=ax)
    ax.set_xlabel('Number of papers')
    ax.set_ylabel('')
    plt.tight_layout()

    model_prcts = df['Baseline model type'].value_counts() / df.shape[0] * 100
    logger.info('% of studies that used at least one traditional baseline: {}'.format(
        model_prcts['Traditional pipeline'] + model_prcts['DL & Trad.']))
    logger.info('% of studies that used at least one deep learning baseline: {}'.format(
        model_prcts['DL'] + model_prcts['DL & Trad.']))
    logger.info('% of studies that did not report baseline comparisons: {}'.format(
        model_prcts['None']))

    if save_cfg is not None:
        fname = os.path.join(save_cfg['savepath'], 'model_comparison')
        fig.savefig(fname + '.' + save_cfg['format'], **save_cfg)

    return ax

Source File: analysis.py From dl-eeg-review with MIT License

6 votes

def plot_cross_validation(df, save_cfg=cfg.saving_config):
    """Plot bar graph of cross validation approaches.
    """
    col = 'Cross validation (clean)'
    df[col] = df[col].fillna('N/M')
    cv_df = ut.split_column_with_multiple_entries(
        df, col, ref_col='Citation', sep=';\n', lower=False)
    
    fig, ax = plt.subplots(
        figsize=(save_cfg['text_width'] / 2, save_cfg['text_height'] / 5))
    sns.countplot(y=cv_df[col], order=cv_df[col].value_counts().index, ax=ax)
    ax.set_xlabel('Number of papers')
    ax.set_ylabel('')
    
    plt.tight_layout()

    if save_cfg is not None:
        fname = os.path.join(save_cfg['savepath'], 'cross_validation')
        fig.savefig(fname + '.' + save_cfg['format'], **save_cfg)

    return ax

Source File: eda.py From AI_in_Medicine_Clinical_Imaging_Classification with MIT License

6 votes

def plot_classification_frequency(df, category, file_name, convert_labels = False):
    '''
    Plots the frequency at which labels occur

    INPUT
        df: Pandas DataFrame of the image name and labels
        category: category of labels, from 0 to 4
        file_name: file name of the image
        convert_labels: argument specified for converting to binary classification

    OUTPUT
        Image of plot, showing label frequency
    '''
    if convert_labels == True:
        labels['level'] = change_labels(labels, 'level')

    sns.set(style="whitegrid", color_codes=True)
    sns.countplot(x=category, data=labels)
    plt.title('Retinopathy vs Frequency')
    plt.savefig(file_name)

Source File: eda.py From eyenet with MIT License

6 votes

def plot_classification_frequency(df, category, file_name, convert_labels = False):
    '''
    Plots the frequency at which labels occur

    INPUT
        df: Pandas DataFrame of the image name and labels
        category: category of labels, from 0 to 4
        file_name: file name of the image
        convert_labels: argument specified for converting to binary classification

    OUTPUT
        Image of plot, showing label frequency
    '''
    if convert_labels == True:
        labels['level'] = change_labels(labels, 'level')

    sns.set(style="whitegrid", color_codes=True)
    sns.countplot(x=category, data=labels)
    plt.title('Retinopathy vs Frequency')
    plt.savefig(file_name)

Source File: dataframe_explorer.py From pandasgui with MIT License

6 votes

def update_plot(self):
            plt.ioff()
            col = self.picker.currentText()

            plt.figure()

            arr = self.df[col].dropna()
            if self.df[col].dtype.name in ['object', 'bool', 'category']:
                ax = sns.countplot(y=arr, color='grey', order=arr.value_counts().iloc[:10].index)

            else:
                ax = sns.distplot(arr, color='black', hist_kws=dict(color='grey', alpha=1))

            self.figure_viewer.setFigure(ax.figure)


# Examples

Source File: analysis.py From perses with MIT License

5 votes

def plot_chemical_trajectory(self, environment, filename):
        """
        Plot the trajectory through chemical space.

        Parameters
        ----------
        environment : str
            the name of the environment for which the chemical space trajectory is desired
        """
        chemical_state_trajectory = self.extract_state_trajectory(environment)

        visited_states = list(set(chemical_state_trajectory))

        state_trajectory = np.zeros(len(chemical_state_trajectory))
        for idx, chemical_state in enumerate(chemical_state_trajectory):
            state_trajectory[idx] = visited_states.index(chemical_state)

        with PdfPages(filename) as pdf:
            sns.set(font_scale=2)
            fig = plt.figure(figsize=(28, 12))
            plt.subplot2grid((1,2), (0,0))
            ax = sns.scatterplot(np.arange(len(state_trajectory)), state_trajectory)
            plt.yticks(np.arange(len(visited_states)), visited_states)

            plt.title("Trajectory through chemical space in {}".format(environment))
            plt.xlabel("iteration")
            plt.ylabel("chemical state")
            plt.tight_layout()

            plt.subplot2grid((1,2), (0,1))
            ax = sns.countplot(y=state_trajectory)

            pdf.savefig(fig)
            plt.close()

Source File: analysis.py From dl-eeg-review with MIT License

5 votes

def plot_type_of_paper(df, save_cfg=cfg.saving_config):
    """Plot bar graph showing the type of each paper (journal, conference, etc.).
    """
    # Move supplements to journal paper category for the plot (a value of one is
    # not visible on a bar graph).
    df_plot = df.copy()
    df_plot.loc[df['Type of paper'] == 'Supplement', :] = 'Journal'

    fig, ax = plt.subplots(figsize=(save_cfg['text_width'] / 4, 
                                    save_cfg['text_height'] / 5))
    sns.countplot(x=df_plot['Type of paper'], ax=ax)
    ax.set_xlabel('')
    ax.set_ylabel('Number of papers')
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
    plt.tight_layout()

    counts = df['Type of paper'].value_counts()
    logger.info('Number of journal papers: {}'.format(counts['Journal']))
    logger.info('Number of conference papers: {}'.format(counts['Conference']))
    logger.info('Number of preprints: {}'.format(counts['Preprint']))
    logger.info('Number of papers that were initially published as preprints: '
                '{}'.format(df[df['Type of paper'] != 'Preprint'][
                    'Preprint first'].value_counts()['Yes']))

    if save_cfg is not None:
        fname = os.path.join(save_cfg['savepath'], 'type_of_paper')
        fig.savefig(fname + '.' + save_cfg['format'], **save_cfg)

    return ax

Source File: analysis.py From dl-eeg-review with MIT License

5 votes

def plot_hardware(df, save_cfg=cfg.saving_config):
    """Plot bar graph showing the hardware used in the study.
    """
    col = 'EEG Hardware'
    hardware_df = ut.split_column_with_multiple_entries(
        df, col, ref_col='Citation', sep=',', lower=False)

    # Remove N/Ms because they make it hard to see anything
    hardware_df = hardware_df[hardware_df[col] != 'N/M']
    
    # Add low cost column
    hardware_df['Low-cost'] = False
    low_cost_devices = ['EPOC (Emotiv)', 'OpenBCI (OpenBCI)', 'Muse (InteraXon)', 
                        'Mindwave Mobile (Neurosky)', 'Mindset (NeuroSky)']
    hardware_df.loc[hardware_df[col].isin(low_cost_devices), 
                    'Low-cost'] = True

    fig, ax = plt.subplots(figsize=(save_cfg['text_width'] / 4 * 2, 
                                    save_cfg['text_height'] / 5 * 2))
    sns.countplot(hue=hardware_df['Low-cost'], y=hardware_df[col], ax=ax,
                  order=hardware_df[col].value_counts().index, 
                  dodge=False)
    # sns.catplot(row=hardware_df['low_cost'], y=hardware_df['hardware'])
    ax.set_xlabel('Number of papers')
    ax.set_ylabel('')
    plt.tight_layout()

    if save_cfg is not None:
        fname = os.path.join(save_cfg['savepath'], 'hardware')
        fig.savefig(fname + '.' + save_cfg['format'], **save_cfg)

    return ax

Source File: DataPrep.py From Fake_News_Detection with MIT License

5 votes

def create_distribution(dataFile):
    
    return sb.countplot(x='Label', data=dataFile, palette='hls')
    

#by calling below we can see that training, test and valid data seems to be failry evenly distributed between the classes

Source File: visualize_traindata.py From Supply-demand-forecasting with MIT License

5 votes

def weather_distribution(self):
        data_dir = g_singletonDataFilePath.getTrainDir()
        self.gapdf = self.load_weatherdf(data_dir)
        print self.gapdf['weather'].describe()
#         sns.distplot(self.gapdf['gap'],kde=False, bins=100);
        
        sns.countplot(x="weather", data=self.gapdf, palette="Greens_d");
        plt.title('Countplot of Weather')
#         self.gapdf['weather'].plot(kind='bar')
#         plt.xlabel('Weather')
#         plt.title('Histogram of Weather')
        return

Source File: utils.py From Machine-Learning-with-Python with MIT License

5 votes

def plot_data(data):
    # barplot for the depencent variable
    sns.countplot(x='y', data=data, palette='hls')
    plt.show()

    # check the missing values
    print(data.isnull().sum())

    # customer distribution plot
    sns.countplot(y='job', data=data)
    plt.show()

    # customer marital status distribution
    sns.countplot(x='marital', data=data)
    plt.show()

    # barplot for credit in default
    sns.countplot(x='default', data=data)
    plt.show()

    # barptot for housing loan
    sns.countplot(x='housing', data=data)
    plt.show()

    # barplot for personal loan
    sns.countplot(x='loan', data=data)
    plt.show()

    # barplot for previous marketing campaign outcome
    sns.countplot(x='poutcome', data=data)
    plt.show()

Source File: brute_force_plotter.py From brute-force-plotter with MIT License

5 votes

def bar_plot(data, col, hue=None, file_name=None):
    sns.countplot(col, hue=hue, data=data.sort_values(col))
    sns.despine(left=True)

    subplots = [
        x for x in plt.gcf().get_children() if isinstance(x, matplotlib.axes.Subplot)
    ]
    for plot in subplots:
        rectangles = [
            x
            for x in plot.get_children()
            if isinstance(x, matplotlib.patches.Rectangle)
        ]
    autolabel(rectangles)

Source File: plots.py From compose with BSD 3-Clause "New" or "Revised" License

5 votes

def distribution(self, **kwargs):
        """Plots the label distribution."""
        self._label_times._assert_single_target()
        target_column = self._label_times.target_columns[0]
        dist = self._label_times[target_column]
        is_discrete = self._label_times.is_discrete[target_column]

        if is_discrete:
            ax = sns.countplot(dist, palette=COLOR, **kwargs)
        else:
            ax = sns.distplot(dist, kde=True, color=COLOR[1], **kwargs)

        ax.set_title('Label Distribution')
        ax.set_ylabel('Count')
        return ax

Source File: coco_stats.py From COCO-Assistant with MIT License

4 votes

def cat_count(anns, names, show_count=False, save=False):

    fig, axes = plt.subplots(1, len(anns), sharey=False)

    # Making axes iterable if only single annotation is present
    if len(anns) == 1:
        axes = [axes]

    # Prepare annotations dataframe
    # This should be done at the start
    for ann, name, ax in zip(anns, names, axes):
        ann_df = pd.DataFrame(ann.anns).transpose()
        if 'category_name' in ann_df.columns:
            chart = sns.countplot(data=ann_df,
                                  x='category_name',
                                  order=ann_df['category_name'].value_counts().index,
                                  palette='Set1',
                                  ax=ax)
        else:
            # Add a new column -> category name
            ann_df['category_name'] = ann_df.apply(lambda row: ann.cats[row.category_id]['name'],axis=1)
            chart = sns.countplot(data=ann_df,
                                  x='category_name',
                                  order=ann_df['category_name'].value_counts().index,
                                  palette='Set1',
                                  ax=ax)

        chart.set_title(name)
        chart.set_xticklabels(chart.get_xticklabels(), rotation=90)

        if show_count is True:
            for p in chart.patches:
                height = p.get_height()
                chart.text(p.get_x() + p.get_width() / 2.,
                           height + 0.9,
                           height,
                           ha="center")

    plt.suptitle('Instances per category', fontsize=14, fontweight='bold')
    plt.tight_layout()

    fig = plt.gcf()
    fig.set_size_inches(11, 11)

    out_dir = os.path.join(os.getcwd(), 'results', 'plots')
    if save is True:
        if os.path.exists(out_dir) is False:
            os.mkdir(out_dir)
        plt.savefig(os.path.join(out_dir, "cat_dist" + ".png"),
                    bbox_inches='tight',
                    pad_inches=0,
                    dpi=plt.gcf().dpi)

    plt.show()

Python seaborn.countplot() Examples