Python Examples of seaborn.scatterplot

Source File: noisy_observations.py From adversarial-policies with MIT License

6 votes

def noisy_adversary_opponent_subset_plot(
    original_df, subset_specs, transform_specs, logistic=True, plot_line=True, savefile=None
):
    subset_df = subset(original_df, subset_specs)
    if len(subset_df) == 0:
        return
    transformed_df = transform(subset_df, transform_specs)
    plt.figure(figsize=(10, 7))
    if plot_line:
        sns.lmplot(data=transformed_df, x="log_noise", y="agent0_win_perc", logistic=logistic)
    else:
        sns.scatterplot(data=transformed_df, x="log_noise", y="agent0_win_perc")
    plt.title(
        "{}: Noisy Zoo{} Observations vs Adversary".format(
            subset_specs["env"], subset_specs["agent0_path"]
        )
    )
    if savefile is not None:
        plt.savefig(savefile)
    else:
        plt.show()
    plt.close()

Source File: prop_sim_plotting.py From causal-text-embeddings with MIT License

6 votes

def make_reddit_prop_plt():
    sns.set()
    prop_expt = pd.DataFrame(att.process_propensity_experiment())

    prop_expt = prop_expt[['exog', 'plugin', 'one_step_tmle', 'very_naive']]
    prop_expt = prop_expt.rename(index=str, columns={'exog': 'Exogeneity',
                                         'very_naive': 'Unadjusted',
                                         'plugin': 'Plug-in',
                                         'one_step_tmle': 'TMLE'})
    prop_expt = prop_expt.set_index('Exogeneity')

    plt.figure(figsize=(4.75, 3.00))
    # plt.figure(figsize=(2.37, 1.5))
    sns.scatterplot(data=prop_expt, legend='brief', s=75)
    plt.xlabel("Exogeneity", fontfamily='monospace')
    plt.ylabel("NDE Estimate", fontfamily='monospace')
    plt.tight_layout()

    fig_dir = '../output/figures'
    os.makedirs(fig_dir, exist_ok=True)
    plt.savefig(os.path.join(fig_dir,'reddit_propensity.pdf'))

Source File: det.py From diffxpy with BSD 3-Clause "New" or "Revised" License

6 votes

def plot_vs_ttest(self, log10=False):
        import matplotlib.pyplot as plt
        import seaborn as sns
        from .tests import t_test

        grouping = self.grouping
        ttest = t_test(
            data=self.x,
            grouping=grouping,
            gene_names=self.gene_ids,
        )
        if log10:
            ttest_pvals = ttest.log10_pval_clean()
            pvals = self.log10_pval_clean()
        else:
            ttest_pvals = ttest.pval
            pvals = self.pval

        fig, ax = plt.subplots()

        sns.scatterplot(x=ttest_pvals, y=pvals, ax=ax)

        ax.set(xlabel="t-test", ylabel='rank test')

        return fig, ax

Source File: summary_generator.py From assistant-dialog-skill-analysis with Apache License 2.0

6 votes

def scatter_plot_intent_dist(workspace_pd):
    """
    takes the workspace_pd and generate a scatter distribution of the intents
    :param workspace_pd:
    :return:
    """

    label_frequency = Counter(workspace_pd["intent"]).most_common()
    frequencies = list(reversed(label_frequency))
    counter_list = list(range(1, len(frequencies) + 1))
    df = pd.DataFrame(data=frequencies, columns=["Intent", "Number of User Examples"])
    df["Intent"] = counter_list

    sns.set(rc={"figure.figsize": (15, 10)})
    display(
        Markdown(
            '## <p style="text-align: center;">Sorted Distribution of User Examples \
                     per Intent</p>'
        )
    )

    plt.ylabel("Number of User Examples", fontdict=LABEL_FONT)
    plt.xlabel("Intent", fontdict=LABEL_FONT)
    ax = sns.scatterplot(x="Intent", y="Number of User Examples", data=df, s=100)

Source File: plot_phage_tsne.py From EdwardsLab with MIT License

6 votes

def plot_2d_sz(tsne, fnar, palette, outpng, verbose):
    """
    Create the 2d plot
    :param tsne: tSNE array
    :param fnar: functions list
    :param outpng: base name for pg output
    :return: nothing
    """
    
    if verbose:
        sys.stderr.write(f"{bcolors.GREEN}Plotting 2D tSNE by size{bcolors.ENDC}\n")

    sp = sns.scatterplot(x=tsne[:,0], y=tsne[:,1], s=tsne[:,2], legend="full", hue=fnar, palette=palette)
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.tight_layout()
    plt.savefig(outpng + ".sz.png")

Source File: analysis.py From dl-eeg-review with MIT License

6 votes

def _plot_results_accuracy_comparison(results_df, save_cfg):
    """Plot the comparison between the best model and best baseline.
    """
    fig, ax = plt.subplots(figsize=(save_cfg['text_width'], 
                                    save_cfg['text_height'] * 0.5))
    sns.scatterplot(data=results_df, x='Baseline (traditional)', y='Proposed', 
                    ax=ax)
    ax.plot([0, 1.1], [0, 1.1], c='k', alpha=0.2)
    plt.axis('square')
    ax.set_xlim([0, 1.1])
    ax.set_ylim([0, 1.1])
    plt.tight_layout()

    if save_cfg is not None:
        savename = 'reported_accuracy_comparison'
        fname = os.path.join(save_cfg['savepath'], savename)
        fig.savefig(fname + '.' + save_cfg['format'], **save_cfg)

    return ax

Source File: plot_phage_tsne.py From EdwardsLab with MIT License

6 votes

def plot_2d(tsne, fnar, palette, outpng, verbose):
    """
    Create the 2d plot
    :param tsne: tSNE array
    :param fnar: functions list
    :param outpng: base name for pg output
    :return: nothing
    """
    
    if verbose:
        sys.stderr.write(f"{bcolors.GREEN}Plotting 2D tSNE{bcolors.ENDC}\n")

    snsplot = sns.scatterplot(tsne[:,0], tsne[:,1], legend="full", hue=fnar, palette=palette)
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.tight_layout()
    plt.savefig(outpng + ".png")

Source File: callbacks.py From ivis with GNU General Public License v2.0

6 votes

def plot_embeddings(self, embeddings):
        embeddings = MinMaxScaler((0, 1)).fit_transform(self.embeddings)

        fig = plt.figure()
        buf = io.BytesIO()
        sns.scatterplot(x=embeddings[:, 0], y=embeddings[:, 1], s=1,
                        hue=self.labels,
                        palette=sns.color_palette("hls", self.n_classes),
                        linewidth=0)

        plt.savefig(buf, format='png', dpi=300)
        plt.close(fig)
        buf.seek(0)

        image = tf.Summary.Image(encoded_image_string=buf.getvalue())
        return image

Source File: n2d.py From n2d with GNU General Public License v3.0

6 votes

def plot(x, y, plot_id, names=None):
    viz_df = pd.DataFrame(data=x[:5000])
    viz_df['Label'] = y[:5000]
    if names is not None:
        viz_df['Label'] = viz_df['Label'].map(names)

    viz_df.to_csv(args.save_dir + '/' + args.dataset + '.csv')
    plt.subplots(figsize=(8, 5))
    sns.scatterplot(x=0, y=1, hue='Label', legend='full', hue_order=sorted(viz_df['Label'].unique()),
                    palette=sns.color_palette("hls", n_colors=args.n_clusters),
                    alpha=.5,
                    data=viz_df)
    l = plt.legend(bbox_to_anchor=(-.1, 1.00, 1.1, .5), loc="lower left", markerfirst=True,
                   mode="expand", borderaxespad=0, ncol=args.n_clusters + 1, handletextpad=0.01, )

    l.texts[0].set_text("")
    plt.ylabel("")
    plt.xlabel("")
    plt.tight_layout()
    plt.savefig(args.save_dir + '/' + args.dataset +
                '-' + plot_id + '.png', dpi=300)
    plt.clf()

Source File: plotUtils.py From pyodds with MIT License

5 votes

def visualize_distribution_static(X,prediction,score, path=None):
    """
    Visualize the original distribution of the data in 2-dimension space, which outliers/inliers are colored as differnet scatter plot.

    Parameters
    ----------
    X: numpy array of shape (n_test, n_features)
        Test data.
    prediction: numpy array of shape (n_test, )
        The prediction result of the test data.
    score: umpy array of shape (n_test, )
        The outlier score of the test data.
    path: string
        The saving path for result figures.
    """
    sns.set(style="darkgrid")

    X=X.to_numpy()
    X_embedding = TSNE(n_components=2).fit_transform(X)

    outlier_label=[]
    for i in range(len(X_embedding)):
        if prediction[i]==1:
            outlier_label.append('inlier')
        else:
            outlier_label.append('outlier')
    X_outlier = pd.DataFrame({'x_emb':X_embedding[:,0],'y_emb':X_embedding[:,1],'outlier_label':np.array(outlier_label),'score':np.array(score)})
    new_sns = sns.scatterplot(x="x_emb", y="y_emb",hue = "score", sizes =20, palette = 'BuGn_r',legend = False, data = X_outlier)
    if path:
        new_sns.get_figure().savefig(path+'/distribution_withoutlier.png')
    plt.show()

Source File: utils.py From dynamo-release with BSD 3-Clause "New" or "Revised" License

5 votes

def scatter_with_legend(
        fig, ax, df, font_color, x, y, c, cmap, legend, **scatter_kwargs
):
    import seaborn as sns
    import matplotlib.patheffects as PathEffects

    unique_labels = np.unique(c)

    if legend == "on data":
        g = sns.scatterplot(
            x, y, hue=c, palette=cmap, ax=ax, legend=False, **scatter_kwargs
        )

        for i in unique_labels:
            color_cnt = np.nanmedian(df.iloc[np.where(c == i)[0], :2], 0)
            txt = ax.text(
                color_cnt[0],
                color_cnt[1],
                str(i),
                color=font_color,
                zorder=1000,
                verticalalignment="center",
                horizontalalignment="center",
                weight="bold",
            )  # c
            txt.set_path_effects(
                [
                    PathEffects.Stroke(
                        linewidth=1.5, foreground=font_color, alpha=0.8
                    ),  # 'w'
                    PathEffects.Normal(),
                ]
            )
    else:
        g = sns.scatterplot(
            x, y, hue=c, palette=cmap, ax=ax, legend="full", **scatter_kwargs
        )
        ax.legend(loc=legend, ncol=unique_labels // 15)

    return fig, ax

Source File: create_graph_appendix.py From experiment-impact-tracker with MIT License

5 votes

def create_scatterplot_from_df(
    df, x: str, y: str, output_path: str = ".", fig_x: int = 16, fig_y: int = 8
):
    """Loads an executive summary df and creates a scatterplot from some pre-specified variables.
    
    Args:
        df ([type]): [description]
        x (str): [description]
        y (str): [description]
        output_path (str, optional): [description]. Defaults to '.'.
        fig_x (int, optional): [description]. Defaults to 16.
        fig_y (int, optional): [description]. Defaults to 8.
    """
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    # create graph dirs
    graph_dir = str(fig_x) + "_" + str(fig_y)
    out_dir = os.path.join(output_path, graph_dir)
    df[x] = df[x].astype(float)
    df[y] = df[y].astype(float)
    os.makedirs(out_dir, exist_ok=True)
    a4_dims = (14, 9)
    fig, ax = plt.subplots(figsize=a4_dims)
    graph = sns.scatterplot(
        ax=ax, x=x, y=y, data=df, s=325, alpha=0.5, hue="Experiment", legend="brief"
    )  # , palette="Set1")
    box = ax.get_position()
    plt.legend(markerscale=2)
    # ax.set_position([box.x0,box.y0,box.width*0.83,box.height])
    # plt.legend(loc='upper left',bbox_to_anchor=(1,1.15))
    # plt.ylim(bottom=0.0)

    # plt.legend(loc='lower right')
    # Use regplot to plot the regression line for the whole points
    # sns.regplot(x="FPOs", y=args.y_axis_var, data=df, sizes=(250, 500),  alpha=.5, scatter=False, ax=graph.axes[2])
    path_name = os.path.join(out_dir, "{}v{}.png".format(x, y))
    plt.savefig(path_name)
    plt.close("all")
    return path_name

Source File: callbacks.py From ivis with GNU General Public License v2.0

5 votes

def plot_embeddings(self, filename):
        embeddings = MinMaxScaler((0, 1)).fit_transform(self.embeddings)

        fig = plt.figure()
        sns.scatterplot(x=embeddings[:, 0], y=embeddings[:, 1], s=1,
                        hue=self.labels,
                        palette=sns.color_palette("hls", self.n_classes),
                        linewidth=0)

        plt.savefig(os.path.join(self.log_dir, filename), dpi=300)
        plt.close(fig)

Source File: dialogs.py From pandasgui with MIT License

5 votes

def finish(self):
        dict = self.getDestinationItems()
        df = self.getDataFrame()

        try:
            x = dict['X Variable'][0]
            y = dict['Y Variable'][0]
            c = dict['Color By'][0]
        except IndexError:
            c = None

        sns.scatterplot(x, y, c, data=df)
        plt.show()

Source File: analysis.py From perses with MIT License

5 votes

def plot_chemical_trajectory(self, environment, filename):
        """
        Plot the trajectory through chemical space.

        Parameters
        ----------
        environment : str
            the name of the environment for which the chemical space trajectory is desired
        """
        chemical_state_trajectory = self.extract_state_trajectory(environment)

        visited_states = list(set(chemical_state_trajectory))

        state_trajectory = np.zeros(len(chemical_state_trajectory))
        for idx, chemical_state in enumerate(chemical_state_trajectory):
            state_trajectory[idx] = visited_states.index(chemical_state)

        with PdfPages(filename) as pdf:
            sns.set(font_scale=2)
            fig = plt.figure(figsize=(28, 12))
            plt.subplot2grid((1,2), (0,0))
            ax = sns.scatterplot(np.arange(len(state_trajectory)), state_trajectory)
            plt.yticks(np.arange(len(visited_states)), visited_states)

            plt.title("Trajectory through chemical space in {}".format(environment))
            plt.xlabel("iteration")
            plt.ylabel("chemical state")
            plt.tight_layout()

            plt.subplot2grid((1,2), (0,1))
            ax = sns.countplot(y=state_trajectory)

            pdf.savefig(fig)
            plt.close()

Source File: plot.py From retentioneering-tools with Mozilla Public License 2.0

5 votes

def cluster_tsne(data, clusters, target, plot_name=None, **kwargs):
    """
    Plots TSNE projection of user stories colored by clusters. Each point represents a user session or whole user trajectory.

    Parameters
    --------
    data: pd.DataFrame
        Feature matrix.
    clusters: np.array
        Array of cluster IDs.
    target: np.array
        Boolean vector, if ``True``, then user has `positive_target_event` in trajectory.
    plot_name: str, optional
        Name of plot to save. Default: ``'clusters_tsne_{timestamp}.svg'``

    Returns
    -------
    Saves plot to ``retention_config.experiments_folder``

    Return type
    -------
    PNG
    """

    if hasattr(data.retention, '_tsne') and not kwargs.get('refit'):
        tsne2 = data.retention._tsne.copy()
    else:
        tsne2 = data.retention.learn_tsne(clusters, **kwargs)
    tsne = tsne2.values
    if np.unique(clusters).shape[0] > 10:
        f, ax = sns.mpl.pyplot.subplots()
        points = ax.scatter(tsne[:, 0], tsne[:, 1], c=clusters, cmap="BrBG")
        f.colorbar(points)
        scatter = ___FigureWrapper__(f)
    else:
        scatter = sns.scatterplot(tsne[:, 0], tsne[:, 1], hue=clusters, legend='full',
                                  palette=sns.color_palette("bright")[0:np.unique(clusters).shape[0]])
    plot_name = plot_name or 'cluster_tsne_{}'.format(datetime.now()).replace(':', '_').replace('.', '_') + '.svg'
    plot_name = data.retention.retention_config['experiments_folder'] + '/' + plot_name
    return scatter, plot_name, tsne2, data.retention.retention_config

Source File: graphics.py From rl-agents with MIT License

4 votes

def plot_frontier(frontier, all_points, writer=None, epoch=0, title="", beta=None, mixture=None, figsize=(8, 6),
                  verbose=True, clamp_qc=None):
    """
        Plot the hull of all Qc, Qr points for different (action, budget).

        If a threshold beta and corresponding mixture is provided, plot them.
    :param frontier: points of the Pareto frontier
    :param all_points: all points (Qc, Qr)
    :param SummaryWriter writer: will log the image to tensorboard if not None
    :param epoch: timestep for tensorboard log
    :param title: figure title
    :param beta: a budget threshold used at decision time
    :param mixture: the optimal mixture corresponding to this budget beta
    :param figsize: figure size, inches
    :param verbose: should the legend be displayed
    :param clamp_qc: if qc is clamped, use these values at x axis limits
    :return: the string description of the image, and its size
    """
    # Figure creation
    dfa, dfh = pd.DataFrame(all_points), pd.DataFrame(frontier)
    fig = plt.figure(figsize=figsize, tight_layout=True)
    sns.scatterplot(data=dfa, x="qc", y="qr", hue="action", legend="full")
    sns.lineplot(data=dfh, x="qc", y="qr", marker="x", label="hull")
    if clamp_qc:  # known limits
        plt.xlim(clamp_qc[0]-0.1, clamp_qc[1]+0.1)
    if beta is not None:
        plt.axvline(x=beta)
    if mixture:
        sns.lineplot(x=[mixture.inf.qc, mixture.sup.qc], y=[mixture.inf.qr, mixture.sup.qr],
                     color="red", marker="o")
    plt.title(title)
    leg = plt.legend(loc='upper right')
    if not verbose:
        leg.remove()
        plt.xlabel('')
        plt.ylabel('')

    # Figure export
    fig.canvas.draw()
    data_str = fig.canvas.tostring_rgb()
    if writer:
        data = np.fromstring(data_str, dtype=np.uint8, sep='')
        data = np.rollaxis(data.reshape(fig.canvas.get_width_height()[::-1] + (3,)), 2, 0)
        writer.add_image(clean_tag(title), data, epoch)
    plt.close()
    return data_str, fig.canvas.get_width_height()

Source File: analysis.py From dl-eeg-review with MIT License

4 votes

def _plot_results_accuracy_per_domain(results_df, diff_df, save_cfg):
    """Make scatterplot + boxplot to show accuracy difference by domain.
    """
    fig, axes = plt.subplots(
        nrows=2, ncols=1, sharex=True, 
        figsize=(save_cfg['text_width'], save_cfg['text_height'] / 3), 
        gridspec_kw = {'height_ratios':[5, 1]})

    results_df['Main domain'] = results_df['Main domain'].apply(
        ut.wrap_text, max_char=20)

    sns.catplot(y='Main domain', x='acc_diff', s=3, jitter=True, 
                data=results_df, ax=axes[0])
    axes[0].set_xlabel('')
    axes[0].set_ylabel('')
    axes[0].axvline(0, c='k', alpha=0.2)

    sns.boxplot(x='acc_diff', data=diff_df, ax=axes[1])
    sns.swarmplot(x='acc_diff', data=diff_df, color="0", size=2, ax=axes[1])
    axes[1].axvline(0, c='k', alpha=0.2)
    axes[1].set_xlabel('Accuracy difference')

    fig.subplots_adjust(wspace=0, hspace=0.02)
    plt.tight_layout()

    logger.info('Number of studies included in the accuracy improvement analysis: {}'.format(
        results_df.shape[0]))
    median = diff_df['acc_diff'].median()
    iqr = diff_df['acc_diff'].quantile(.75) - diff_df['acc_diff'].quantile(.25)
    logger.info('Median gain in accuracy: {:.6f}'.format(median))
    logger.info('Interquartile range of the gain in accuracy: {:.6f}'.format(iqr))
    best_improvement = diff_df.nlargest(3, 'acc_diff')
    logger.info('Best improvement in accuracy: {}, in {}'.format(
        best_improvement['acc_diff'].values[0], 
        best_improvement['Citation'].values[0]))
    logger.info('Second best improvement in accuracy: {}, in {}'.format(
        best_improvement['acc_diff'].values[1], 
        best_improvement['Citation'].values[1]))
    logger.info('Third best improvement in accuracy: {}, in {}'.format(
        best_improvement['acc_diff'].values[2], 
        best_improvement['Citation'].values[2]))

    if save_cfg is not None:
        savename = 'reported_accuracy_per_domain'
        fname = os.path.join(save_cfg['savepath'], savename)
        fig.savefig(fname + '.' + save_cfg['format'], **save_cfg)

    return axes

Source File: action.py From insightconnect-plugins with MIT License

4 votes

def run(self, params={}):
        # Set styles
        sns.set_palette(params.get('color_palette'))
        sns.set(style=params.get('margin_style'))

        # Process the data and create the plot
        try:
            decoded_data = base64.b64decode(params.get('csv_data'))
        except Exception as e:
            error = f"Failed to decode base64 encoded CSV data with error: {e}"
            self.logger.error(error)
            raise e

        df = pd.read_csv(BytesIO(decoded_data))
        x = params.get('x_value')
        y = params.get('y_value')
        hue = params.get('hue')

        args = {
            "data": df,
            "x": x,
            "y": y
        }

        if not x or (x not in df):
            error = f"Column ({x}) not in data set, cannot create plot..."
            self.logger.error(error)
            return Exception(error)
        elif not y or (y not in df):
            error = f"Column ({y}) not in data set, cannot create plot..."
            self.logger.error(error)
            return Exception(error)

        if hue and (len(hue) > 0):
            args['hue'] = hue

            if hue not in df:
                error = f"Column for hue ({hue}) not in data set, cannot create plot..."
                self.logger.error(error)
                return Exception(error)

        # AxesSubplots (the plot object returned) don't have the savefig method, get the figure, then save it
        self.logger.info("Creating plot...")
        plot = sns.scatterplot(**args)
        fig = plot.get_figure()

        # bbox_inches is required to ensure that labels are cut off
        fig.savefig('plot.png', bbox_inches="tight")
        with open('plot.png', 'rb', )as f:
            plot = base64.b64encode(f.read())

        return {
            "csv": params.get('csv_data'),
            "plot": plot.decode('utf-8')
        }

Python seaborn.scatterplot() Examples