Python Examples of seaborn.pairplot

Source File: stock_visualizer.py From stock-analysis with MIT License

6 votes

def jointplot(self, other, column, **kwargs):
        """
        Generate a seaborn jointplot for given column in asset compared to
        another asset.

        Parameters:
            - other: The other asset's dataframe
            - column: The column name to use for the comparison.
            - kwargs: Keyword arguments to pass down to `sns.pairplot()`

        Returns:
            A seaborn jointplot
        """
        return sns.jointplot(
            x=self.data[column],
            y=other[column],
            **kwargs
        )

Source File: stock_visualizer.py From stock-analysis with MIT License

6 votes

def pairplot(self, **kwargs):
        """
        Generate a seaborn pairplot for this asset group.

        Parameters:
            - kwargs: Keyword arguments to pass down to `sns.pairplot()`

        Returns:
            A seaborn pairplot
        """
        return sns.pairplot(
            self.data.pivot_table(
                values='close', index=self.data.index, columns='name'
            ),
            diag_kind='kde',
            **kwargs
        )

Source File: clustering_kmeans_search_alternative.py From practicalDataAnalysisCookbook with GNU General Public License v2.0

6 votes

def plotInteractions(data, n_clusters):
    '''
        Plot the interactions between variables
    '''
    # cluster the data
    cluster = findClusters_kmeans(data, n_clusters)

    # append the labels to the dataset for ease of plotting
    data['clus'] = cluster.labels_

    # prepare the plot
    ax = sns.pairplot(selected, hue='clus')

    # and save the figure
    ax.savefig(
        '../../Data/Chapter04/k_means_{0}_clusters.png' \
        .format(n_clusters)
    )


# the file name of the dataset

Source File: document.py From DQLearning-Toolbox with MIT License

5 votes

def savePair(df,samplesize=20000):
    df1 = df.sample(samplesize)
    sns.set(style="ticks")
    sns.set_context("paper")
    sns.pairplot(df1)
    plt.title('Pair Graph')
    plt.savefig(pair_path)

#画滑动平均图，默认12阶

Source File: EDA.py From exploripy with MIT License

5 votes

def ScatterPlot(self):
		start = time.time()
		sns.set(style="ticks", color_codes=True)
		this_dir, this_filename = os.path.split(__file__)
		OutFileName = os.path.join(this_dir, 'HTMLTemplate/dist/output/Scatter.png')
		fig, ax = plt.subplots()
		ax = sns.pairplot(self.df[self.ContinuousFeatures].dropna(),markers="+",palette="husl",kind="reg", plot_kws={'line_kws':{'color':'orange'}})
		plt.savefig(OutFileName)
		end = time.time()
		if self.debug == 'YES':
			print('ScatterPlot',end-start)
		return OutFileName

Source File: poiRegression.py From python-urbanPlanning with MIT License

5 votes

def basicStat(dataBunch):
    sns.set(style='whitegrid',context='notebook')
    cols=['lat','lng','price','overall_rating','service_rating','facility_rating','hygiene_rating','image_num','comment_num','favorite_num','checkin_num']  #用于标识frame数据框的列索引
    frame=pd.DataFrame(dataBunch.data[:],columns=cols)  #转换为pandas库的frame数据框格式，方便数据观察和提取
#    print(frame)
    sns.pairplot(frame[cols],size=2.5)  #两两数据的散点图，用于观察数据间的关系
    plt.show()    
   
    cm=np.corrcoef(frame[cols].values.T)  #计算两两间的相关系数
    sns.set(font_scale=1.3)
    hm=sns.heatmap(cm,cbar=True,annot=True,square=True,fmt='.2f',annot_kws={'size':13},yticklabels=cols,xticklabels=cols) #热力图显示相关系数，方便直观查看
    plt.show

Source File: reduce_iris_sample_size_lvq.py From neupy with MIT License

5 votes

def plot_scattermatrix(data, target):
    df = pd.DataFrame(data)
    df['target'] = target
    return sns.pairplot(df, hue='target', diag_kind='hist')

Source File: scrap_log.py From ffjord with MIT License

5 votes

def plot_pairplot(csv_filename, fig_filename, top=None):
    import seaborn as sns
    import pandas as pd

    sns.set(style="ticks", color_codes=True)
    quants = pd.read_csv(csv_filename)
    if top is not None:
        quants = quants[:top]

    g = sns.pairplot(quants, kind='reg', diag_kind='kde', markers='.')
    g.savefig(fig_filename)

Source File: visualization.py From default-credit-card-prediction with MIT License

5 votes

def visualize_hist_pairplot(X,y,selected_feature1,selected_feature2,features,diag_kind):
	"""
	Visualize the pairwise relationships (Histograms and Density Funcions) between classes and respective attributes

	Keyword arguments:
	X -- The feature vectors
	y -- The target vector
	selected_feature1 - First feature
	selected_feature1 - Second feature
	diag_kind -- Type of plot in the diagonal (Histogram or Density Function)
	"""

	#create data
	joint_data=np.column_stack((X,y))
	column_names=features

	#create dataframe
	df=pd.DataFrame(data=joint_data,columns=column_names)

	#plot
	palette = sea.hls_palette()
	splot=sea.pairplot(df, hue="Y", palette={0:palette[2],1:palette[0]},vars=[selected_feature1,selected_feature2],diag_kind=diag_kind)
	splot.fig.suptitle('Pairwise relationship: '+selected_feature1+" vs "+selected_feature2)
	splot.set(xticklabels=[])
	# plt.subplots_adjust(right=0.94, top=0.94)

	#save fig
	output_dir = "img"
	save_fig(output_dir,'{}/{}_{}_hist_pairplot.png'.format(output_dir,selected_feature1,selected_feature2))
	# plt.show()

Source File: plotfunctions.py From DataScience-webapp-with-flask with MIT License

5 votes

def plot_correlations(ds, corr, corrcat):
    sns.set()
    plt.gcf().clear()
    if corrcat != '': sns.pairplot(ds[corr], hue = corrcat)
    else: sns.pairplot(ds[corr])
    from io import BytesIO
    figfile = BytesIO()
    plt.savefig(figfile, format='png')
    figfile.seek(0)  # rewind to beginning of file
    import base64
    figdata_png = base64.b64encode(figfile.getvalue())
    return figdata_png

Source File: vis_corex.py From bio_corex with Apache License 2.0

5 votes

def plot_pairplots(data, labels, alpha, mis, column_label, topk=5, prefix='', focus=''):
    cmap = sns.cubehelix_palette(as_cmap=True, light=.9)
    plt.rcParams.update({'font.size': 32})
    m, nv = mis.shape
    for j in range(m):
        inds = np.where(np.logical_and(alpha[j] > 0, mis[j] > 0.))[0]
        inds = inds[np.argsort(- alpha[j, inds] * mis[j, inds])][:topk]
        if focus in column_label:
            ifocus = column_label.index(focus)
            if not ifocus in inds:
                inds = np.insert(inds, 0, ifocus)
        if len(inds) >= 2:
            plt.clf()
            subdata = data[:, inds]
            columns = [column_label[i] for i in inds]
            subdata = pd.DataFrame(data=subdata, columns=columns)

            try:
                sns.pairplot(subdata, kind="reg", diag_kind="kde", height=5, dropna=True)
                filename = '{}/pairplots_regress/group_num={}.pdf'.format(prefix, j)
                if not os.path.exists(os.path.dirname(filename)):
                    os.makedirs(os.path.dirname(filename))
                plt.suptitle("Latent factor {}".format(j), y=1.01)
                plt.savefig(filename, bbox_inches='tight')
                plt.clf()
            except:
                pass

            subdata['Latent factor'] = labels[:,j]
            try:
                sns.pairplot(subdata, kind="scatter", dropna=True, vars=subdata.columns.drop('Latent factor'), hue="Latent factor", diag_kind="kde", height=5)
                filename = '{}/pairplots/group_num={}.pdf'.format(prefix, j)
                if not os.path.exists(os.path.dirname(filename)):
                    os.makedirs(os.path.dirname(filename))
                plt.suptitle("Latent factor {}".format(j), y=1.01)
                plt.savefig(filename, bbox_inches='tight')
                plt.close('all')
            except:
                pass

Source File: plotting.py From kvae with MIT License

5 votes

def plot_auxiliary(all_vars, filename, table_size=4):
    # All variables need to be (batch_size, sequence_length, dimension)
    for i, a in enumerate(all_vars):
        if a.ndim == 2:
            all_vars[i] = np.expand_dims(a, 0)

    dim = all_vars[0].shape[-1]
    if dim == 2:
        f, ax = plt.subplots(table_size, table_size, sharex='col', sharey='row', figsize=[12, 12])
        idx = 0
        for x in range(table_size):
            for y in range(table_size):
                for a in all_vars:
                    # Loop over the batch dimension
                    ax[x, y].plot(a[idx, :, 0], a[idx, :, 1], linestyle='-', marker='o', markersize=3)
                    # Plot starting point of the trajectory
                    ax[x, y].plot(a[idx, 0, 0], a[idx, 0, 1], 'r.', ms=12)
                idx += 1
        # plt.show()
        plt.savefig(filename, format='png', bbox_inches='tight', dpi=80)
        plt.close()
    else:
        df_list = []
        for i, a in enumerate(all_vars):
            df = pd.DataFrame(all_vars[i].reshape(-1, dim))
            df['class'] = i
            df_list.append(df)

        df_all = pd.concat(df_list)
        sns_plot = sns.pairplot(df_all, hue="class", vars=range(dim))
        sns_plot.savefig(filename)
    plt.close()

Source File: atlas3.py From ssbio with MIT License

5 votes

def make_pairplot(self, num_components_to_plot=4, outpath=None, dpi=150):
        # Get columns
        components_to_plot = [self.principal_observations_df.columns[x] for x in range(num_components_to_plot)]

        # Plot
        plot = sns.pairplot(data=self.principal_observations_df, hue=self.observation_colname,
                                vars=components_to_plot, markers=self.markers, size=4)
        plt.subplots_adjust(top=.95)
        plt.suptitle(self.plot_title)

        if outpath:
            plot.fig.savefig(outpath, dpi=dpi)
        else:
            plt.show()
        plt.close()

Source File: stock_visualizer.py From stock-analysis with MIT License

5 votes

def pairplot(self, **kwargs):
        """
        Generate a seaborn pairplot for this asset.

        Parameters:
            - kwargs: Keyword arguments to pass down to `sns.pairplot()`

        Returns:
            A seaborn pairplot
        """
        return sns.pairplot(self.data, **kwargs)

Source File: stock_visualizer.py From stock-analysis with MIT License

5 votes

def pairplot(self, **kwargs):
        """To be implemented by subclasses for generating pairplots."""
        raise NotImplementedError('To be implemented by subclasses!')

Source File: plots.py From AlphaPy with Apache License 2.0

4 votes

def plot_scatter(df, features, target, tag='eda', directory=None):
    r"""Plot a scatterplot matrix, also known as a pair plot.

    Parameters
    ----------
    df : pandas.DataFrame
        The dataframe containing the features.
    features: list of str
        The features to compare in the scatterplot.
    target : str
        The target variable for contrast.
    tag : str
        Unique identifier for the plot.
    directory : str, optional
        The full specification of the plot location.

    Returns
    -------
    None : None.

    References
    ----------

    https://seaborn.pydata.org/examples/scatterplot_matrix.html

    """

    logger.info("Generating Scatter Plot")

    # Get the feature subset

    features.append(target)
    df = df[features]

    # Generate the pair plot

    sns.set()
    sns_plot = sns.pairplot(df, hue=target)

    # Save the plot
    write_plot('seaborn', sns_plot, 'scatter_plot', tag, directory)


#
# Function plot_facet_grid
#

Source File: visualize.py From pipelines with Apache License 2.0

4 votes

def datahtml(
    bucket_name,
    commit_sha,
    train_file_path
):
    import json
    import seaborn as sns
    import matplotlib.pyplot as plt
    import os
    image_path = os.path.join(bucket_name, commit_sha, 'visualization.png')
    image_url = os.path.join('https://storage.googleapis.com', bucket_name.lstrip('gs://'), commit_sha, 'visualization.png')
    html_path = os.path.join(bucket_name, 'kaggle.html')
    # ouptut visualization to a file

    import pandas as pd
    df_train = pd.read_csv(train_file_path)
    sns.set()
    cols = ['SalePrice', 'OverallQual', 'GrLivArea', 'GarageCars', 'TotalBsmtSF', 'FullBath', 'YearBuilt']
    sns.pairplot(df_train[cols], size = 3)
    plt.savefig('visualization.png')
    from tensorflow.python.lib.io import file_io
    file_io.copy('visualization.png', image_path)
    rendered_template = """
    <html>
        <head>
            <title>correlation image</title>
        </head>
        <body>
            <img src={}>
        </body>
    </html>""".format(image_url)
    file_io.write_string_to_file(html_path, rendered_template)

    metadata = {
        'outputs' : [{
        'type': 'web-app',
        'storage': 'gcs',
        'source': html_path,
        }]
    }
    with file_io.FileIO('/mlpipeline-ui-metadata.json', 'w') as f:
        json.dump(metadata, f)

Source File: plotUtils.py From pyodds with MIT License

4 votes

def visualize_outlierscore(value,label,contamination,path=None):
    """
    Visualize the predicted outlier score.

    Parameters
    ----------
    value: numpy array of shape (n_test, )
        The outlier score of the test data.
    label: numpy array of shape (n_test, )
        The label of test data produced by the algorithm.
    contamination : float in (0., 0.5), optional (default=0.1)
        The amount of contamination of the data set,
        i.e. the proportion of outliers in the data set. Used when fitting to
        define the threshold on the decision function.
    path: string
        The saving path for result figures.
    """

    sns.set(style="darkgrid")

    ts = np.arange(len(value))
    outlier_label=[]
    for i in range(len(ts)):
        if label[i]==1:
            outlier_label.append('inlier')
        else:
            outlier_label.append('outlier')
    X_outlier = pd.DataFrame({'ts':ts,'Outlier_score':value,'outlier_label':np.array(outlier_label)})
    pal = dict(inlier="#4CB391", outlier="gray")
    g = sns.FacetGrid(X_outlier, hue="outlier_label", palette=pal, height=5)
    g.map(plt.scatter, "ts", "Outlier_score", s=30, alpha=.7, linewidth=.5, edgecolor="white")

    ranking = np.sort(value)
    threshold = ranking[int((1 - contamination) * len(ranking))]
    plt.hlines(threshold, xmin=0, xmax=len(X_outlier)-1, colors="g", zorder=100, label='Threshold')
    threshold = ranking[int((contamination) * len(ranking))]
    plt.hlines(threshold, xmin=0, xmax=len(X_outlier)-1, colors="g", zorder=100, label='Threshold2')
    if path:
        plt.savefig(path+'/visualize_outlierscore.png')
    plt.show()



# def visualize_outlierresult(X,label,path=None):
#     """
#     Visualize the predicted outlier result.
#
#     Parameters
#     ----------
#     X: numpy array of shape (n_test, n_features)
#         The test data.
#     label: numpy array of shape (n_test, )
#         The label of test data produced by the algorithm.
#
#     """
#     X['outlier']=pd.Series(label)
#     pal = dict(inlier="#4CB391", outlier="gray")
#     g = sns.pairplot(X, hue="outlier", palette=pal)
#     if path:
#         plt.savefig(path+'/visualize_outlierresult.png')
#     plt.show()

Source File: sampling.py From pyPESTO with BSD 3-Clause "New" or "Revised" License

4 votes

def sampling_scatter(
        result: Result,
        i_chain: int = 0,
        stepsize: int = 1,
        suptitle: str = None,
        size: Tuple[float, float] = None):
    """Parameter scatter plot.

    Parameters
    ----------
    result:
        The pyPESTO result object with filled sample result.
    i_chain:
        Which chain to plot. Default: First chain.
    stepsize:
        Only one in `stepsize` values is plotted.
    suptitle:
        Figure super title.
    size:
        Figure size in inches.

    Returns
    -------
    ax:
        The plot axes.
    """

    # get data which should be plotted
    nr_params, params_fval, theta_lb, theta_ub = get_data_to_plot(
        result=result, i_chain=i_chain, stepsize=stepsize)

    sns.set(style="ticks")

    ax = sns.pairplot(
        params_fval.drop(['logPosterior', 'iteration'], axis=1))

    if size is not None:
        ax.fig.set_size_inches(size)

    if suptitle:
        ax.fig.suptitle(suptitle)

    return ax

Source File: action.py From insightconnect-plugins with MIT License

4 votes

def run(self, params={}):
        # Set styles
        sns.set_palette(params.get('color_palette'))
        sns.set(style=params.get('margin_style'))

        # Process the data and create the plot
        try:
            decoded_data = base64.b64decode(params.get('csv_data'))
        except Exception as e:
            error = f"Failed to decode base64 encoded CSV data with error: {e}"
            self.logger.error(error)
            raise e

        df = pd.read_csv(BytesIO(decoded_data))
        kind = params.get('kind')
        hue = params.get('hue')

        args = {
            "kind": kind
        }

        if hue and (len(hue) > 0):
            args['hue'] = hue

            if hue not in df:
                error = f"Column for hue ({hue}) not in data set, cannot create plot..."
                self.logger.error(error)
                return Exception(error)

        # Pairgrids have the savefig method, call it directly
        self.logger.info("Creating plot...")
        plot = sns.pairplot(df, **args)

        # bbox_inches is required to ensure that labels are cut off
        plot.savefig('plot.png', bbox_inches="tight")
        with open('plot.png', 'rb') as f:
            plot = base64.b64encode(f.read())

        return {
            "csv": params.get('csv_data'),
            "plot": plot.decode('utf-8')
        }

Python seaborn.pairplot() Examples