Python Examples of pandas.dataframe

Source File: trade_api.py From TradeApi with Apache License 2.0

6 votes

def query_account(self, format=""):
        """
            return pd.dataframe
        """
        r, msg = self._check_session()
        if not r: return (None, msg)

        rpc_params = {}

        data_format = self._get_format(format, "pandas")
        if data_format == "pandas":
            rpc_params["format"] = "columnset"

        cr = self._remote.call("oms.query_account", rpc_params)

        return utils.extract_result(cr, data_format=data_format, class_name="Account")

Source File: sample_based_voting.py From lale with Apache License 2.0

6 votes

def transform(self, X, end_index_list = None):
        if end_index_list is None:
            end_index_list = self.end_index_list # in case the end_index_list was set as meta_data

        if end_index_list is None:
            return X
        else:
            voted_labels = []
            prev_index = 0
            if not isinstance(X, np.ndarray):
                if isinstance(X, list):
                    X = np.array(X)
                elif isinstance(X, pd.dataframe):
                    X = X.as_matrix()
            for index in end_index_list:
                labels = X[prev_index:index]
                (values,counts) = np.unique(labels,return_counts=True)
                ind=np.argmax(counts) #If two labels are in majority, this will pick the first one.
                voted_labels.append(ind)
            return np.array(voted_labels)

Source File: pdutils.py From pysystemtrade with GNU General Public License v3.0

6 votes

def from_dict_of_values_to_df(data_dict, ts_index, columns=None):
    """
    Turn a set of fixed values into a pd.dataframe

    :param data_dict: A dict of scalars
    :param ts_index: A timeseries index
    :param columns: (optional) A list of str to align the column names to [must have entries in data_dict keys]
    :return: pd.dataframe, column names from data_dict, values repeated scalars
    """

    if columns is None:
        columns = data_dict.keys()

    columns_as_list = list(columns)

    numeric_values = dict([(keyname, [data_dict[keyname]] * len(ts_index))
                           for keyname in columns_as_list])

    pd_dataframe = pd.DataFrame(numeric_values, ts_index)

    return pd_dataframe

Source File: pdutils.py From pysystemtrade with GNU General Public License v3.0

6 votes

def dataframe_pad(starting_df, column_list, padwith=0.0):
    """
    Takes a dataframe and adds extra columns if neccessary so we end up with columns named column_list

    :param starting_df: A pd.dataframe with named columns
    :param column_list: A list of column names
    :param padwith: The value to pad missing columns with
    :return: pd.Dataframe
    """

    def _pad_column(column_name, starting_df, padwith):
        if column_name in starting_df.columns:
            return starting_df[column_name]
        else:
            return pd.Series([0.0] * len(starting_df.index), starting_df.index)

    new_data = [
        _pad_column(column_name, starting_df, padwith)
        for column_name in column_list
    ]

    new_df = pd.concat(new_data, axis=1)
    new_df.columns = column_list

    return new_df

Source File: classification.py From Splunking-Crime with GNU Affero General Public License v3.0

6 votes

def check_params_with_data(self, df, actual_field, predicted_field):
        """ Check parameters against ground-truth values.

        Handle errors regarding cardinality of ground-truth labels
        and check pos_label param, if applicable. Assumed data has already
        been cleaned and made categorical. Overwritten as needed.

        Args:
            df (pd.dataframe): input dataframe
            actual_field (str): name of ground-truth field
            predicted_field (str): name of predicted field

        Raises:
            RuntimeError if params are incompatible with passed data
        """
        msg = 'Scoring method {} does not support "check_params_with_data" method.'
        raise MLSPLNotImplementedError(msg.format(self.scoring_name))

Source File: classification.py From Splunking-Crime with GNU Affero General Public License v3.0

6 votes

def score(self, df, options):
        """ Compute the score.

        Args:
            df (pd.DataFrame): input dataframe
            options (dict): passed options

        Returns:
            df_output (pd.dataframe): output dataframe
        """
        # Prepare ground-truth and predicted labels
        y_actual, y_predicted = self.prepare_input_data(df, self.actual_field, self.predicted_field, options)
        # Get the scoring result
        result = self.scoring_function(y_actual, y_predicted, **self.params)
        # Create the output df
        df_output = self.create_output(self.scoring_name, result)
        return df_output

Source File: classification.py From Splunking-Crime with GNU Affero General Public License v3.0

6 votes

def create_output(self, scoring_name, result):
        """ Create output dataframe

        Args:
            scoring_name (str): scoring function name
            result (float, dict or array): output of sklearn scoring function

        Returns:
            output_df (pd.DataFrame): output dataframe
        """

        labels = self.params.get('labels', None)

        if labels is not None:  # labels is union of predicted & actual classes. (eg. average=none, confusion matrix)
            output_df = pd.DataFrame(data=[result], columns=labels)
        else:  # otherwise, use scoring name
            output_df = pd.DataFrame(data=[result], columns=[scoring_name])
        return output_df

Source File: solar_equations.py From CityEnergyAnalyst with MIT License

6 votes

def calc_worst_hour(latitude, weather_data, solar_window_solstice):
    """
    Calculate the first hour of solar window of the winter solstice for panel spacing.
    http://www.affordable-solar.com/learning-center/building-a-system/calculating-tilted-array-spacing/

    :param latitude: latitude of the site [degree]
    :type latitude: float
    :param weather_data: weather data of the site
    :type weather_data: pd.dataframe
    :param solar_window_solstice: the desired hour of shade-free solar window on the winter solstice.
    :type solar_window_solstice: floar
    :return worst_hour: the hour to calculate minimum spacing
    :rtype worst_hour: float


    """
    if latitude > 0:
        northern_solstice = weather_data.query('month == 12 & day == 21')
        worst_hour = northern_solstice[northern_solstice.hour == (12 - round(solar_window_solstice / 2))].index[0]
    else:
        southern_solstice = weather_data.query('month == 6 & day == 21')
        worst_hour = southern_solstice[southern_solstice.hour == (12 - round(solar_window_solstice / 2))].index[0]

    return worst_hour

Source File: classification.py From Splunking-Crime with GNU Affero General Public License v3.0

6 votes

def create_output(self, scoring_name, result):
        """ Output dataframe differs from parent.

        The output shape of precision_recall_fscore_support depends on the
        average value. If average!=None, output is 1x4. If average=None, output
        is nx4 where n is the number of unique classes in y_actual and y_predicted.
        """

        # Labels is populated when average=None. In this case, metrics are computed for each target class.
        labels = self.params.get('labels', None)

        if labels is not None:
            stacked_array = np.vstack(result)  # n x 4
            index_labels = np.array(['precision', 'recall', 'fbeta_score', 'support']).reshape(-1, 1)
            output_array = np.hstack((index_labels, stacked_array))
            col_labels = ['metric'] + ['scored({})'.format(i) for i in labels]  # named for alphabetical sorting
            output_df = pd.DataFrame(data=output_array, columns=col_labels)
        else:
            array = np.array(result).reshape(1, -1)  # 1 x 4
            output_df = pd.DataFrame(data=array, columns=['precision', 'recall', 'fbeta_score', 'support'])
        return output_df

Source File: trade_api.py From TradeSim with Apache License 2.0

6 votes

def query_portfolio(self, format=""):
        """
            return pd.dataframe
        """

        r, msg = self._check_session()
        if not r: return (None, msg)

        rpc_params = {}

        data_format = self._get_format(format, "pandas")
        if data_format == "pandas":
            rpc_params["format"] = "columnset"

        cr = self._remote.call("pms.query_portfolio", rpc_params)

        return utils.extract_result(cr, index_column="security", data_format=data_format, class_name="NetPosition")

Source File: trade_api.py From TradeSim with Apache License 2.0

6 votes

def query_trade(self, task_id=-1, format=""):
        """
            task_id: -1 -- all
            return pd.dataframe
        """

        r, msg = self._check_session()
        if not r: return (None, msg)

        rpc_params = {"task_id": task_id}

        data_format = self._get_format(format, "pandas")
        if data_format == "pandas":
            rpc_params["format"] = "columnset"

        cr = self._remote.call("oms.query_trade", rpc_params)

        return utils.extract_result(cr, data_format=data_format, class_name="Trade")

Source File: trade_api.py From TradeSim with Apache License 2.0

6 votes

def query_task(self, task_id=-1, format=""):
        """
            task_id: -1 -- all
            return pd.dataframe
        """

        r, msg = self._check_session()
        if not r: return (None, msg)

        rpc_params = {"task_id": task_id}

        data_format = self._get_format(format, "pandas")
        if data_format == "pandas":
            rpc_params["format"] = "columnset"

        cr = self._remote.call("oms.query_task", rpc_params)

        return utils.extract_result(cr, data_format=data_format, class_name="Task")

Source File: trade_api.py From TradeSim with Apache License 2.0

6 votes

def query_net_position(self, mode="all", securities="", format=""):
        """
            securities: seperate by ","
            return pd.dataframe
        """

        r, msg = self._check_session()
        if not r: return (None, msg)

        rpc_params = {"mode"       : mode,
                      "security"   : securities}

        data_format = self._get_format(format, "pandas")
        if data_format == "pandas":
            rpc_params["format"] = "columnset"

        cr = self._remote.call("oms.query_net_position", rpc_params)

        return utils.extract_result(cr, data_format=data_format, class_name="NetPosition")

Source File: trade_api.py From TradeSim with Apache License 2.0

6 votes

def query_position(self, mode="all", securities="", format=""):
        """
            securities: seperate by ","
            return pd.dataframe
        """

        r, msg = self._check_session()
        if not r: return (None, msg)

        rpc_params = {"mode"       : mode,
                      "security"   : securities}

        data_format = self._get_format(format, "pandas")
        if data_format == "pandas":
            rpc_params["format"] = "columnset"

        cr = self._remote.call("oms.query_position", rpc_params)

        return utils.extract_result(cr, data_format=data_format, class_name="Position")

Source File: trade_api.py From TradeSim with Apache License 2.0

6 votes

def query_account(self, format=""):
        """
            return pd.dataframe
        """
        r, msg = self._check_session()
        if not r: return (None, msg)

        rpc_params = {}

        data_format = self._get_format(format, "pandas")
        if data_format == "pandas":
            rpc_params["format"] = "columnset"

        cr = self._remote.call("oms.query_account", rpc_params)

        return utils.extract_result(cr, data_format=data_format, class_name="Account")

Source File: model_datasets.py From AMPL with MIT License

6 votes

def load_featurized_data(self):
        """Loads prefeaturized data from the filesystem. Returns a data frame,
        which is then passed to featurization.extract_prefeaturized_data() for processing.
        
        Returns:
            featurized_dset_df (pd.DataFrame): dataframe of the prefeaturized data, needs futher processing
        """
        # First check to set if dataset already has the feature columns we need
        dset_df = self.load_full_dataset()
        if self.has_all_feature_columns(dset_df):
            self.dataset_key = self.params.dataset_key
            return dset_df

        # Otherwise, generate the expected path for the featurized dataset
        featurized_dset_name = self.featurization.get_featurized_dset_name(self.dataset_name)
        dataset_dir = os.path.dirname(self.params.dataset_key)
        data_dir = os.path.join(dataset_dir, self.featurization.get_featurized_data_subdir())
        featurized_dset_path = os.path.join(data_dir, featurized_dset_name)
        featurized_dset_df = pd.read_csv(featurized_dset_path)
        self.dataset_key = featurized_dset_path
        return featurized_dset_df

    # ****************************************************************************************

Source File: trade_api.py From TradeApi with Apache License 2.0

6 votes

def query_position(self, mode="all", securities="", format=""):
        """
            securities: seperate by ","
            return pd.dataframe
        """

        r, msg = self._check_session()
        if not r: return (None, msg)

        rpc_params = {"mode"       : mode,
                      "security"   : securities}

        data_format = self._get_format(format, "pandas")
        if data_format == "pandas":
            rpc_params["format"] = "columnset"

        cr = self._remote.call("oms.query_position", rpc_params)

        return utils.extract_result(cr, data_format=data_format, class_name="Position")

Source File: trade_api.py From TradeApi with Apache License 2.0

6 votes

def query_net_position(self, mode="all", securities="", format=""):
        """
            securities: seperate by ","
            return pd.dataframe
        """

        r, msg = self._check_session()
        if not r: return (None, msg)

        rpc_params = {"mode"       : mode,
                      "security"   : securities}

        data_format = self._get_format(format, "pandas")
        if data_format == "pandas":
            rpc_params["format"] = "columnset"

        cr = self._remote.call("oms.query_net_position", rpc_params)

        return utils.extract_result(cr, data_format=data_format, class_name="NetPosition")

Source File: model_datasets.py From AMPL with MIT License

6 votes

def has_all_feature_columns(self, dset_df):
        """
        Compare the columns in dataframe dset_df against the feature columns required by
        the current featurization and descriptor_type param. Returns True if dset_df contains
        all the required columns.
        
        Args:
            dset_df (DataFrame): Feature matrix
        
        Returns:
            (Boolean): boolean specifying whether there are any missing columns in dset_df
        """
        missing_cols = set(self.featurization.get_feature_columns()) - set(dset_df.columns.values)
        return (len(missing_cols) == 0)

    # *************************************************************************************

Source File: trade_api.py From TradeApi with Apache License 2.0

6 votes

def query_task(self, task_id=-1, format=""):
        """
            task_id: -1 -- all
            return pd.dataframe
        """

        r, msg = self._check_session()
        if not r: return (None, msg)

        rpc_params = {"task_id": task_id}

        data_format = self._get_format(format, "pandas")
        if data_format == "pandas":
            rpc_params["format"] = "columnset"

        cr = self._remote.call("oms.query_task", rpc_params)

        return utils.extract_result(cr, data_format=data_format, class_name="Task")

Source File: trade_api.py From TradeApi with Apache License 2.0

6 votes

def query_trade(self, task_id=-1, format=""):
        """
            task_id: -1 -- all
            return pd.dataframe
        """

        r, msg = self._check_session()
        if not r: return (None, msg)

        rpc_params = {"task_id": task_id}

        data_format = self._get_format(format, "pandas")
        if data_format == "pandas":
            rpc_params["format"] = "columnset"

        cr = self._remote.call("oms.query_trade", rpc_params)

        return utils.extract_result(cr, data_format=data_format, class_name="Trade")

Source File: trade_api.py From TradeApi with Apache License 2.0

6 votes

def query_portfolio(self, format=""):
        """
            return pd.dataframe
        """

        r, msg = self._check_session()
        if not r: return (None, msg)

        rpc_params = {}

        data_format = self._get_format(format, "pandas")
        if data_format == "pandas":
            rpc_params["format"] = "columnset"

        cr = self._remote.call("pms.query_portfolio", rpc_params)

        return utils.extract_result(cr, index_column="security", data_format=data_format, class_name="NetPosition")

Source File: bivariate.py From btgym with GNU Lesser General Public License v3.0

5 votes

def __init__(self, *args, **kwargs):
        """

        Args:
            model_params:               dict holding generative model parameters,
                                        same as args for: bivariate_state_set_iterator_fn
            assets_filenames:           dict. of two keys in form of: {'asset_name`: 'data_file_name'},
                                        test data or None, ignored when `assets_dataframes` arg. is given
            assets_dataframes:          dict. of two keys in form of {'asset_name`: pd.dataframe},
                                        an alternative way to provide test data as pandas.dataframes instances,
                                        overrides `assets_filenames`
            train_episode_duration:     dict of keys {'days', 'hours', 'minutes'} - train sample duration
            test_episode_duration:      dict of keys {'days', 'hours', 'minutes'} - test sample duration
        """
        super().__init__(*args, _train_class_ref=BivariateStateSetGenerator, **kwargs)

Source File: classification.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def prepare_input_data(self, df, actual_field, predicted_field, options):
        """ Prepare the data prior to scoring.

        Preprocess input data, perform parameter validation and
        handles errors. Overwritten as needed.

        Args:
            df (pd.dataframe): input dataframe
            actual_field (str): ground-truth labels field name
            predicted_field (str): predicted labels field name
            options (dict): input options
            
        Returns:
            y_actual (np.array): preprocessed ground-truth labels
            y_predicted (np.array): preprocessed predicted labels
        """
        # remove nans and check limits
        clean_df = prepare_classification_scoring_data(df, actual_field, predicted_field, options.get('mlspl_limits', None))
        # convert to str if needed
        categorical_df = make_categorical(clean_df, [actual_field, predicted_field])

        # Check for inconsistencies with data
        self.check_params_with_data(categorical_df, actual_field, predicted_field)
        # warn if no intersection of actual/predicted fields
        check_class_intersection(categorical_df, actual_field, predicted_field)

        if self._meta_params.get('all_labels', False):  # when average=None or for confusion matrix
            self.params['labels'] = get_union_of_field_values(categorical_df, [actual_field, predicted_field])

        y_actual, y_predicted = categorical_df[actual_field].values, categorical_df[predicted_field].values
        return y_actual, y_predicted

Source File: trade_api.py From TradeApi with Apache License 2.0

5 votes

def query_repo_contract(self, format=""):
        """
            securities: seperate by ","
            return pd.dataframe
        """

        r, msg = self._check_session()
        if not r: return (None, msg)

        rpc_params = {}

        cr = self._remote.call("oms.query_repo_contract", rpc_params)

        return utils.extract_result(cr, data_format=self._get_format(format, "pandas"), class_name="RepoContract")

Source File: feature_utils.py From 2020plus with Apache License 2.0

5 votes

def process_mutational_features(mydf):
    """Performs feature processing pipeline.

    Parameters
    ----------
    mydf : pd.DataFrame
        data frame containing the desired raw data for computation of
        features for classifier

    Returns
    -------
    proc_feat_df: pd.DataFrame
        dataframe consisting of features for classification
    """
    # rename process of columns to ensure compatability with previously
    # written code
    mydf = mydf.rename(columns={'Protein_Change': 'AminoAcid',
                                'DNA_Change': 'Nucleotide'})

    # process features
    feat_list = fmat.generate_feature_matrix(mydf, 2)
    headers = feat_list.pop(0)  # remove header row
    feat_df = pd.DataFrame(feat_list, columns=headers)  # convert to data frame
    proc_feat_df = normalize_mutational_features(feat_df, 0)
    miss_ent_df = pentropy.missense_position_entropy(mydf[['Gene', 'AminoAcid']])
    # mut_ent_df = pentropy.mutation_position_entropy(mydf[['Gene', 'AminoAcid']])

    # encorporate entropy features
    #proc_feat_df['mutation position entropy'] = mut_ent_df['mutation position entropy']
    #proc_feat_df['pct of uniform mutation entropy'] = mut_ent_df['pct of uniform mutation entropy']
    proc_feat_df['missense position entropy'] = miss_ent_df['missense position entropy']
    proc_feat_df['pct of uniform missense entropy'] = miss_ent_df['pct of uniform missense entropy']
    return proc_feat_df

Source File: feature_utils.py From 2020plus with Apache License 2.0

5 votes

def random_sort(df, prng=None):
    """Randomly shuffle a DataFrame.

    NOTE: if the training data is not randomly shuffled, then
    supervised learning may find artifacts related to the order
    of the data.

    Parameters
    ----------
    df : pd.DataFrame
        dataframe with feature information

    Returns
    -------
    df : pd.DataFrame
        Randomly shuffled data frame
    """
    # get new random state if not specified
    if prng is None:
        prng = np.random.RandomState()

    # get random order
    random_indices = prng.choice(df.index.values,  # sample from 'genes'
                                 len(df),  # number of samples
                                 replace=False)  # sample without replacement

    # change order of df
    random_df = df.loc[random_indices].copy()

    return random_df

Source File: QualityModule.py From staramr with Apache License 2.0

5 votes

def _get_num_contigs_over_minimum_bp_feedback(self,files_contigs_lengths,minimum_contig_length,unacceptable_num_contigs_over_minimum_bp):
        """
        Goes through the files and determines whether or not they pass the quality metrics for the acceptable number of contigs equal to or above the minimum contig length
        :param files_contigs_lengths: The lengths of the contigs for the files
        :param minimum_contig_length: The minimum contig length as defined by the user for quality metrics
        :param unacceptable_num_contigs: The number of contigs in a file, equal to or above our minimum contig length, for which to raise a flag as defined by the user for quality metrics
        :return: An array where the first element is itself an array where each element is the number of contigs equal to or above the minimum contig length for
        the corresponding file. The second element is itself an array where each element is the feedback (True or False) 
        for whether the corresponding file passes the acceptable number of contigs equal to or above the minimum contig length quality metric
        """
        #This array is what we will return and will contain for each of the files the number of contigs of length greater than or equal to the minimum contig length 
        #as well as the feedback for whether or not this number of contigs is greater than or equal to the unacceptable number of contigs and thus wether it passes or fails the quality metric
        #this array will be used to construct our quality module dataframe
        feedback=[]
        #This array will contain the number of contigs of length greater than or equal to the minimum contig length for the files, it will be returned as the first element of feedback
        file_num_contigs=[]
        #This array will contain the feedback of either True or False for whether or not the corresponding files pass the quality metrics for the acceptable number of contigs equal to 
        #or above the minimum contig length, it will be returned as the second element of feedback
        contigs_over_minimum_bp_feedback=[]
        for file_contigs_lengths in files_contigs_lengths:
            num_contigs = 0

            for contig in file_contigs_lengths:
                if contig >= minimum_contig_length:
                    num_contigs = num_contigs+1

            file_num_contigs.append(num_contigs)


        for file_num_contigs_over_minimum_bp in file_num_contigs:
            if file_num_contigs_over_minimum_bp >= unacceptable_num_contigs_over_minimum_bp:
                contigs_over_minimum_bp_feedback.append(False)

            else:
                contigs_over_minimum_bp_feedback.append(True)


        feedback.append(file_num_contigs)
        feedback.append(contigs_over_minimum_bp_feedback)

        return feedback

Source File: QualityModule.py From staramr with Apache License 2.0

5 votes

def _get_genome_length_feedback(self,files_genome_lengths,lb_gsize,ub_gsize):
        """
        Goes through the files and determines whether or not they pass the quality metrics for genome length
        :param files_genome_lengths: An array where each element is the genome length of the corresponding file
        :param lb_gsize: The lower bound for the genome size as defined by the user for quality metrics
        :param ub_gsize: The upper bound for the genome size as defined by the user for quality metrics
        :return: An array where each element corresponds to the feedback (true or false) for the corresponding file in regards to the
        genome size quality metric
        """
        #The array contains the feedback of either True or false for whether or not the corresponding files pass the genome length quality metric, and
        #this feedback will be used to construc our quality module dataframe
        files_genome_feedback=[genome_length >= lb_gsize and genome_length <= ub_gsize for genome_length in files_genome_lengths]
        return files_genome_feedback

Source File: QualityModule.py From staramr with Apache License 2.0

5 votes

def create_quality_module_dataframe(self):
        """
        Goes through the files and creates a dataframe consisting of the file's genome length, N50 value and the number of contigs greater or equal to the minimum contig length as
        specified by the quality metrics. It also consists of the feedback for whether or not the file passed the quality metrics and if it didn't feedback on why it failed
        :return: A pd.dataframe containing the genome size, N50 value, number of contigs equal to or above our user defined minimum contig length
        as well as the results of our quality metrics (pass or fail) and the corresponding feedback
        """
        name_set=[]
        for myFile in self._files:
            name_set.append(path.splitext(path.basename(myFile))[0])

        files_contigs_and_genomes_lengths=self._get_files_contigs_and_genomes_lengths(self._files)

        files_genome_length_feedback = self._get_genome_length_feedback(files_contigs_and_genomes_lengths[1],self._genome_size_lower_bound,self._genome_size_upper_bound)
        
        files_N50_value_feedback=self._get_N50_feedback(files_contigs_and_genomes_lengths[0],files_contigs_and_genomes_lengths[1],self._minimum_N50_value)
        
        file_num_contigs_over_minimum_bp_feedback= self._get_num_contigs_over_minimum_bp_feedback(files_contigs_and_genomes_lengths[0],self._minimum_contig_length,self._unacceptable_num_contigs)
        
        quality_module = self._get_quality_module(files_genome_length_feedback,files_N50_value_feedback[1],file_num_contigs_over_minimum_bp_feedback[1])      
        quality_module_feedback = quality_module[0]
        quality_module_result = quality_module[1]

        #Module to represent our quality metric values, the index which is used to merge this module and the feedback module is the file names
        quality_metrics_module = pd.DataFrame([[file_name,genome_length,N50_value,num_contigs_over_minimum_bp] for file_name,genome_length,N50_value,num_contigs_over_minimum_bp in 
            zip(name_set,files_contigs_and_genomes_lengths[1],files_N50_value_feedback[0],file_num_contigs_over_minimum_bp_feedback[0])],
            columns=('Isolate ID','Genome Length','N50 value','Number of Contigs Greater Than Or Equal To '+str(self._minimum_contig_length)+' bp')).set_index('Isolate ID')

        #Module to represent the feedback for our quality metrics, the index which is used to merge this module and the quality metric value module is the file names 
        feedback_module = pd.DataFrame([[file_name,feedback,detailed_feedback] for file_name,feedback,detailed_feedback in zip(name_set,quality_module_result,quality_module_feedback)],
            columns=('Isolate ID','Quality Module','Quality Module Feedback')).set_index('Isolate ID')
        
        #Module to represent out quality metric values and their corresponding feedback
        quality_module_frame=quality_metrics_module.merge(feedback_module, on='Isolate ID', how='left')
        
        return quality_module_frame

Python pandas.dataframe() Examples