Python pandas.Dataframe() Examples
The following are 30
code examples of pandas.Dataframe().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas
, or try the search function
.
Example #1
Source File: glove_preprocessor.py From interpret-text with MIT License | 7 votes |
def preprocess(self, data) -> pd.DataFrame: """ Convert a list of text into a dataframe containing padded token ids, masks distinguishing word tokens from pads, and word token counts for each text in the list. :param data: list of strings (e.g. sentences) :type data: list :return: tokens (pd.Dataframe): a dataframe containing lists of word token ids, pad/word masks, and token counts for each string in the list :rtype: pandas dataframe """ token_lists = [] masks = [] counts = [] for sentence in data: token_list, mask = self.generate_tokens(sentence) token_lists.append(token_list) masks.append(mask) counts.append(np.sum(mask)) tokens = pd.DataFrame( {"tokens": token_lists, "mask": masks, "counts": counts} ) return tokens
Example #2
Source File: generic.py From pyiron with BSD 3-Clause "New" or "Revised" License | 6 votes |
def queue_table(self, project_only=True, recursive=True, full_table=False): """ Display the queuing system table as pandas.Dataframe Args: project_only (bool): Query only for jobs within the current project - True by default recursive (bool): Include jobs from sub projects full_table (bool): Whether to show the entire pandas table Returns: pandas.DataFrame: Output from the queuing system - optimized for the Sun grid engine """ return queue_table( job_ids=self.get_job_ids(recursive=recursive), project_only=project_only, full_table=full_table )
Example #3
Source File: forecaster.py From prophet with MIT License | 6 votes |
def add_group_component(self, components, name, group): """Adds a component with given name that contains all of the components in group. Parameters ---------- components: Dataframe with components. name: Name of new group component. group: List of components that form the group. Returns ------- Dataframe with components. """ new_comp = components[components['component'].isin(set(group))].copy() group_cols = new_comp['col'].unique() if len(group_cols) > 0: new_comp = pd.DataFrame({'col': group_cols, 'component': name}) components = components.append(new_comp) return components
Example #4
Source File: glm_reporter.py From nistats with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _dataframe_to_html(df, precision, **kwargs): """ Makes HTML table from provided dataframe. Removes HTML5 non-compliant attributes (ex: `border`). Parameters ---------- df: pandas.Dataframe Dataframe to be converted into HTML table. precision: int The display precision for float values in the table. **kwargs: keyworded arguments Supplies keyworded arguments for func: pandas.Dataframe.to_html() Returns ------- html_table: String Code for HTML table. """ with pd.option_context('display.precision', precision): html_table = df.to_html(**kwargs) html_table = html_table.replace('border="1" ', '') return html_table
Example #5
Source File: io.py From code-for-the-world with MIT License | 6 votes |
def read_selig(path): """Read a Selig-style airfoil file Parameters ----------- path : str Path to the Selig-stle .dat file. Returns ------- air_df : pd.Dataframe Pandas Dataframe containing x- and y-coordinates of airfoil data. """ air_df = pd.read_csv(path, delim_whitespace=True, header=0) air_df.columns = ['x', 'y'] return air_df
Example #6
Source File: forecaster.py From prophet with MIT License | 6 votes |
def predict_uncertainty(self, df): """Prediction intervals for yhat and trend. Parameters ---------- df: Prediction dataframe. Returns ------- Dataframe with uncertainty intervals. """ sim_values = self.sample_posterior_predictive(df) lower_p = 100 * (1.0 - self.interval_width) / 2 upper_p = 100 * (1.0 + self.interval_width) / 2 series = {} for key in ['yhat', 'trend']: series['{}_lower'.format(key)] = self.percentile( sim_values[key], lower_p, axis=1) series['{}_upper'.format(key)] = self.percentile( sim_values[key], upper_p, axis=1) return pd.DataFrame(series)
Example #7
Source File: forecaster.py From prophet with MIT License | 6 votes |
def predictive_samples(self, df): """Sample from the posterior predictive distribution. Parameters ---------- df: Dataframe with dates for predictions (column ds), and capacity (column cap) if logistic growth. Returns ------- Dictionary with keys "trend" and "yhat" containing posterior predictive samples for that component. """ df = self.setup_dataframe(df.copy()) sim_values = self.sample_posterior_predictive(df) return sim_values
Example #8
Source File: QADataStruct.py From QUANTAXIS with MIT License | 6 votes |
def __init__(self, DataFrame): """Stock Transaction Arguments: DataFrame {pd.Dataframe} -- [input is one/multi day transaction] """ self.type = 'stock_transaction' self.data = DataFrame if 'amount' not in DataFrame.columns: if 'vol' in DataFrame.columns: self.data['amount'] = self.data.vol * self.data.price * 100 elif 'volume' in DataFrame.columns: self.data['amount'] = self.data.volume * self.data.price * 100 if '_id' in DataFrame.columns: self.data = self.data.drop(["_id"], axis=1) self.mongo_coll = DATABASE.stock_transaction
Example #9
Source File: QADataStruct.py From QUANTAXIS with MIT License | 6 votes |
def __init__(self, DataFrame): """Index Transaction Arguments: DataFrame {pd.Dataframe} -- [input is one/multi day transaction] """ self.type = 'index_transaction' self.data = DataFrame if 'amount' not in DataFrame.columns: if 'vol' in DataFrame.columns: self.data['amount'] = self.data.vol * self.data.price * 100 elif 'volume' in DataFrame.columns: self.data['amount'] = self.data.volume * self.data.price * 100 if '_id' in DataFrame.columns: self.data = self.data.drop(["_id"], axis=1) self.mongo_coll = DATABASE.index_transaction
Example #10
Source File: generic.py From pyiron with BSD 3-Clause "New" or "Revised" License | 6 votes |
def get_jobs(self, recursive=True, columns=None): """ Internal function to return the jobs as dictionary rather than a pandas.Dataframe Args: recursive (bool): search subprojects [True/False] columns (list): by default only the columns ['id', 'project'] are selected, but the user can select a subset of ['id', 'status', 'chemicalformula', 'job', 'subjob', 'project', 'projectpath', 'timestart', 'timestop', 'totalcputime', 'computer', 'hamilton', 'hamversion', 'parentid', 'masterid'] Returns: dict: columns are used as keys and point to a list of the corresponding values """ if not isinstance(self.db, FileTable): return get_jobs( database=self.db, sql_query=self.sql_query, user=self.user, project_path=self.project_path, recursive=recursive, columns=columns, ) else: return self.db.get_jobs(project=self.project_path, recursive=recursive, columns=columns)
Example #11
Source File: moment_est.py From OptimalPortfolio with MIT License | 6 votes |
def __init__(self, invariants, n, frequency=252): """ :param invariants: sample data of market invariants :type invariants: pd.Dataframe :param n: number of assets :type n: int :param frequency: time horizon of projection :type frequency: int """ if not isinstance(invariants, pd.DataFrame): warnings.warn("invariants is not pd.Dataframe", RuntimeWarning) self.invariants = invariants self.S = self.invariants.cov() self.frequency = frequency self.n = n
Example #12
Source File: generic.py From pyiron with BSD 3-Clause "New" or "Revised" License | 6 votes |
def queue_table_global(self, full_table=False): """ Display the queuing system table as pandas.Dataframe Args: full_table (bool): Whether to show the entire pandas table Returns: pandas.DataFrame: Output from the queuing system - optimized for the Sun grid engine """ df = queue_table(job_ids=[], project_only=False, full_table=full_table) if len(df) != 0 and self.db is not None: return pandas.DataFrame( [ self.db.get_item_by_id( int(str(queue_ID).replace("pi_", "").replace(".sh", "")) ) for queue_ID in df["jobname"] if str(queue_ID).startswith("pi_") ] ) else: return None
Example #13
Source File: hdfio.py From pyiron with BSD 3-Clause "New" or "Revised" License | 6 votes |
def get_from_table(self, path, name): """ Get a specific value from a pandas.Dataframe Args: path (str): relative path to the data object name (str): parameter key Returns: dict, list, float, int: the value associated to the specific parameter key """ df_table = self.get(path) keys = df_table["Parameter"] if name in keys: job_id = keys.index(name) return df_table["Value"][job_id] raise ValueError("Unknown name: {0}".format(name))
Example #14
Source File: schemas.py From CityEnergyAnalyst with MIT License | 6 votes |
def validate(self, df): """Check to make sure the Dataframe conforms to the schema""" expected_columns = set(self.schema["schema"]["columns"].keys()) found_columns = set(df.columns.values) # handle some extra cases if "PIPE0" in expected_columns: found_columns = {c for c in found_columns if not c.startswith("PIPE")} found_columns.add("PIPE0") # handle some extra cases if "NODE0" in expected_columns: found_columns = {c for c in found_columns if not c.startswith("NODE")} found_columns.add("NODE0") if not found_columns == expected_columns: missing_columns = expected_columns - found_columns extra_columns = found_columns - expected_columns warnings.warn("Dataframe does not conform to schemas.yml specification for {lm}" "(missing: {missing_columns}, extra: {extra_columns}".format( lm=self.lm, missing_columns=missing_columns, extra_columns=extra_columns))
Example #15
Source File: moment_est.py From OptimalPortfolio with MIT License | 6 votes |
def __init__(self, invariants, n, dist="normal"): """ :param invariants: sample data of market invariants :type invariants: pd.Dataframe :param n: number of assets :type n: int :param dist: choice of distribution: "normal" :type dist: str """ self.invariants = invariants self.dist = dist self.n = n self.mean = None self.cov = None self.skew = None self.kurt = None
Example #16
Source File: moment_est.py From OptimalPortfolio with MIT License | 6 votes |
def exp_cov(invariants, span=180, frequency=252): """ Calculates sample exponentially weighted covariance :param invariants: sample data of market invariants :type invariants: pd.Dataframe :param frequency: time horizon of projection :type frequency: int :param span: the span for exponential weights :return: sample exponentially weighted covariance dataframe """ if not isinstance(invariants, pd.DataFrame): warnings.warn("invariants not a pd.Dataframe", RuntimeWarning) invariants = pd.DataFrame(invariants) assets = invariants.columns daily_cov = invariants.ewm(span=span).cov().iloc[-len(assets):, -len(assets):] return pd.DataFrame(daily_cov*frequency)
Example #17
Source File: moment_est.py From OptimalPortfolio with MIT License | 6 votes |
def sample_moment(invariants, order, frequency=252): """ Calculates nth moment of sample data. :param invariants: sample data of market invariants :type invariants: pd.Dataframe :param order: order of moment :type order: int :param frequency: time horizon of projection :type frequency: int :return: nth moment of sample invariants """ if not isinstance(invariants, pd.DataFrame): warnings.warn("invariants not a pd.Dataframe", RuntimeWarning) invariants = pd.DataFrame(invariants) daily_moment = moment(invariants, moment=order) return daily_moment*frequency
Example #18
Source File: moment_est.py From OptimalPortfolio with MIT License | 6 votes |
def sample_coM4(invariants): """ Calculates sample fourth order co-moment matrix Taps into the R package PerformanceAnalytics through rpy2 :param invariants: sample data of market invariants :type invariants: pd.Dataframe :param frequency: time horizon of projection, default set ot 252 days :type frequency: int :return: sample skew dataframe """ importr('PerformanceAnalytics') if not isinstance(invariants, pd.DataFrame): warnings.warn("invariants not a pd.Dataframe", RuntimeWarning) invariants = pd.DataFrame(invariants) p = invariants.shape[1] coskew_function = robjects.r('M4.MM') r_inv_vec = robjects.FloatVector(np.concatenate(invariants.values)) r_invariants = robjects.r.matrix(r_inv_vec,nrow=p,ncol=p) r_M4 = coskew_function(r_invariants) return np.matrix(r_M4)
Example #19
Source File: moment_est.py From OptimalPortfolio with MIT License | 6 votes |
def sample_coM3(invariants): """ Calculates sample third order co-moment matrix Taps into the R package PerformanceAnalytics through rpy2 :param invariants: sample data of market invariants :type invariants: pd.Dataframe :param frequency: time horizon of projection, default set ot 252 days :type frequency: int :return: sample skew dataframe """ importr('PerformanceAnalytics') if not isinstance(invariants, pd.DataFrame): warnings.warn("invariants not a pd.Dataframe", RuntimeWarning) invariants = pd.DataFrame(invariants) p = invariants.shape[1] coskew_function = robjects.r('M3.MM') r_inv_vec = robjects.FloatVector(np.concatenate(invariants.values)) r_invariants = robjects.r.matrix(r_inv_vec,nrow=p,ncol=p) r_M3 = coskew_function(r_invariants) return np.matrix(r_M3)
Example #20
Source File: invariants.py From OptimalPortfolio with MIT License | 6 votes |
def forex_invariants(prices, no_assets): """ Calculates forex price invariants, which are the compounded returns :param prices: stock prices data of the various tickers :type prices: pd Dataframe :param no_assets: number of assets in data :type no_assets: int :return: dataframe of stock invariants :rtype: pd.Dataframe """ if not isinstance(prices, pd.DataFrame): warnings.warn("prices are not a pd Dataframe", RuntimeWarning) asset_ret = pd.DataFrame() for j in range(no_assets): returns = [] for i in range(1, len(prices)): log_ret = np.log(prices.iloc[i, j] / prices.iloc[i-1, j]) returns.append(log_ret) asset_ret = pd.concat([pd.DataFrame(returns), asset_ret], axis=1, ignore_index=True) return asset_ret
Example #21
Source File: invariants.py From OptimalPortfolio with MIT License | 6 votes |
def stock_invariants(prices, no_assets): """ Calculates stock price invariants, which are the compounded returns :param prices: stock prices data of the various tickers :type prices: pd Dataframe :param no_assets: number of assets in data :type no_assets: int :return: dataframe of stock invariants :rtype: pd.Dataframe """ if not isinstance(prices, pd.DataFrame): warnings.warn("prices are not a pd Dataframe", RuntimeWarning) asset_ret = pd.DataFrame() for j in range(no_assets): returns = [] for i in range(1, len(prices)): log_ret = np.log(prices.iloc[i, j] / prices.iloc[i-1, j]) returns.append(log_ret) asset_ret = pd.concat([pd.DataFrame(returns), asset_ret], axis=1, ignore_index=True) return asset_ret
Example #22
Source File: pdutils.py From pysystemtrade with GNU General Public License v3.0 | 6 votes |
def dataframe_pad(starting_df, column_list, padwith=0.0): """ Takes a dataframe and adds extra columns if neccessary so we end up with columns named column_list :param starting_df: A pd.dataframe with named columns :param column_list: A list of column names :param padwith: The value to pad missing columns with :return: pd.Dataframe """ def _pad_column(column_name, starting_df, padwith): if column_name in starting_df.columns: return starting_df[column_name] else: return pd.Series([0.0] * len(starting_df.index), starting_df.index) new_data = [ _pad_column(column_name, starting_df, padwith) for column_name in column_list ] new_df = pd.concat(new_data, axis=1) new_df.columns = column_list return new_df
Example #23
Source File: logistic_regressor.py From autoimpute with MIT License | 6 votes |
def predict_proba(self, X): """Predict probabilities of class membership for logistic regression. The regression uses the pooled parameters from each of the imputed datasets to generate a set of single predictions. The pooled params come from multiply imputed datasets, but the predictions themselves follow the same rules as an logistic regression. Because this is logistic regression, the sigmoid function is applied to the result of the normal equation, giving us probabilities between 0 and 1 for each prediction. This method returns those probabilities. Args: X (pd.Dataframe): predictors to predict response Returns: np.array: prob of class membership for predicted observations. """ # run validation first X = self._predict_strategy_validator(self, X) # get the alpha and betas, then create linear equation for predictions alpha = self.statistics_["coefs"].values[0] betas = self.statistics_["coefs"].values[1:] return self._sigmoid(alpha + np.dot(X, betas))
Example #24
Source File: parallel.py From pyiron with BSD 3-Clause "New" or "Revised" License | 6 votes |
def output_to_pandas(self, sort_by=None, h5_path="output"): """ Convert output of all child jobs to a pandas Dataframe object. Args: sort_by (str): sort the output using pandas.DataFrame.sort_values(by=sort_by) h5_path (str): select child output to include - default='output' Returns: pandas.Dataframe: output as dataframe """ # TODO: The output to pandas function should no longer be required with self.project_hdf5.open(h5_path) as hdf: for key in hdf.list_nodes(): self._output[key] = hdf[key] df = pandas.DataFrame(self._output) if sort_by is not None: df = df.sort_values(by=sort_by) return df # TODO: make it more general and move it then into genericJob
Example #25
Source File: exchange_algorithm.py From catalyst with Apache License 2.0 | 5 votes |
def get_frame_stats(self): """ preparing the stats before analyze :return: stats: pd.Dataframe """ # add the last day stats which is not saved in the directory current_stats = pd.DataFrame(self.frame_stats) current_stats.set_index('period_close', drop=False, inplace=True) # get the location of the directory algo_folder = get_algo_folder(self.algo_namespace) folder = join(algo_folder, 'frame_stats') if exists(folder): files = [f for f in listdir(folder) if isfile(join(folder, f))] period_stats_list = [] for item in files: filename = join(folder, item) with open(filename, 'rb') as handle: perf_period = pickle.load(handle) period_stats_list.extend(perf_period) stats = pd.DataFrame(period_stats_list) stats.set_index('period_close', drop=False, inplace=True) return pd.concat([stats, current_stats]) else: return current_stats
Example #26
Source File: accounting.py From pysystemtrade with GNU General Public License v3.0 | 5 votes |
def to_frame(self, curve_type="net"): """ Returns individual return curves as a data frame :param curve_type: gross, net or costs :type curve_type: str :returns: pd.Dataframe TxN """ actype = getattr(self, curve_type) return actype.to_frame()
Example #27
Source File: split_data.py From vae-anomaly-detector with MIT License | 5 votes |
def split_data(dataframe, split): """ Split the data into a training set and a test set according to 'train_size' Args: dataframe: (pandas.Dataframe) split: (list of float) train/valid/test split """ split_idx = _split_indices(dataframe, split) train_data = dataframe.iloc[split_idx['train']] test_data = dataframe.iloc[split_idx['test']] return train_data, test_data
Example #28
Source File: agg.py From quantipy with MIT License | 5 votes |
def make_default_cat_view(link, weights=None): ''' This function is creates Quantipy's default categorical aggregations: The x axis has to be a catgeorical single or multicode variable, the y axis can be generated from either categorical (single or multicode) or numeric (int/float). Numeric y axes are categorized into unique column codes. Acts as a wrapper around _df_to_value_matrix(), _aggregate_matrix() and set_qp_multiindex(). Parameters ---------- data : pd.DataFrame x, y : str Variable names from the processed case data input, i.e. the link definition. weighted : bool Controls if the aggregation is performed on weighted or weighted data. Returns ------- view_df : pd.Dataframe (multiindexed) ''' mat, xdef, ydef = get_matrix(link, weights) mat = weight_matrix(mat, xdef) df = _default_cat_df(mat, xdef, ydef) view_df = struct.set_qp_multiindex(df, link.x, link.y) return view_df
Example #29
Source File: struct.py From quantipy with MIT License | 5 votes |
def deep_drop(df, targets, axes=[0, 1]): ''' Drops all columns given in the targets list from the defined axes of the passed dataframe. The dataframe is allowed to be multiindexed on both axes. Parameters --------- df : pd.DataFrame targets : string or sequence of strings Columns to be dropped. axes : list of int, default = [0, 1] Specification of the axes to drop from. Will perform the drop on both axes by default. Returns ------- df : pd.Dataframe ''' if not isinstance(targets, (list, tuple)): targets = [targets] if not isinstance(axes, (list, tuple)): axes = [axes] levels = (len(df.index.levels), len(df.columns.levels)) for axis in axes: for level in range(1, levels[axis])[::2]: for target in targets: df = df.drop(target, axis=axis, level=level) return df
Example #30
Source File: clean.py From cfanalytics with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _overall_percentile(self): """Add an overall percentile column. Returns ------- cfopendata : pd.Dataframe Crossfit open data with add overall percentile columns. """ col = self.df['Overall_rank'] pct = np.flip(np.round(np.linspace(0, 100, num=len(self.df)), decimals=4), 0) # Check for duplications pct = self._rm_dups(col, pct) self.cleandata.iloc[:,10] = pct return self