Python pandas.DataFrames() Examples
The following are 22
code examples of pandas.DataFrames().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas
, or try the search function
.
Example #1
Source File: checks.py From bulwark with GNU Lesser General Public License v3.0 | 6 votes |
def is_same_as(df, df_to_compare, **kwargs): """Asserts that two pd.DataFrames are equal. Args: df (pd.DataFrame): Any pd.DataFrame. df_to_compare (pd.DataFrame): A second pd.DataFrame. **kwargs (dict): Keyword arguments passed through to pandas' ``assert_frame_equal``. Returns: Original `df`. """ try: tm.assert_frame_equal(df, df_to_compare, **kwargs) except AssertionError as exc: raise AssertionError("DataFrames are not equal") from exc return df
Example #2
Source File: correlations.py From pysystemtrade with GNU General Public License v3.0 | 6 votes |
def __init__(self, corr_list, column_names, fit_dates): """ Returns a time series of forecasts for a particular instrument :param instrument_code: :type str: :param rule_variation_list: :type list: list of str to get forecasts for, if None uses get_trading_rule_list :returns: TxN pd.DataFrames; columns rule_variation_name """ setattr(self, "corr_list", corr_list) setattr(self, "columns", column_names) setattr(self, "fit_dates", fit_dates)
Example #3
Source File: accounts_inputs.py From pysystemtrade with GNU General Public License v3.0 | 6 votes |
def get_capped_forecast(self, instrument_code, rule_variation_name): """ Get the capped forecast from the previous module KEY INPUT :param instrument_code: :type str: :param rule_variation_name: :type str: name of the trading rule variation :returns: Tx1 pd.DataFrames """ return self.parent.forecastScaleCap.get_capped_forecast( instrument_code, rule_variation_name)
Example #4
Source File: accounts_inputs.py From pysystemtrade with GNU General Public License v3.0 | 6 votes |
def get_forecast_weights(self, instrument_code): """ Get the capped forecast from the previous module KEY INPUT :param instrument_code: :type str: :param rule_variation_name: :type str: name of the trading rule variation :returns: dict of Tx1 pd.DataFrames """ return self.parent.combForecast.get_forecast_weights(instrument_code)
Example #5
Source File: accounts_inputs.py From pysystemtrade with GNU General Public License v3.0 | 6 votes |
def get_daily_returns_volatility(self, instrument_code): """ Get the daily return (not %) volatility from previous stage, or calculate KEY INPUT :param instrument_code: :type str: :returns: Tx1 pd.DataFrames """ system = self.parent if hasattr(system, "rawdata"): returns_vol = system.rawdata.daily_returns_volatility( instrument_code) else: price = self.get_daily_price(instrument_code) returns_vol = robust_vol_calc(price.diff()) return returns_vol
Example #6
Source File: accounts_inputs.py From pysystemtrade with GNU General Public License v3.0 | 6 votes |
def get_aligned_forecast(self, instrument_code, rule_variation_name): """ Get the capped forecast aligned to daily prices KEY INPUT :param instrument_code: :type str: :param rule_variation_name: :type str: name of the trading rule variation :returns: Tx1 pd.DataFrames """ price = self.get_daily_price(instrument_code) forecast = self.get_capped_forecast(instrument_code, rule_variation_name) forecast = forecast.reindex(price.index).ffill() return forecast
Example #7
Source File: accounts_inputs.py From pysystemtrade with GNU General Public License v3.0 | 5 votes |
def get_forecast_diversification_multiplier(self, instrument_code): """ Get the f.d.m from the previous module KEY INPUT :param instrument_code: :type str: :returns: dict of Tx1 pd.DataFrames """ return self.parent.combForecast.get_forecast_diversification_multiplier( instrument_code)
Example #8
Source File: conftest.py From kartothek with MIT License | 5 votes |
def meta_partitions_evaluation_dataframe(metadata_version): """ Create a list of MetaPartitions for testing. The partitions include in-memory pd.DataFrames without external references, i.e. files are empty """ df = pd.DataFrame( OrderedDict([("P", [1]), ("L", [1]), ("HORIZON", [1]), ("PRED", [10])]) ) mp = MetaPartition( label="cluster_1_1", data={"PRED": df}, metadata_version=metadata_version ) df_2 = pd.DataFrame( OrderedDict([("P", [1]), ("L", [1]), ("HORIZON", [2]), ("PRED", [20])]) ) mp2 = MetaPartition( label="cluster_1_2", data={"PRED": df_2}, metadata_version=metadata_version ) df_3 = pd.DataFrame( OrderedDict([("P", [2]), ("L", [2]), ("HORIZON", [1]), ("PRED", [10])]) ) mp3 = MetaPartition( label="cluster_2_1", data={"PRED": df_3}, metadata_version=metadata_version ) df_4 = pd.DataFrame( OrderedDict([("P", [2]), ("L", [2]), ("HORIZON", [2]), ("PRED", [20])]) ) mp4 = MetaPartition( label="cluster_2_2", data={"PRED": df_4}, metadata_version=metadata_version ) return [mp, mp2, mp3, mp4]
Example #9
Source File: conftest.py From kartothek with MIT License | 5 votes |
def meta_partitions_dataframe_function(metadata_version): """ Create a list of MetaPartitions for testing. The partitions include in-memory pd.DataFrames without external references, i.e. files are empty """ return _get_meta_partitions_with_dataframe(metadata_version)
Example #10
Source File: conftest.py From kartothek with MIT License | 5 votes |
def meta_partitions_dataframe(metadata_version): """ Create a list of MetaPartitions for testing. The partitions include in-memory pd.DataFrames without external references, i.e. files are empty """ with cm_frozen_time(TIME_TO_FREEZE): return _get_meta_partitions_with_dataframe(metadata_version)
Example #11
Source File: theta.py From sktime with BSD 3-Clause "New" or "Revised" License | 5 votes |
def compute_pred_int(self, y_pred, alpha=DEFAULT_ALPHA): """ Get the prediction intervals for the forecast. If alpha is iterable, multiple intervals will be calculated. """ errors = self._compute_pred_errors(alpha=alpha) # for multiple alphas, errors come in a list; # for single alpha, they come as a single pd.Series, # wrap it here into a list to make it iterable, # to avoid code duplication if isinstance(errors, pd.Series): errors = [errors] # compute prediction intervals pred_int = [ pd.DataFrame({ "lower": y_pred - error, "upper": y_pred + error }) for error in errors ] # for a single alpha, return single pd.DataFrame if len(pred_int) == 1: return pred_int[0] # otherwise return list of pd.DataFrames return pred_int
Example #12
Source File: forecast_combine.py From pysystemtrade with GNU General Public License v3.0 | 5 votes |
def get_all_forecasts(self, instrument_code, rule_variation_list=None): """ Returns a data frame of forecasts for a particular instrument KEY INPUT :param instrument_code: :type str: :param rule_variation_list: :type list: list of str to get forecasts for, if None uses get_trading_rule_list :returns: TxN pd.DataFrames; columns rule_variation_name >>> from systems.tests.testdata import get_test_object_futures_with_rules_and_capping >>> from systems.basesystem import System >>> (fcs, rules, rawdata, data, config)=get_test_object_futures_with_rules_and_capping() >>> system1=System([rawdata, rules, fcs, ForecastCombineFixed()], data, config) >>> system1.combForecast.get_all_forecasts("EDOLLAR",["ewmac8"]).tail(2) ewmac8 2015-12-10 -0.190583 2015-12-11 0.871231 >>> >>> system2=System([rawdata, rules, fcs, ForecastCombineFixed()], data, config) >>> system2.combForecast.get_all_forecasts("EDOLLAR").tail(2) ewmac16 ewmac8 2015-12-10 3.134462 -0.190583 2015-12-11 3.606243 0.871231 """ if rule_variation_list is None: rule_variation_list = self.get_trading_rule_list( instrument_code) forecasts = self.get_forecasts_given_rule_list(instrument_code, rule_variation_list) return forecasts
Example #13
Source File: forecast_combine.py From pysystemtrade with GNU General Public License v3.0 | 5 votes |
def get_capped_forecast(self, instrument_code, rule_variation_name): """ Get the capped forecast from the previous module KEY INPUT :param instrument_code: :type str: :param rule_variation_name: :type str: name of the trading rule variation :returns: dict of Tx1 pd.DataFrames; keynames rule_variation_name >>> from systems.tests.testdata import get_test_object_futures_with_rules_and_capping >>> from systems.basesystem import System >>> (fcs, rules, rawdata, data, config)=get_test_object_futures_with_rules_and_capping() >>> system=System([rawdata, rules, fcs, ForecastCombineFixed()], data, config) >>> system.combForecast.get_capped_forecast("EDOLLAR","ewmac8").tail(2) ewmac8 2015-12-10 -0.190583 2015-12-11 0.871231 """ return self.parent.forecastScaleCap.get_capped_forecast( instrument_code, rule_variation_name)
Example #14
Source File: accounts_inputs.py From pysystemtrade with GNU General Public License v3.0 | 5 votes |
def get_daily_price(self, instrument_code): """ Get the instrument price from rawdata Cached as data isn't cached :param instrument_code: :type str: :returns: Tx1 pd.DataFrames """ return self.parent.data.daily_prices(instrument_code)
Example #15
Source File: struct.py From quantipy with MIT License | 5 votes |
def set_qp_multiindex(df, x, y): ''' Takes a pd.DataFrames and applies Quantipy's Question/Values layout to it by creating a multiindex on both axes. Parameters ---------- df : pd.DataFrame x, y : str Variable names from the processed case data input, i.e. the link definition. Returns ------- df : pd.Dataframe (Quantipy convention, multiindexed) ''' axis_labels = ['Question', 'Values'] df.index = pd.MultiIndex.from_product([[x], df.index], names=axis_labels) if y is None: df.columns = pd.MultiIndex.from_product([[x], df.columns], names=axis_labels) elif y == '@': df.columns = pd.MultiIndex.from_product([[x], df.columns], names=axis_labels) else: df.columns = pd.MultiIndex.from_product([[y], df.columns], names=axis_labels) return df
Example #16
Source File: functions.py From quantipy with MIT License | 5 votes |
def set_qp_multiindex(df, x, y): ''' Takes a pd.DataFrames and applies Quantipy's Question/Values layout to it by creating a multiindex on both axes. Parameters ---------- df : pd.DataFrame x, y : str Variable names from the processed case data input, i.e. the link definition. Returns ------- df : pd.Dataframe (Quantipy convention, multiindexed) ''' axis_labels = ['Question', 'Values'] df.index = pd.MultiIndex.from_product([[x], df.index], names=axis_labels) if y is None: df.columns = pd.MultiIndex.from_product([[x], df.columns], names=axis_labels) elif y == '@': df.columns = pd.MultiIndex.from_product([[x], '@'], names=axis_labels) else: df.columns = pd.MultiIndex.from_product([[y], df.columns], names=axis_labels) return df
Example #17
Source File: functions.py From quantipy with MIT License | 5 votes |
def apply_viewdf_layout(df, x, y): ''' Takes a pd.DataFrames and applies Quantipy's Question/Values layout to it by creating a multiindex on both axes. Parameters ---------- df : pd.DataFrame x, y : str Variable names from the processed case data input, i.e. the link definition. Returns ------- df : pd.Dataframe (multiindexed) ''' axis_labels = ['Question', 'Values'] df.index = pd.MultiIndex.from_product([[x], df.index], names=axis_labels) if y is None: df.columns = pd.MultiIndex.from_product([[x], df.columns], names=axis_labels) elif y == '@': df.columns = pd.MultiIndex.from_product([[x], '@'], names=axis_labels) else: df.columns = pd.MultiIndex.from_product([[y], df.columns], names=axis_labels) return df #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Example #18
Source File: load_data.py From CrypTen with MIT License | 5 votes |
def read_data(data_dir, dates): """Builds dataframe for model and func benchmarks Assumes directory is structured as DATA_PATH |_2020-02-20 |_func_benchmarks.csv |_model_benchmarks.csv Args: data_dir (pathlib.path): path containing month subdirectories dates (list of str): containing dates / subdirectories available Returns: tuple of pd.DataFrames containing func and model benchmarks with dates """ func_df, model_df = None, None for date in dates: path = os.path.join(data_dir, date) tmp_func_df = pd.read_csv(os.path.join(path, "func_benchmarks.csv")) tmp_model_df = pd.read_csv(os.path.join(path, "model_benchmarks.csv")) tmp_func_df["date"], tmp_model_df["date"] = date, date if func_df is None: func_df = tmp_func_df.copy() model_df = tmp_model_df.copy() else: func_df = func_df.append(tmp_func_df) model_df = model_df.append(tmp_model_df) func_df = compute_runtime_gap(func_df) func_df = add_error_bars(func_df) return func_df, model_df
Example #19
Source File: transform_problem.py From estimagic with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _check_params(params): """Check params has a unique index and contains no columns to be created internally. Args: params (pd.DataFrame or list of pd.DataFrames): See :ref:`params`. Raises: AssertionError: The index contains duplicates. ValueError: The DataFrame contains internal columns. """ assert ( not params.index.duplicated().any() ), "No duplicates allowed in the index of params." invalid_names = [ "_fixed", "_fixed_value", "_is_fixed_to_value", "_is_fixed_to_other", ] invalid_present_columns = [] for col in params.columns: if col in invalid_names or col.startswith("_internal"): invalid_present_columns.append(col) if len(invalid_present_columns) > 0: msg = ( "Column names starting with '_internal' and as well as any other of the " f"following columns are not allowed in params:\n{invalid_names}." f"This is violated for:\n{invalid_present_columns}." ) raise ValueError(msg)
Example #20
Source File: data_processing.py From AIAlpha with MIT License | 4 votes |
def make_train_test(self, df_x, df_y, window, csv_path, has_y=False, binary_y=False, save_csv=False): """ Splits the dataset into train and test :param df_x: dataframe of x variables :type df_x: pd.DataFrame :param df_y: dataframe of y values :type df_y: pd.DataFrame :param window: the prediction window :type window: int :param has_y: whether df_y exists separately or is a column in df_x (must be 'target' column) :type has_y: boolean :return: train_x, train_y, test_x, test_y :rtype: pd.DataFrames """ if has_y: y_values = df_y.copy() y_values.columns = ['y_values'] fulldata = df_x.copy() else: if window == 0: y_values = df_x['close'].copy() y_values.columns = ['y_values'] fulldata = df_x.copy() else: y_values = np.log(df_x['close'].copy()/df_x['close'].copy().shift(-window)).dropna() y_values.columns = ['y_values'] fulldata = df_x.iloc[:-window, :].copy() if binary_y: y_values.loc[y_values['y_values']<0] = -1 y_values.loc[y_values['y_values']>0] = 1 y_values.loc[y_values['y_values']==0] = 0 print(y_values.shape) print(fulldata.shape) train_y = y_values.iloc[:int(len(y_values)*self.split)] test_y = y_values.iloc[int(len(y_values)*self.split)+1:] train_x = fulldata.iloc[:int(len(y_values)*self.split), :] test_x = fulldata.iloc[int(len(y_values)*self.split)+1:len(y_values), :] print(train_y.shape) print(train_x.shape) if save_csv: train_x.to_csv(f'data/processed_data/{csv_path}/train_x.csv') train_y.to_csv(f'data/processed_data/{csv_path}/train_y.csv', header=['y_values']) test_x.to_csv(f'data/processed_data/{csv_path}/test_x.csv') test_y.to_csv(f'data/processed_data/{csv_path}/test_y.csv', header=['y_values']) fulldata.to_csv(f'data/processed_data/{csv_path}/full_x.csv') y_values.to_csv(f'data/processed_data/{csv_path}/full_y.csv', header=['y_values']) return fulldata, y_values, train_x, train_y, test_x, test_y
Example #21
Source File: pdutils.py From pysystemtrade with GNU General Public License v3.0 | 4 votes |
def find_dates_when_label_changes(original_data, new_data, col_names=dict(data='PRICE', label='PRICE_CONTRACT')): """ For two pd.DataFrames with 2 columns, including a label column, find the date after which the labelling is consistent across columns >>> s1=pd.DataFrame(dict(PRICE=[1,2,3,np.nan], PRICE_CONTRACT = ["a", "a", "b", "b"]), index=['a1','a2','a3','a4']) >>> s2=pd.DataFrame(dict(PRICE=[ 2,3,4], PRICE_CONTRACT = [ "b", "b", "b"]), index=['a2','a3','a4']) >>> find_dates_when_label_changes(s1, s2) ('a3', 'a2') >>> s2=pd.DataFrame(dict(PRICE=[ 2,3,4], PRICE_CONTRACT = [ "a", "b", "b"]), index=['a2','a3','a4']) >>> find_dates_when_label_changes(s1, s2) ('a2', 'a1') >>> s2=pd.DataFrame(dict(PRICE=[ 2,3,4], PRICE_CONTRACT = [ "c", "c", "c"]), index=['a2','a3','a4']) >>> find_dates_when_label_changes(s1, s2) mismatch_on_last_day >>> find_dates_when_label_changes(s1, s1) original index matches new >>> s2=pd.DataFrame(dict(PRICE=[1, 2,3,4], PRICE_CONTRACT = ["a","c", "c", "c"]), index=['a1','a2','a3','a4']) >>> find_dates_when_label_changes(s1, s2) mismatch_on_last_day :param original_data: some data :param new_data: some new data :param col_names: dict of str :return: tuple or object if match didn't work out """ label_column = col_names['label'] joint_labels = pd.concat([original_data[label_column], new_data[label_column]], axis=1) joint_labels.columns = ['current', 'new'] joint_labels = joint_labels.sort_index() new_data_start = new_data.index[0] existing_labels_in_new_period = joint_labels['current'][new_data_start:].ffill() new_labels_in_new_period = joint_labels['new'][new_data_start:].ffill() # Find the last date when the labels didn't match, and the first date after that match_data=\ find_dates_when_series_starts_matching(existing_labels_in_new_period, new_labels_in_new_period) if match_data is mismatch_on_last_day: ## Can't use any of new data return mismatch_on_last_day elif match_data is all_labels_match: ## Can use entire series becuase all match if new_data.index[0] == original_data.index[0]: # They are same size, so have to use whole of original data return original_index_matches_new else: ## All the new data matches first_date_after_series_mismatch = new_data_start last_date_when_series_mismatch = original_data.index[original_data.index < new_data_start][-1] else: first_date_after_series_mismatch, last_date_when_series_mismatch = match_data return first_date_after_series_mismatch, last_date_when_series_mismatch
Example #22
Source File: transform_problem.py From estimagic with BSD 3-Clause "New" or "Revised" License | 4 votes |
def _pre_process_arguments( params, algorithm, algo_options, logging, dashboard, dash_options ): """Process user supplied arguments without affecting the optimization problem. Args: params (pd.DataFrame or list of pd.DataFrames): See :ref:`params`. algorithm (str or list of strings): Identifier of the optimization algorithm. See :ref:`list_of_algorithms` for supported values. algo_options (dict or list of dicts): algorithm specific configurations for the optimization dashboard (bool): Whether to create and show a dashboard, default is False. See :ref:`dashboard` for details. dash_options (dict or list of dict, optional): Options passed to the dashboard. Supported keys are: - port (int): port where to display the dashboard - no_browser (bool): whether to display the dashboard in a browser - rollover (int): how many iterations to keep in the monitoring plots Returns: optim_kwargs (dict): dictionary collecting the arguments that are going to be passed to _internal_minimize params (pd.DataFrame): The expanded params DataFrame with all needed columns. See :ref:`params`. database_path (str or pathlib.Path or None): path to the database. """ standard_dash_options = {"no_browser": False, "port": None, "rollover": 500} # important for dash_options to be last for standards to be overwritten dash_options = {**standard_dash_options, **dash_options} origin, algo_name = _process_algorithm(algorithm) optim_kwargs = { "origin": origin, "algo_name": algo_name, "algo_options": algo_options, } params = _set_params_defaults_if_missing(params) _check_params(params) database_path = logging if dashboard else None return optim_kwargs, params, dash_options, database_path