Python pandas.read_feather() Examples
The following are 30
code examples of pandas.read_feather().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas
, or try the search function
.
Example #1
Source File: arrow.py From spectre with Apache License 2.0 | 6 votes |
def __init__(self, path: str = None, keep_in_memory: bool = True) -> None: if not os.path.exists(path + '.meta'): raise FileNotFoundError(os.path.abspath(path + '.meta')) # pandas 0.22 has the fastest MultiIndex if pd.__version__.startswith('0.22'): import feather cols = feather.read_dataframe(path + '.meta') else: cols = pd.read_feather(path + '.meta') ohlcv = cols.ohlcv.values adjustments = cols.adjustments.values[:2] if adjustments[0] is None: adjustments = None super().__init__(path, ohlcv, adjustments) self.keep_in_memory = keep_in_memory self._cache = None
Example #2
Source File: from_kkbox.py From pycox with BSD 2-Clause "Simplified" License | 6 votes |
def _make_train_test_split(self, seed=1234): from sklearn.model_selection import train_test_split np.random.seed(seed) covariates = pd.read_feather(self._path_dir / 'covariates.feather') def train_test_split_customer(df, col_customer, test_size): tr, te = train_test_split(df[[col_customer]].drop_duplicates(), test_size=test_size) train = df.merge(tr, how='right', on=col_customer) test = df.merge(te, how='right', on=col_customer) return train, test train, test = train_test_split_customer(covariates, 'msno', 0.25) train, val = train_test_split_customer(train, 'msno', 0.1) assert train.merge(test, how='inner', on='msno').shape[0] == 0 assert train.merge(val, how='inner', on='msno').shape[0] == 0 assert test.merge(val, how='inner', on='msno').shape[0] == 0 train.to_feather(self._path_dir / 'train.feather') test.to_feather(self._path_dir / 'test.feather') val.to_feather(self._path_dir / 'val.feather')
Example #3
Source File: run.py From talkingdata-adtracking-fraud-detection with MIT License | 6 votes |
def load_dataset(paths, index=None) -> pd.DataFrame: assert len(paths) > 0 feature_datasets = [] for path in paths: if index is None: feature_datasets.append(pd.read_feather(path)) else: feature_datasets.append(pd.read_feather(path).loc[index]) gc.collect() # check if all of feature dataset share the same index index = feature_datasets[0].index for feature_dataset in feature_datasets[1:]: pandas.testing.assert_index_equal(index, feature_dataset.index) return pd.concat(feature_datasets, axis=1)
Example #4
Source File: feature_store.py From nyaggle with MIT License | 6 votes |
def load_feature(feature_name: Union[int, str], directory: str = './features/', ignore_columns: List[str] = None) -> pd.DataFrame: """ Load feature as pandas DataFrame. Args: feature_name: The name of the feature (used in ``save_feature``). directory: The directory where the feature is stored. ignore_columns: The list of columns that will be dropped from the loaded dataframe. Returns: The feature dataframe """ path = os.path.join(directory, str(feature_name) + '.f') df = pd.read_feather(path) if ignore_columns: return df.drop([c for c in ignore_columns if c in df.columns], axis=1) else: return df
Example #5
Source File: atlas3.py From ssbio with MIT License | 6 votes |
def get_proteome_percentages(counts_df, outpath, force_rerun=False): if ssbio.utils.force_rerun(flag=force_rerun, outfile=outpath): big_strain_percents_df = pd.DataFrame(columns=counts_df.columns) for strain in counts_df.columns: totals = list(filter(lambda x: x.endswith('total'), counts_df[strain].index)) for t in totals: counts = t.rsplit('_', 1)[0] aa_counts = list(filter(lambda x: (x.startswith(counts) and x not in totals), counts_df[strain].index)) for aa_count in aa_counts: big_strain_percents_df.at[aa_count.replace('count', '%'), strain] = counts_df[strain][aa_count]/counts_df[strain][t] big_strain_percents_df.astype(float).reset_index().to_feather(outpath) else: big_strain_percents_df = pd.read_feather(outpath).set_index('index') big_strain_percents_df.index.name = None return big_strain_percents_df
Example #6
Source File: parsers.py From modin with Apache License 2.0 | 5 votes |
def parse(fname, **kwargs): from pyarrow import feather num_splits = kwargs.pop("num_splits", None) if num_splits is None: return pandas.read_feather(fname, **kwargs) df = feather.read_feather(fname, **kwargs) # Append the length of the index here to build it externally return _split_result_for_readers(0, num_splits, df) + [len(df.index), df.dtypes]
Example #7
Source File: test_feather.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_path_localpath(self): df = tm.makeDataFrame().reset_index() result = tm.round_trip_localpath(df.to_feather, pd.read_feather) tm.assert_frame_equal(df, result)
Example #8
Source File: arrow.py From spectre with Apache License 2.0 | 5 votes |
def _load(self) -> pd.DataFrame: if self._cache is not None: return self._cache if pd.__version__.startswith('0.22'): import feather df = feather.read_dataframe(self._path) else: df = pd.read_feather(self._path) df.set_index(['date', 'asset'], inplace=True) if self.keep_in_memory: self._cache = df return df
Example #9
Source File: protocols.py From bionic with Apache License 2.0 | 5 votes |
def read(self, path): with path.open("rb") as file_: return pd.read_feather(file_)
Example #10
Source File: test_feather.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def check_round_trip(self, df, **kwargs): with ensure_clean() as path: to_feather(df, path) result = read_feather(path, **kwargs) assert_frame_equal(result, df)
Example #11
Source File: test_feather.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_path_pathlib(self): df = tm.makeDataFrame().reset_index() result = tm.round_trip_pathlib(df.to_feather, pd.read_feather) tm.assert_frame_equal(df, result)
Example #12
Source File: test_feather.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_path_localpath(self): df = tm.makeDataFrame().reset_index() result = tm.round_trip_localpath(df.to_feather, pd.read_feather) tm.assert_frame_equal(df, result)
Example #13
Source File: io.py From modin with Apache License 2.0 | 5 votes |
def read_feather(cls, path, columns=None, use_threads=True): ErrorMessage.default_to_pandas("`read_feather`") return cls.from_pandas( pandas.read_feather(path, columns=columns, use_threads=use_threads) )
Example #14
Source File: test_feather.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def check_round_trip(self, df, expected=None, **kwargs): if expected is None: expected = df with ensure_clean() as path: to_feather(df, path) result = read_feather(path, **kwargs) assert_frame_equal(result, expected)
Example #15
Source File: test_io.py From modin with Apache License 2.0 | 5 votes |
def test_from_feather(): setup_feather_file(SMALL_ROW_SIZE) pandas_df = pandas.read_feather(TEST_FEATHER_FILENAME) modin_df = pd.read_feather(TEST_FEATHER_FILENAME) df_equals(modin_df, pandas_df) teardown_feather_file()
Example #16
Source File: Utils.py From Kaggle-Competition-Favorita with MIT License | 5 votes |
def load_data(): # df_train = pd.read_feather('train_after1608_raw') df_train = pd.read_csv('train.csv', usecols=[1, 2, 3, 4, 5], dtype={'onpromotion': bool}, converters={'unit_sales': lambda u: np.log1p(float(u)) if float(u) > 0 else 0}, parse_dates=["date"]) df_test = pd.read_csv("test.csv", usecols=[0, 1, 2, 3, 4], dtype={'onpromotion': bool}, parse_dates=["date"]).set_index(['store_nbr', 'item_nbr', 'date']) # subset data df_2017 = df_train.loc[df_train.date>=pd.datetime(2016,1,1)] # promo promo_2017_train = df_2017.set_index( ["store_nbr", "item_nbr", "date"])[["onpromotion"]].unstack( level=-1).fillna(False) promo_2017_train.columns = promo_2017_train.columns.get_level_values(1) promo_2017_test = df_test[["onpromotion"]].unstack(level=-1).fillna(False) promo_2017_test.columns = promo_2017_test.columns.get_level_values(1) promo_2017_test = promo_2017_test.reindex(promo_2017_train.index).fillna(False) promo_2017 = pd.concat([promo_2017_train, promo_2017_test], axis=1) del promo_2017_test, promo_2017_train df_2017 = df_2017.set_index( ["store_nbr", "item_nbr", "date"])[["unit_sales"]].unstack( level=-1).fillna(0) df_2017.columns = df_2017.columns.get_level_values(1) # items items = pd.read_csv("items.csv").set_index("item_nbr") stores = pd.read_csv("stores.csv").set_index("store_nbr") # items = items.reindex(df_2017.index.get_level_values(1)) return df_2017, promo_2017, items, stores
Example #17
Source File: Utils.py From Kaggle-Competition-Favorita with MIT License | 5 votes |
def load_unstack(filename): df_name, promo_name = 'df_' + filename + '_raw', 'promo_' + filename + '_raw' df_2017 = pd.read_feather(df_name).set_index(['store_nbr','item_nbr']) df_2017.columns = pd.to_datetime(df_2017.columns) promo_2017 = pd.read_feather(promo_name).set_index(['store_nbr','item_nbr']) promo_2017.columns = pd.to_datetime(promo_2017.columns) items = pd.read_csv("items.csv").set_index("item_nbr") stores = pd.read_csv("stores.csv").set_index("store_nbr") return df_2017, promo_2017, items, stores # Create validation and test data
Example #18
Source File: _dataset_loader.py From pycox with BSD 2-Clause "Simplified" License | 5 votes |
def read_df(self): if not self.path.exists(): print(f"Dataset '{self.name}' not locally available. Downloading...") self._download() print(f"Done") df = pd.read_feather(self.path) df = self._label_cols_at_end(df) return df
Example #19
Source File: dataset_view.py From QCFractal with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _deserialize(data: bytes, msgpacked_cols: List[str]) -> pd.DataFrame: """ Data are returned as feather-packed pandas DataFrames. Due to limitations in pyarrow, some objects are msgpacked inside the DataFrame. """ import pyarrow df = pd.read_feather(pyarrow.BufferReader(data)) for col in msgpacked_cols: df[col] = df[col].apply(lambda element: deserialize(element, "msgpack-ext")) if "index" in df.columns: df.set_index("index", inplace=True) # pandas.to_feather does not support indexes, # so we have to send indexless frames over the wire, and set the index here. return df
Example #20
Source File: test_feather.py From twitter-stock-recommendation with MIT License | 5 votes |
def check_round_trip(self, df, **kwargs): with ensure_clean() as path: to_feather(df, path) with catch_warnings(record=True): result = read_feather(path, **kwargs) assert_frame_equal(result, df)
Example #21
Source File: test_feather.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_path_pathlib(self): df = tm.makeDataFrame().reset_index() result = tm.round_trip_pathlib(df.to_feather, pd.read_feather) tm.assert_frame_equal(df, result)
Example #22
Source File: test_feather.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_path_localpath(self): df = tm.makeDataFrame().reset_index() result = tm.round_trip_localpath(df.to_feather, pd.read_feather) tm.assert_frame_equal(df, result)
Example #23
Source File: test_pandas.py From docker-python with Apache License 2.0 | 5 votes |
def test_read_feather(self): data = pd.read_feather("/input/tests/data/feather-0_3_1.feather") self.assertEqual(10, data.size)
Example #24
Source File: test_feather.py From recruit with Apache License 2.0 | 5 votes |
def test_path_pathlib(self): df = tm.makeDataFrame().reset_index() result = tm.round_trip_pathlib(df.to_feather, pd.read_feather) tm.assert_frame_equal(df, result)
Example #25
Source File: test_feather.py From recruit with Apache License 2.0 | 5 votes |
def test_path_localpath(self): df = tm.makeDataFrame().reset_index() result = tm.round_trip_localpath(df.to_feather, pd.read_feather) tm.assert_frame_equal(df, result)
Example #26
Source File: base.py From ml-competition-template-titanic with MIT License | 5 votes |
def load(self): self.train = pd.read_feather(str(self.train_path)) self.test = pd.read_feather(str(self.test_path))
Example #27
Source File: __init__.py From ml-competition-template-titanic with MIT License | 5 votes |
def load_datasets(feats): dfs = [pd.read_feather(f'features/{f}_train.feather') for f in feats] X_train = pd.concat(dfs, axis=1, sort=False) dfs = [pd.read_feather(f'features/{f}_test.feather') for f in feats] X_test = pd.concat(dfs, axis=1, sort=False) return X_train, X_test
Example #28
Source File: test_feather.py From vnpy_crypto with MIT License | 5 votes |
def check_round_trip(self, df, **kwargs): with ensure_clean() as path: to_feather(df, path) with catch_warnings(record=True): result = read_feather(path, **kwargs) assert_frame_equal(result, df)
Example #29
Source File: test_feather.py From vnpy_crypto with MIT License | 5 votes |
def test_path_pathlib(self): df = tm.makeDataFrame().reset_index() result = tm.round_trip_pathlib(df.to_feather, pd.read_feather) tm.assert_frame_equal(df, result)
Example #30
Source File: test_feather.py From vnpy_crypto with MIT License | 5 votes |
def test_path_localpath(self): df = tm.makeDataFrame().reset_index() result = tm.round_trip_localpath(df.to_feather, pd.read_feather) tm.assert_frame_equal(df, result)