Python pandas.read_feather() Examples

The following are 30 code examples of pandas.read_feather(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas , or try the search function .
Example #1
Source File: arrow.py    From spectre with Apache License 2.0 6 votes vote down vote up
def __init__(self, path: str = None, keep_in_memory: bool = True) -> None:
        if not os.path.exists(path + '.meta'):
            raise FileNotFoundError(os.path.abspath(path + '.meta'))

        # pandas 0.22 has the fastest MultiIndex
        if pd.__version__.startswith('0.22'):
            import feather
            cols = feather.read_dataframe(path + '.meta')
        else:
            cols = pd.read_feather(path + '.meta')

        ohlcv = cols.ohlcv.values
        adjustments = cols.adjustments.values[:2]
        if adjustments[0] is None:
            adjustments = None
        super().__init__(path, ohlcv, adjustments)
        self.keep_in_memory = keep_in_memory
        self._cache = None 
Example #2
Source File: from_kkbox.py    From pycox with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def _make_train_test_split(self, seed=1234):
        from sklearn.model_selection import train_test_split
        np.random.seed(seed)
        covariates = pd.read_feather(self._path_dir / 'covariates.feather')

        def train_test_split_customer(df, col_customer, test_size):
            tr, te = train_test_split(df[[col_customer]].drop_duplicates(), test_size=test_size)
            train =  df.merge(tr, how='right', on=col_customer)
            test =  df.merge(te, how='right', on=col_customer)
            return train, test

        train, test = train_test_split_customer(covariates, 'msno', 0.25)
        train, val = train_test_split_customer(train, 'msno', 0.1)

        assert train.merge(test, how='inner', on='msno').shape[0] == 0
        assert train.merge(val, how='inner', on='msno').shape[0] == 0
        assert test.merge(val, how='inner', on='msno').shape[0] == 0

        train.to_feather(self._path_dir / 'train.feather')
        test.to_feather(self._path_dir / 'test.feather')
        val.to_feather(self._path_dir / 'val.feather') 
Example #3
Source File: run.py    From talkingdata-adtracking-fraud-detection with MIT License 6 votes vote down vote up
def load_dataset(paths, index=None) -> pd.DataFrame:
    assert len(paths) > 0

    feature_datasets = []
    for path in paths:
        if index is None:
            feature_datasets.append(pd.read_feather(path))
        else:
            feature_datasets.append(pd.read_feather(path).loc[index])
        gc.collect()
    # check if all of feature dataset share the same index
    index = feature_datasets[0].index
    for feature_dataset in feature_datasets[1:]:
        pandas.testing.assert_index_equal(index, feature_dataset.index)

    return pd.concat(feature_datasets, axis=1) 
Example #4
Source File: feature_store.py    From nyaggle with MIT License 6 votes vote down vote up
def load_feature(feature_name: Union[int, str], directory: str = './features/',
                 ignore_columns: List[str] = None) -> pd.DataFrame:
    """
    Load feature as pandas DataFrame.

    Args:
        feature_name:
            The name of the feature (used in ``save_feature``).
        directory:
            The directory where the feature is stored.
        ignore_columns:
            The list of columns that will be dropped from the loaded dataframe.
    Returns:
        The feature dataframe
    """
    path = os.path.join(directory, str(feature_name) + '.f')

    df = pd.read_feather(path)
    if ignore_columns:
        return df.drop([c for c in ignore_columns if c in df.columns], axis=1)
    else:
        return df 
Example #5
Source File: atlas3.py    From ssbio with MIT License 6 votes vote down vote up
def get_proteome_percentages(counts_df, outpath, force_rerun=False):
    if ssbio.utils.force_rerun(flag=force_rerun, outfile=outpath):
        big_strain_percents_df = pd.DataFrame(columns=counts_df.columns)
        for strain in counts_df.columns:
            totals = list(filter(lambda x: x.endswith('total'), counts_df[strain].index))
            for t in totals:
                counts = t.rsplit('_', 1)[0]
                aa_counts = list(filter(lambda x: (x.startswith(counts) and x not in totals), counts_df[strain].index))
                for aa_count in aa_counts:
                    big_strain_percents_df.at[aa_count.replace('count', '%'), strain] = counts_df[strain][aa_count]/counts_df[strain][t]

        big_strain_percents_df.astype(float).reset_index().to_feather(outpath)
    else:
        big_strain_percents_df = pd.read_feather(outpath).set_index('index')

    big_strain_percents_df.index.name = None
    return big_strain_percents_df 
Example #6
Source File: parsers.py    From modin with Apache License 2.0 5 votes vote down vote up
def parse(fname, **kwargs):
        from pyarrow import feather

        num_splits = kwargs.pop("num_splits", None)
        if num_splits is None:
            return pandas.read_feather(fname, **kwargs)
        df = feather.read_feather(fname, **kwargs)
        # Append the length of the index here to build it externally
        return _split_result_for_readers(0, num_splits, df) + [len(df.index), df.dtypes] 
Example #7
Source File: test_feather.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_path_localpath(self):
        df = tm.makeDataFrame().reset_index()
        result = tm.round_trip_localpath(df.to_feather, pd.read_feather)
        tm.assert_frame_equal(df, result) 
Example #8
Source File: arrow.py    From spectre with Apache License 2.0 5 votes vote down vote up
def _load(self) -> pd.DataFrame:
        if self._cache is not None:
            return self._cache

        if pd.__version__.startswith('0.22'):
            import feather
            df = feather.read_dataframe(self._path)
        else:
            df = pd.read_feather(self._path)
        df.set_index(['date', 'asset'], inplace=True)

        if self.keep_in_memory:
            self._cache = df
        return df 
Example #9
Source File: protocols.py    From bionic with Apache License 2.0 5 votes vote down vote up
def read(self, path):
        with path.open("rb") as file_:
            return pd.read_feather(file_) 
Example #10
Source File: test_feather.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def check_round_trip(self, df, **kwargs):

        with ensure_clean() as path:
            to_feather(df, path)
            result = read_feather(path, **kwargs)
            assert_frame_equal(result, df) 
Example #11
Source File: test_feather.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_path_pathlib(self):
        df = tm.makeDataFrame().reset_index()
        result = tm.round_trip_pathlib(df.to_feather, pd.read_feather)
        tm.assert_frame_equal(df, result) 
Example #12
Source File: test_feather.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_path_localpath(self):
        df = tm.makeDataFrame().reset_index()
        result = tm.round_trip_localpath(df.to_feather, pd.read_feather)
        tm.assert_frame_equal(df, result) 
Example #13
Source File: io.py    From modin with Apache License 2.0 5 votes vote down vote up
def read_feather(cls, path, columns=None, use_threads=True):
        ErrorMessage.default_to_pandas("`read_feather`")
        return cls.from_pandas(
            pandas.read_feather(path, columns=columns, use_threads=use_threads)
        ) 
Example #14
Source File: test_feather.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def check_round_trip(self, df, expected=None, **kwargs):

        if expected is None:
            expected = df

        with ensure_clean() as path:
            to_feather(df, path)

            result = read_feather(path, **kwargs)
            assert_frame_equal(result, expected) 
Example #15
Source File: test_io.py    From modin with Apache License 2.0 5 votes vote down vote up
def test_from_feather():
    setup_feather_file(SMALL_ROW_SIZE)

    pandas_df = pandas.read_feather(TEST_FEATHER_FILENAME)
    modin_df = pd.read_feather(TEST_FEATHER_FILENAME)

    df_equals(modin_df, pandas_df)

    teardown_feather_file() 
Example #16
Source File: Utils.py    From Kaggle-Competition-Favorita with MIT License 5 votes vote down vote up
def load_data():
    # df_train = pd.read_feather('train_after1608_raw')
    df_train = pd.read_csv('train.csv', usecols=[1, 2, 3, 4, 5], dtype={'onpromotion': bool},
                           converters={'unit_sales': lambda u: np.log1p(float(u)) if float(u) > 0 else 0},
                           parse_dates=["date"])
    df_test = pd.read_csv("test.csv", usecols=[0, 1, 2, 3, 4], dtype={'onpromotion': bool},
                          parse_dates=["date"]).set_index(['store_nbr', 'item_nbr', 'date'])

    # subset data
    df_2017 = df_train.loc[df_train.date>=pd.datetime(2016,1,1)]

    # promo
    promo_2017_train = df_2017.set_index(
    ["store_nbr", "item_nbr", "date"])[["onpromotion"]].unstack(
        level=-1).fillna(False)
    promo_2017_train.columns = promo_2017_train.columns.get_level_values(1)
    promo_2017_test = df_test[["onpromotion"]].unstack(level=-1).fillna(False)
    promo_2017_test.columns = promo_2017_test.columns.get_level_values(1)
    promo_2017_test = promo_2017_test.reindex(promo_2017_train.index).fillna(False)
    promo_2017 = pd.concat([promo_2017_train, promo_2017_test], axis=1)
    del promo_2017_test, promo_2017_train

    df_2017 = df_2017.set_index(
    ["store_nbr", "item_nbr", "date"])[["unit_sales"]].unstack(
        level=-1).fillna(0)
    df_2017.columns = df_2017.columns.get_level_values(1)

    # items
    items = pd.read_csv("items.csv").set_index("item_nbr")
    stores = pd.read_csv("stores.csv").set_index("store_nbr")
    # items = items.reindex(df_2017.index.get_level_values(1))

    return df_2017, promo_2017, items, stores 
Example #17
Source File: Utils.py    From Kaggle-Competition-Favorita with MIT License 5 votes vote down vote up
def load_unstack(filename):
    df_name, promo_name = 'df_' + filename + '_raw', 'promo_' + filename + '_raw'
    df_2017 = pd.read_feather(df_name).set_index(['store_nbr','item_nbr'])
    df_2017.columns = pd.to_datetime(df_2017.columns)
    promo_2017 = pd.read_feather(promo_name).set_index(['store_nbr','item_nbr'])
    promo_2017.columns = pd.to_datetime(promo_2017.columns)
    items = pd.read_csv("items.csv").set_index("item_nbr")
    stores = pd.read_csv("stores.csv").set_index("store_nbr")

    return df_2017, promo_2017, items, stores

# Create validation and test data 
Example #18
Source File: _dataset_loader.py    From pycox with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def read_df(self):
        if not self.path.exists():
            print(f"Dataset '{self.name}' not locally available. Downloading...")
            self._download()
            print(f"Done")
        df = pd.read_feather(self.path)
        df = self._label_cols_at_end(df)
        return df 
Example #19
Source File: dataset_view.py    From QCFractal with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _deserialize(data: bytes, msgpacked_cols: List[str]) -> pd.DataFrame:
        """
        Data are returned as feather-packed pandas DataFrames.
        Due to limitations in pyarrow, some objects are msgpacked inside the DataFrame.
        """
        import pyarrow

        df = pd.read_feather(pyarrow.BufferReader(data))
        for col in msgpacked_cols:
            df[col] = df[col].apply(lambda element: deserialize(element, "msgpack-ext"))

        if "index" in df.columns:
            df.set_index("index", inplace=True)  # pandas.to_feather does not support indexes,
            # so we have to send indexless frames over the wire, and set the index here.
        return df 
Example #20
Source File: test_feather.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def check_round_trip(self, df, **kwargs):

        with ensure_clean() as path:
            to_feather(df, path)

            with catch_warnings(record=True):
                result = read_feather(path, **kwargs)
            assert_frame_equal(result, df) 
Example #21
Source File: test_feather.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_path_pathlib(self):
        df = tm.makeDataFrame().reset_index()
        result = tm.round_trip_pathlib(df.to_feather, pd.read_feather)
        tm.assert_frame_equal(df, result) 
Example #22
Source File: test_feather.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_path_localpath(self):
        df = tm.makeDataFrame().reset_index()
        result = tm.round_trip_localpath(df.to_feather, pd.read_feather)
        tm.assert_frame_equal(df, result) 
Example #23
Source File: test_pandas.py    From docker-python with Apache License 2.0 5 votes vote down vote up
def test_read_feather(self):
        data = pd.read_feather("/input/tests/data/feather-0_3_1.feather")

        self.assertEqual(10, data.size) 
Example #24
Source File: test_feather.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_path_pathlib(self):
        df = tm.makeDataFrame().reset_index()
        result = tm.round_trip_pathlib(df.to_feather, pd.read_feather)
        tm.assert_frame_equal(df, result) 
Example #25
Source File: test_feather.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_path_localpath(self):
        df = tm.makeDataFrame().reset_index()
        result = tm.round_trip_localpath(df.to_feather, pd.read_feather)
        tm.assert_frame_equal(df, result) 
Example #26
Source File: base.py    From ml-competition-template-titanic with MIT License 5 votes vote down vote up
def load(self):
        self.train = pd.read_feather(str(self.train_path))
        self.test = pd.read_feather(str(self.test_path)) 
Example #27
Source File: __init__.py    From ml-competition-template-titanic with MIT License 5 votes vote down vote up
def load_datasets(feats):
    dfs = [pd.read_feather(f'features/{f}_train.feather') for f in feats]
    X_train = pd.concat(dfs, axis=1, sort=False)
    dfs = [pd.read_feather(f'features/{f}_test.feather') for f in feats]
    X_test = pd.concat(dfs, axis=1, sort=False)
    return X_train, X_test 
Example #28
Source File: test_feather.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def check_round_trip(self, df, **kwargs):

        with ensure_clean() as path:
            to_feather(df, path)

            with catch_warnings(record=True):
                result = read_feather(path, **kwargs)
            assert_frame_equal(result, df) 
Example #29
Source File: test_feather.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_path_pathlib(self):
        df = tm.makeDataFrame().reset_index()
        result = tm.round_trip_pathlib(df.to_feather, pd.read_feather)
        tm.assert_frame_equal(df, result) 
Example #30
Source File: test_feather.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_path_localpath(self):
        df = tm.makeDataFrame().reset_index()
        result = tm.round_trip_localpath(df.to_feather, pd.read_feather)
        tm.assert_frame_equal(df, result)