Python pandas.to_pickle() Examples

The following are 30 code examples of pandas.to_pickle(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas , or try the search function .
Example #1
Source File: test_pickle.py    From recruit with Apache License 2.0 7 votes vote down vote up
def test_write_explicit(self, compression, get_random_path):
        base = get_random_path
        path1 = base + ".compressed"
        path2 = base + ".raw"

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to compressed file
            df.to_pickle(p1, compression=compression)

            # decompress
            with tm.decompress_file(p1, compression=compression) as f:
                with open(p2, "wb") as fh:
                    fh.write(f.read())

            # read decompressed file
            df2 = pd.read_pickle(p2, compression=None)

            tm.assert_frame_equal(df, df2) 
Example #2
Source File: update_database.py    From estimagic with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _handle_exception(statements, database, exception_info):
    directory = Path(str(database.bind.url)[10:])
    if not directory.is_dir():
        directory = Path(".")
    directory = directory.resolve()

    for stat in statements:
        if isinstance(stat, (sqlalchemy.sql.dml.Insert, sqlalchemy.sql.dml.Update)):
            values = stat.compile().params
            timestamp = dt.datetime.now().strftime("%Y%m%d-%H%M%S-%f")
            filename = f"{stat.table.name}_{timestamp}.pickle"
            pd.to_pickle(values, directory / filename)

    warnings.warn(
        f"Unable to write to database. The data was saved in {directory} instead. The "
        f"traceback was:\n\n{exception_info}"
    ) 
Example #3
Source File: test_pickle.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_read_explicit(self, compression, get_random_path):
        base = get_random_path
        path1 = base + ".raw"
        path2 = base + ".compressed"

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to uncompressed file
            df.to_pickle(p1, compression=None)

            # compress
            self.compress_file(p1, p2, compression=compression)

            # read compressed file
            df2 = pd.read_pickle(p2, compression=compression)

            tm.assert_frame_equal(df, df2) 
Example #4
Source File: testing.py    From Splunking-Crime with GNU Affero General Public License v3.0 6 votes vote down vote up
def round_trip_pickle(obj, path=None):
    """
    Pickle an object and then read it again.

    Parameters
    ----------
    obj : pandas object
        The object to pickle and then re-read.
    path : str, default None
        The path where the pickled object is written and then read.

    Returns
    -------
    round_trip_pickled_object : pandas object
        The original object that was pickled and then re-read.
    """

    if path is None:
        path = u('__{random_bytes}__.pickle'.format(random_bytes=rands(10)))
    with ensure_clean(path) as path:
        pd.to_pickle(obj, path)
        return pd.read_pickle(path) 
Example #5
Source File: test_pickle.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def test_write_explicit(self, compression, get_random_path):
        # issue 11666
        if compression == 'xz':
            tm._skip_if_no_lzma()

        base = get_random_path
        path1 = base + ".compressed"
        path2 = base + ".raw"

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to compressed file
            df.to_pickle(p1, compression=compression)

            # decompress
            self.decompress_file(p1, p2, compression=compression)

            # read decompressed file
            df2 = pd.read_pickle(p2, compression=None)

            tm.assert_frame_equal(df, df2) 
Example #6
Source File: test_pickle.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def test_read_explicit(self, compression, get_random_path):
        # issue 11666
        if compression == 'xz':
            tm._skip_if_no_lzma()

        base = get_random_path
        path1 = base + ".raw"
        path2 = base + ".compressed"

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to uncompressed file
            df.to_pickle(p1, compression=None)

            # compress
            self.compress_file(p1, p2, compression=compression)

            # read compressed file
            df2 = pd.read_pickle(p2, compression=compression)

            tm.assert_frame_equal(df, df2) 
Example #7
Source File: testing.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def round_trip_pickle(obj, path=None):
    """
    Pickle an object and then read it again.

    Parameters
    ----------
    obj : pandas object
        The object to pickle and then re-read.
    path : str, default None
        The path where the pickled object is written and then read.

    Returns
    -------
    round_trip_pickled_object : pandas object
        The original object that was pickled and then re-read.
    """

    if path is None:
        path = u('__{random_bytes}__.pickle'.format(random_bytes=rands(10)))
    with ensure_clean(path) as path:
        pd.to_pickle(obj, path)
        return pd.read_pickle(path) 
Example #8
Source File: testing.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def round_trip_pickle(obj, path=None):
    """
    Pickle an object and then read it again.

    Parameters
    ----------
    obj : pandas object
        The object to pickle and then re-read.
    path : str, default None
        The path where the pickled object is written and then read.

    Returns
    -------
    round_trip_pickled_object : pandas object
        The original object that was pickled and then re-read.
    """

    if path is None:
        path = u('__{random_bytes}__.pickle'.format(random_bytes=rands(10)))
    with ensure_clean(path) as path:
        pd.to_pickle(obj, path)
        return pd.read_pickle(path) 
Example #9
Source File: testing.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def round_trip_pickle(obj, path=None):
    """
    Pickle an object and then read it again.

    Parameters
    ----------
    obj : pandas object
        The object to pickle and then re-read.
    path : str, default None
        The path where the pickled object is written and then read.

    Returns
    -------
    round_trip_pickled_object : pandas object
        The original object that was pickled and then re-read.
    """

    if path is None:
        path = u('__{random_bytes}__.pickle'.format(random_bytes=rands(10)))
    with ensure_clean(path) as path:
        pd.to_pickle(obj, path)
        return pd.read_pickle(path) 
Example #10
Source File: testing.py    From recruit with Apache License 2.0 6 votes vote down vote up
def round_trip_pickle(obj, path=None):
    """
    Pickle an object and then read it again.

    Parameters
    ----------
    obj : pandas object
        The object to pickle and then re-read.
    path : str, default None
        The path where the pickled object is written and then read.

    Returns
    -------
    round_trip_pickled_object : pandas object
        The original object that was pickled and then re-read.
    """

    if path is None:
        path = u('__{random_bytes}__.pickle'.format(random_bytes=rands(10)))
    with ensure_clean(path) as path:
        pd.to_pickle(obj, path)
        return pd.read_pickle(path) 
Example #11
Source File: test_io.py    From modin with Apache License 2.0 6 votes vote down vote up
def test_to_pickle():
    modin_df = create_test_modin_dataframe()
    pandas_df = create_test_pandas_dataframe()

    TEST_PICKLE_DF_FILENAME = "test_df.pkl"
    TEST_PICKLE_pandas_FILENAME = "test_pandas.pkl"

    modin_df.to_pickle(TEST_PICKLE_DF_FILENAME)
    pandas_df.to_pickle(TEST_PICKLE_pandas_FILENAME)

    assert assert_files_eq(TEST_PICKLE_DF_FILENAME, TEST_PICKLE_pandas_FILENAME)

    teardown_test_file(TEST_PICKLE_pandas_FILENAME)
    teardown_test_file(TEST_PICKLE_DF_FILENAME)

    pd.to_pickle(modin_df, TEST_PICKLE_DF_FILENAME)
    pandas.to_pickle(pandas_df, TEST_PICKLE_pandas_FILENAME)

    assert assert_files_eq(TEST_PICKLE_DF_FILENAME, TEST_PICKLE_pandas_FILENAME)

    teardown_test_file(TEST_PICKLE_pandas_FILENAME)
    teardown_test_file(TEST_PICKLE_DF_FILENAME) 
Example #12
Source File: test_pickle.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_read_explicit(self, compression, get_random_path):
        base = get_random_path
        path1 = base + ".raw"
        path2 = base + ".compressed"

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to uncompressed file
            df.to_pickle(p1, compression=None)

            # compress
            self.compress_file(p1, p2, compression=compression)

            # read compressed file
            df2 = pd.read_pickle(p2, compression=compression)

            tm.assert_frame_equal(df, df2) 
Example #13
Source File: test_pickle.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_write_explicit(self, compression, get_random_path):
        base = get_random_path
        path1 = base + ".compressed"
        path2 = base + ".raw"

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to compressed file
            df.to_pickle(p1, compression=compression)

            # decompress
            with tm.decompress_file(p1, compression=compression) as f:
                with open(p2, "wb") as fh:
                    fh.write(f.read())

            # read decompressed file
            df2 = pd.read_pickle(p2, compression=None)

            tm.assert_frame_equal(df, df2) 
Example #14
Source File: test_pickle.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_read_explicit(self, compression, get_random_path):
        base = get_random_path
        path1 = base + ".raw"
        path2 = base + ".compressed"

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to uncompressed file
            df.to_pickle(p1, compression=None)

            # compress
            self.compress_file(p1, p2, compression=compression)

            # read compressed file
            df2 = pd.read_pickle(p2, compression=compression)

            tm.assert_frame_equal(df, df2) 
Example #15
Source File: test_pickle.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_write_explicit_bad(self, compression, get_random_path):
        with tm.assert_raises_regex(ValueError,
                                    "Unrecognized compression type"):
            with tm.ensure_clean(get_random_path) as path:
                df = tm.makeDataFrame()
                df.to_pickle(path, compression=compression) 
Example #16
Source File: test_pickle.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_read_bad_versions(self, protocol, get_random_path):
        # For Python 2, HIGHEST_PROTOCOL should be 2.
        msg = ("pickle protocol {protocol} asked for; the highest available "
               "protocol is 2").format(protocol=protocol)
        with pytest.raises(ValueError, match=msg):
            with tm.ensure_clean(get_random_path) as path:
                df = tm.makeDataFrame()
                df.to_pickle(path, protocol=protocol) 
Example #17
Source File: test_pickle.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_read(self, protocol, get_random_path):
        with tm.ensure_clean(get_random_path) as path:
            df = tm.makeDataFrame()
            df.to_pickle(path, protocol=protocol)
            df2 = pd.read_pickle(path)
            tm.assert_frame_equal(df, df2) 
Example #18
Source File: pandas.py    From ontask_b with MIT License 5 votes vote down vote up
def df_to_string(df):
    """Transform a data frame into a b64 encoded pickled representation.

    :param df: Pandas dataframe
    :return: Base64 encoded string of its pickled representation
    """
    try:
        out_file = BytesIO()
        pd.to_pickle(df, out_file)
    except ValueError:
        out_file = BytesIO()
        pickle.dump(df, out_file)

    return base64.b64encode(out_file.getvalue()) 
Example #19
Source File: recommend_dnn.py    From deep_learning with MIT License 5 votes vote down vote up
def rebuild_data():
    """
    清洗选择特征数据
    """
    user_header = ['user_id','gender', 'age',  'job']
    user_df = pd.read_csv('./data/ml-1m/users.dat', sep='::', names=user_header, usecols=[0, 1, 2, 3], engine = 'python')
    user_df.set_index(['user_id'], inplace = False) 

    movie_header = ['movie_id', 'title','category']
    movie_df = pd.read_csv('./data/ml-1m/movies.dat', sep='::', names=movie_header, usecols=[0, 1, 2], engine = 'python')
    movie_df.set_index(['movie_id'], inplace = False) 

    rating_header = ['user_id', 'movie_id', 'rating', 'timestamp']
    rating_df = pd.read_csv('./data/ml-1m/ratings.dat',sep='::', names=rating_header, engine = 'python')[:100000]

    rating_user = [user_df[user_df['user_id'] == mid].values[0] for uid, mid, r, _ in rating_df.values]
    rating_movie = [movie_df[movie_df['movie_id'] == mid].values[0] for uid, mid, r, _ in rating_df.values]

    user_df = pd.DataFrame(rating_user, index=None, columns=['user_id', 'gender', 'age',  'job'])
    movie_df = pd.DataFrame(rating_movie, index=None, columns=['movie_id', 'title', 'category'])
    rating_df = rating_df.rating
    pd.to_pickle(user_df, './data/ml-1m/user_pick')
    pd.to_pickle(movie_df, './data/ml-1m/movie_pick')
    pd.to_pickle(rating_df, './data/ml-1m/rating_pick')

    print(user_df.shape,movie_df.shape,rating_df.shape) 
Example #20
Source File: test_pickle.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_read_infer(self, ext, get_random_path):
        base = get_random_path
        path1 = base + ".raw"
        path2 = base + ext
        compression = None
        for c in self._compression_to_extension:
            if self._compression_to_extension[c] == ext:
                compression = c
                break

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to uncompressed file
            df.to_pickle(p1, compression=None)

            # compress
            self.compress_file(p1, p2, compression=compression)

            # read compressed file by inferred compression method
            df2 = pd.read_pickle(p2)

            tm.assert_frame_equal(df, df2)


# ---------------------
# test pickle compression
# --------------------- 
Example #21
Source File: cli.py    From aesthetics with Apache License 2.0 5 votes vote down vote up
def train(features):
    X, Y = ordered_dict_to_x_y(features)
    pd.DataFrame(X).to_csv('features.csv')

    clf = get_classification()
    clf.fit(X, Y)
    pd.to_pickle(clf, 'classification.pkl')
    return clf 
Example #22
Source File: test_pickle.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_pickle_path_pathlib():
    df = tm.makeDataFrame()
    result = tm.round_trip_pathlib(df.to_pickle, pd.read_pickle)
    tm.assert_frame_equal(df, result) 
Example #23
Source File: test_pickle.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_pickle_path_localpath():
    df = tm.makeDataFrame()
    result = tm.round_trip_localpath(df.to_pickle, pd.read_pickle)
    tm.assert_frame_equal(df, result)


# ---------------------
# test pickle compression
# --------------------- 
Example #24
Source File: test_pickle.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_pickle_path_localpath():
    df = tm.makeDataFrame()
    result = tm.round_trip_localpath(df.to_pickle, pd.read_pickle)
    tm.assert_frame_equal(df, result)


# ---------------------
# test pickle compression
# --------------------- 
Example #25
Source File: test_pickle.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_read_infer(self, ext, get_random_path):
        if ext == '.xz':
            tm._skip_if_no_lzma()

        base = get_random_path
        path1 = base + ".raw"
        path2 = base + ext
        compression = None
        for c in self._compression_to_extension:
            if self._compression_to_extension[c] == ext:
                compression = c
                break

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to uncompressed file
            df.to_pickle(p1, compression=None)

            # compress
            self.compress_file(p1, p2, compression=compression)

            # read compressed file by inferred compression method
            df2 = pd.read_pickle(p2)

            tm.assert_frame_equal(df, df2)


# ---------------------
# test pickle compression
# --------------------- 
Example #26
Source File: test_pickle.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_read(self, protocol, get_random_path):
        with tm.ensure_clean(get_random_path) as path:
            df = tm.makeDataFrame()
            df.to_pickle(path, protocol=protocol)
            df2 = pd.read_pickle(path)
            tm.assert_frame_equal(df, df2) 
Example #27
Source File: test_pickle.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_read_bad_versions(self, protocol, get_random_path):
        # For Python 2.x (respectively 3.y with y < 4), [expected]
        # HIGHEST_PROTOCOL should be 2 (respectively 3). Hence, the protocol
        # parameter should not exceed 2 (respectively 3).
        if sys.version_info[:2] < (3, 0):
            expect_hp = 2
        else:
            expect_hp = 3
        with tm.assert_raises_regex(ValueError,
                                    "pickle protocol %d asked for; the highest"
                                    " available protocol is %d" % (protocol,
                                                                   expect_hp)):
            with tm.ensure_clean(get_random_path) as path:
                df = tm.makeDataFrame()
                df.to_pickle(path, protocol=protocol) 
Example #28
Source File: io.py    From modin with Apache License 2.0 5 votes vote down vote up
def to_pickle(cls, obj, path, compression="infer", protocol=4):
        if protocol == 4:
            protocol = -1
        ErrorMessage.default_to_pandas("`to_pickle`")
        if isinstance(obj, BaseQueryCompiler):
            return pandas.to_pickle(
                obj.to_pandas(), path, compression=compression, protocol=protocol
            )
        else:
            return pandas.to_pickle(
                obj, path, compression=compression, protocol=protocol
            ) 
Example #29
Source File: test_io.py    From modin with Apache License 2.0 5 votes vote down vote up
def setup_pickle_file(row_size, force=False):
    if os.path.exists(TEST_PICKLE_FILENAME) and not force:
        pass
    else:
        df = pandas.DataFrame(
            {"col1": np.arange(row_size), "col2": np.arange(row_size)}
        )
        df.to_pickle(TEST_PICKLE_FILENAME) 
Example #30
Source File: 1_gen_sessions.py    From DSIN with Apache License 2.0 5 votes vote down vote up
def gen_user_hist_sessions(model, FRAC=0.25):
    if model not in ['din', 'dsin']:
        raise ValueError('model must be din or dmsn')

    print("gen " + model + " hist sess", FRAC)
    name = '../sampled_data/behavior_log_pv_user_filter_enc_' + str(FRAC) + '.pkl'
    data = pd.read_pickle(name)
    data = data.loc[data.time_stamp >= 1493769600]  # 0503-0513
    # 0504~1493856000
    # 0503 1493769600

    user = pd.read_pickle('../sampled_data/user_profile_' + str(FRAC) + '.pkl')

    n_samples = user.shape[0]
    print(n_samples)
    batch_size = 150000
    iters = (n_samples - 1) // batch_size + 1

    print("total", iters, "iters", "batch_size", batch_size)
    for i in range(0, iters):
        target_user = user['userid'].values[i * batch_size:(i + 1) * batch_size]
        sub_data = data.loc[data.user.isin(target_user)]
        print(i, 'iter start')
        df_grouped = sub_data.groupby('user')
        if model == 'din':
            user_hist_session = applyParallel(
                df_grouped, gen_session_list_din, n_jobs=20, backend='loky')
        else:
            user_hist_session = applyParallel(
                df_grouped, gen_session_list_dsin, n_jobs=20, backend='multiprocessing')
        pd.to_pickle(user_hist_session, '../sampled_data/user_hist_session_' +
                     str(FRAC) + '_' + model + '_' + str(i) + '.pkl')
        print(i, 'pickled')
        del user_hist_session
        gc.collect()
        print(i, 'del')

    print("1_gen " + model + " hist sess done")