Python Examples of pandas.to

Source File: test_pickle.py From recruit with Apache License 2.0

7 votes

def test_write_explicit(self, compression, get_random_path):
        base = get_random_path
        path1 = base + ".compressed"
        path2 = base + ".raw"

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to compressed file
            df.to_pickle(p1, compression=compression)

            # decompress
            with tm.decompress_file(p1, compression=compression) as f:
                with open(p2, "wb") as fh:
                    fh.write(f.read())

            # read decompressed file
            df2 = pd.read_pickle(p2, compression=None)

            tm.assert_frame_equal(df, df2)

Source File: update_database.py From estimagic with BSD 3-Clause "New" or "Revised" License

6 votes

def _handle_exception(statements, database, exception_info):
    directory = Path(str(database.bind.url)[10:])
    if not directory.is_dir():
        directory = Path(".")
    directory = directory.resolve()

    for stat in statements:
        if isinstance(stat, (sqlalchemy.sql.dml.Insert, sqlalchemy.sql.dml.Update)):
            values = stat.compile().params
            timestamp = dt.datetime.now().strftime("%Y%m%d-%H%M%S-%f")
            filename = f"{stat.table.name}_{timestamp}.pickle"
            pd.to_pickle(values, directory / filename)

    warnings.warn(
        f"Unable to write to database. The data was saved in {directory} instead. The "
        f"traceback was:\n\n{exception_info}"
    )

Source File: test_pickle.py From vnpy_crypto with MIT License

6 votes

def test_read_explicit(self, compression, get_random_path):
        base = get_random_path
        path1 = base + ".raw"
        path2 = base + ".compressed"

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to uncompressed file
            df.to_pickle(p1, compression=None)

            # compress
            self.compress_file(p1, p2, compression=compression)

            # read compressed file
            df2 = pd.read_pickle(p2, compression=compression)

            tm.assert_frame_equal(df, df2)

Source File: testing.py From Splunking-Crime with GNU Affero General Public License v3.0

6 votes

def round_trip_pickle(obj, path=None):
    """
    Pickle an object and then read it again.

    Parameters
    ----------
    obj : pandas object
        The object to pickle and then re-read.
    path : str, default None
        The path where the pickled object is written and then read.

    Returns
    -------
    round_trip_pickled_object : pandas object
        The original object that was pickled and then re-read.
    """

    if path is None:
        path = u('__{random_bytes}__.pickle'.format(random_bytes=rands(10)))
    with ensure_clean(path) as path:
        pd.to_pickle(obj, path)
        return pd.read_pickle(path)

Source File: test_pickle.py From elasticintel with GNU General Public License v3.0

6 votes

def test_write_explicit(self, compression, get_random_path):
        # issue 11666
        if compression == 'xz':
            tm._skip_if_no_lzma()

        base = get_random_path
        path1 = base + ".compressed"
        path2 = base + ".raw"

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to compressed file
            df.to_pickle(p1, compression=compression)

            # decompress
            self.decompress_file(p1, p2, compression=compression)

            # read decompressed file
            df2 = pd.read_pickle(p2, compression=None)

            tm.assert_frame_equal(df, df2)

Source File: test_pickle.py From elasticintel with GNU General Public License v3.0

6 votes

def test_read_explicit(self, compression, get_random_path):
        # issue 11666
        if compression == 'xz':
            tm._skip_if_no_lzma()

        base = get_random_path
        path1 = base + ".raw"
        path2 = base + ".compressed"

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to uncompressed file
            df.to_pickle(p1, compression=None)

            # compress
            self.compress_file(p1, p2, compression=compression)

            # read compressed file
            df2 = pd.read_pickle(p2, compression=compression)

            tm.assert_frame_equal(df, df2)

Source File: testing.py From vnpy_crypto with MIT License

6 votes

def round_trip_pickle(obj, path=None):
    """
    Pickle an object and then read it again.

    Parameters
    ----------
    obj : pandas object
        The object to pickle and then re-read.
    path : str, default None
        The path where the pickled object is written and then read.

    Returns
    -------
    round_trip_pickled_object : pandas object
        The original object that was pickled and then re-read.
    """

    if path is None:
        path = u('__{random_bytes}__.pickle'.format(random_bytes=rands(10)))
    with ensure_clean(path) as path:
        pd.to_pickle(obj, path)
        return pd.read_pickle(path)

Source File: testing.py From predictive-maintenance-using-machine-learning with Apache License 2.0

6 votes

def round_trip_pickle(obj, path=None):
    """
    Pickle an object and then read it again.

    Parameters
    ----------
    obj : pandas object
        The object to pickle and then re-read.
    path : str, default None
        The path where the pickled object is written and then read.

    Returns
    -------
    round_trip_pickled_object : pandas object
        The original object that was pickled and then re-read.
    """

    if path is None:
        path = u('__{random_bytes}__.pickle'.format(random_bytes=rands(10)))
    with ensure_clean(path) as path:
        pd.to_pickle(obj, path)
        return pd.read_pickle(path)

Source File: testing.py From elasticintel with GNU General Public License v3.0

6 votes

def round_trip_pickle(obj, path=None):
    """
    Pickle an object and then read it again.

    Parameters
    ----------
    obj : pandas object
        The object to pickle and then re-read.
    path : str, default None
        The path where the pickled object is written and then read.

    Returns
    -------
    round_trip_pickled_object : pandas object
        The original object that was pickled and then re-read.
    """

    if path is None:
        path = u('__{random_bytes}__.pickle'.format(random_bytes=rands(10)))
    with ensure_clean(path) as path:
        pd.to_pickle(obj, path)
        return pd.read_pickle(path)

Source File: testing.py From recruit with Apache License 2.0

6 votes

def round_trip_pickle(obj, path=None):
    """
    Pickle an object and then read it again.

    Parameters
    ----------
    obj : pandas object
        The object to pickle and then re-read.
    path : str, default None
        The path where the pickled object is written and then read.

    Returns
    -------
    round_trip_pickled_object : pandas object
        The original object that was pickled and then re-read.
    """

    if path is None:
        path = u('__{random_bytes}__.pickle'.format(random_bytes=rands(10)))
    with ensure_clean(path) as path:
        pd.to_pickle(obj, path)
        return pd.read_pickle(path)

Source File: test_io.py From modin with Apache License 2.0

6 votes

def test_to_pickle():
    modin_df = create_test_modin_dataframe()
    pandas_df = create_test_pandas_dataframe()

    TEST_PICKLE_DF_FILENAME = "test_df.pkl"
    TEST_PICKLE_pandas_FILENAME = "test_pandas.pkl"

    modin_df.to_pickle(TEST_PICKLE_DF_FILENAME)
    pandas_df.to_pickle(TEST_PICKLE_pandas_FILENAME)

    assert assert_files_eq(TEST_PICKLE_DF_FILENAME, TEST_PICKLE_pandas_FILENAME)

    teardown_test_file(TEST_PICKLE_pandas_FILENAME)
    teardown_test_file(TEST_PICKLE_DF_FILENAME)

    pd.to_pickle(modin_df, TEST_PICKLE_DF_FILENAME)
    pandas.to_pickle(pandas_df, TEST_PICKLE_pandas_FILENAME)

    assert assert_files_eq(TEST_PICKLE_DF_FILENAME, TEST_PICKLE_pandas_FILENAME)

    teardown_test_file(TEST_PICKLE_pandas_FILENAME)
    teardown_test_file(TEST_PICKLE_DF_FILENAME)

Source File: test_pickle.py From recruit with Apache License 2.0

6 votes

def test_read_explicit(self, compression, get_random_path):
        base = get_random_path
        path1 = base + ".raw"
        path2 = base + ".compressed"

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to uncompressed file
            df.to_pickle(p1, compression=None)

            # compress
            self.compress_file(p1, p2, compression=compression)

            # read compressed file
            df2 = pd.read_pickle(p2, compression=compression)

            tm.assert_frame_equal(df, df2)

Source File: test_pickle.py From predictive-maintenance-using-machine-learning with Apache License 2.0

6 votes

def test_write_explicit(self, compression, get_random_path):
        base = get_random_path
        path1 = base + ".compressed"
        path2 = base + ".raw"

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to compressed file
            df.to_pickle(p1, compression=compression)

            # decompress
            with tm.decompress_file(p1, compression=compression) as f:
                with open(p2, "wb") as fh:
                    fh.write(f.read())

            # read decompressed file
            df2 = pd.read_pickle(p2, compression=None)

            tm.assert_frame_equal(df, df2)

Source File: test_pickle.py From predictive-maintenance-using-machine-learning with Apache License 2.0

6 votes

def test_read_explicit(self, compression, get_random_path):
        base = get_random_path
        path1 = base + ".raw"
        path2 = base + ".compressed"

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to uncompressed file
            df.to_pickle(p1, compression=None)

            # compress
            self.compress_file(p1, p2, compression=compression)

            # read compressed file
            df2 = pd.read_pickle(p2, compression=compression)

            tm.assert_frame_equal(df, df2)

Source File: test_pickle.py From elasticintel with GNU General Public License v3.0

5 votes

def test_write_explicit_bad(self, compression, get_random_path):
        with tm.assert_raises_regex(ValueError,
                                    "Unrecognized compression type"):
            with tm.ensure_clean(get_random_path) as path:
                df = tm.makeDataFrame()
                df.to_pickle(path, compression=compression)

Source File: test_pickle.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_read_bad_versions(self, protocol, get_random_path):
        # For Python 2, HIGHEST_PROTOCOL should be 2.
        msg = ("pickle protocol {protocol} asked for; the highest available "
               "protocol is 2").format(protocol=protocol)
        with pytest.raises(ValueError, match=msg):
            with tm.ensure_clean(get_random_path) as path:
                df = tm.makeDataFrame()
                df.to_pickle(path, protocol=protocol)

Source File: test_pickle.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_read(self, protocol, get_random_path):
        with tm.ensure_clean(get_random_path) as path:
            df = tm.makeDataFrame()
            df.to_pickle(path, protocol=protocol)
            df2 = pd.read_pickle(path)
            tm.assert_frame_equal(df, df2)

Source File: pandas.py From ontask_b with MIT License

5 votes

def df_to_string(df):
    """Transform a data frame into a b64 encoded pickled representation.

    :param df: Pandas dataframe
    :return: Base64 encoded string of its pickled representation
    """
    try:
        out_file = BytesIO()
        pd.to_pickle(df, out_file)
    except ValueError:
        out_file = BytesIO()
        pickle.dump(df, out_file)

    return base64.b64encode(out_file.getvalue())

Source File: recommend_dnn.py From deep_learning with MIT License

5 votes

def rebuild_data():
    """
    清洗选择特征数据
    """
    user_header = ['user_id','gender', 'age',  'job']
    user_df = pd.read_csv('./data/ml-1m/users.dat', sep='::', names=user_header, usecols=[0, 1, 2, 3], engine = 'python')
    user_df.set_index(['user_id'], inplace = False) 

    movie_header = ['movie_id', 'title','category']
    movie_df = pd.read_csv('./data/ml-1m/movies.dat', sep='::', names=movie_header, usecols=[0, 1, 2], engine = 'python')
    movie_df.set_index(['movie_id'], inplace = False) 

    rating_header = ['user_id', 'movie_id', 'rating', 'timestamp']
    rating_df = pd.read_csv('./data/ml-1m/ratings.dat',sep='::', names=rating_header, engine = 'python')[:100000]

    rating_user = [user_df[user_df['user_id'] == mid].values[0] for uid, mid, r, _ in rating_df.values]
    rating_movie = [movie_df[movie_df['movie_id'] == mid].values[0] for uid, mid, r, _ in rating_df.values]

    user_df = pd.DataFrame(rating_user, index=None, columns=['user_id', 'gender', 'age',  'job'])
    movie_df = pd.DataFrame(rating_movie, index=None, columns=['movie_id', 'title', 'category'])
    rating_df = rating_df.rating
    pd.to_pickle(user_df, './data/ml-1m/user_pick')
    pd.to_pickle(movie_df, './data/ml-1m/movie_pick')
    pd.to_pickle(rating_df, './data/ml-1m/rating_pick')

    print(user_df.shape,movie_df.shape,rating_df.shape)

Source File: test_pickle.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_read_infer(self, ext, get_random_path):
        base = get_random_path
        path1 = base + ".raw"
        path2 = base + ext
        compression = None
        for c in self._compression_to_extension:
            if self._compression_to_extension[c] == ext:
                compression = c
                break

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to uncompressed file
            df.to_pickle(p1, compression=None)

            # compress
            self.compress_file(p1, p2, compression=compression)

            # read compressed file by inferred compression method
            df2 = pd.read_pickle(p2)

            tm.assert_frame_equal(df, df2)


# ---------------------
# test pickle compression
# ---------------------

Source File: cli.py From aesthetics with Apache License 2.0

5 votes

def train(features):
    X, Y = ordered_dict_to_x_y(features)
    pd.DataFrame(X).to_csv('features.csv')

    clf = get_classification()
    clf.fit(X, Y)
    pd.to_pickle(clf, 'classification.pkl')
    return clf

Source File: test_pickle.py From elasticintel with GNU General Public License v3.0

5 votes

def test_pickle_path_pathlib():
    df = tm.makeDataFrame()
    result = tm.round_trip_pathlib(df.to_pickle, pd.read_pickle)
    tm.assert_frame_equal(df, result)

Source File: test_pickle.py From elasticintel with GNU General Public License v3.0

5 votes

def test_pickle_path_localpath():
    df = tm.makeDataFrame()
    result = tm.round_trip_localpath(df.to_pickle, pd.read_pickle)
    tm.assert_frame_equal(df, result)


# ---------------------
# test pickle compression
# ---------------------

Source File: test_pickle.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def test_pickle_path_localpath():
    df = tm.makeDataFrame()
    result = tm.round_trip_localpath(df.to_pickle, pd.read_pickle)
    tm.assert_frame_equal(df, result)


# ---------------------
# test pickle compression
# ---------------------

Source File: test_pickle.py From elasticintel with GNU General Public License v3.0

5 votes

def test_read_infer(self, ext, get_random_path):
        if ext == '.xz':
            tm._skip_if_no_lzma()

        base = get_random_path
        path1 = base + ".raw"
        path2 = base + ext
        compression = None
        for c in self._compression_to_extension:
            if self._compression_to_extension[c] == ext:
                compression = c
                break

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to uncompressed file
            df.to_pickle(p1, compression=None)

            # compress
            self.compress_file(p1, p2, compression=compression)

            # read compressed file by inferred compression method
            df2 = pd.read_pickle(p2)

            tm.assert_frame_equal(df, df2)


# ---------------------
# test pickle compression
# ---------------------

Source File: test_pickle.py From elasticintel with GNU General Public License v3.0

5 votes

def test_read(self, protocol, get_random_path):
        with tm.ensure_clean(get_random_path) as path:
            df = tm.makeDataFrame()
            df.to_pickle(path, protocol=protocol)
            df2 = pd.read_pickle(path)
            tm.assert_frame_equal(df, df2)

Source File: test_pickle.py From elasticintel with GNU General Public License v3.0

5 votes

def test_read_bad_versions(self, protocol, get_random_path):
        # For Python 2.x (respectively 3.y with y < 4), [expected]
        # HIGHEST_PROTOCOL should be 2 (respectively 3). Hence, the protocol
        # parameter should not exceed 2 (respectively 3).
        if sys.version_info[:2] < (3, 0):
            expect_hp = 2
        else:
            expect_hp = 3
        with tm.assert_raises_regex(ValueError,
                                    "pickle protocol %d asked for; the highest"
                                    " available protocol is %d" % (protocol,
                                                                   expect_hp)):
            with tm.ensure_clean(get_random_path) as path:
                df = tm.makeDataFrame()
                df.to_pickle(path, protocol=protocol)

Source File: io.py From modin with Apache License 2.0

5 votes

def to_pickle(cls, obj, path, compression="infer", protocol=4):
        if protocol == 4:
            protocol = -1
        ErrorMessage.default_to_pandas("`to_pickle`")
        if isinstance(obj, BaseQueryCompiler):
            return pandas.to_pickle(
                obj.to_pandas(), path, compression=compression, protocol=protocol
            )
        else:
            return pandas.to_pickle(
                obj, path, compression=compression, protocol=protocol
            )

Source File: test_io.py From modin with Apache License 2.0

5 votes

def setup_pickle_file(row_size, force=False):
    if os.path.exists(TEST_PICKLE_FILENAME) and not force:
        pass
    else:
        df = pandas.DataFrame(
            {"col1": np.arange(row_size), "col2": np.arange(row_size)}
        )
        df.to_pickle(TEST_PICKLE_FILENAME)

Source File: 1_gen_sessions.py From DSIN with Apache License 2.0

5 votes

def gen_user_hist_sessions(model, FRAC=0.25):
    if model not in ['din', 'dsin']:
        raise ValueError('model must be din or dmsn')

    print("gen " + model + " hist sess", FRAC)
    name = '../sampled_data/behavior_log_pv_user_filter_enc_' + str(FRAC) + '.pkl'
    data = pd.read_pickle(name)
    data = data.loc[data.time_stamp >= 1493769600]  # 0503-0513
    # 0504~1493856000
    # 0503 1493769600

    user = pd.read_pickle('../sampled_data/user_profile_' + str(FRAC) + '.pkl')

    n_samples = user.shape[0]
    print(n_samples)
    batch_size = 150000
    iters = (n_samples - 1) // batch_size + 1

    print("total", iters, "iters", "batch_size", batch_size)
    for i in range(0, iters):
        target_user = user['userid'].values[i * batch_size:(i + 1) * batch_size]
        sub_data = data.loc[data.user.isin(target_user)]
        print(i, 'iter start')
        df_grouped = sub_data.groupby('user')
        if model == 'din':
            user_hist_session = applyParallel(
                df_grouped, gen_session_list_din, n_jobs=20, backend='loky')
        else:
            user_hist_session = applyParallel(
                df_grouped, gen_session_list_dsin, n_jobs=20, backend='multiprocessing')
        pd.to_pickle(user_hist_session, '../sampled_data/user_hist_session_' +
                     str(FRAC) + '_' + model + '_' + str(i) + '.pkl')
        print(i, 'pickled')
        del user_hist_session
        gc.collect()
        print(i, 'del')

    print("1_gen " + model + " hist sess done")

Python pandas.to_pickle() Examples