Python pandas.to_pickle() Examples
The following are 30
code examples of pandas.to_pickle().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas
, or try the search function
.
Example #1
Source File: test_pickle.py From recruit with Apache License 2.0 | 7 votes |
def test_write_explicit(self, compression, get_random_path): base = get_random_path path1 = base + ".compressed" path2 = base + ".raw" with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: df = tm.makeDataFrame() # write to compressed file df.to_pickle(p1, compression=compression) # decompress with tm.decompress_file(p1, compression=compression) as f: with open(p2, "wb") as fh: fh.write(f.read()) # read decompressed file df2 = pd.read_pickle(p2, compression=None) tm.assert_frame_equal(df, df2)
Example #2
Source File: update_database.py From estimagic with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _handle_exception(statements, database, exception_info): directory = Path(str(database.bind.url)[10:]) if not directory.is_dir(): directory = Path(".") directory = directory.resolve() for stat in statements: if isinstance(stat, (sqlalchemy.sql.dml.Insert, sqlalchemy.sql.dml.Update)): values = stat.compile().params timestamp = dt.datetime.now().strftime("%Y%m%d-%H%M%S-%f") filename = f"{stat.table.name}_{timestamp}.pickle" pd.to_pickle(values, directory / filename) warnings.warn( f"Unable to write to database. The data was saved in {directory} instead. The " f"traceback was:\n\n{exception_info}" )
Example #3
Source File: test_pickle.py From vnpy_crypto with MIT License | 6 votes |
def test_read_explicit(self, compression, get_random_path): base = get_random_path path1 = base + ".raw" path2 = base + ".compressed" with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: df = tm.makeDataFrame() # write to uncompressed file df.to_pickle(p1, compression=None) # compress self.compress_file(p1, p2, compression=compression) # read compressed file df2 = pd.read_pickle(p2, compression=compression) tm.assert_frame_equal(df, df2)
Example #4
Source File: testing.py From Splunking-Crime with GNU Affero General Public License v3.0 | 6 votes |
def round_trip_pickle(obj, path=None): """ Pickle an object and then read it again. Parameters ---------- obj : pandas object The object to pickle and then re-read. path : str, default None The path where the pickled object is written and then read. Returns ------- round_trip_pickled_object : pandas object The original object that was pickled and then re-read. """ if path is None: path = u('__{random_bytes}__.pickle'.format(random_bytes=rands(10))) with ensure_clean(path) as path: pd.to_pickle(obj, path) return pd.read_pickle(path)
Example #5
Source File: test_pickle.py From elasticintel with GNU General Public License v3.0 | 6 votes |
def test_write_explicit(self, compression, get_random_path): # issue 11666 if compression == 'xz': tm._skip_if_no_lzma() base = get_random_path path1 = base + ".compressed" path2 = base + ".raw" with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: df = tm.makeDataFrame() # write to compressed file df.to_pickle(p1, compression=compression) # decompress self.decompress_file(p1, p2, compression=compression) # read decompressed file df2 = pd.read_pickle(p2, compression=None) tm.assert_frame_equal(df, df2)
Example #6
Source File: test_pickle.py From elasticintel with GNU General Public License v3.0 | 6 votes |
def test_read_explicit(self, compression, get_random_path): # issue 11666 if compression == 'xz': tm._skip_if_no_lzma() base = get_random_path path1 = base + ".raw" path2 = base + ".compressed" with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: df = tm.makeDataFrame() # write to uncompressed file df.to_pickle(p1, compression=None) # compress self.compress_file(p1, p2, compression=compression) # read compressed file df2 = pd.read_pickle(p2, compression=compression) tm.assert_frame_equal(df, df2)
Example #7
Source File: testing.py From vnpy_crypto with MIT License | 6 votes |
def round_trip_pickle(obj, path=None): """ Pickle an object and then read it again. Parameters ---------- obj : pandas object The object to pickle and then re-read. path : str, default None The path where the pickled object is written and then read. Returns ------- round_trip_pickled_object : pandas object The original object that was pickled and then re-read. """ if path is None: path = u('__{random_bytes}__.pickle'.format(random_bytes=rands(10))) with ensure_clean(path) as path: pd.to_pickle(obj, path) return pd.read_pickle(path)
Example #8
Source File: testing.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def round_trip_pickle(obj, path=None): """ Pickle an object and then read it again. Parameters ---------- obj : pandas object The object to pickle and then re-read. path : str, default None The path where the pickled object is written and then read. Returns ------- round_trip_pickled_object : pandas object The original object that was pickled and then re-read. """ if path is None: path = u('__{random_bytes}__.pickle'.format(random_bytes=rands(10))) with ensure_clean(path) as path: pd.to_pickle(obj, path) return pd.read_pickle(path)
Example #9
Source File: testing.py From elasticintel with GNU General Public License v3.0 | 6 votes |
def round_trip_pickle(obj, path=None): """ Pickle an object and then read it again. Parameters ---------- obj : pandas object The object to pickle and then re-read. path : str, default None The path where the pickled object is written and then read. Returns ------- round_trip_pickled_object : pandas object The original object that was pickled and then re-read. """ if path is None: path = u('__{random_bytes}__.pickle'.format(random_bytes=rands(10))) with ensure_clean(path) as path: pd.to_pickle(obj, path) return pd.read_pickle(path)
Example #10
Source File: testing.py From recruit with Apache License 2.0 | 6 votes |
def round_trip_pickle(obj, path=None): """ Pickle an object and then read it again. Parameters ---------- obj : pandas object The object to pickle and then re-read. path : str, default None The path where the pickled object is written and then read. Returns ------- round_trip_pickled_object : pandas object The original object that was pickled and then re-read. """ if path is None: path = u('__{random_bytes}__.pickle'.format(random_bytes=rands(10))) with ensure_clean(path) as path: pd.to_pickle(obj, path) return pd.read_pickle(path)
Example #11
Source File: test_io.py From modin with Apache License 2.0 | 6 votes |
def test_to_pickle(): modin_df = create_test_modin_dataframe() pandas_df = create_test_pandas_dataframe() TEST_PICKLE_DF_FILENAME = "test_df.pkl" TEST_PICKLE_pandas_FILENAME = "test_pandas.pkl" modin_df.to_pickle(TEST_PICKLE_DF_FILENAME) pandas_df.to_pickle(TEST_PICKLE_pandas_FILENAME) assert assert_files_eq(TEST_PICKLE_DF_FILENAME, TEST_PICKLE_pandas_FILENAME) teardown_test_file(TEST_PICKLE_pandas_FILENAME) teardown_test_file(TEST_PICKLE_DF_FILENAME) pd.to_pickle(modin_df, TEST_PICKLE_DF_FILENAME) pandas.to_pickle(pandas_df, TEST_PICKLE_pandas_FILENAME) assert assert_files_eq(TEST_PICKLE_DF_FILENAME, TEST_PICKLE_pandas_FILENAME) teardown_test_file(TEST_PICKLE_pandas_FILENAME) teardown_test_file(TEST_PICKLE_DF_FILENAME)
Example #12
Source File: test_pickle.py From recruit with Apache License 2.0 | 6 votes |
def test_read_explicit(self, compression, get_random_path): base = get_random_path path1 = base + ".raw" path2 = base + ".compressed" with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: df = tm.makeDataFrame() # write to uncompressed file df.to_pickle(p1, compression=None) # compress self.compress_file(p1, p2, compression=compression) # read compressed file df2 = pd.read_pickle(p2, compression=compression) tm.assert_frame_equal(df, df2)
Example #13
Source File: test_pickle.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_write_explicit(self, compression, get_random_path): base = get_random_path path1 = base + ".compressed" path2 = base + ".raw" with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: df = tm.makeDataFrame() # write to compressed file df.to_pickle(p1, compression=compression) # decompress with tm.decompress_file(p1, compression=compression) as f: with open(p2, "wb") as fh: fh.write(f.read()) # read decompressed file df2 = pd.read_pickle(p2, compression=None) tm.assert_frame_equal(df, df2)
Example #14
Source File: test_pickle.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def test_read_explicit(self, compression, get_random_path): base = get_random_path path1 = base + ".raw" path2 = base + ".compressed" with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: df = tm.makeDataFrame() # write to uncompressed file df.to_pickle(p1, compression=None) # compress self.compress_file(p1, p2, compression=compression) # read compressed file df2 = pd.read_pickle(p2, compression=compression) tm.assert_frame_equal(df, df2)
Example #15
Source File: test_pickle.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_write_explicit_bad(self, compression, get_random_path): with tm.assert_raises_regex(ValueError, "Unrecognized compression type"): with tm.ensure_clean(get_random_path) as path: df = tm.makeDataFrame() df.to_pickle(path, compression=compression)
Example #16
Source File: test_pickle.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_read_bad_versions(self, protocol, get_random_path): # For Python 2, HIGHEST_PROTOCOL should be 2. msg = ("pickle protocol {protocol} asked for; the highest available " "protocol is 2").format(protocol=protocol) with pytest.raises(ValueError, match=msg): with tm.ensure_clean(get_random_path) as path: df = tm.makeDataFrame() df.to_pickle(path, protocol=protocol)
Example #17
Source File: test_pickle.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_read(self, protocol, get_random_path): with tm.ensure_clean(get_random_path) as path: df = tm.makeDataFrame() df.to_pickle(path, protocol=protocol) df2 = pd.read_pickle(path) tm.assert_frame_equal(df, df2)
Example #18
Source File: pandas.py From ontask_b with MIT License | 5 votes |
def df_to_string(df): """Transform a data frame into a b64 encoded pickled representation. :param df: Pandas dataframe :return: Base64 encoded string of its pickled representation """ try: out_file = BytesIO() pd.to_pickle(df, out_file) except ValueError: out_file = BytesIO() pickle.dump(df, out_file) return base64.b64encode(out_file.getvalue())
Example #19
Source File: recommend_dnn.py From deep_learning with MIT License | 5 votes |
def rebuild_data(): """ 清洗选择特征数据 """ user_header = ['user_id','gender', 'age', 'job'] user_df = pd.read_csv('./data/ml-1m/users.dat', sep='::', names=user_header, usecols=[0, 1, 2, 3], engine = 'python') user_df.set_index(['user_id'], inplace = False) movie_header = ['movie_id', 'title','category'] movie_df = pd.read_csv('./data/ml-1m/movies.dat', sep='::', names=movie_header, usecols=[0, 1, 2], engine = 'python') movie_df.set_index(['movie_id'], inplace = False) rating_header = ['user_id', 'movie_id', 'rating', 'timestamp'] rating_df = pd.read_csv('./data/ml-1m/ratings.dat',sep='::', names=rating_header, engine = 'python')[:100000] rating_user = [user_df[user_df['user_id'] == mid].values[0] for uid, mid, r, _ in rating_df.values] rating_movie = [movie_df[movie_df['movie_id'] == mid].values[0] for uid, mid, r, _ in rating_df.values] user_df = pd.DataFrame(rating_user, index=None, columns=['user_id', 'gender', 'age', 'job']) movie_df = pd.DataFrame(rating_movie, index=None, columns=['movie_id', 'title', 'category']) rating_df = rating_df.rating pd.to_pickle(user_df, './data/ml-1m/user_pick') pd.to_pickle(movie_df, './data/ml-1m/movie_pick') pd.to_pickle(rating_df, './data/ml-1m/rating_pick') print(user_df.shape,movie_df.shape,rating_df.shape)
Example #20
Source File: test_pickle.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_read_infer(self, ext, get_random_path): base = get_random_path path1 = base + ".raw" path2 = base + ext compression = None for c in self._compression_to_extension: if self._compression_to_extension[c] == ext: compression = c break with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: df = tm.makeDataFrame() # write to uncompressed file df.to_pickle(p1, compression=None) # compress self.compress_file(p1, p2, compression=compression) # read compressed file by inferred compression method df2 = pd.read_pickle(p2) tm.assert_frame_equal(df, df2) # --------------------- # test pickle compression # ---------------------
Example #21
Source File: cli.py From aesthetics with Apache License 2.0 | 5 votes |
def train(features): X, Y = ordered_dict_to_x_y(features) pd.DataFrame(X).to_csv('features.csv') clf = get_classification() clf.fit(X, Y) pd.to_pickle(clf, 'classification.pkl') return clf
Example #22
Source File: test_pickle.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_pickle_path_pathlib(): df = tm.makeDataFrame() result = tm.round_trip_pathlib(df.to_pickle, pd.read_pickle) tm.assert_frame_equal(df, result)
Example #23
Source File: test_pickle.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_pickle_path_localpath(): df = tm.makeDataFrame() result = tm.round_trip_localpath(df.to_pickle, pd.read_pickle) tm.assert_frame_equal(df, result) # --------------------- # test pickle compression # ---------------------
Example #24
Source File: test_pickle.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def test_pickle_path_localpath(): df = tm.makeDataFrame() result = tm.round_trip_localpath(df.to_pickle, pd.read_pickle) tm.assert_frame_equal(df, result) # --------------------- # test pickle compression # ---------------------
Example #25
Source File: test_pickle.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_read_infer(self, ext, get_random_path): if ext == '.xz': tm._skip_if_no_lzma() base = get_random_path path1 = base + ".raw" path2 = base + ext compression = None for c in self._compression_to_extension: if self._compression_to_extension[c] == ext: compression = c break with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: df = tm.makeDataFrame() # write to uncompressed file df.to_pickle(p1, compression=None) # compress self.compress_file(p1, p2, compression=compression) # read compressed file by inferred compression method df2 = pd.read_pickle(p2) tm.assert_frame_equal(df, df2) # --------------------- # test pickle compression # ---------------------
Example #26
Source File: test_pickle.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_read(self, protocol, get_random_path): with tm.ensure_clean(get_random_path) as path: df = tm.makeDataFrame() df.to_pickle(path, protocol=protocol) df2 = pd.read_pickle(path) tm.assert_frame_equal(df, df2)
Example #27
Source File: test_pickle.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_read_bad_versions(self, protocol, get_random_path): # For Python 2.x (respectively 3.y with y < 4), [expected] # HIGHEST_PROTOCOL should be 2 (respectively 3). Hence, the protocol # parameter should not exceed 2 (respectively 3). if sys.version_info[:2] < (3, 0): expect_hp = 2 else: expect_hp = 3 with tm.assert_raises_regex(ValueError, "pickle protocol %d asked for; the highest" " available protocol is %d" % (protocol, expect_hp)): with tm.ensure_clean(get_random_path) as path: df = tm.makeDataFrame() df.to_pickle(path, protocol=protocol)
Example #28
Source File: io.py From modin with Apache License 2.0 | 5 votes |
def to_pickle(cls, obj, path, compression="infer", protocol=4): if protocol == 4: protocol = -1 ErrorMessage.default_to_pandas("`to_pickle`") if isinstance(obj, BaseQueryCompiler): return pandas.to_pickle( obj.to_pandas(), path, compression=compression, protocol=protocol ) else: return pandas.to_pickle( obj, path, compression=compression, protocol=protocol )
Example #29
Source File: test_io.py From modin with Apache License 2.0 | 5 votes |
def setup_pickle_file(row_size, force=False): if os.path.exists(TEST_PICKLE_FILENAME) and not force: pass else: df = pandas.DataFrame( {"col1": np.arange(row_size), "col2": np.arange(row_size)} ) df.to_pickle(TEST_PICKLE_FILENAME)
Example #30
Source File: 1_gen_sessions.py From DSIN with Apache License 2.0 | 5 votes |
def gen_user_hist_sessions(model, FRAC=0.25): if model not in ['din', 'dsin']: raise ValueError('model must be din or dmsn') print("gen " + model + " hist sess", FRAC) name = '../sampled_data/behavior_log_pv_user_filter_enc_' + str(FRAC) + '.pkl' data = pd.read_pickle(name) data = data.loc[data.time_stamp >= 1493769600] # 0503-0513 # 0504~1493856000 # 0503 1493769600 user = pd.read_pickle('../sampled_data/user_profile_' + str(FRAC) + '.pkl') n_samples = user.shape[0] print(n_samples) batch_size = 150000 iters = (n_samples - 1) // batch_size + 1 print("total", iters, "iters", "batch_size", batch_size) for i in range(0, iters): target_user = user['userid'].values[i * batch_size:(i + 1) * batch_size] sub_data = data.loc[data.user.isin(target_user)] print(i, 'iter start') df_grouped = sub_data.groupby('user') if model == 'din': user_hist_session = applyParallel( df_grouped, gen_session_list_din, n_jobs=20, backend='loky') else: user_hist_session = applyParallel( df_grouped, gen_session_list_dsin, n_jobs=20, backend='multiprocessing') pd.to_pickle(user_hist_session, '../sampled_data/user_hist_session_' + str(FRAC) + '_' + model + '_' + str(i) + '.pkl') print(i, 'pickled') del user_hist_session gc.collect() print(i, 'del') print("1_gen " + model + " hist sess done")