Python pandas.read_pickle() Examples
The following are 30
code examples of pandas.read_pickle().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas
, or try the search function
.
Example #1
Source File: test_pickle.py From recruit with Apache License 2.0 | 7 votes |
def test_write_explicit(self, compression, get_random_path): base = get_random_path path1 = base + ".compressed" path2 = base + ".raw" with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: df = tm.makeDataFrame() # write to compressed file df.to_pickle(p1, compression=compression) # decompress with tm.decompress_file(p1, compression=compression) as f: with open(p2, "wb") as fh: fh.write(f.read()) # read decompressed file df2 = pd.read_pickle(p2, compression=None) tm.assert_frame_equal(df, df2)
Example #2
Source File: abstract.py From qb with MIT License | 6 votes |
def load_guesses(directory: str, output_type='char', folds=c.GUESSER_GENERATION_FOLDS) -> pd.DataFrame: """ Loads all the guesses pertaining to a guesser inferred from directory :param directory: where to load guesses from :param output_type: One of: char, full, first :param folds: folds to load, by default all of them :return: guesses across all folds for given directory """ assert len(folds) > 0 guess_df = None for fold in folds: input_path = AbstractGuesser.guess_path(directory, fold, output_type) if guess_df is None: guess_df = pd.read_pickle(input_path) else: new_guesses_df = pd.read_pickle(input_path) guess_df = pd.concat([guess_df, new_guesses_df]) return guess_df
Example #3
Source File: test_pickle.py From vnpy_crypto with MIT License | 6 votes |
def test_write_infer(self, ext, get_random_path): base = get_random_path path1 = base + ext path2 = base + ".raw" compression = None for c in self._compression_to_extension: if self._compression_to_extension[c] == ext: compression = c break with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: df = tm.makeDataFrame() # write to compressed file by inferred compression method df.to_pickle(p1) # decompress with tm.decompress_file(p1, compression=compression) as f: with open(p2, "wb") as fh: fh.write(f.read()) # read decompressed file df2 = pd.read_pickle(p2, compression=None) tm.assert_frame_equal(df, df2)
Example #4
Source File: main.py From Deep-Learning-with-TensorFlow-Second-Edition with MIT License | 6 votes |
def user_user_pearson_corr(ratings_df,TRAINED): if TRAINED: if os.path.isfile("model/user_user_corr_train.pkl"): df_corr=pd.read_pickle("user_user_corr_train.pkl") else: df =pd.read_pickle("user_item_table_train.pkl") df=df.T df_corr=df.corr() df_corr.to_pickle("user_user_corr_train.pkl") else: if os.path.isfile("model/user_user_corr.pkl"): df_corr=pd.read_pickle("user_user_corr.pkl") else: df = pd.read_pickle("user_item_table.pkl") df=df.T df_corr=df.corr() df_corr.to_pickle("user_user_corr.pkl") return df_corr
Example #5
Source File: walker.py From GraphEmbedding with MIT License | 6 votes |
def simulate_walks(self, num_walks, walk_length, stay_prob=0.3, workers=1, verbose=0): layers_adj = pd.read_pickle(self.temp_path+'layers_adj.pkl') layers_alias = pd.read_pickle(self.temp_path+'layers_alias.pkl') layers_accept = pd.read_pickle(self.temp_path+'layers_accept.pkl') gamma = pd.read_pickle(self.temp_path+'gamma.pkl') walks = [] initialLayer = 0 nodes = self.idx # list(self.g.nodes()) results = Parallel(n_jobs=workers, verbose=verbose, )( delayed(self._simulate_walks)(nodes, num, walk_length, stay_prob, layers_adj, layers_accept, layers_alias, gamma) for num in partition_num(num_walks, workers)) walks = list(itertools.chain(*results)) return walks
Example #6
Source File: main.py From Deep-Learning-with-TensorFlow-Second-Edition with MIT License | 6 votes |
def top_k_similar_items(movies,ratings_df,k,TRAINED=False): """ Returns k similar movies for respective movie INPUTS : movies : list of numbers or number, list of movie ids ratings_df : rating dataframe, store all users rating for respective movies k : natural number TRAINED : TRUE or FALSE, weather use trained user vs movie table or untrained OUTPUT: list of k similar movies for respected movie """ if TRAINED: df=pd.read_pickle("user_item_table_train.pkl") else: df=pd.read_pickle("user_item_table.pkl") corr_matrix=item_item_correlation(df,TRAINED) if type(movies) is not list: return corr_matrix[movies].sort_values(ascending=False).drop(movies).index.values[0:k] else: dict={} for movie in movies: dict.update({movie:corr_matrix[movie].sort_values(ascending=False).drop(movie).index.values[0:k]}) pd.DataFrame(dict).to_csv("movie_top_k.csv") return dict
Example #7
Source File: test_pickle.py From vnpy_crypto with MIT License | 6 votes |
def test_read_explicit(self, compression, get_random_path): base = get_random_path path1 = base + ".raw" path2 = base + ".compressed" with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: df = tm.makeDataFrame() # write to uncompressed file df.to_pickle(p1, compression=None) # compress self.compress_file(p1, p2, compression=compression) # read compressed file df2 = pd.read_pickle(p2, compression=compression) tm.assert_frame_equal(df, df2)
Example #8
Source File: pipeline.py From xbbg with Apache License 2.0 | 6 votes |
def daily_stats(data: (pd.Series, pd.DataFrame), **kwargs) -> pd.DataFrame: """ Daily stats for given data Examples: >>> pd.set_option('precision', 2) >>> ( ... pd.concat([ ... pd.read_pickle('xbbg/tests/data/sample_rms_ib0.pkl'), ... pd.read_pickle('xbbg/tests/data/sample_rms_ib1.pkl'), ... ], sort=False) ... .pipe(get_series, col='close') ... .pipe(daily_stats) ... )['RMS FP Equity'].iloc[:, :5] count mean std min 10% 2020-01-16 00:00:00+00:00 434.0 711.16 1.11 708.6 709.6 2020-01-17 00:00:00+00:00 437.0 721.53 1.66 717.0 719.0 """ if data.empty: return pd.DataFrame() if 'percentiles' not in kwargs: kwargs['percentiles'] = [.1, .25, .5, .75, .9] return data.groupby(data.index.floor('d')).describe(**kwargs)
Example #9
Source File: test_pickle.py From recruit with Apache License 2.0 | 6 votes |
def test_write_infer(self, ext, get_random_path): base = get_random_path path1 = base + ext path2 = base + ".raw" compression = None for c in self._compression_to_extension: if self._compression_to_extension[c] == ext: compression = c break with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: df = tm.makeDataFrame() # write to compressed file by inferred compression method df.to_pickle(p1) # decompress with tm.decompress_file(p1, compression=compression) as f: with open(p2, "wb") as fh: fh.write(f.read()) # read decompressed file df2 = pd.read_pickle(p2, compression=None) tm.assert_frame_equal(df, df2)
Example #10
Source File: testing.py From vnpy_crypto with MIT License | 6 votes |
def round_trip_pickle(obj, path=None): """ Pickle an object and then read it again. Parameters ---------- obj : pandas object The object to pickle and then re-read. path : str, default None The path where the pickled object is written and then read. Returns ------- round_trip_pickled_object : pandas object The original object that was pickled and then re-read. """ if path is None: path = u('__{random_bytes}__.pickle'.format(random_bytes=rands(10))) with ensure_clean(path) as path: pd.to_pickle(obj, path) return pd.read_pickle(path)
Example #11
Source File: test_multi.py From vnpy_crypto with MIT License | 6 votes |
def test_legacy_pickle(self, datapath): if PY3: pytest.skip("testing for legacy pickles not " "support on py3") path = datapath('indexes', 'data', 'multiindex_v1.pickle') obj = pd.read_pickle(path) obj2 = MultiIndex.from_tuples(obj.values) assert obj.equals(obj2) res = obj.get_indexer(obj) exp = np.arange(len(obj), dtype=np.intp) assert_almost_equal(res, exp) res = obj.get_indexer(obj2[::-1]) exp = obj.get_indexer(obj[::-1]) exp2 = obj2.get_indexer(obj2[::-1]) assert_almost_equal(res, exp) assert_almost_equal(exp, exp2)
Example #12
Source File: test_multi.py From vnpy_crypto with MIT License | 6 votes |
def test_legacy_v2_unpickle(self, datapath): # 0.7.3 -> 0.8.0 format manage path = datapath('indexes', 'data', 'mindex_073.pickle') obj = pd.read_pickle(path) obj2 = MultiIndex.from_tuples(obj.values) assert obj.equals(obj2) res = obj.get_indexer(obj) exp = np.arange(len(obj), dtype=np.intp) assert_almost_equal(res, exp) res = obj.get_indexer(obj2[::-1]) exp = obj.get_indexer(obj[::-1]) exp2 = obj2.get_indexer(obj2[::-1]) assert_almost_equal(res, exp) assert_almost_equal(exp, exp2)
Example #13
Source File: testing.py From recruit with Apache License 2.0 | 6 votes |
def round_trip_pickle(obj, path=None): """ Pickle an object and then read it again. Parameters ---------- obj : pandas object The object to pickle and then re-read. path : str, default None The path where the pickled object is written and then read. Returns ------- round_trip_pickled_object : pandas object The original object that was pickled and then re-read. """ if path is None: path = u('__{random_bytes}__.pickle'.format(random_bytes=rands(10))) with ensure_clean(path) as path: pd.to_pickle(obj, path) return pd.read_pickle(path)
Example #14
Source File: EDA.py From G-Bert with MIT License | 6 votes |
def split_dataset(data_path='data-multi-visit.pkl'): data = pd.read_pickle(data_path) sample_id = data['SUBJECT_ID'].unique() random_number = [i for i in range(len(sample_id))] # shuffle(random_number) train_id = sample_id[random_number[:int(len(sample_id)*2/3)]] eval_id = sample_id[random_number[int( len(sample_id)*2/3): int(len(sample_id)*5/6)]] test_id = sample_id[random_number[int(len(sample_id)*5/6):]] def ls2file(list_data, file_name): with open(file_name, 'w') as fout: for item in list_data: fout.write(str(item) + '\n') ls2file(train_id, 'train-id.txt') ls2file(eval_id, 'eval-id.txt') ls2file(test_id, 'test-id.txt') print('train size: %d, eval size: %d, test size: %d' % (len(train_id), len(eval_id), len(test_id)))
Example #15
Source File: utils.py From bioconda-utils with MIT License | 5 votes |
def _load_channel_dataframe_cached(self): if self.cache_file is not None and os.path.exists(self.cache_file): ts = datetime.datetime.fromtimestamp(os.path.getmtime(self.cache_file)) seconds = (datetime.datetime.now() - ts).seconds if seconds <= self.cache_timeout: logger.info("Loading repodata from cache %s", self.cache_file) return pd.read_pickle(self.cache_file) else: logger.info("Repodata cache file too old. Reloading") res = self._load_channel_dataframe() if self.cache_file is not None: res.to_pickle(self.cache_file) return res
Example #16
Source File: test_timeseries_legacy.py From Computable with MIT License | 5 votes |
def test_unpickle_legacy_len0_daterange(self): pth, _ = os.path.split(os.path.abspath(__file__)) filepath = os.path.join(pth, 'data', 'series_daterange0.pickle') result = pd.read_pickle(filepath) ex_index = DatetimeIndex([], freq='B') self.assert_(result.index.equals(ex_index)) tm.assert_isinstance(result.index.freq, offsets.BDay) self.assert_(len(result) == 0)
Example #17
Source File: keras-theano.py From DeepLearning-IDS with MIT License | 5 votes |
def loadData(fileName): dataFile = os.path.join(dataPath, fileName) pickleDump = '{}.pickle'.format(dataFile) if os.path.exists(pickleDump): df = pd.read_pickle(pickleDump) else: df = pd.read_csv(dataFile) df = df.dropna() df = shuffle(df) df.to_pickle(pickleDump) return df
Example #18
Source File: keras-tensorflow.py From DeepLearning-IDS with MIT License | 5 votes |
def loadData(fileName): dataFile = os.path.join(dataPath, fileName) pickleDump = '{}.pickle'.format(dataFile) if os.path.exists(pickleDump): df = pd.read_pickle(pickleDump) else: df = pd.read_csv(dataFile) df = df.dropna() df = shuffle(df) df.to_pickle(pickleDump) return df
Example #19
Source File: keras-cntk.py From DeepLearning-IDS with MIT License | 5 votes |
def loadData(fileName): dataFile = os.path.join(dataPath, fileName) pickleDump = '{}.pickle'.format(dataFile) if os.path.exists(pickleDump): df = pd.read_pickle(pickleDump) else: df = pd.read_csv(dataFile) df = df.dropna() df = shuffle(df) df.to_pickle(pickleDump) return df # k-fold cross validation: # https://machinelearningmastery.com/evaluate-performance-deep-learning-models-keras/
Example #20
Source File: fastai-expriments.py From DeepLearning-IDS with MIT License | 5 votes |
def loadData(fileName): dataFile = os.path.join(dataPath, fileName) pickleDump = '{}.pickle'.format(dataFile) if os.path.exists(pickleDump): df = pd.read_pickle(pickleDump) else: df = pd.read_csv(dataFile) df = df.dropna() df = shuffle(df) df.to_pickle(pickleDump) return df
Example #21
Source File: test_pickle.py From vnpy_crypto with MIT License | 5 votes |
def test_pickle_path_localpath(): df = tm.makeDataFrame() result = tm.round_trip_localpath(df.to_pickle, pd.read_pickle) tm.assert_frame_equal(df, result) # --------------------- # test pickle compression # ---------------------
Example #22
Source File: test_pickle.py From vnpy_crypto with MIT License | 5 votes |
def test_pickle_v0_14_1(datapath): cat = pd.Categorical(values=['a', 'b', 'c'], ordered=False, categories=['a', 'b', 'c', 'd']) pickle_path = datapath('io', 'data', 'categorical_0_14_1.pickle') # This code was executed once on v0.14.1 to generate the pickle: # # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'], # name='foobar') # with open(pickle_path, 'wb') as f: pickle.dump(cat, f) # tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path))
Example #23
Source File: test_api.py From vnpy_crypto with MIT License | 5 votes |
def _pickle_roundtrip(self, obj): with ensure_clean() as path: obj.to_pickle(path) unpickled = pd.read_pickle(path) return unpickled
Example #24
Source File: test_io.py From vnpy_crypto with MIT License | 5 votes |
def _pickle_roundtrip_name(self, obj): with ensure_clean() as path: obj.to_pickle(path) unpickled = pd.read_pickle(path) return unpickled
Example #25
Source File: response_matrix.py From ocelot with GNU General Public License v3.0 | 5 votes |
def load(self, filename): self.df = pd.read_pickle(filename) self.df2data() return 1
Example #26
Source File: VideoFeatures.py From videofeatures with MIT License | 5 votes |
def loadFeatures(self, feature_df_path=None): """ loads features from pd dataframe and returns them as a matrix :param feature_df_path: path to pandas dataframe that holds features :return: (features, labels) - features as ndarray of shape (n_videos, n_frames, n_descriptors_per_image, n_dim_descriptor) and labels (list) of videos """ if feature_df_path is None: feature_df_path = self.getDumpFileName('features') assert os.path.isfile(feature_df_path) feature_df = pd.read_pickle(feature_df_path) assert 'features' in feature_df and 'labels' in feature_df # stack video features to a 2d matrix features = np.concatenate(feature_df['features'], axis=0) labels = list(feature_df['labels']) if features.ndim == 3: # assume only one feature vector is given -> insert dimension features = features.reshape((features.shape[0], features.shape[1], 1, features.shape[2])) self.logger.info( 'Loaded {} features from {}. Features have shape {}'.format(self.extractor.__class__.__name__, feature_df_path, np.shape(features))) assert features.ndim == 4 and len(labels) == features.shape[0] return features, labels
Example #27
Source File: scientific-hypothesis.py From escape-from-automanual-testing with GNU Affero General Public License v3.0 | 5 votes |
def test_dataframe_round_trip(df): with BytesIO() as f: df.to_pickle(f, compression=None) contents = f.getvalue() with BytesIO(contents) as f: new = pd.read_pickle(f, compression=None) # Pandas ships testing helper functions too! pd.testing.assert_frame_equal(df, new)
Example #28
Source File: trainer.py From pykg2vec with MIT License | 5 votes |
def export_embeddings(self): """ Export embeddings in tsv and pandas pickled format. With tsvs (both label, vector files), you can: 1) Use those pretained embeddings for your applications. 2) Visualize the embeddings in this website to gain insights. (https://projector.tensorflow.org/) Pandas dataframes can be read with pd.read_pickle('desired_file.pickle') """ save_path = self.config.path_embeddings / self.model.model_name save_path.mkdir(parents=True, exist_ok=True) idx2ent = self.config.knowledge_graph.read_cache_data('idx2entity') idx2rel = self.config.knowledge_graph.read_cache_data('idx2relation') with open(str(save_path / "ent_labels.tsv"), 'w') as l_export_file: for label in idx2ent.values(): l_export_file.write(label + "\n") with open(str(save_path / "rel_labels.tsv"), 'w') as l_export_file: for label in idx2rel.values(): l_export_file.write(label + "\n") for named_embedding in self.model.parameter_list: all_ids = list(range(0, int(named_embedding.weight.shape[0]))) stored_name = named_embedding.name if len(named_embedding.shape) == 2: all_embs = named_embedding.weight.detach().cpu().numpy() with open(str(save_path / ("%s.tsv" % stored_name)), 'w') as v_export_file: for idx in all_ids: v_export_file.write("\t".join([str(x) for x in all_embs[idx]]) + "\n")
Example #29
Source File: struc2vec.py From GraphEmbedding with MIT License | 5 votes |
def train(self, embed_size=128, window_size=5, workers=3, iter=5): # pd.read_pickle(self.temp_path+'walks.pkl') sentences = self.sentences print("Learning representation...") model = Word2Vec(sentences, size=embed_size, window=window_size, min_count=0, hs=1, sg=1, workers=workers, iter=iter) print("Learning representation done!") self.w2v_model = model return model
Example #30
Source File: struc2vec.py From GraphEmbedding with MIT License | 5 votes |
def prepare_biased_walk(self,): sum_weights = {} sum_edges = {} average_weight = {} gamma = {} layer = 0 while (os.path.exists(self.temp_path+'norm_weights_distance-layer-' + str(layer)+'.pkl')): probs = pd.read_pickle( self.temp_path+'norm_weights_distance-layer-' + str(layer)+'.pkl') for v, list_weights in probs.items(): sum_weights.setdefault(layer, 0) sum_edges.setdefault(layer, 0) sum_weights[layer] += sum(list_weights) sum_edges[layer] += len(list_weights) average_weight[layer] = sum_weights[layer] / sum_edges[layer] gamma.setdefault(layer, {}) for v, list_weights in probs.items(): num_neighbours = 0 for w in list_weights: if (w > average_weight[layer]): num_neighbours += 1 gamma[layer][v] = num_neighbours layer += 1 pd.to_pickle(average_weight, self.temp_path + 'average_weight') pd.to_pickle(gamma, self.temp_path + 'gamma.pkl')