Python Examples of pandas.read

Source File: test_pickle.py From recruit with Apache License 2.0

7 votes

def test_write_explicit(self, compression, get_random_path):
        base = get_random_path
        path1 = base + ".compressed"
        path2 = base + ".raw"

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to compressed file
            df.to_pickle(p1, compression=compression)

            # decompress
            with tm.decompress_file(p1, compression=compression) as f:
                with open(p2, "wb") as fh:
                    fh.write(f.read())

            # read decompressed file
            df2 = pd.read_pickle(p2, compression=None)

            tm.assert_frame_equal(df, df2)

Source File: abstract.py From qb with MIT License

6 votes

def load_guesses(directory: str, output_type='char', folds=c.GUESSER_GENERATION_FOLDS) -> pd.DataFrame:
        """
        Loads all the guesses pertaining to a guesser inferred from directory
        :param directory: where to load guesses from
        :param output_type: One of: char, full, first
        :param folds: folds to load, by default all of them
        :return: guesses across all folds for given directory
        """
        assert len(folds) > 0
        guess_df = None
        for fold in folds:
            input_path = AbstractGuesser.guess_path(directory, fold, output_type)
            if guess_df is None:
                guess_df = pd.read_pickle(input_path)
            else:
                new_guesses_df = pd.read_pickle(input_path)
                guess_df = pd.concat([guess_df, new_guesses_df])

        return guess_df

Source File: test_pickle.py From vnpy_crypto with MIT License

6 votes

def test_write_infer(self, ext, get_random_path):
        base = get_random_path
        path1 = base + ext
        path2 = base + ".raw"
        compression = None
        for c in self._compression_to_extension:
            if self._compression_to_extension[c] == ext:
                compression = c
                break

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to compressed file by inferred compression method
            df.to_pickle(p1)

            # decompress
            with tm.decompress_file(p1, compression=compression) as f:
                with open(p2, "wb") as fh:
                    fh.write(f.read())

            # read decompressed file
            df2 = pd.read_pickle(p2, compression=None)

            tm.assert_frame_equal(df, df2)

Source File: main.py From Deep-Learning-with-TensorFlow-Second-Edition with MIT License

6 votes

def user_user_pearson_corr(ratings_df,TRAINED):
    if TRAINED:
        if os.path.isfile("model/user_user_corr_train.pkl"):
            df_corr=pd.read_pickle("user_user_corr_train.pkl")
        else:
            df =pd.read_pickle("user_item_table_train.pkl")
            df=df.T
            df_corr=df.corr()
            df_corr.to_pickle("user_user_corr_train.pkl")
    else:
        if os.path.isfile("model/user_user_corr.pkl"):
            df_corr=pd.read_pickle("user_user_corr.pkl")
        else:
            df = pd.read_pickle("user_item_table.pkl")
            df=df.T
            df_corr=df.corr()
            df_corr.to_pickle("user_user_corr.pkl")
    return df_corr

Source File: walker.py From GraphEmbedding with MIT License

6 votes

def simulate_walks(self, num_walks, walk_length, stay_prob=0.3, workers=1, verbose=0):

        layers_adj = pd.read_pickle(self.temp_path+'layers_adj.pkl')
        layers_alias = pd.read_pickle(self.temp_path+'layers_alias.pkl')
        layers_accept = pd.read_pickle(self.temp_path+'layers_accept.pkl')
        gamma = pd.read_pickle(self.temp_path+'gamma.pkl')
        walks = []
        initialLayer = 0

        nodes = self.idx  # list(self.g.nodes())

        results = Parallel(n_jobs=workers, verbose=verbose, )(
            delayed(self._simulate_walks)(nodes, num, walk_length, stay_prob, layers_adj, layers_accept, layers_alias, gamma) for num in
            partition_num(num_walks, workers))

        walks = list(itertools.chain(*results))
        return walks

Source File: main.py From Deep-Learning-with-TensorFlow-Second-Edition with MIT License

6 votes

def top_k_similar_items(movies,ratings_df,k,TRAINED=False):
    """
    Returns k similar movies for respective movie
    INPUTS :
        movies : list of numbers or number, list of movie ids
        ratings_df : rating dataframe, store all users rating for respective movies
        k          : natural number
        TRAINED    : TRUE or FALSE, weather use trained user vs movie table or untrained
    OUTPUT:
        list of k similar movies for respected movie
    """
    if TRAINED:
        df=pd.read_pickle("user_item_table_train.pkl")
    else:
        df=pd.read_pickle("user_item_table.pkl")

    corr_matrix=item_item_correlation(df,TRAINED)
    if type(movies) is not list:
        return corr_matrix[movies].sort_values(ascending=False).drop(movies).index.values[0:k]
    else:
        dict={}
        for movie in movies:
            dict.update({movie:corr_matrix[movie].sort_values(ascending=False).drop(movie).index.values[0:k]})
        pd.DataFrame(dict).to_csv("movie_top_k.csv")
        return dict

Source File: test_pickle.py From vnpy_crypto with MIT License

6 votes

def test_read_explicit(self, compression, get_random_path):
        base = get_random_path
        path1 = base + ".raw"
        path2 = base + ".compressed"

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to uncompressed file
            df.to_pickle(p1, compression=None)

            # compress
            self.compress_file(p1, p2, compression=compression)

            # read compressed file
            df2 = pd.read_pickle(p2, compression=compression)

            tm.assert_frame_equal(df, df2)

Source File: pipeline.py From xbbg with Apache License 2.0

6 votes

def daily_stats(data: (pd.Series, pd.DataFrame), **kwargs) -> pd.DataFrame:
    """
    Daily stats for given data

    Examples:
        >>> pd.set_option('precision', 2)
        >>> (
        ...     pd.concat([
        ...         pd.read_pickle('xbbg/tests/data/sample_rms_ib0.pkl'),
        ...         pd.read_pickle('xbbg/tests/data/sample_rms_ib1.pkl'),
        ...     ], sort=False)
        ...     .pipe(get_series, col='close')
        ...     .pipe(daily_stats)
        ... )['RMS FP Equity'].iloc[:, :5]
                                   count    mean   std    min    10%
        2020-01-16 00:00:00+00:00  434.0  711.16  1.11  708.6  709.6
        2020-01-17 00:00:00+00:00  437.0  721.53  1.66  717.0  719.0
    """
    if data.empty: return pd.DataFrame()
    if 'percentiles' not in kwargs: kwargs['percentiles'] = [.1, .25, .5, .75, .9]
    return data.groupby(data.index.floor('d')).describe(**kwargs)

Source File: test_pickle.py From recruit with Apache License 2.0

6 votes

def test_write_infer(self, ext, get_random_path):
        base = get_random_path
        path1 = base + ext
        path2 = base + ".raw"
        compression = None
        for c in self._compression_to_extension:
            if self._compression_to_extension[c] == ext:
                compression = c
                break

        with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
            df = tm.makeDataFrame()

            # write to compressed file by inferred compression method
            df.to_pickle(p1)

            # decompress
            with tm.decompress_file(p1, compression=compression) as f:
                with open(p2, "wb") as fh:
                    fh.write(f.read())

            # read decompressed file
            df2 = pd.read_pickle(p2, compression=None)

            tm.assert_frame_equal(df, df2)

Source File: testing.py From vnpy_crypto with MIT License

6 votes

def round_trip_pickle(obj, path=None):
    """
    Pickle an object and then read it again.

    Parameters
    ----------
    obj : pandas object
        The object to pickle and then re-read.
    path : str, default None
        The path where the pickled object is written and then read.

    Returns
    -------
    round_trip_pickled_object : pandas object
        The original object that was pickled and then re-read.
    """

    if path is None:
        path = u('__{random_bytes}__.pickle'.format(random_bytes=rands(10)))
    with ensure_clean(path) as path:
        pd.to_pickle(obj, path)
        return pd.read_pickle(path)

Source File: test_multi.py From vnpy_crypto with MIT License

6 votes

def test_legacy_pickle(self, datapath):
        if PY3:
            pytest.skip("testing for legacy pickles not "
                        "support on py3")

        path = datapath('indexes', 'data', 'multiindex_v1.pickle')
        obj = pd.read_pickle(path)

        obj2 = MultiIndex.from_tuples(obj.values)
        assert obj.equals(obj2)

        res = obj.get_indexer(obj)
        exp = np.arange(len(obj), dtype=np.intp)
        assert_almost_equal(res, exp)

        res = obj.get_indexer(obj2[::-1])
        exp = obj.get_indexer(obj[::-1])
        exp2 = obj2.get_indexer(obj2[::-1])
        assert_almost_equal(res, exp)
        assert_almost_equal(exp, exp2)

Source File: test_multi.py From vnpy_crypto with MIT License

6 votes

def test_legacy_v2_unpickle(self, datapath):

        # 0.7.3 -> 0.8.0 format manage
        path = datapath('indexes', 'data', 'mindex_073.pickle')
        obj = pd.read_pickle(path)

        obj2 = MultiIndex.from_tuples(obj.values)
        assert obj.equals(obj2)

        res = obj.get_indexer(obj)
        exp = np.arange(len(obj), dtype=np.intp)
        assert_almost_equal(res, exp)

        res = obj.get_indexer(obj2[::-1])
        exp = obj.get_indexer(obj[::-1])
        exp2 = obj2.get_indexer(obj2[::-1])
        assert_almost_equal(res, exp)
        assert_almost_equal(exp, exp2)

Source File: testing.py From recruit with Apache License 2.0

6 votes

def round_trip_pickle(obj, path=None):
    """
    Pickle an object and then read it again.

    Parameters
    ----------
    obj : pandas object
        The object to pickle and then re-read.
    path : str, default None
        The path where the pickled object is written and then read.

    Returns
    -------
    round_trip_pickled_object : pandas object
        The original object that was pickled and then re-read.
    """

    if path is None:
        path = u('__{random_bytes}__.pickle'.format(random_bytes=rands(10)))
    with ensure_clean(path) as path:
        pd.to_pickle(obj, path)
        return pd.read_pickle(path)

Source File: EDA.py From G-Bert with MIT License

6 votes

def split_dataset(data_path='data-multi-visit.pkl'):
    data = pd.read_pickle(data_path)
    sample_id = data['SUBJECT_ID'].unique()

    random_number = [i for i in range(len(sample_id))]
#     shuffle(random_number)

    train_id = sample_id[random_number[:int(len(sample_id)*2/3)]]
    eval_id = sample_id[random_number[int(
        len(sample_id)*2/3): int(len(sample_id)*5/6)]]
    test_id = sample_id[random_number[int(len(sample_id)*5/6):]]

    def ls2file(list_data, file_name):
        with open(file_name, 'w') as fout:
            for item in list_data:
                fout.write(str(item) + '\n')

    ls2file(train_id, 'train-id.txt')
    ls2file(eval_id, 'eval-id.txt')
    ls2file(test_id, 'test-id.txt')

    print('train size: %d, eval size: %d, test size: %d' %
          (len(train_id), len(eval_id), len(test_id)))

Source File: utils.py From bioconda-utils with MIT License

5 votes

def _load_channel_dataframe_cached(self):
        if self.cache_file is not None and os.path.exists(self.cache_file):
            ts = datetime.datetime.fromtimestamp(os.path.getmtime(self.cache_file))
            seconds = (datetime.datetime.now() - ts).seconds
            if seconds <= self.cache_timeout:
                logger.info("Loading repodata from cache %s", self.cache_file)
                return pd.read_pickle(self.cache_file)
            else:
                logger.info("Repodata cache file too old. Reloading")

        res = self._load_channel_dataframe()

        if self.cache_file is not None:
            res.to_pickle(self.cache_file)
        return res

Source File: test_timeseries_legacy.py From Computable with MIT License

5 votes

def test_unpickle_legacy_len0_daterange(self):
        pth, _ = os.path.split(os.path.abspath(__file__))
        filepath = os.path.join(pth, 'data', 'series_daterange0.pickle')

        result = pd.read_pickle(filepath)

        ex_index = DatetimeIndex([], freq='B')

        self.assert_(result.index.equals(ex_index))
        tm.assert_isinstance(result.index.freq, offsets.BDay)
        self.assert_(len(result) == 0)

Source File: keras-theano.py From DeepLearning-IDS with MIT License

5 votes

def loadData(fileName):
    dataFile = os.path.join(dataPath, fileName)
    pickleDump = '{}.pickle'.format(dataFile)
    if os.path.exists(pickleDump):
        df = pd.read_pickle(pickleDump)
    else:
        df = pd.read_csv(dataFile)
        df = df.dropna()
        df = shuffle(df)
        df.to_pickle(pickleDump)
    return df

Source File: keras-tensorflow.py From DeepLearning-IDS with MIT License

5 votes

def loadData(fileName):
    dataFile = os.path.join(dataPath, fileName)
    pickleDump = '{}.pickle'.format(dataFile)
    if os.path.exists(pickleDump):
        df = pd.read_pickle(pickleDump)
    else:
        df = pd.read_csv(dataFile)
        df = df.dropna()
        df = shuffle(df)
        df.to_pickle(pickleDump)
    return df

Source File: keras-cntk.py From DeepLearning-IDS with MIT License

5 votes

def loadData(fileName):
    dataFile = os.path.join(dataPath, fileName)
    pickleDump = '{}.pickle'.format(dataFile)
    if os.path.exists(pickleDump):
        df = pd.read_pickle(pickleDump)
    else:
        df = pd.read_csv(dataFile)
        df = df.dropna()
        df = shuffle(df)
        df.to_pickle(pickleDump)
    return df


# k-fold cross validation:
# https://machinelearningmastery.com/evaluate-performance-deep-learning-models-keras/

Source File: fastai-expriments.py From DeepLearning-IDS with MIT License

5 votes

def loadData(fileName):
    dataFile = os.path.join(dataPath, fileName)
    pickleDump = '{}.pickle'.format(dataFile)
    if os.path.exists(pickleDump):
        df = pd.read_pickle(pickleDump)
    else:
        df = pd.read_csv(dataFile)
        df = df.dropna()
        df = shuffle(df)
        df.to_pickle(pickleDump)
    return df

Source File: test_pickle.py From vnpy_crypto with MIT License

5 votes

def test_pickle_path_localpath():
    df = tm.makeDataFrame()
    result = tm.round_trip_localpath(df.to_pickle, pd.read_pickle)
    tm.assert_frame_equal(df, result)


# ---------------------
# test pickle compression
# ---------------------

Source File: test_pickle.py From vnpy_crypto with MIT License

5 votes

def test_pickle_v0_14_1(datapath):

    cat = pd.Categorical(values=['a', 'b', 'c'], ordered=False,
                         categories=['a', 'b', 'c', 'd'])
    pickle_path = datapath('io', 'data', 'categorical_0_14_1.pickle')
    # This code was executed once on v0.14.1 to generate the pickle:
    #
    # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'],
    #                   name='foobar')
    # with open(pickle_path, 'wb') as f: pickle.dump(cat, f)
    #
    tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path))

Source File: test_api.py From vnpy_crypto with MIT License

5 votes

def _pickle_roundtrip(self, obj):

        with ensure_clean() as path:
            obj.to_pickle(path)
            unpickled = pd.read_pickle(path)
            return unpickled

Source File: test_io.py From vnpy_crypto with MIT License

5 votes

def _pickle_roundtrip_name(self, obj):

        with ensure_clean() as path:
            obj.to_pickle(path)
            unpickled = pd.read_pickle(path)
            return unpickled

Source File: response_matrix.py From ocelot with GNU General Public License v3.0

5 votes

def load(self, filename):
        self.df = pd.read_pickle(filename)
        self.df2data()
        return 1

Source File: VideoFeatures.py From videofeatures with MIT License

5 votes

def loadFeatures(self, feature_df_path=None):
    """
    loads features from pd dataframe and returns them as a matrix
    :param feature_df_path: path to pandas dataframe that holds features
    :return: (features, labels) - features as ndarray of shape (n_videos, n_frames, n_descriptors_per_image, n_dim_descriptor) and labels (list) of videos
    """

    if feature_df_path is None:
      feature_df_path = self.getDumpFileName('features')

    assert os.path.isfile(feature_df_path)
    feature_df = pd.read_pickle(feature_df_path)

    assert 'features' in feature_df and 'labels' in feature_df

    # stack video features to a 2d matrix
    features = np.concatenate(feature_df['features'], axis=0)

    labels = list(feature_df['labels'])

    if features.ndim == 3: # assume only one feature vector is given -> insert dimension
      features = features.reshape((features.shape[0], features.shape[1], 1, features.shape[2]))

    self.logger.info(
      'Loaded {} features from {}.  Features have shape {}'.format(self.extractor.__class__.__name__, feature_df_path,
                                                                   np.shape(features)))

    assert features.ndim == 4 and len(labels) == features.shape[0]
    return features, labels

Source File: scientific-hypothesis.py From escape-from-automanual-testing with GNU Affero General Public License v3.0

5 votes

def test_dataframe_round_trip(df):
    with BytesIO() as f:
        df.to_pickle(f, compression=None)
        contents = f.getvalue()
    with BytesIO(contents) as f:
        new = pd.read_pickle(f, compression=None)
    # Pandas ships testing helper functions too!
    pd.testing.assert_frame_equal(df, new)

Source File: trainer.py From pykg2vec with MIT License

5 votes

def export_embeddings(self):
        """
            Export embeddings in tsv and pandas pickled format.
            With tsvs (both label, vector files), you can:
            1) Use those pretained embeddings for your applications.
            2) Visualize the embeddings in this website to gain insights. (https://projector.tensorflow.org/)

            Pandas dataframes can be read with pd.read_pickle('desired_file.pickle')
        """
        save_path = self.config.path_embeddings / self.model.model_name
        save_path.mkdir(parents=True, exist_ok=True)
        
        idx2ent = self.config.knowledge_graph.read_cache_data('idx2entity')
        idx2rel = self.config.knowledge_graph.read_cache_data('idx2relation')

        with open(str(save_path / "ent_labels.tsv"), 'w') as l_export_file:
            for label in idx2ent.values():
                l_export_file.write(label + "\n")

        with open(str(save_path / "rel_labels.tsv"), 'w') as l_export_file:
            for label in idx2rel.values():
                l_export_file.write(label + "\n")

        for named_embedding in self.model.parameter_list:
            all_ids = list(range(0, int(named_embedding.weight.shape[0])))

            stored_name = named_embedding.name

            if len(named_embedding.shape) == 2:
                all_embs = named_embedding.weight.detach().cpu().numpy()
                with open(str(save_path / ("%s.tsv" % stored_name)), 'w') as v_export_file:
                    for idx in all_ids:
                        v_export_file.write("\t".join([str(x) for x in all_embs[idx]]) + "\n")

Source File: struc2vec.py From GraphEmbedding with MIT License

5 votes

def train(self, embed_size=128, window_size=5, workers=3, iter=5):

        # pd.read_pickle(self.temp_path+'walks.pkl')
        sentences = self.sentences

        print("Learning representation...")
        model = Word2Vec(sentences, size=embed_size, window=window_size, min_count=0, hs=1, sg=1, workers=workers,
                         iter=iter)
        print("Learning representation done!")
        self.w2v_model = model

        return model

Source File: struc2vec.py From GraphEmbedding with MIT License

5 votes

def prepare_biased_walk(self,):

        sum_weights = {}
        sum_edges = {}
        average_weight = {}
        gamma = {}
        layer = 0
        while (os.path.exists(self.temp_path+'norm_weights_distance-layer-' + str(layer)+'.pkl')):
            probs = pd.read_pickle(
                self.temp_path+'norm_weights_distance-layer-' + str(layer)+'.pkl')
            for v, list_weights in probs.items():
                sum_weights.setdefault(layer, 0)
                sum_edges.setdefault(layer, 0)
                sum_weights[layer] += sum(list_weights)
                sum_edges[layer] += len(list_weights)

            average_weight[layer] = sum_weights[layer] / sum_edges[layer]

            gamma.setdefault(layer, {})

            for v, list_weights in probs.items():
                num_neighbours = 0
                for w in list_weights:
                    if (w > average_weight[layer]):
                        num_neighbours += 1
                gamma[layer][v] = num_neighbours

            layer += 1

        pd.to_pickle(average_weight, self.temp_path + 'average_weight')
        pd.to_pickle(gamma, self.temp_path + 'gamma.pkl')

Python pandas.read_pickle() Examples