Python pandas.HDFStore() Examples
The following are 30
code examples of pandas.HDFStore().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pandas
, or try the search function
.
Example #1
Source File: minute_bars.py From catalyst with Apache License 2.0 | 6 votes |
def write(self, frames): """ Write the frames to the target HDF5 file, using the format used by ``pd.Panel.to_hdf`` Parameters ---------- frames : iter[(int, DataFrame)] or dict[int -> DataFrame] An iterable or other mapping of sid to the corresponding OHLCV pricing data. """ with HDFStore(self._path, 'w', complevel=self._complevel, complib=self._complib) \ as store: panel = pd.Panel.from_dict(dict(frames)) panel.to_hdf(store, 'updates') with tables.open_file(self._path, mode='r+') as h5file: h5file.set_node_attr('/', 'version', 0)
Example #2
Source File: eq_loc.py From pykonal with GNU General Public License v3.0 | 6 votes |
def load_stations(input_file): """ Load and return network geometry from input file. Input file must be HDF5 file created using pandas.HDFStore with a "stations" table that contains "network", "station", "latitude", "longitude", and "elevation" fields. Units of degrees are assumed for "latitude" and "longitude", and units of kilometers are assumed for "elevation". Returns: pandas.DataFrame object with "network", "station", "latitude", "longitude", and "depth" fields. Units of "depth" are kilometers. """ with pd.HDFStore(input_file, mode="r") as store: stations = store["stations"] stations["depth"] = -stations["elevation"] stations = stations[ ["network", "station", "latitude", "longitude", "depth"] ] return (stations)
Example #3
Source File: Omlette.py From OpenTrader with GNU Lesser General Public License v3.0 | 6 votes |
def iMain(): """ Read an hdf file generated by us to make sure we can recover its content and structure. Give the name of an hdf5 file as a command-line argument. """ assert sys.argv, __doc__ sFile = sys.argv[1] assert os.path.isfile(sFile) oHdfStore = pandas.HDFStore(sFile, mode='r') print oHdfStore.groups() # bug - no return value # oSignals = pandas.read_hdf(oHdfStore, '/servings/signals') mSignals = oHdfStore.select('/recipe/servings/mSignals', auto_close=False) print mSignals print oHdfStore.get_node('/recipe')._v_attrs.metadata[0]['sUrl']
Example #4
Source File: dataset.py From avocado with MIT License | 6 votes |
def write_models(self, tag=None): """Write the models of the light curves to disk. The models will be stored in the features directory using the dataset's name and the given features tag. Note that for now the models are stored as individual tables in the HDF5 file because there doesn't appear to be a good way to store fixed length arrays in pandas. WARNING: This is not the best way to implement this, and there are definitely much better ways. This also isn't thread-safe at all. Parameters ---------- tag : str (optional) The tag for this version of the features. By default, this will use settings['features_tag']. """ models_path = self.get_models_path(tag=tag) store = pd.HDFStore(models_path, "a") for model_name, model in self.models.items(): model.to_hdf(store, model_name, mode="a") store.close()
Example #5
Source File: utils.py From avocado with MIT License | 6 votes |
def _create_csi_index(store, key, column_name): """Create a CSI index on a column in an HDF5 file. The column must have been already specified in the data_columns call to to_hdf or it won't be stored correctly in the HDF5 file. Parameters ---------- store : :class:`pandas.HDFStore` An HDF5 file opened as an instance of a :class:`pandas.HDFStore` object. key : str The key of the DataFrame to use. column_name : str The column to add a CSI index to. """ key_store = store.get_storer(key) use_name = _map_column_name(key_store, column_name) column = key_store.table.colinstances[use_name] if not column.index.is_csi: column.remove_index() column.create_csindex()
Example #6
Source File: process_ow.py From tierpsy-tracker with MIT License | 6 votes |
def ow_plate_summary(fname): all_feats = read_feat_events(fname) with pd.HDFStore(fname, 'r') as fid: features_timeseries = fid['/features_timeseries'] for cc in features_timeseries: all_feats[cc] = features_timeseries[cc].values wStats = WormStats() exp_feats = wStats.getWormStats(all_feats, np.nanmean) exp_feats = pd.DataFrame(exp_feats) valid_order = [x for x in exp_feats.columns if x not in wStats.extra_fields] exp_feats = exp_feats.loc[:, valid_order] return [exp_feats] #%%
Example #7
Source File: hdf.py From vivarium with GNU General Public License v3.0 | 6 votes |
def _write_pandas_data(path: Path, entity_key: EntityKey, data: Union[PandasObj]): """Write data in a pandas format to an HDF file. This method currently supports :class:`pandas DataFrame` objects, with or with or without columns, and :class:`pandas.Series` objects. """ if data.empty: # Our data is indexed, sometimes with no other columns. This leaves an # empty dataframe that store.put will silently fail to write in table # format. data = data.reset_index() if data.empty: raise ValueError("Cannot write an empty dataframe that does not have an index.") metadata = {'is_empty': True} data_columns = True else: metadata = {'is_empty': False} data_columns = None with pd.HDFStore(str(path), complevel=9) as store: store.put(entity_key.path, data, format="table", data_columns=data_columns) store.get_storer(entity_key.path).attrs.metadata = metadata # NOTE: must use attrs. write this up
Example #8
Source File: burstlib_ext.py From FRETBursts with GNU General Public License v2.0 | 6 votes |
def _store_bg_data(store, base_name, min_ph_delays_us, best_bg, best_th, BG_data, BG_data_e): if not base_name.endswith('/'): base_name = base_name + '/' store_name = store.filename group_name = '/' + base_name[:-1] store.create_carray(group_name, 'min_ph_delays_us', obj=min_ph_delays_us, createparents=True) for ph_sel, values in BG_data.items(): store.create_carray(group_name, str(ph_sel), obj=values) for ph_sel, values in BG_data_e.items(): store.create_carray(group_name, str(ph_sel) + '_err', obj=values) store.close() store = pd.HDFStore(store_name) store[base_name + 'best_bg'] = best_bg store[base_name + 'best_th'] = best_th store.close()
Example #9
Source File: panda.py From twint with MIT License | 6 votes |
def save(_filename, _dataframe, **options): if options.get("dataname"): _dataname = options.get("dataname") else: _dataname = "twint" if not options.get("type"): with warnings.catch_warnings(): warnings.simplefilter("ignore") _store = pd.HDFStore(_filename + ".h5") _store[_dataname] = _dataframe _store.close() elif options.get("type") == "Pickle": with warnings.catch_warnings(): warnings.simplefilter("ignore") _dataframe.to_pickle(_filename + ".pkl") else: print("""Please specify: filename, DataFrame, DataFrame name and type (HDF5, default, or Pickle)""")
Example #10
Source File: panda.py From twint with MIT License | 6 votes |
def read(_filename, **options): if not options.get("dataname"): _dataname = "twint" else: _dataname = options.get("dataname") if not options.get("type"): _store = pd.HDFStore(_filename + ".h5") _df = _store[_dataname] return _df elif options.get("type") == "Pickle": _df = pd.read_pickle(_filename + ".pkl") return _df else: print("""Please specify: DataFrame, DataFrame name (twint as default), filename and type (HDF5, default, or Pickle""")
Example #11
Source File: dc2_object.py From gcr-catalogs with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _open_hdf5(self, file_path): """Return the file handle of an HDF5 file as an pd.HDFStore object Cache and return the file handle for the HDF5 file at <file_path> Args: file_path (str): The path of the desired file Return: The cached file handle """ if (file_path not in self._file_handles or not self._file_handles[file_path].is_open): self._file_handles[file_path] = pd.HDFStore(file_path, 'r') return self._file_handles[file_path]
Example #12
Source File: burstlib_ext.py From FRETBursts with GNU General Public License v2.0 | 6 votes |
def _load_bg_data(store, base_name, ph_streams): if not base_name.endswith('/'): base_name = base_name + '/' store_name = store.filename group_name = '/' + base_name[:-1] min_ph_delays = store.get_node(group_name, 'min_ph_delays_us')[:] BG_data = {} for ph_sel in ph_streams: BG_data[ph_sel] = store.get_node(group_name, str(ph_sel))[:] BG_data_e = {} for ph_sel in ph_streams: BG_data_e[ph_sel] = store.get_node(group_name, str(ph_sel) + '_err')[:] store.close() store = pd.HDFStore(store_name) best_bg = store[base_name + 'best_bg'] best_th = store[base_name + 'best_th'] store.close() return best_th, best_bg, BG_data, BG_data_e, min_ph_delays
Example #13
Source File: hlatyper.py From OptiType with BSD 3-Clause "New" or "Revised" License | 6 votes |
def store_dataframes(out_hdf, **kwargs): # DataFrames to serialize have to be passed by keyword arguments. An argument matrix1=DataFrame(...) # will be written into table 'matrix1' in the HDF file. complevel = kwargs.pop('complevel', 9) # default complevel & complib values if complib = kwargs.pop('complib', 'zlib') # not explicitly asked for as arguments if VERBOSE: print(now(), 'Storing %d DataFrames in file %s with compression settings %d %s...' % (len(kwargs), out_hdf, complevel, complib)) store = pd.HDFStore(out_hdf, complevel=complevel, complib=complib) # TODO: WRITE ONLY? it probably appends now for table_name, dataframe in kwargs.items(): store[table_name] = dataframe store.close() if VERBOSE: print(now(), 'DataFrames stored in file.')
Example #14
Source File: test_orca.py From orca with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_write_tables(df, store_name): orca.add_table('table', df) @orca.step() def step(table): pass step_tables = orca.get_step_table_names(['step']) orca.write_tables(store_name, step_tables, None) with pd.HDFStore(store_name, mode='r') as store: assert 'table' in store pdt.assert_frame_equal(store['table'], df) orca.write_tables(store_name, step_tables, 1969) with pd.HDFStore(store_name, mode='r') as store: assert '1969/table' in store pdt.assert_frame_equal(store['1969/table'], df)
Example #15
Source File: test_orca.py From orca with BSD 3-Clause "New" or "Revised" License | 6 votes |
def test_run_and_write_tables_out_tables_provided(df, store_name): table_names = ['table', 'table2', 'table3'] for t in table_names: orca.add_table(t, df) @orca.step() def step(iter_var, table, table2): return orca.run( ['step'], iter_vars=range(1), data_out=store_name, out_base_tables=table_names, out_run_tables=['table']) with pd.HDFStore(store_name, mode='r') as store: for t in table_names: assert 'base/{}'.format(t) in store assert '0/table' in store assert '0/table2' not in store assert '0/table3' not in store
Example #16
Source File: helper.py From tierpsy-tracker with MIT License | 6 votes |
def calculate_bgnd_from_masked_fulldata(masked_image_file): """ - Opens the masked_image_file hdf5 file, reads the /full_data node and creates a "background" by taking the maximum value of each pixel over time. - Parses the file name to find a camera serial number - reads the pixel/um ratio from the masked_image_file """ import numpy as np from tierpsy.helper.params import read_unit_conversions # read attributes of masked_image_file _, (microns_per_pixel, xy_units) , is_light_background = read_unit_conversions(masked_image_file) # get "background" and px2um with pd.HDFStore(masked_image_file, 'r') as fid: assert is_light_background, \ 'MultiWell recognition is only available for brightfield at the moment' img = np.max(fid.get_node('/full_data'), axis=0) camera_serial = parse_camera_serial(masked_image_file) return img, camera_serial, microns_per_pixel
Example #17
Source File: process_ow.py From tierpsy-tracker with MIT License | 5 votes |
def ow_trajectories_summary(fname): fps = read_fps(fname) with pd.HDFStore(fname, 'r') as fid: features_timeseries = fid['/features_timeseries'] all_summary = [] valid_order = None wStats = WormStats() for w_ind, w_ts_data in features_timeseries.groupby('worm_index'): ll = ['worm_{}'.format(int(w_ind))] all_feats = read_feat_events(fname, ll) for cc in w_ts_data: all_feats[cc] = w_ts_data[cc].values exp_feats = wStats.getWormStats(all_feats, np.nanmean) exp_feats = pd.DataFrame(exp_feats) if valid_order is None: #only calculate this the first time... valid_order = [x for x in exp_feats.columns if x not in wStats.extra_fields] #remove uncalculated indexes from wStats exp_feats = exp_feats.loc[:, valid_order] assert not 'worm_index' in exp_feats exp_feats = add_trajectory_info(exp_feats, w_ind, w_ts_data, fps) all_summary.append(exp_feats) all_summary = pd.concat(all_summary, ignore_index=True) return [all_summary] #%%
Example #18
Source File: getIntensityProfile.py From tierpsy-tracker with MIT License | 5 votes |
def setIntMapIndexes(skeletons_file, min_num_skel): # get index of valid skeletons. Let's use pandas because it is easier to # process. with pd.HDFStore(skeletons_file, 'r') as fid: trajectories_data = fid['/trajectories_data'] if 'is_good_skel' in trajectories_data: # select rows with only valid filtered skeletons good = trajectories_data['is_good_skel'] == 1 else: # or that at least have an skeleton good = trajectories_data['has_skeleton'] == 1 trajectories_data_valid = trajectories_data[good] # select trajectories that have at least min_num_skel valid skeletons N = trajectories_data_valid.groupby( 'worm_index_joined').agg({'has_skeleton': np.nansum}) N = N[N > min_num_skel].dropna() good = trajectories_data_valid['worm_index_joined'].isin(N.index) trajectories_data_valid = trajectories_data_valid.loc[good] # assing indexes to the new rows tot_valid_rows = len(trajectories_data_valid) trajectories_data['int_map_id'] = -1 trajectories_data.loc[ trajectories_data_valid.index, 'int_map_id'] = np.arange(tot_valid_rows) # let's save this data into the skeletons file save_modified_table(skeletons_file, trajectories_data, 'trajectories_data') # get the valid trajectories with the correct index. There is probably a # faster way to do this, but this is less prone to errors. trajectories_data_valid = trajectories_data[ trajectories_data['int_map_id'] != -1] # return the reduced version with only valid rows return trajectories_data_valid
Example #19
Source File: eq_loc.py From pykonal with GNU General Public License v3.0 | 5 votes |
def write_events(dataframe, output_file): """ Write event locations to HDF5 file via pandas.HDFStore. """ logger.debug("Saving event locations to disk.") # Convert dtypes before saving event locations. for field in DTYPES: dataframe[field] = dataframe[field].astype(DTYPES[field]) with pd.HDFStore(output_file, mode="w") as store: store["events"] = dataframe return (True)
Example #20
Source File: io.py From PyPSA with GNU General Public License v3.0 | 5 votes |
def __init__(self, path, **kwargs): self.ds = pd.HDFStore(path, mode='w', **kwargs) self.index = {}
Example #21
Source File: io.py From PyPSA with GNU General Public License v3.0 | 5 votes |
def __init__(self, path): self.ds = pd.HDFStore(path, mode='r') self.index = {}
Example #22
Source File: seqlib.py From Enrich2 with BSD 3-Clause "New" or "Revised" License | 5 votes |
def counts_from_file_h5(self, fname): """ If an HDF store containing raw counts has been specified, open the store, copy those counts into this store, and close the counts store. Copies all tables in the ``'/raw'`` group along with their metadata. """ store = pd.HDFStore(fname) self.logger.info( "Using existing HDF5 data store '{}' for raw data" "".format(fname) ) # this could probably be much more efficient, but the PyTables docs # don't explain copying subsets of files adequately raw_keys = [key for key in store.keys() if key.startswith("/raw/")] if len(raw_keys) == 0: raise ValueError( "No raw counts found in '{}' [{}]" "".format(fname, self.name) ) else: for k in raw_keys: # copy the data table raw = store[k] self.store.put(k, raw, format="table", data_columns=raw.columns) # copy the metadata self.set_metadata(k, self.get_metadata(k, store=store), update=False) self.logger.info("Copied raw data '{}'".format(k)) store.close()
Example #23
Source File: storemanager.py From Enrich2 with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_metadata(self, key, store=None): """ Retrieve the Enrich2 metadata dictionary from the HDF5 store. *key* is the name of the group or node in the HDF5 data store. Returns the metadata dictionary for *key*. If no metadata has been set for *key*, returns ``None``. *store* can be an external open HDFStore (used when copying metadata from raw counts). If it is ``None``, use this object's store. """ if store is None: store = self.store try: metadata = store.get_storer(key).attrs["enrich2"] except AttributeError: if store is self.store: # store parameter was None raise AttributeError( "Invalid HDF store node '{}' [{}]".format(key, self.name) ) else: raise AttributeError( "Invalid external HDF store node '{}' in " "'{}' [{}]".format(key, store.filename, self.name) ) except KeyError: # no enrich2 metadata return None else: return metadata
Example #24
Source File: dataio.py From tribeflow with BSD 3-Clause "New" or "Revised" License | 5 votes |
def save_model(out_fpath, model): ''' Saves the given model to out_fpath. The model is simply a map of string keys, numpy array or dict values. Nothing else is supported. key -> array key -> dict Only. Parameters ---------- out_fpath : string Where to save the model model : dict The actual model ''' store = pd.HDFStore(out_fpath, 'w') for model_key in model: model_val = model[model_key] if type(model_val) == np.ndarray: store[model_key] = pd.DataFrame(model_val) else: store[model_key] = pd.DataFrame(model_val.items(), \ columns=['Name', 'Id']) store.close()
Example #25
Source File: storemanager.py From Enrich2 with BSD 3-Clause "New" or "Revised" License | 5 votes |
def store_open(self, children=False, force_delete=True): """ Open the HDF5 file associated with this object. If the ``force_recalculate`` option is selected and ``force_delete`` is ``True``, the existing tables under ``'/main'`` will be deleted upon opening. This method needs a lot more error checking. """ if self.has_store: if not self.store_cfg: self.store_path = os.path.join( self.output_dir, "{}_{}.h5".format(fix_filename(self.name), self.store_suffix), ) if os.path.exists(self.store_path): self.logger.info( 'Found existing HDF5 data store "{}"'.format(self.store_path) ) else: self.logger.info( 'Creating new HDF5 data store "{}"'.format(self.store_path) ) self.store = pd.HDFStore(self.store_path) if self.force_recalculate and force_delete: if "/main" in self.store: self.logger.info("Deleting existing calculated values") self.store.remove("/main") else: self.logger.warning("No existing calculated values in file") if children and self.children is not None: for child in self.children: child.store_open(children=True)
Example #26
Source File: network.py From teneto with GNU General Public License v3.0 | 5 votes |
def hdf5_setup(self, hdf5path): """ """ hdf = pd.HDFStore(hdf5path) hdf.put('network', self.network, format='table', data_columns=True) hdf.close() self.hdf5 = True self.network = hdf5path
Example #27
Source File: bic.py From tribeflow with BSD 3-Clause "New" or "Revised" License | 5 votes |
def main(model): store = pd.HDFStore(model) from_ = store['from_'][0][0] to = store['to'][0][0] assert from_ == 0 trace_fpath = store['trace_fpath'][0][0] Psi_oz = store['Psi_sz'].values count_z = store['count_z'].values[:, 0] obj2id = dict(store['source2id'].values) Psi_oz = Psi_oz / Psi_oz.sum(axis=0) Psi_zo = (Psi_oz * count_z).T Psi_zo = Psi_zo / Psi_zo.sum(axis=0) mem_size = store['Dts'].values.shape[1] probs = {} ll = 0.0 n = 0.0 with open(trace_fpath) as trace_file: for i, l in enumerate(trace_file): if i >= to: break n += 1 spl = l.strip().split('\t') _, _, s, d = spl if (obj2id[d], obj2id[s]) not in probs: probs[obj2id[d], obj2id[s]] = \ (Psi_oz[obj2id[d]] * Psi_zo[:, obj2id[s]]).sum() ll += np.log(probs[obj2id[d], obj2id[s]]) print(-2 * ll + n * np.log(count_z.shape[0] + sum(Psi_oz.shape))) store.close()
Example #28
Source File: network.py From teneto with GNU General Public License v3.0 | 5 votes |
def drop_edge(self, edgelist): """ Removes an edge from network. Parameters ---------- edgelist : list a list (or list of lists) containing the i,j and t indicies to be removes. Returns -------- Updates TenetoBIDS.network dataframe """ if not isinstance(edgelist[0], list): edgelist = [edgelist] self._check_input(edgelist, 'edgelist') if self.hdf5: with pd.HDFStore(self.network) as hdf: for e in edgelist: hdf.remove( 'network', 'i == ' + str(e[0]) + ' & ' + 'j == ' + str(e[1]) + ' & ' + 't == ' + str(e[2])) print('HDF5 delete warning. This will not reduce the size of the file.') else: for e in edgelist: idx = self.network[(self.network['i'] == e[0]) & ( self.network['j'] == e[1]) & (self.network['t'] == e[2])].index self.network.drop(idx, inplace=True) self.network.reset_index(inplace=True, drop=True) self._update_network()
Example #29
Source File: network.py From teneto with GNU General Public License v3.0 | 5 votes |
def add_edge(self, edgelist): """ Adds an edge from network. Parameters ---------- edgelist : list a list (or list of lists) containing the i,j and t indicies to be added. For weighted networks list should also contain a 'weight' key. Returns -------- Updates TenetoBIDS.network dataframe with new edge """ if not self.sparse: raise ValueError('Add edge not compatible with dense network') if not isinstance(edgelist[0], list): edgelist = [edgelist] self._check_input(edgelist, 'edgelist') if len(edgelist[0]) == 4: colnames = ['i', 'j', 't', 'weight'] elif len(edgelist[0]) == 3: colnames = ['i', 'j', 't'] if self.hdf5: with pd.HDFStore(self.network) as hdf: rows = hdf.get_storer('network').nrows hdf.append('network', pd.DataFrame(edgelist, columns=colnames, index=np.arange( rows, rows+len(edgelist))), format='table', data_columns=True) edgelist = np.array(edgelist) if np.max(edgelist[:, :2]) > self.netshape[0]: self.netshape[0] = np.max(edgelist[:, :2]) if np.max(edgelist[:, 2]) > self.netshape[1]: self.netshape[1] = np.max(edgelist[:, 2]) else: newedges = pd.DataFrame(edgelist, columns=colnames) self.network = pd.concat( [self.network, newedges], ignore_index=True, sort=True) self._update_network()
Example #30
Source File: Omlette.py From OpenTrader with GNU Lesser General Public License v3.0 | 5 votes |
def __init__(self, sHdfStore="", oFd=sys.stdout): self.oHdfStore = None self.oFd = oFd if sHdfStore: # ugly - active self.oHdfStore = pandas.HDFStore(sHdfStore, mode='w') self.oFd.write("INFO: hdf store" +self.oHdfStore.filename +'\n') self.oRecipe = None self.oChefModule = None