Python Examples of tables.open

Source File: hdf_saver.py From crappy with GNU General Public License v2.0

6 votes

def prepare(self):
    assert self.inputs, "No input connected to the hdf_saver!"
    assert len(self.inputs) == 1,\
        "Cannot link more than one block to a hdf_saver!"
    d = path.dirname(self.filename)
    if not path.exists(d):
      # Create the folder if it does not exist
      try:
        makedirs(d)
      except OSError:
        assert path.exists(d),"Error creating "+d
    if path.exists(self.filename):
      # If the file already exists, append a number to the name
      print("[hdf_saver] WARNING!",self.filename,"already exists !")
      name,ext = path.splitext(self.filename)
      i = 1
      while path.exists(name+"_%05d"%i+ext):
        i += 1
      self.filename = name+"_%05d"%i+ext
      print("[hdf_saver] Using",self.filename,"instead!")
    self.hfile = tables.open_file(self.filename,"w")
    for name,value in self.metadata.items():
      self.hfile.create_array(self.hfile.root,name,value)

Source File: minute_bars.py From catalyst with Apache License 2.0

6 votes

def write(self, frames):
        """
        Write the frames to the target HDF5 file, using the format used by
        ``pd.Panel.to_hdf``

        Parameters
        ----------
        frames : iter[(int, DataFrame)] or dict[int -> DataFrame]
            An iterable or other mapping of sid to the corresponding OHLCV
            pricing data.
        """
        with HDFStore(self._path, 'w',
                      complevel=self._complevel, complib=self._complib) \
                as store:
            panel = pd.Panel.from_dict(dict(frames))
            panel.to_hdf(store, 'updates')
        with tables.open_file(self._path, mode='r+') as h5file:
            h5file.set_node_attr('/', 'version', 0)

Source File: preprocess_coco_h5.py From TFSegmentation with Apache License 2.0

6 votes

def write_image_annotation_pairs_to_h5(filename_pairs, h5_filename):
    atom = tables.Int8Atom()
    h5_file = tables.open_file(h5_filename, mode='a')
    array_x = h5_file.create_earray(h5_file.root, 'X', atom, (0, 512, 1024, 3))
    array_y = h5_file.create_earray(h5_file.root, 'Y', atom, (0, 512, 1024))
    h = 512
    w = 1024
    for img_path, annotation_path in tqdm(filename_pairs):
        img = misc.imread(img_path)
        img = misc.imresize(img, (h, w))
        annotation = misc.imread(annotation_path)
        annotation = custom_ignore_labels(annotation)
        annotation = misc.imresize(annotation, (h, w), 'nearest')
        array_x.append(np.expand_dims(img, 0))
        array_y.append(np.expand_dims(annotation, 0))
    h5_file.close()

Source File: preprocess_cityscapes_h5.py From TFSegmentation with Apache License 2.0

6 votes

def write_image_annotation_pairs_to_h5(filename_pairs, h5_filename):
    atom = tables.Int8Atom()
    h5_file = tables.open_file(h5_filename, mode='a')
    array_x = h5_file.create_earray(h5_file.root, 'X', atom, (0, 512, 1024, 3))
    array_y = h5_file.create_earray(h5_file.root, 'Y', atom, (0, 512, 1024))
    h = 512
    w = 1024
    for img_path, annotation_path in tqdm(filename_pairs):
        img = misc.imread(img_path)
        img = misc.imresize(img, (h, w))
        annotation = misc.imread(annotation_path)
        annotation = custom_ignore_labels(annotation)
        annotation = misc.imresize(annotation, (h, w), 'nearest')
        array_x.append(np.expand_dims(img, 0))
        array_y.append(np.expand_dims(annotation, 0))
    h5_file.close()

Source File: sm_table.py From gmpe-smtk with GNU Affero General Public License v3.0

6 votes

def get_dbnames(filepath):
    '''Returns he database names of the given Gm database (HDF5 file)
    The file should have been created with the `GMTableParser.parse`
    method.

    :param filepath: the path to the HDF5 file
    :return: a list of strings identyfying the database names in the file
    '''
    with tables.open_file(filepath, 'r') as h5file:
        root = h5file.get_node('/')
        return [group._v_name for group in  # pylint: disable=protected-access
                h5file.list_nodes(root, classname=Group.__name__)]
        # note: h5file.walk_groups() might raise a ClosedNodeError.
        # This error is badly documented (as much pytables stuff),
        # the only mention is (pytables pdf doc): "CloseNodeError: The
        # operation can not be completed because the node is closed. For
        # instance, listing the children of a closed group is not allowed".
        # I suspect it deals with groups deleted / overwritten and the way
        # hdf5 files mark portions of files to be "empty". However,
        # the list_nodes above seems not to raise anymore

Source File: test_hdf5.py From ctapipe with BSD 3-Clause "New" or "Revised" License

6 votes

def test_units():
    class WithUnits(Container):
        inverse_length = Field(5 / u.m, "foo")
        time = Field(1 * u.s, "bar", unit=u.s)
        grammage = Field(2 * u.g / u.cm ** 2, "baz", unit=u.g / u.cm ** 2)

    c = WithUnits()

    with tempfile.NamedTemporaryFile() as f:
        with HDF5TableWriter(f.name, "data") as writer:
            writer.write("units", c)

        with tables.open_file(f.name, "r") as f:

            assert f.root.data.units.attrs["inverse_length_UNIT"] == "m-1"
            assert f.root.data.units.attrs["time_UNIT"] == "s"
            assert f.root.data.units.attrs["grammage_UNIT"] == "cm-2 g"

Source File: hdf.py From vivarium with GNU General Public License v3.0

6 votes

def touch(path: Union[str, Path]):
    """Creates an HDF file, wiping an existing file if necessary.

    If the given path is proper to create a HDF file, it creates a new
    HDF file.

    Parameters
    ----------
    path
        The path to the HDF file.

    Raises
    ------
    ValueError
        If the non-proper path is given to create a HDF file.

    """
    path = _get_valid_hdf_path(path)

    with tables.open_file(str(path), mode='w'):
        pass

Source File: test_tools.py From ctapipe with BSD 3-Clause "New" or "Revised" License

6 votes

def test_muon_reconstruction(tmpdir):
    from ctapipe.tools.muon_reconstruction import MuonAnalysis

    with tempfile.NamedTemporaryFile(suffix=".hdf5") as f:
        assert (
            run_tool(
                MuonAnalysis(),
                argv=[f"--input={LST_MUONS}", f"--output={f.name}", "--overwrite",],
            )
            == 0
        )

        t = tables.open_file(f.name)
        table = t.root.dl1.event.telescope.parameters.muons[:]
        assert len(table) > 20
        assert np.count_nonzero(np.isnan(table["muonring_radius"])) == 0

    assert run_tool(MuonAnalysis(), ["--help-all"]) == 0

Source File: TM_dataset.py From GroundHog with BSD 3-Clause "New" or "Revised" License

6 votes

def load_files(self):
        if self.target_lfiles is not None:
            for target_lfile in self.target_lfiles:
                target_lang = tables.open_file(target_lfile, 'r')
                self.target_langs.append([target_lang.get_node(self.table_name), 
                    target_lang.get_node(self.index_name)])

        for source_lfile in self.source_lfiles:
            source_lang = tables.open_file(source_lfile, 'r')
            self.source_langs.append([source_lang.get_node(self.table_name),
                source_lang.get_node(self.index_name)])
        self.data_len = self.source_langs[-1][1].shape[0]

        self.idxs = np.arange(self.data_len)
        if self.shuffle:
            np.random.shuffle(self.idxs)

Source File: model_tests_basic.py From PyDeepGP with BSD 3-Clause "New" or "Revised" License

6 votes

def setUp(self):
        # Load data
        Y = np.loadtxt(os.path.join(base_path,self.outputfile))
        X = np.loadtxt(os.path.join(base_path,self.inputfile))
        m = deepgp.DeepGP([Y.shape[1],5,X.shape[1]],Y, X=X,kernels=[GPy.kern.RBF(5,ARD=True), GPy.kern.RBF(X.shape[1],ARD=True)], num_inducing=2, back_constraint=True, encoder_dims=[[3]])
        if not os.path.exists(os.path.join(base_path,self.modelfile)):
            # Create the model file
            m.randomize()
            m._trigger_params_changed()
            m.save(os.path.join(base_path,self.modelfile))
            with h5py.File(os.path.join(base_path,self.modelfile),'r+') as f:
                L = f.create_dataset("L", (1,),dtype=np.float)
                L[:] = m._log_marginal_likelihood
                f.close()
                        
        # Load model parameters
        with tables.open_file(os.path.join(base_path,self.modelfile),'r') as f:
            m.param_array[:] = f.root.param_array[:]
            L = float(f.root.L[:])
            m._trigger_params_changed()
            f.close()
        self.model = m
        self.L = L

Source File: model_tests_basic.py From PyDeepGP with BSD 3-Clause "New" or "Revised" License

6 votes

def setUp(self):
        # Load data
        Y = np.loadtxt(os.path.join(base_path,self.datafile))
        m = deepgp.DeepGP([Y.shape[1],5,2],Y,kernels=[GPy.kern.RBF(5,ARD=True), GPy.kern.RBF(2,ARD=True)], num_inducing=2, back_constraint=True, encoder_dims=[[3],[2]])
        if not os.path.exists(os.path.join(base_path,self.modelfile)):
            # Create the model file
            m.randomize()
            m._trigger_params_changed()
            m.save(os.path.join(base_path,self.modelfile))
            with h5py.File(os.path.join(base_path,self.modelfile),'r+') as f:
                L = f.create_dataset("L", (1,),dtype=np.float)
                L[:] = m._log_marginal_likelihood
                f.close()
        
        # Load model parameters
        with tables.open_file(os.path.join(base_path,self.modelfile),'r') as f:
            m.param_array[:] = f.root.param_array[:]
            L = float(f.root.L[:])
            m._trigger_params_changed()
            f.close()
        self.model = m
        self.L = L

Source File: model_tests_basic.py From PyDeepGP with BSD 3-Clause "New" or "Revised" License

6 votes

def setUp(self):
        # Load data
        Y = np.loadtxt(os.path.join(base_path,self.outputfile))
        X = np.loadtxt(os.path.join(base_path,self.inputfile))
        m = deepgp.DeepGP([Y.shape[1],5,X.shape[1]],Y, X=X,kernels=[GPy.kern.RBF(5,ARD=True), GPy.kern.RBF(X.shape[1],ARD=True)], num_inducing=2, back_constraint=False)
        if not os.path.exists(os.path.join(base_path,self.modelfile)):
            # Create the model file
            m.randomize()
            m._trigger_params_changed()
            m.save(os.path.join(base_path,self.modelfile))
            with h5py.File(os.path.join(base_path,self.modelfile),'r+') as f:
                L = f.create_dataset("L", (1,),dtype=np.float)
                L[:] = m._log_marginal_likelihood
                f.close()
                        
        # Load model parameters
        with tables.open_file(os.path.join(base_path,self.modelfile),'r') as f:
            m.param_array[:] = f.root.param_array[:]
            L = float(f.root.L[:])
            m._trigger_params_changed()
            f.close()
        self.model = m
        self.L = L

Source File: hdf.py From vivarium with GNU General Public License v3.0

6 votes

def remove(path: Union[str, Path], entity_key: str):
    """Removes a piece of data from an HDF file.

    Parameters
    ----------
    path :
        The path to the HDF file to remove the data from.
    entity_key :
        A representation of the internal HDF path where the data is located.

    Raises
    ------
    ValueError
        If the path or entity_key are improperly formatted.

    """
    path = _get_valid_hdf_path(path)
    entity_key = EntityKey(entity_key)

    with tables.open_file(str(path), mode='a') as file:
        file.remove_node(entity_key.path, recursive=True)

Source File: hdf.py From vivarium with GNU General Public License v3.0

6 votes

def get_keys(path: str) -> List[str]:
    """Gets key representation of all paths in an HDF file.

    Parameters
    ----------
    path :
        The path to the HDF file.

    Returns
    -------
        A list of key representations of the internal paths in the HDF.
    """
    path = _get_valid_hdf_path(path)
    with tables.open_file(str(path)) as file:
        keys = _get_keys(file.root)
    return keys

Source File: model_tests_basic.py From PyDeepGP with BSD 3-Clause "New" or "Revised" License

6 votes

def setUp(self):
        # Load data
        Y = np.loadtxt(os.path.join(base_path,self.datafile))
        m = deepgp.DeepGP([Y.shape[1],5,2],Y,kernels=[GPy.kern.RBF(5,ARD=True), GPy.kern.RBF(2,ARD=True)], num_inducing=2, back_constraint=False)
        if not os.path.exists(os.path.join(base_path,self.modelfile)):
            # Create the model file
            m.randomize()
            m._trigger_params_changed()
            m.save(os.path.join(base_path,self.modelfile))
            with h5py.File(os.path.join(base_path,self.modelfile),'r+') as f:
                L = f.create_dataset("L", (1,),dtype=np.float)
                L[:] = m._log_marginal_likelihood
                f.close()
        
        # Load model parameters
        with tables.open_file(os.path.join(base_path,self.modelfile),'r') as f:
            m.param_array[:] = f.root.param_array[:]
            L = float(f.root.L[:])
            m._trigger_params_changed()
            f.close()
        self.model = m
        self.L = L

Source File: StructuredForests.py From StructuredForests with BSD 2-Clause "Simplified" License

6 votes

def load_model(self):
        model_file = os.path.join(self.forest_dir, self.forest_name)

        with tables.open_file(model_file, filters=self.comp_filt) as mfile:
            self.model = {
                "thrs": mfile.get_node("/thrs")[:],
                "fids": mfile.get_node("/fids")[:],
                "cids": mfile.get_node("/cids")[:],
                "edge_bnds": mfile.get_node("/edge_bnds")[:].flatten(),
                "edge_pts": mfile.get_node("/edge_pts")[:].flatten(),
                "n_seg": mfile.get_node("/n_seg")[:].flatten(),
                "segs": mfile.get_node("/segs")[:],
            }

            self.trained = True

        return self.model

Source File: get_sample_names.py From WASP with Apache License 2.0

6 votes

def main():
    parser = argparse.ArgumentParser(description="Writes names of samples "
                                     "contained in HDF5 file to stdout")
                                   
    parser.add_argument("h5file", help="HDF5 file containing /samples table")

    options = parser.parse_args()
    
    h5f = tables.open_file(options.h5file)

    for node in h5f.root:
        if node.name.startswith("samples"):
            _, chr_name = node.name.split("_", 1)

            sys.stdout.write("%s:\n" % chr_name)
            for row in node:
                sys.stdout.write("  %s\n" % row['name'])
            sys.stdout.write("\n")
    else:
        sys.stderr.write("%s does not contain samples table\n" % options.h5file)
        exit(2)

    h5f.close()

Source File: logging.py From mackrl with Apache License 2.0

6 votes

def __init__(self, path, name, logging_struct, T_per_file=500000):
        name = "__".join(name.split("/")) # escape slash character in name
        try:
            from tables import open_file
            self.path = path
            self.name = name
            self.T_per_file = T_per_file
            self.hdf_path = os.path.join(path, "hdf")
            self.folder_name =  os.path.join(self.hdf_path, name)
            if not os.path.isdir(self.folder_name):
                os.makedirs(self.folder_name)
            self.logging_struct = logging_struct
        except Exception as e:
            self.logging_struct.py_logger.warning("Could not execute HDF logger save - no disk space, or no permissions? " +
                                                  "Error message: {}, path: {}, name: {}".format(e, path, name))

        pass

Source File: hdf5.py From spotpy with MIT License

6 votes

def __init__(self, *args, **kwargs):
        """
        Create a new datawriter for hdf5 files
        :param args:
        :param kwargs:
        """
        # init base class
        super(hdf5, self).__init__(*args, **kwargs)
        # store init item only if dbinit
        if not kwargs.get('dbappend', False):
            # Create an open file, which needs to be closed after the sampling
            self.db = tables.open_file(self.dbname + '.h5', 'w', self.dbname)
            self.table = self.db.create_table('/', self.dbname, description=self.get_table_def())
        else:
            # Continues writing file
            self.db = tables.open_file(self.dbname + '.h5', 'a')
            self.table = self.db.root[self.dbname]

Source File: update_het_probs.py From WASP with Apache License 2.0

5 votes

def main():
    error = 0.01
    args = parse_options()

    if util.is_gzipped(args.infile):
        infile = gzip.open(args.infile, "rt")
    else:
        infile = open(args.infile, "rt")
        
    if args.outfile.endswith(".gz"):
        outfile = gzip.open(args.outfile, "wt")
    else:
        outfile = open(args.outfile, "wt")

    ref_count_h5 = tables.open_file(args.ref_as_counts)
    alt_count_h5 = tables.open_file(args.alt_as_counts)

    snp_line = infile.readline()
    if snp_line:
        outfile.write(snp_line)
    else:
        sys.stderr.write("The input file was empty.\n")
        exit(-1)

    snp_line = infile.readline()
    while snp_line:
        snpinfo = snp_line.strip().split()
        if snpinfo[9] == "NA":
            outfile.write(snp_line)
        else:
            new_hetps = process_one_snp(snpinfo, ref_count_h5, 
                                        alt_count_h5, error)
            outfile.write("\t".join(snpinfo[:10] + 
                                    [";".join(new_hetps)] + 
                                    snpinfo[11:]) + "\n")
        snp_line = infile.readline()

    ref_count_h5.close()
    alt_count_h5.close()

Source File: hdf5.py From attention-lvcsr with MIT License

5 votes

def open_file(self, path):
        self.h5file = tables.open_file(path, mode="r")
        node = self.h5file.get_node('/', self.data_node)

        self.nodes = [getattr(node, source) for source in self.sources_in_file]
        if self.stop is None:
            self.stop = self.nodes[0].nrows
        self.num_examples = self.stop - self.start

Source File: CheckFinished.py From tierpsy-tracker with MIT License

5 votes

def _checkFlagsFun(fname, field_name, test_value, test_func=_isValidFlag, extra_files=[]):
    accepted_errors = (tables.exceptions.HDF5ExtError, 
        tables.exceptions.NoSuchNodeError, KeyError,IOError)
    try:
        with tables.open_file(fname, mode='r') as fid:
            field = fid.get_node(field_name)
            has_finished = test_func(field, test_value)

            #check that all the extra files do exist
            has_finished = has_finished and all(os.path.exists(x) for x in extra_files)
            
            return has_finished

    except accepted_errors:
            return False

Source File: h5tools.py From pywr with GNU General Public License v3.0

5 votes

def __init__(self, filename, filter_kwds=None, mode="r", title='', metadata=None, create_directories=False):
        self._opened = False
        if isinstance(filename, (str, os.PathLike)):
            # filename is a path to open
            self.filename = filename
            # Note sure how else to deal with str / unicode requirements in pytables
            # See this issue: https://github.com/PyTables/PyTables/issues/522
            import sys
            if filter_kwds:
                if sys.version_info[0] == 2 and 'complib' in filter_kwds:
                    filter_kwds['complib'] = filter_kwds['complib'].encode()
                filters = tables.Filters(**filter_kwds)
            else:
                filters = None

            # Create directories for the filename if required
            if create_directories:
                try:
                    os.makedirs(os.path.dirname(filename))
                except OSError as exception:
                    import errno
                    if exception.errno != errno.EEXIST:
                        raise

            self.file = tables.open_file(filename, mode=mode, filters=filters, title=title)
            self._opened = True
        elif isinstance(filename, tables.File):
            # filename is a pytables file
            self.file = filename
            assert(self.file.isopen)
            self.filename = self.file.filename
            self._opened = False
        else:
            raise TypeError("{} must be initalised with a filename to open or an open tables.File".format(self.__class__.__name__))

        # now update metadata if given
        if metadata is not None and self.file.mode != 'r':
            for k, v in metadata.items():
                setattr(self.file.root._v_attrs, k, v)

Source File: hdf5.py From spotpy with MIT License

5 votes

def getdata(self):
        with tables.open_file(self.dbname + '.h5', 'a') as db:
            return db.root[self.dbname][:]

Source File: hdf5.py From attention-lvcsr with MIT License

5 votes

def load(self):
        self.open_file(self.path)

Source File: test_hdf5.py From attention-lvcsr with MIT License

5 votes

def setUp(self):
        num_rows = 500
        filters = tables.Filters(complib='blosc', complevel=5)
        h5file = tables.open_file(
            'tmp.h5', mode='w', title='Test', filters=filters)
        group = h5file.create_group("/", 'Data')
        atom = tables.UInt8Atom()
        y = h5file.create_carray(group, 'y', atom=atom, title='Data targets',
                                 shape=(num_rows, 1), filters=filters)
        for i in range(num_rows):
            y[i] = i
        h5file.flush()
        h5file.close()
        self.dataset = PytablesDataset('tmp.h5', ('y',), 20, 500)
        self.dataset_default = PytablesDataset('tmp.h5', ('y',))

Source File: sim_pe_reads.py From WASP with Apache License 2.0

5 votes

def make_hap_seqs(haps, options):
    """Makes a chromosome sequence for each haplotype"""
    seq_h5 = tables.open_file(options.seq)
    
    node_name = "/%s" % options.chrom
    if node_name not in seq_h5:
        raise ValueError("chromosome %s is not in sequence h5 file" % options.chrom)
        
    seq_node = seq_h5.get_node("/%s" % options.chrom)

    seq_array1 = seq_node[:]
    seq_array2 = np.array(seq_node[:])

    is_alt = (haps.hap1 == 1)
    seq_array1[haps.pos[is_alt] - 1] = haps.alt_allele[is_alt]

    is_alt = (haps.hap2 == 1)    
    seq_array2[haps.pos[is_alt] - 1] = haps.alt_allele[is_alt]
    

    seq1 = "".join([chr(x) for x in seq_array1])
    seq2 = "".join([chr(x) for x in seq_array2])

    seq_h5.close()

    return seq1, seq2

Source File: test_find_intersecting_snps.py From WASP with Apache License 2.0

5 votes

def write_snp_tab_h5(self):
        snp_tab_h5 = tables.open_file(self.snp_tab_filename, "w")

        class SNPTab(tables.IsDescription):
            name = tables.StringCol(16)
            pos = tables.Int64Col()
            allele1 = tables.StringCol(100)
            allele2 = tables.StringCol(100)

        chrom_tables = {}
        snp_num = 0
        for snp in self.snp_list:
            if snp[0] in chrom_tables:
                table = chrom_tables[snp[0]]
            else:
                table = snp_tab_h5.create_table(snp_tab_h5.root, snp[0], SNPTab)
                chrom_tables[snp[0]] = table

            row = table.row
            snp_num += 1
            row['name'] = "snp%d" % snp_num
            row['pos'] = snp[1]
            row['allele1'] = snp[2]
            row['allele2'] = snp[3]
            row.append()
            table.flush()

        self.write_hap_samples(snp_tab_h5)
        
        snp_tab_h5.close()

Source File: test_find_intersecting_snps.py From WASP with Apache License 2.0

5 votes

def write_snp_index_h5(self):
        atom = tables.Int16Atom(dflt=0)
    
        zlib_filter = tables.Filters(complevel=1, complib="zlib")
        
        snp_index_h5 = tables.open_file(self.snp_index_filename, "w")    

        snp_index = 0

        chrom_arrays = {}
        chrom_lengths = self.get_chrom_lengths()
        
        for snp in self.snp_list:
            if snp[0] in chrom_arrays:
                carray = chrom_arrays[snp[0]]
            else:
                # create CArray for this chromosome
                shape = [chrom_lengths[snp[0]]]
                carray = snp_index_h5.create_carray(snp_index_h5.root,
                                                   snp[0], atom, shape,
                                                   filters=zlib_filter)
                carray[:] = -1
                chrom_arrays[snp[0]] = carray

            pos = snp[1]
            carray[pos-1] = snp_index
            snp_index += 1
            
        self.write_hap_samples(snp_index_h5)

        snp_index_h5.close()

Source File: hdf5.py From attention-lvcsr with MIT License

5 votes

def __init__(self, path, sources, start=0, stop=None, data_node='Data',
                 sources_in_file=None):
        if sources_in_file is None:
            sources_in_file = sources
        self.sources_in_file = sources_in_file
        self.provides_sources = sources
        self.path = path
        self.data_node = data_node
        self.start = start
        self.stop = stop
        self.nodes = None
        self.open_file(path)
        super(PytablesDataset, self).__init__(self.provides_sources)

Python tables.open_file() Examples