Python h5py.special_dtype() Examples
The following are 30
code examples of h5py.special_dtype().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
h5py
, or try the search function
.
Example #1
Source File: ilsvrc2010.py From attention-lvcsr with MIT License | 6 votes |
def prepare_hdf5_file(hdf5_file, n_train, n_valid, n_test): """Create datasets within a given HDF5 file. Parameters ---------- hdf5_file : :class:`h5py.File` instance HDF5 file handle to which to write. n_train : int The number of training set examples. n_valid : int The number of validation set examples. n_test : int The number of test set examples. """ n_total = n_train + n_valid + n_test splits = create_splits(n_train, n_valid, n_test) hdf5_file.attrs['split'] = H5PYDataset.create_split_array(splits) vlen_dtype = h5py.special_dtype(vlen=numpy.dtype('uint8')) hdf5_file.create_dataset('encoded_images', shape=(n_total,), dtype=vlen_dtype) hdf5_file.create_dataset('targets', shape=(n_total, 1), dtype=numpy.int16) hdf5_file.create_dataset('filenames', shape=(n_total, 1), dtype='S32')
Example #2
Source File: million_song_dataset.py From implicit with MIT License | 6 votes |
def _hfd5_from_dataframe(data, track_info, outputfilename): # create a sparse matrix of all the users/plays plays = coo_matrix((data['plays'].astype(np.float32), (data['track'].cat.codes.copy(), data['user'].cat.codes.copy()))).tocsr() with h5py.File(outputfilename, "w") as f: g = f.create_group('track_user_plays') g.create_dataset("data", data=plays.data) g.create_dataset("indptr", data=plays.indptr) g.create_dataset("indices", data=plays.indices) dt = h5py.special_dtype(vlen=str) dset = f.create_dataset('track', track_info.shape, dtype=dt) dset[:] = track_info user = list(data['user'].cat.categories) dset = f.create_dataset('user', (len(user),), dtype=dt) dset[:] = user
Example #3
Source File: sketchfab.py From implicit with MIT License | 6 votes |
def _hfd5_from_dataframe(data, outputfilename): items = data['mid'].cat.codes.copy() users = data['uid'].cat.codes.copy() values = np.ones(len(items)).astype(np.float32) # create a sparse matrix of all the item/users/likes likes = coo_matrix((values, (items, users))).astype(np.float32).tocsr() with h5py.File(outputfilename, "w") as f: g = f.create_group('item_user_likes') g.create_dataset("data", data=likes.data) g.create_dataset("indptr", data=likes.indptr) g.create_dataset("indices", data=likes.indices) dt = h5py.special_dtype(vlen=str) item = list(data['mid'].cat.categories) dset = f.create_dataset('item', (len(item),), dtype=dt) dset[:] = item user = list(data['uid'].cat.categories) dset = f.create_dataset('user', (len(user),), dtype=dt) dset[:] = user
Example #4
Source File: lastfm.py From implicit with MIT License | 6 votes |
def _hfd5_from_dataframe(data, outputfilename): # create a sparse matrix of all the users/plays plays = coo_matrix((data['plays'].astype(np.float32), (data['artist'].cat.codes.copy(), data['user'].cat.codes.copy()))).tocsr() with h5py.File(outputfilename, "w") as f: g = f.create_group('artist_user_plays') g.create_dataset("data", data=plays.data) g.create_dataset("indptr", data=plays.indptr) g.create_dataset("indices", data=plays.indices) dt = h5py.special_dtype(vlen=str) artist = list(data['artist'].cat.categories) dset = f.create_dataset('artist', (len(artist),), dtype=dt) dset[:] = artist user = list(data['user'].cat.categories) dset = f.create_dataset('user', (len(user),), dtype=dt) dset[:] = user
Example #5
Source File: h5_test.py From keras-image-segmentation with MIT License | 6 votes |
def write_data(h5py_file, mode, x_paths, y_paths): num_data = len(x_paths) uint8_dt = h5py.special_dtype(vlen=np.uint8) string_dt = h5py.special_dtype(vlen=str) group = h5py_file.create_group(mode) h5_name = group.create_dataset('name', shape=(num_data,), dtype=string_dt) h5_image = group.create_dataset('image', shape=(num_data,), dtype=uint8_dt) h5_label = group.create_dataset('label', shape=(num_data,), dtype=uint8_dt) h5_image.attrs['size'] = [256,512,3] h5_label.attrs['size'] = [256,512,1] for i in range(num_data): x_img = cv2.imread(x_paths[i], 1) y_img = cv2.imread(y_paths[i], 0) x_img = cv2.resize(x_img, None, fx=0.25, fy=0.25, interpolation=cv2.INTER_LINEAR) y_img = cv2.resize(y_img, None, fx=0.25, fy=0.25, interpolation=cv2.INTER_NEAREST) h5_image[i] = x_img.flatten() h5_label[i] = y_img.flatten() h5_name[i] = os.path.basename(x_paths[i]) # break
Example #6
Source File: pyMtsf.py From PySimulator with GNU Lesser General Public License v3.0 | 6 votes |
def WriteUnits(self): if len(self.units) == 0: return #maxLenTypeName = self._getMaxLength([x.name for x in self.units]) numpyDataType = numpy.dtype({'names': ['name', 'factor', 'offset', 'mode'], 'formats': [h5py.special_dtype(vlen=unicode), 'double', 'double', h5py.special_dtype(enum=(numpy.uint8, {'BaseUnit':0, 'Unit':1, 'DefaultDisplayUnit':2}))]}) # 'uint8']}) dataset = self.description.create_dataset('Units', (len(self.units), 1), dtype=numpyDataType, maxshape=(len(self.units), 1), compression='gzip') allData = [] for unit in self.units: allData.append((unit.name, unit.factor, unit.offset, unit.mode)) dataset[:, 0] = allData
Example #7
Source File: movielens.py From implicit with MIT License | 6 votes |
def _hfd5_from_dataframe(ratings, movies, outputfilename): # transform ratings dataframe into a sparse matrix m = coo_matrix((ratings['rating'].astype(np.float32), (ratings['movieId'], ratings['userId']))).tocsr() with h5py.File(outputfilename, "w") as f: # write out the ratings matrix g = f.create_group('movie_user_ratings') g.create_dataset("data", data=m.data) g.create_dataset("indptr", data=m.indptr) g.create_dataset("indices", data=m.indices) # write out the titles as a numpy array titles = np.empty(shape=(movies.movieId.max()+1,), dtype=np.object) titles[movies.movieId] = movies.title dt = h5py.special_dtype(vlen=str) dset = f.create_dataset('movie', (len(titles),), dtype=dt) dset[:] = titles
Example #8
Source File: data_generator.py From GroundedTranslation with BSD 3-Clause "New" or "Revised" License | 6 votes |
def set_predicted_description(self, split, data_key, sentence): ''' Set the predicted sentence tokens in the data_key group, creating the group if necessary, or erasing the current value if necessary. ''' if self.openmode != "r+": # forcefully quit when trying to write to a read-only file raise RuntimeError("Dataset is read-only, try again with --h5_writable") dataset_key = 'predicted_description' try: predicted_text = self.dataset[split][data_key].create_dataset(dataset_key, (1,), dtype=h5py.special_dtype(vlen=unicode)) except RuntimeError: # the dataset already exists, erase it and create an empty space del self.dataset[split][data_key][dataset_key] predicted_text = self.dataset[split][data_key].create_dataset(dataset_key, (1,), dtype=h5py.special_dtype(vlen=unicode)) predicted_text[0] = " ".join([x for x in sentence])
Example #9
Source File: vectors.py From text_embedding with MIT License | 6 votes |
def text2hdf5(textfile, hdf5file, **kwargs): '''converts word embeddings file from text to HDF5 format Args: textfile: word embeddings file in format "word float ... float\n" hdf5file: output file ; will have keys 'words' and 'vectors' kwargs: passed to load Returns: None ''' words, vectors = zip(*load(textfile, **kwargs)) f = h5py.File(hdf5file) f.create_dataset('words', (len(words),), dtype=h5py.special_dtype(vlen=str)) for i, word in enumerate(words): f['words'][i] = word f.create_dataset('vectors', data=np.vstack(vectors)) f.close()
Example #10
Source File: hdf5.py From pyPESTO with BSD 3-Clause "New" or "Revised" License | 6 votes |
def write_string_array(f: h5py.Group, path: str, strings: Collection) -> None: """ Write string array to hdf5 Parameters ------------- f: h5py.Group where dataset should be created path: path of the dataset to create strings: list of strings to be written to f """ dt = h5py.special_dtype(vlen=str) dset = f.create_dataset(path, (len(strings),), dtype=dt) dset[:] = [s.encode('utf8') for s in strings]
Example #11
Source File: ilsvrc2012.py From fuel with MIT License | 6 votes |
def prepare_hdf5_file(hdf5_file, n_train, n_valid, n_test): """Create datasets within a given HDF5 file. Parameters ---------- hdf5_file : :class:`h5py.File` instance HDF5 file handle to which to write. n_train : int The number of training set examples. n_valid : int The number of validation set examples. n_test : int The number of test set examples. """ n_total = n_train + n_valid + n_test n_labeled = n_train + n_valid splits = create_splits(n_train, n_valid, n_test) hdf5_file.attrs['split'] = H5PYDataset.create_split_array(splits) vlen_dtype = h5py.special_dtype(vlen=numpy.dtype('uint8')) hdf5_file.create_dataset('encoded_images', shape=(n_total,), dtype=vlen_dtype) hdf5_file.create_dataset('targets', shape=(n_labeled, 1), dtype=numpy.int16) hdf5_file.create_dataset('filenames', shape=(n_total, 1), dtype='S32')
Example #12
Source File: ilsvrc2010.py From fuel with MIT License | 6 votes |
def prepare_hdf5_file(hdf5_file, n_train, n_valid, n_test): """Create datasets within a given HDF5 file. Parameters ---------- hdf5_file : :class:`h5py.File` instance HDF5 file handle to which to write. n_train : int The number of training set examples. n_valid : int The number of validation set examples. n_test : int The number of test set examples. """ n_total = n_train + n_valid + n_test splits = create_splits(n_train, n_valid, n_test) hdf5_file.attrs['split'] = H5PYDataset.create_split_array(splits) vlen_dtype = h5py.special_dtype(vlen=numpy.dtype('uint8')) hdf5_file.create_dataset('encoded_images', shape=(n_total,), dtype=vlen_dtype) hdf5_file.create_dataset('targets', shape=(n_total, 1), dtype=numpy.int16) hdf5_file.create_dataset('filenames', shape=(n_total, 1), dtype='S32')
Example #13
Source File: hdf5_dataset_writer.py From calamari with Apache License 2.0 | 6 votes |
def finish_chunck(self): if len(self.text) == 0: return codec = self.compute_codec() filename = "{}_{:03d}{}".format(self.output_filename, self.current_chunk, DataSetType.gt_extension(DataSetType.HDF5)) self.files.append(filename) file = h5py.File(filename, 'w') dti32 = h5py.special_dtype(vlen=np.dtype('int32')) dtui8 = h5py.special_dtype(vlen=np.dtype('uint8')) file.create_dataset('transcripts', (len(self.text),), dtype=dti32, compression='gzip') file.create_dataset('images_dims', data=[d.shape for d in self.data], dtype=int) file.create_dataset('images', (len(self.text),), dtype=dtui8, compression='gzip') file.create_dataset('codec', data=list(map(ord, codec))) file['transcripts'][...] = [list(map(codec.index, d)) for d in self.text] file['images'][...] = [d.reshape(-1) for d in self.data] file.close() self.current_chunk += 1 self.data = [] self.text = []
Example #14
Source File: test_datatype.py From GraphicDesignPatternByPython with MIT License | 6 votes |
def test_compound_vlen(self): vidt = h5py.special_dtype(vlen=np.uint8) eidt = h5py.special_dtype(enum=(np.uint8, {'OFF': 0, 'ON': 1})) for np_align in (False, True): dt = np.dtype([ ('a', eidt), ('foo', vidt), ('bar', vidt), ('switch', eidt)], align=np_align) np_offsets = [dt.fields[i][1] for i in dt.names] for logical in (False, True): if logical and np_align: # Vlen types have different size in the numpy struct self.assertRaises(TypeError, h5py.h5t.py_create, dt, logical=logical) else: ht = h5py.h5t.py_create(dt, logical=logical) offsets = [ht.get_member_offset(i) for i in range(ht.get_nmembers())] if np_align: self.assertEqual(np_offsets, offsets)
Example #15
Source File: test_datatype.py From GraphicDesignPatternByPython with MIT License | 6 votes |
def test_vlen_enum(self): fname = self.mktemp() arr1 = [[1],[1,2]] dt1 = h5py.special_dtype(vlen=h5py.special_dtype( enum=('i', dict(foo=1, bar=2)))) with h5py.File(fname,'w') as f: df1 = f.create_dataset('test', (len(arr1),), dtype=dt1) df1[:] = np.array(arr1) with h5py.File(fname,'r') as f: df2 = f['test'] dt2 = df2.dtype arr2 = [e.tolist() for e in df2[:]] self.assertEqual(arr1, arr2) self.assertEqual(h5py.check_dtype(enum=h5py.check_dtype(vlen=dt1)), h5py.check_dtype(enum=h5py.check_dtype(vlen=dt2)))
Example #16
Source File: test_datatype.py From GraphicDesignPatternByPython with MIT License | 6 votes |
def test_compound_vlen_enum(self): eidt = h5py.special_dtype(enum=(np.uint8, {'OFF': 0, 'ON': 1})) vidt = h5py.special_dtype(vlen=np.uint8) def a(items): return np.array(items, dtype=np.uint8) f = self.f dt_vve = np.dtype([ ('foo', vidt), ('bar', vidt), ('switch', eidt)]) vve = f.create_dataset('dt_vve', shape=(2,), dtype=dt_vve) data = np.array([(a([1,2,3]), a([1,2]), 1), (a([]), a([2,4,6]), 0),], dtype=dt_vve) vve[:] = data actual = vve[:] self.assertVlenArrayEqual(data['foo'], actual['foo']) self.assertVlenArrayEqual(data['bar'], actual['bar']) self.assertArrayEqual(data['switch'], actual['switch'])
Example #17
Source File: _utils.py From PyINT with GNU General Public License v3.0 | 6 votes |
def write_h5(datasetDict, out_file, metadata=None, ref_file=None, compression=None): if os.path.isfile(out_file): print('delete exsited file: {}'.format(out_file)) os.remove(out_file) print('create HDF5 file: {} with w mode'.format(out_file)) dt = h5py.special_dtype(vlen=np.dtype('float64')) with h5py.File(out_file, 'w') as f: for dsName in datasetDict.keys(): data = datasetDict[dsName] ds = f.create_dataset(dsName, data=data, compression=compression) for key, value in metadata.items(): f.attrs[key] = str(value) #print(key + ': ' + value) print('finished writing to {}'.format(out_file)) return out_file ######################################################################
Example #18
Source File: h5f.py From costar_plan with Apache License 2.0 | 6 votes |
def write(self, example, filename, image_types=[]): ''' Write an example out to disk. status: success, failure or error.failure ''' filename = os.path.join(self.name, filename) f = h5f.File(filename, 'w') if image_types != []: dt = h5f.special_dtype(vlen=bytes) for (img_type_str, img_format_str) in image_types: f.create_dataset("type_" + img_type_str, data=[img_format_str]) for key, value in example.items(): if self.verbose > 0: print('H5fDataset writing key: ' + str(key)) f.create_dataset(key, data=value) f.close()
Example #19
Source File: h5ad.py From anndata with BSD 3-Clause "New" or "Revised" License | 6 votes |
def write_series(group, key, series, dataset_kwargs=MappingProxyType({})): # group here is an h5py type, otherwise categoricals won’t write if series.dtype == object: # Assuming it’s string group.create_dataset( key, data=series.values, dtype=h5py.special_dtype(vlen=str), **dataset_kwargs, ) elif is_categorical_dtype(series): # This should work for categorical Index and Series categorical: pd.Categorical = series.values categories: np.ndarray = categorical.categories.values codes: np.ndarray = categorical.codes category_key = f"__categories/{key}" write_array(group, category_key, categories, dataset_kwargs=dataset_kwargs) write_array(group, key, codes, dataset_kwargs=dataset_kwargs) group[key].attrs["categories"] = group[category_key].ref group[category_key].attrs["ordered"] = categorical.ordered else: group[key] = series.values
Example #20
Source File: legacyapi.py From h5netcdf with BSD 3-Clause "New" or "Revised" License | 6 votes |
def createVariable(self, varname, datatype, dimensions=(), zlib=False, complevel=4, shuffle=True, fletcher32=False, chunksizes=None, fill_value=None): if len(dimensions) == 0: # it's a scalar # rip off chunk and filter options for consistency with netCDF4-python chunksizes = None zlib = False fletcher32 = False shuffle = False if datatype is str: datatype = h5py.special_dtype(vlen=unicode) kwds = {} if zlib: # only add compression related keyword arguments if relevant (h5py # chokes otherwise) kwds['compression'] = 'gzip' kwds['compression_opts'] = complevel kwds['shuffle'] = shuffle return super(Group, self).create_variable( varname, dimensions, dtype=datatype, fletcher32=fletcher32, chunks=chunksizes, fillvalue=fill_value, **kwds)
Example #21
Source File: test_dataset.py From GraphicDesignPatternByPython with MIT License | 6 votes |
def test_convert(self): dt = h5py.special_dtype(vlen=int) ds = self.f.create_dataset('vlen', (3,), dtype=dt) ds[0] = np.array([1.4, 1.2]) ds[1] = np.array([1.2]) ds[2] = [1.2, 2, 3] self.assertArrayEqual(ds[0], np.array([1, 1])) self.assertArrayEqual(ds[1], np.array([1])) self.assertArrayEqual(ds[2], np.array([1, 2, 3])) ds[0:2] = np.array([[0.1, 1.1, 2.1, 3.1, 4], np.arange(4)]) self.assertArrayEqual(ds[0], np.arange(5)) self.assertArrayEqual(ds[1], np.arange(4)) ds[0:2] = np.array([np.array([0.1, 1.2, 2.2]), np.array([0.2, 1.2, 2.2])]) self.assertArrayEqual(ds[0], np.arange(3)) self.assertArrayEqual(ds[1], np.arange(3))
Example #22
Source File: test_dataset.py From GraphicDesignPatternByPython with MIT License | 6 votes |
def test_int(self): dt = h5py.special_dtype(vlen=int) ds = self.f.create_dataset('vlen', (4,), dtype=dt) ds[0] = np.arange(3) ds[1] = np.arange(0) ds[2] = [1, 2, 3] ds[3] = np.arange(1) self.assertArrayEqual(ds[0], np.arange(3)) self.assertArrayEqual(ds[1], np.arange(0)) self.assertArrayEqual(ds[2], np.array([1, 2, 3])) self.assertArrayEqual(ds[1], np.arange(0)) ds[0:2] = np.array([np.arange(5), np.arange(4)]) self.assertArrayEqual(ds[0], np.arange(5)) self.assertArrayEqual(ds[1], np.arange(4)) ds[0:2] = np.array([np.arange(3), np.arange(3)]) self.assertArrayEqual(ds[0], np.arange(3)) self.assertArrayEqual(ds[1], np.arange(3))
Example #23
Source File: utils.py From acerta-abide with GNU General Public License v2.0 | 5 votes |
def hdf5_handler(filename, mode="r"): h5py.File(filename, "a").close() propfaid = h5py.h5p.create(h5py.h5p.FILE_ACCESS) settings = list(propfaid.get_cache()) settings[1] = 0 settings[2] = 0 propfaid.set_cache(*settings) with contextlib.closing(h5py.h5f.open(filename, fapl=propfaid)) as fid: f = h5py.File(fid, mode) # f.attrs.create(dtype=h5py.special_dtype(vlen=str)) return f
Example #24
Source File: test_dataset.py From keras-lambda with MIT License | 5 votes |
def test_vlen_bytes(self): """ Vlen bytes dataset maps to vlen ascii in the file """ dt = h5py.special_dtype(vlen=bytes) ds = self.f.create_dataset('x', (100,), dtype=dt) tid = ds.id.get_type() self.assertEqual(type(tid), h5py.h5t.TypeStringID) self.assertEqual(tid.get_cset(), h5py.h5t.CSET_ASCII)
Example #25
Source File: global_attribute_manager.py From loompy with BSD 2-Clause "Simplified" License | 5 votes |
def __setattr__(self, name: str, val: Any) -> None: if name.startswith("!"): super(GlobalAttributeManager, self).__setattr__(name[1:], val) elif "/" in name: raise KeyError("Attribute name cannot contain slash (/)") else: if self.f is not None: if loompy.compare_loom_spec_version(self.f, "3.0.0") < 0 and "attrs" not in self.f["/"]: normalized = loompy.normalize_attr_values(val, False) self.f.attrs[name] = normalized self.f.flush() val = self.f.attrs[name] # Read it back in to ensure it's synced and normalized normalized = loompy.materialize_attr_values(val) self.__dict__["storage"][name] = normalized else: normalized = loompy.normalize_attr_values(val, True) if name in self.f["attrs"]: del self.f["attrs"][name] if not np.isscalar(normalized) and normalized.dtype == np.object_: self.ds._file.create_dataset("attrs/" + name, data=normalized, dtype=h5py.special_dtype(vlen=str)) else: self.f["attrs"][name] = normalized self.f.flush() val = self.f["attrs"][name][()] # Read it back in to ensure it's synced and normalized normalized = loompy.materialize_attr_values(val) self.__dict__["storage"][name] = normalized
Example #26
Source File: datasets.py From pysaliency with MIT License | 5 votes |
def to_hdf5(self, target): """ Write FileStimuli to hdf5 file or hdf5 group """ target.attrs['type'] = np.string_('FileStimuli') target.attrs['version'] = np.string_('2.1') import h5py # make sure everything is unicode hdf5_filename = target.file.filename hdf5_directory = os.path.dirname(hdf5_filename) relative_filenames = [os.path.relpath(filename, hdf5_directory) for filename in self.filenames] decoded_filenames = [decode_string(filename) for filename in relative_filenames] encoded_filenames = [filename.encode('utf8') for filename in decoded_filenames] target.create_dataset( 'filenames', data=np.array(encoded_filenames), dtype=h5py.special_dtype(vlen=str) ) shape_dataset = target.create_dataset( 'shapes', (len(self), ), dtype=h5py.special_dtype(vlen=np.dtype('int64')) ) for n, shape in enumerate(self.shapes): shape_dataset[n] = np.array(shape) for attribute_name, attribute_value in self.attributes.items(): target.create_dataset(attribute_name, data=attribute_value) target.attrs['__attributes__'] = np.string_(json.dumps(self.__attributes__)) target.attrs['size'] = len(self)
Example #27
Source File: hdf5_dataset_writer.py From aiexamples with Apache License 2.0 | 5 votes |
def store_class_labels(self, class_labels): dt = h5py.special_dtype(vlen=str) labelset = self.db.create_dataset("label_names", (len(class_labels),), dtype=dt) labelset[:] = class_labels
Example #28
Source File: lm_utils.py From espnet with Apache License 2.0 | 5 votes |
def load_dataset(path, label_dict, outdir=None): """Load and save HDF5 that contains a dataset and stats for LM Args: path (str): The path of an input text dataset file label_dict (dict[str, int]): dictionary that maps token label string to its ID number outdir (str): The path of an output dir Returns: tuple[list[np.ndarray], int, int]: Tuple of token IDs in np.int32 converted by `read_tokens` the number of tokens by `count_tokens`, and the number of OOVs by `count_tokens` """ if outdir is not None: os.makedirs(outdir, exist_ok=True) filename = outdir + "/" + os.path.basename(path) + ".h5" if os.path.exists(filename): logging.info(f"loading binary dataset: {filename}") f = h5py.File(filename, "r") return f["data"][:], f["n_tokens"][()], f["n_oovs"][()] else: logging.info("skip dump/load HDF5 because the output dir is not specified") logging.info(f"reading text dataset: {path}") ret = read_tokens(path, label_dict) n_tokens, n_oovs = count_tokens(ret, label_dict["<unk>"]) if outdir is not None: logging.info(f"saving binary dataset: {filename}") with h5py.File(filename, "w") as f: # http://docs.h5py.org/en/stable/special.html#arbitrary-vlen-data data = f.create_dataset( "data", (len(ret),), dtype=h5py.special_dtype(vlen=np.int32) ) data[:] = ret f["n_tokens"] = n_tokens f["n_oovs"] = n_oovs return ret, n_tokens, n_oovs
Example #29
Source File: dbfun_lookuptable.py From ABXpy with MIT License | 5 votes |
def get_dtype(data): str_dtype = h5py.special_dtype(vlen=unicode) # allow for the use of strings if isinstance(data[0], str): dtype = str_dtype # could add some checks that the dtype is one of those supported by h5 ? elif hasattr(data, 'dtype'): dtype = data.dtype else: dtype = numpy.array(data).dtype return dtype # item_size given in bytes, size_in_mem given in kilobytes
Example #30
Source File: embed.py From triplet-reid-pytorch with MIT License | 5 votes |
def write_to_h5(output_file, model, endpoints, num_augmentations, dataloader, dataset, keys=["emb"]): """ Writes model to h5 """ print(len(dataloader), len(dataset)) print("Model dimension is {}".format(model.module.dim)) if len(keys) == 0: raise RuntimeError("Plase specify at least one key that should be written to file.") with h5py.File(output_file) as f_out: # Dataparallel class! datasets = {} for key in keys: datasets[key] = f_out.create_dataset(key, shape=(len(dataset), num_augmentations) + model.module.dimensions[key], dtype=np.float32) for key in dataset.header: datasets[key] = f_out.create_dataset( key, shape=(len(dataset),), dtype=h5py.special_dtype(vlen=str)) start_idx = 0 for endpoints, rows in run_forward_pass(dataloader, model, endpoints): # TODO this will not work if for some reason some endpoints are shorter than others for key in keys: end_idx = start_idx + len(endpoints[key]) datasets[key][start_idx:end_idx] = endpoints[key] for key, values in rows.items(): end_idx = start_idx + len(values) datasets[key][start_idx:end_idx] = np.asarray(values) start_idx = end_idx return output_file