Python tables.Int32Atom() Examples
The following are 4
code examples of tables.Int32Atom().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tables
, or try the search function
.
Example #1
Source File: svhn.py From batchup with MIT License | 6 votes |
def fetch_svhn_extra(source_paths, target_path): extra_path = source_paths[0] print('Converting {} to HDF5 (compressed)...'.format(extra_path)) f_out = tables.open_file(target_path, mode='w') g_out = f_out.create_group(f_out.root, 'svhn', 'SVHN data') filters = tables.Filters(complevel=9, complib='blosc') X_u8_arr = f_out.create_earray( g_out, 'extra_X_u8', tables.UInt8Atom(), (0, 3, 32, 32), filters=filters) y_arr = f_out.create_earray( g_out, 'extra_y', tables.Int32Atom(), (0,), filters=filters) # Load in the extra data Matlab file _insert_svhn_matlab_to_h5(X_u8_arr, y_arr, extra_path) f_out.close() return target_path
Example #2
Source File: dense_design_matrix.py From TextDetector with GNU General Public License v3.0 | 5 votes |
def init_hdf5(self, path, shapes, title="Pytables Dataset", y_dtype='float'): """ Initializes the hdf5 file into which the data will be stored. This must be called before calling fill_hdf5. Parameters ---------- path : string The name of the hdf5 file. shapes : tuple The shapes of X and y. title : string, optional Name of the dataset. e.g. For SVHN, set this to "SVHN Dataset". "Pytables Dataset" is used as title, by default. y_dtype : string, optional Either 'float' or 'int'. Decides the type of pytables atom used to store the y data. By default 'float' type is used. """ assert y_dtype in ['float', 'int'], ( "y_dtype can be 'float' or 'int' only" ) x_shape, y_shape = shapes # make pytables ensure_tables() h5file = tables.openFile(path, mode="w", title=title) gcolumns = h5file.createGroup(h5file.root, "Data", "Data") atom = (tables.Float32Atom() if config.floatX == 'float32' else tables.Float64Atom()) h5file.createCArray(gcolumns, 'X', atom=atom, shape=x_shape, title="Data values", filters=self.filters) if y_dtype != 'float': # For 1D ndarray of int labels, override the atom to integer atom = (tables.Int32Atom() if config.floatX == 'float32' else tables.Int64Atom()) h5file.createCArray(gcolumns, 'y', atom=atom, shape=y_shape, title="Data targets", filters=self.filters) return h5file, gcolumns
Example #3
Source File: dense_design_matrix.py From TextDetector with GNU General Public License v3.0 | 4 votes |
def resize(self, h5file, start, stop): """ Resizes the X and y tables. This must be called before calling fill_hdf5. Parameters ---------- h5file : hdf5 file handle Handle to an hdf5 object. start : int The start index to write data. stop : int The index of the record following the last record to be written. """ ensure_tables() # TODO is there any smarter and more efficient way to this? data = h5file.getNode('/', "Data") try: gcolumns = h5file.createGroup('/', "Data_", "Data") except tables.exceptions.NodeError: h5file.removeNode('/', "Data_", 1) gcolumns = h5file.createGroup('/', "Data_", "Data") start = 0 if start is None else start stop = gcolumns.X.nrows if stop is None else stop atom = (tables.Float32Atom() if config.floatX == 'float32' else tables.Float64Atom()) x = h5file.createCArray(gcolumns, 'X', atom=atom, shape=((stop - start, data.X.shape[1])), title="Data values", filters=self.filters) if np.issubdtype(data.y, int): # For 1D ndarray of int labels, override the atom to integer atom = (tables.Int32Atom() if config.floatX == 'float32' else tables.Int64Atom()) y = h5file.createCArray(gcolumns, 'y', atom=atom, shape=((stop - start, data.y.shape[1])), title="Data targets", filters=self.filters) x[:] = data.X[start:stop] y[:] = data.y[start:stop] h5file.removeNode('/', "Data", 1) h5file.renameNode('/', "Data", "Data_") h5file.flush() return h5file, gcolumns
Example #4
Source File: voice.py From voice-corpus-tool with Mozilla Public License 2.0 | 4 votes |
def _hdf5(self, alphabet_path, hdf5_path, ninput=26, ncontext=9): skipped = [] str_to_label = {} alphabet_size = 0 with codecs.open(alphabet_path, 'r', 'utf-8') as fin: for line in fin: if line[0:2] == '\\#': line = '#\n' elif line[0] == '#': continue str_to_label[line[:-1]] = alphabet_size alphabet_size += 1 def process_sample(sample): if len(sample.transcript) == 0: skipped.append(sample.original_name) return None sample.write() try: samplerate, audio = wav.read(sample.file.filename) transcript = np.asarray([str_to_label[c] for c in sample.transcript]) except: skipped.append(sample.original_name) return None features = mfcc(audio, samplerate=samplerate, numcep=ninput)[::2] empty_context = np.zeros((ncontext, ninput), dtype=features.dtype) features = np.concatenate((empty_context, features, empty_context)) if (2*ncontext + len(features)) < len(transcript): skipped.append(sample.original_name) return None return features, len(features), transcript, len(transcript) out_data = self._map('Computing MFCC features...', self.samples, process_sample) out_data = [s for s in out_data if s is not None] if len(skipped) > 0: log('WARNING - Skipped %d samples that had no transcription, had been too short for their transcription or had been missed:' % len(skipped)) for s in skipped: log(' - Sample origin: "%s".' % s) if len(out_data) <= 0: log('No samples written to feature DB "%s".' % hdf5_path) return # list of tuples -> tuple of lists features, features_len, transcript, transcript_len = zip(*out_data) log('Writing feature DB...') with tables.open_file(hdf5_path, 'w') as file: features_dset = file.create_vlarray(file.root, 'features', tables.Float32Atom(), filters=tables.Filters(complevel=1)) # VLArray atoms need to be 1D, so flatten feature array for f in features: features_dset.append(np.reshape(f, -1)) features_len_dset = file.create_array(file.root, 'features_len', features_len) transcript_dset = file.create_vlarray(file.root, 'transcript', tables.Int32Atom(), filters=tables.Filters(complevel=1)) for t in transcript: transcript_dset.append(t) transcript_len_dset = file.create_array(file.root, 'transcript_len', transcript_len) log('Wrote features of %d samples to feature DB "%s".' % (len(features), hdf5_path))