Python tables.Int32Atom() Examples

The following are 4 code examples of tables.Int32Atom(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tables , or try the search function .
Example #1
Source File: svhn.py    From batchup with MIT License 6 votes vote down vote up
def fetch_svhn_extra(source_paths, target_path):
    extra_path = source_paths[0]

    print('Converting {} to HDF5 (compressed)...'.format(extra_path))
    f_out = tables.open_file(target_path, mode='w')
    g_out = f_out.create_group(f_out.root, 'svhn', 'SVHN data')
    filters = tables.Filters(complevel=9, complib='blosc')
    X_u8_arr = f_out.create_earray(
        g_out, 'extra_X_u8', tables.UInt8Atom(), (0, 3, 32, 32),
        filters=filters)
    y_arr = f_out.create_earray(
        g_out, 'extra_y', tables.Int32Atom(), (0,), filters=filters)

    # Load in the extra data Matlab file
    _insert_svhn_matlab_to_h5(X_u8_arr, y_arr, extra_path)

    f_out.close()

    return target_path 
Example #2
Source File: dense_design_matrix.py    From TextDetector with GNU General Public License v3.0 5 votes vote down vote up
def init_hdf5(self, path, shapes,
                  title="Pytables Dataset",
                  y_dtype='float'):
        """
        Initializes the hdf5 file into which the data will be stored. This must
        be called before calling fill_hdf5.

        Parameters
        ----------
        path : string
            The name of the hdf5 file.
        shapes : tuple
            The shapes of X and y.
        title : string, optional
            Name of the dataset. e.g. For SVHN, set this to "SVHN Dataset".
            "Pytables Dataset" is used as title, by default.
        y_dtype : string, optional
            Either 'float' or 'int'. Decides the type of pytables atom
            used to store the y data. By default 'float' type is used.
        """
        assert y_dtype in ['float', 'int'], (
            "y_dtype can be 'float' or 'int' only"
        )

        x_shape, y_shape = shapes
        # make pytables
        ensure_tables()
        h5file = tables.openFile(path, mode="w", title=title)
        gcolumns = h5file.createGroup(h5file.root, "Data", "Data")
        atom = (tables.Float32Atom() if config.floatX == 'float32'
                else tables.Float64Atom())
        h5file.createCArray(gcolumns, 'X', atom=atom, shape=x_shape,
                            title="Data values", filters=self.filters)
        if y_dtype != 'float':
            # For 1D ndarray of int labels, override the atom to integer
            atom = (tables.Int32Atom() if config.floatX == 'float32'
                    else tables.Int64Atom())
        h5file.createCArray(gcolumns, 'y', atom=atom, shape=y_shape,
                            title="Data targets", filters=self.filters)
        return h5file, gcolumns 
Example #3
Source File: dense_design_matrix.py    From TextDetector with GNU General Public License v3.0 4 votes vote down vote up
def resize(self, h5file, start, stop):
        """
        Resizes the X and y tables. This must be called before calling
        fill_hdf5.

        Parameters
        ----------
        h5file : hdf5 file handle
            Handle to an hdf5 object.
        start : int
            The start index to write data.
        stop : int
            The index of the record following the last record to be written.
        """
        ensure_tables()
        # TODO is there any smarter and more efficient way to this?

        data = h5file.getNode('/', "Data")
        try:
            gcolumns = h5file.createGroup('/', "Data_", "Data")
        except tables.exceptions.NodeError:
            h5file.removeNode('/', "Data_", 1)
            gcolumns = h5file.createGroup('/', "Data_", "Data")

        start = 0 if start is None else start
        stop = gcolumns.X.nrows if stop is None else stop

        atom = (tables.Float32Atom() if config.floatX == 'float32'
                else tables.Float64Atom())
        x = h5file.createCArray(gcolumns,
                                'X',
                                atom=atom,
                                shape=((stop - start, data.X.shape[1])),
                                title="Data values",
                                filters=self.filters)
        if np.issubdtype(data.y, int):
            # For 1D ndarray of int labels, override the atom to integer
            atom = (tables.Int32Atom() if config.floatX == 'float32'
                    else tables.Int64Atom())
        y = h5file.createCArray(gcolumns,
                                'y',
                                atom=atom,
                                shape=((stop - start, data.y.shape[1])),
                                title="Data targets",
                                filters=self.filters)
        x[:] = data.X[start:stop]
        y[:] = data.y[start:stop]

        h5file.removeNode('/', "Data", 1)
        h5file.renameNode('/', "Data", "Data_")
        h5file.flush()
        return h5file, gcolumns 
Example #4
Source File: voice.py    From voice-corpus-tool with Mozilla Public License 2.0 4 votes vote down vote up
def _hdf5(self, alphabet_path, hdf5_path, ninput=26, ncontext=9):
        skipped = []
        str_to_label = {}
        alphabet_size = 0
        with codecs.open(alphabet_path, 'r', 'utf-8') as fin:
            for line in fin:
                if line[0:2] == '\\#':
                    line = '#\n'
                elif line[0] == '#':
                    continue
                str_to_label[line[:-1]] = alphabet_size
                alphabet_size += 1

        def process_sample(sample):
            if len(sample.transcript) == 0:
                skipped.append(sample.original_name)
                return None
            sample.write()
            try:
                samplerate, audio = wav.read(sample.file.filename)
                transcript = np.asarray([str_to_label[c] for c in sample.transcript])
            except:
                skipped.append(sample.original_name)
                return None
            features = mfcc(audio, samplerate=samplerate, numcep=ninput)[::2]
            empty_context = np.zeros((ncontext, ninput), dtype=features.dtype)
            features = np.concatenate((empty_context, features, empty_context))
            if (2*ncontext + len(features)) < len(transcript):
                skipped.append(sample.original_name)
                return None
            return features, len(features), transcript, len(transcript)

        out_data = self._map('Computing MFCC features...', self.samples, process_sample)
        out_data = [s for s in out_data if s is not None]
        if len(skipped) > 0:
            log('WARNING - Skipped %d samples that had no transcription, had been too short for their transcription or had been missed:' % len(skipped))
            for s in skipped:
                log(' - Sample origin: "%s".' % s)
        if len(out_data) <= 0:
            log('No samples written to feature DB "%s".' % hdf5_path)
            return
        # list of tuples -> tuple of lists
        features, features_len, transcript, transcript_len = zip(*out_data)

        log('Writing feature DB...')
        with tables.open_file(hdf5_path, 'w') as file:
            features_dset = file.create_vlarray(file.root, 'features', tables.Float32Atom(), filters=tables.Filters(complevel=1))
            # VLArray atoms need to be 1D, so flatten feature array
            for f in features:
                features_dset.append(np.reshape(f, -1))
            features_len_dset = file.create_array(file.root, 'features_len', features_len)

            transcript_dset = file.create_vlarray(file.root, 'transcript', tables.Int32Atom(), filters=tables.Filters(complevel=1))
            for t in transcript:
                transcript_dset.append(t)

            transcript_len_dset = file.create_array(file.root, 'transcript_len', transcript_len)
        log('Wrote features of %d samples to feature DB "%s".' % (len(features), hdf5_path))