Python Examples of tables.Float32Atom

Source File: orient_pharynx.py From tierpsy-tracker with MIT License

6 votes

def init_data(ske_file_id, tot_rows):
    #create and reference all the arrays
    field = 'skeleton'
    dims = (tot_rows,2,2)
    
    if '/' + field in ske_file_id:
        ske_file_id.remove_node('/', field)
        
    skeletons = ske_file_id.create_carray('/', 
                              field, 
                              tables.Float32Atom(dflt=np.nan), 
                              dims, 
                              filters=TABLE_FILTERS)
    traj_dat = ske_file_id.get_node('/trajectories_data')
    has_skeleton = traj_dat.cols.has_skeleton
    has_skeleton[:] = np.zeros_like(has_skeleton) #delete previous
    
    return skeletons, has_skeleton

Source File: dense_design_matrix.py From TextDetector with GNU General Public License v3.0

5 votes

def init_hdf5(self, path, shapes,
                  title="Pytables Dataset",
                  y_dtype='float'):
        """
        Initializes the hdf5 file into which the data will be stored. This must
        be called before calling fill_hdf5.

        Parameters
        ----------
        path : string
            The name of the hdf5 file.
        shapes : tuple
            The shapes of X and y.
        title : string, optional
            Name of the dataset. e.g. For SVHN, set this to "SVHN Dataset".
            "Pytables Dataset" is used as title, by default.
        y_dtype : string, optional
            Either 'float' or 'int'. Decides the type of pytables atom
            used to store the y data. By default 'float' type is used.
        """
        assert y_dtype in ['float', 'int'], (
            "y_dtype can be 'float' or 'int' only"
        )

        x_shape, y_shape = shapes
        # make pytables
        ensure_tables()
        h5file = tables.openFile(path, mode="w", title=title)
        gcolumns = h5file.createGroup(h5file.root, "Data", "Data")
        atom = (tables.Float32Atom() if config.floatX == 'float32'
                else tables.Float64Atom())
        h5file.createCArray(gcolumns, 'X', atom=atom, shape=x_shape,
                            title="Data values", filters=self.filters)
        if y_dtype != 'float':
            # For 1D ndarray of int labels, override the atom to integer
            atom = (tables.Int32Atom() if config.floatX == 'float32'
                    else tables.Int64Atom())
        h5file.createCArray(gcolumns, 'y', atom=atom, shape=y_shape,
                            title="Data targets", filters=self.filters)
        return h5file, gcolumns

Source File: getSkeletonsTables.py From tierpsy-tracker with MIT License

5 votes

def _initSkeletonsArrays(ske_file_id, tot_rows, resampling_N, worm_midbody):
    '''initialize arrays to save the skeletons data.
        Used by trajectories2Skeletons
    '''

    # this is to initialize the arrays to one row, pytables do not accept empty arrays as initializers of carrays
    if tot_rows == 0:
        tot_rows = 1  
    
    #define  dimession of each array, it is the only part of the array that varies
    data_dims = {}
    for data_str in ['skeleton', 'contour_side1', 'contour_side2']:
        data_dims[data_str + '_length'] = (tot_rows,)
        data_dims[data_str] = (tot_rows, resampling_N, 2)
    data_dims['contour_width'] = (tot_rows, resampling_N)
    data_dims['width_midbody'] = (tot_rows,)
    data_dims['contour_area'] = (tot_rows,)
    
    #create and reference all the arrays
    def _create_array(field, dims):
        if '/' + field in ske_file_id:
            ske_file_id.remove_node('/', field)
            
        return ske_file_id.create_carray('/', 
                                  field, 
                                  tables.Float32Atom(dflt=np.nan), 
                                  dims, 
                                  filters=TABLE_FILTERS)
        
    skel_arrays = {field:_create_array(field, dims) for field, dims in data_dims.items()}
    inram_skel_arrays = {field:np.ones(dims, dtype=np.float32)*np.nan for field, dims in data_dims.items()}
    
    # flags to mark if a frame was skeletonized
    traj_dat = ske_file_id.get_node('/trajectories_data')
    has_skeleton = traj_dat.cols.has_skeleton
    has_skeleton[:] = np.zeros_like(has_skeleton) #delete previous
    
#    return skel_arrays, has_skeleton
    return skel_arrays, has_skeleton, inram_skel_arrays

Source File: compressVideo.py From tierpsy-tracker with MIT License

5 votes

def initMasksGroups(fid, expected_frames, im_height, im_width, 
    attr_params, save_full_interval, is_expandable=True):

    # open node to store the compressed (masked) data
    mask_dataset = createImgGroup(fid, "/mask", expected_frames, im_height, im_width, is_expandable)
    

    tot_save_full = (expected_frames // save_full_interval) + 1
    full_dataset = createImgGroup(fid, "/full_data", tot_save_full, im_height, im_width, is_expandable)
    full_dataset._v_attrs['save_interval'] = save_full_interval
    

    assert all(x in ['expected_fps', 'is_light_background', 'microns_per_pixel'] for x in attr_params)
    set_unit_conversions(mask_dataset, **attr_params)
    set_unit_conversions(full_dataset, **attr_params)

    if is_expandable:
        mean_intensity = fid.create_earray('/', 
                                        'mean_intensity',
                                        atom=tables.Float32Atom(),
                                        shape=(0,),
                                        expectedrows=expected_frames,
                                        filters=TABLE_FILTERS)
    else:
        mean_intensity = fid.create_carray('/', 
                                        'mean_intensity',
                                        atom=tables.Float32Atom(),
                                        shape=(expected_frames,),
                                        filters=TABLE_FILTERS)
    
    return mask_dataset, full_dataset, mean_intensity

Source File: read_write.py From deepQuest with BSD 3-Clause "New" or "Revised" License

5 votes

def numpy2hdf5(filepath, mylist, data_name='data', permission='w'):
    if permission == 'w':
        f = tables.open_file(filepath, mode=permission)
        atom = tables.Float32Atom()
        array_c = f.create_earray(f.root, data_name, atom,
                                  tuple([0] + [mylist.shape[i] for i in range(1, len(mylist.shape))]))
        array_c.append(mylist)
        f.close()
    elif permission == 'a':
        f = tables.open_file(filepath, mode='a')
        f.root.data.append(mylist)
        f.close()

Source File: data.py From 3DUnetCNN with MIT License

5 votes

def create_data_file(out_file, n_channels, n_samples, image_shape):
    hdf5_file = tables.open_file(out_file, mode='w')
    filters = tables.Filters(complevel=5, complib='blosc')
    data_shape = tuple([0, n_channels] + list(image_shape))
    truth_shape = tuple([0, 1] + list(image_shape))
    data_storage = hdf5_file.create_earray(hdf5_file.root, 'data', tables.Float32Atom(), shape=data_shape,
                                           filters=filters, expectedrows=n_samples)
    truth_storage = hdf5_file.create_earray(hdf5_file.root, 'truth', tables.UInt8Atom(), shape=truth_shape,
                                            filters=filters, expectedrows=n_samples)
    affine_storage = hdf5_file.create_earray(hdf5_file.root, 'affine', tables.Float32Atom(), shape=(0, 4, 4),
                                             filters=filters, expectedrows=n_samples)
    return hdf5_file, data_storage, truth_storage, affine_storage

Source File: data.py From 3D-CNNs-for-Liver-Classification with Apache License 2.0

5 votes

def create_data_file(out_file, n_channels, n_samples, image_shape):
    hdf5_file = tables.open_file(out_file, mode='w')
    filters = tables.Filters(complevel=5, complib='blosc')
    data_shape = tuple([0, n_channels] + list(image_shape))
    truth_shape = tuple([0, 1] + list(image_shape))
    data_storage = hdf5_file.create_earray(hdf5_file.root, 'data', tables.Float32Atom(), shape=data_shape,
                                           filters=filters, expectedrows=n_samples)
    truth_storage = hdf5_file.create_earray(hdf5_file.root, 'truth', tables.UInt8Atom(), shape=truth_shape,
                                            filters=filters, expectedrows=n_samples)
    affine_storage = hdf5_file.create_earray(hdf5_file.root, 'affine', tables.Float32Atom(), shape=(0, 4, 4),
                                             filters=filters, expectedrows=n_samples)
    return hdf5_file, data_storage, truth_storage, affine_storage

Source File: preprocess.py From 3D-CNNs-for-Liver-Classification with Apache License 2.0

5 votes

def create_data_file(out_file, n_channels, n_samples, image_shape):
    hdf5_file = tables.open_file(out_file, mode='w')
    filters = tables.Filters(complevel=5, complib='blosc')
    data_shape = tuple([0, n_channels] + list(image_shape))
    truth_shape = tuple([0, 1])
    data_storage = hdf5_file.create_earray(hdf5_file.root, 'data', tables.Float32Atom(), shape=data_shape,
                                           filters=filters, expectedrows=n_samples)
    truth_storage = hdf5_file.create_earray(hdf5_file.root, 'truth', tables.UInt8Atom(), shape=truth_shape,
                                            filters=filters, expectedrows=n_samples)
    return hdf5_file, data_storage, truth_storage

Source File: data.py From Keras-Brats-Improved-Unet3d with MIT License

5 votes

def create_data_file(out_file, n_channels, n_samples, image_shape):
    hdf5_file = tables.open_file(out_file, mode='w')
    filters = tables.Filters(complevel=5, complib='blosc')
    data_shape = tuple([0, n_channels] + list(image_shape))
    truth_shape = tuple([0, 1] + list(image_shape))
    data_storage = hdf5_file.create_earray(hdf5_file.root, 'data', tables.Float32Atom(), shape=data_shape,
                                           filters=filters, expectedrows=n_samples)
    truth_storage = hdf5_file.create_earray(hdf5_file.root, 'truth', tables.UInt8Atom(), shape=truth_shape,
                                            filters=filters, expectedrows=n_samples)
    affine_storage = hdf5_file.create_earray(hdf5_file.root, 'affine', tables.Float32Atom(), shape=(0, 4, 4),
                                             filters=filters, expectedrows=n_samples)
    return hdf5_file, data_storage, truth_storage, affine_storage

Source File: moving_mnist.py From RATM with MIT License

5 votes

def dump_test_set(self, h5filepath, nframes, framesize):
        # set rng to a hardcoded state, so we always have the same test set!
        self.numpy_rng.seed(1)
        with tables.openFile(h5filepath, 'w') as h5file:

            h5file.createArray(h5file.root, 'test_targets',
                               self.partitions['test']['targets'])

            vids = h5file.createCArray(
                h5file.root,
                'test_images',
                tables.Float32Atom(),
                shape=(10000,
                       nframes, framesize, framesize),
                filters=tables.Filters(complevel=5, complib='zlib'))

            pos = h5file.createCArray(
                h5file.root,
                'test_pos',
                tables.UInt16Atom(),
                shape=(10000,
                       nframes, 2),
                filters=tables.Filters(complevel=5, complib='zlib'))
            for i in range(100):
                print i
                (vids[i*100:(i+1)*100],
                 pos[i*100:(i+1)*100], _) = self.get_batch(
                     'test', 100, nframes, framesize,
                     idx=np.arange(i*100,(i+1)*100))
                h5file.flush()

Source File: dense_design_matrix.py From TextDetector with GNU General Public License v3.0

4 votes

def resize(self, h5file, start, stop):
        """
        Resizes the X and y tables. This must be called before calling
        fill_hdf5.

        Parameters
        ----------
        h5file : hdf5 file handle
            Handle to an hdf5 object.
        start : int
            The start index to write data.
        stop : int
            The index of the record following the last record to be written.
        """
        ensure_tables()
        # TODO is there any smarter and more efficient way to this?

        data = h5file.getNode('/', "Data")
        try:
            gcolumns = h5file.createGroup('/', "Data_", "Data")
        except tables.exceptions.NodeError:
            h5file.removeNode('/', "Data_", 1)
            gcolumns = h5file.createGroup('/', "Data_", "Data")

        start = 0 if start is None else start
        stop = gcolumns.X.nrows if stop is None else stop

        atom = (tables.Float32Atom() if config.floatX == 'float32'
                else tables.Float64Atom())
        x = h5file.createCArray(gcolumns,
                                'X',
                                atom=atom,
                                shape=((stop - start, data.X.shape[1])),
                                title="Data values",
                                filters=self.filters)
        if np.issubdtype(data.y, int):
            # For 1D ndarray of int labels, override the atom to integer
            atom = (tables.Int32Atom() if config.floatX == 'float32'
                    else tables.Int64Atom())
        y = h5file.createCArray(gcolumns,
                                'y',
                                atom=atom,
                                shape=((stop - start, data.y.shape[1])),
                                title="Data targets",
                                filters=self.filters)
        x[:] = data.X[start:stop]
        y[:] = data.y[start:stop]

        h5file.removeNode('/', "Data", 1)
        h5file.renameNode('/', "Data", "Data_")
        h5file.flush()
        return h5file, gcolumns

Source File: Concurrent_AP.py From Concurrent_AP with MIT License

4 votes

def check_HDF5_arrays(hdf5_file, N, convergence_iter):
    """Check that the HDF5 data structure of file handle 'hdf5_file' 
        has all the required nodes organizing the various two-dimensional 
        arrays required for Affinity Propagation clustering 
        ('Responsibility' matrix, 'Availability', etc.).
    
    Parameters
    ----------
    hdf5_file : string or file handle
        Name of the Hierarchical Data Format under consideration.
        
    N : int
        The number of samples in the data-set that will undergo Affinity Propagation
        clustering.
    
    convergence_iter : int
        Number of iterations with no change in the number of estimated clusters 
        that stops the convergence.
    """
    
    Worker.hdf5_lock.acquire()

    with tables.open_file(hdf5_file, 'r+') as fileh:
        if not hasattr(fileh.root, 'aff_prop_group'):
            fileh.create_group(fileh.root, "aff_prop_group")

        atom = tables.Float32Atom()
        filters = None
        #filters = tables.Filters(5, 'blosc')
            
        for feature in ('availabilities', 'responsibilities',
                            'similarities', 'temporaries'):
            if not hasattr(fileh.root.aff_prop_group, feature):
                fileh.create_carray(fileh.root.aff_prop_group, feature, 
                         atom, (N, N), "Matrix of {0} for affinity "
                         "propagation clustering".format(feature), 
                         filters = filters)

        if not hasattr(fileh.root.aff_prop_group, 'parallel_updates'):
            fileh.create_carray(fileh.root.aff_prop_group,
                     'parallel_updates', atom, (N, convergence_iter), 
                     "Matrix of parallel updates for affinity propagation "
                     "clustering", filters = filters)
                     
    Worker.hdf5_lock.release()

Source File: voice.py From voice-corpus-tool with Mozilla Public License 2.0

4 votes

def _hdf5(self, alphabet_path, hdf5_path, ninput=26, ncontext=9):
        skipped = []
        str_to_label = {}
        alphabet_size = 0
        with codecs.open(alphabet_path, 'r', 'utf-8') as fin:
            for line in fin:
                if line[0:2] == '\\#':
                    line = '#\n'
                elif line[0] == '#':
                    continue
                str_to_label[line[:-1]] = alphabet_size
                alphabet_size += 1

        def process_sample(sample):
            if len(sample.transcript) == 0:
                skipped.append(sample.original_name)
                return None
            sample.write()
            try:
                samplerate, audio = wav.read(sample.file.filename)
                transcript = np.asarray([str_to_label[c] for c in sample.transcript])
            except:
                skipped.append(sample.original_name)
                return None
            features = mfcc(audio, samplerate=samplerate, numcep=ninput)[::2]
            empty_context = np.zeros((ncontext, ninput), dtype=features.dtype)
            features = np.concatenate((empty_context, features, empty_context))
            if (2*ncontext + len(features)) < len(transcript):
                skipped.append(sample.original_name)
                return None
            return features, len(features), transcript, len(transcript)

        out_data = self._map('Computing MFCC features...', self.samples, process_sample)
        out_data = [s for s in out_data if s is not None]
        if len(skipped) > 0:
            log('WARNING - Skipped %d samples that had no transcription, had been too short for their transcription or had been missed:' % len(skipped))
            for s in skipped:
                log(' - Sample origin: "%s".' % s)
        if len(out_data) <= 0:
            log('No samples written to feature DB "%s".' % hdf5_path)
            return
        # list of tuples -> tuple of lists
        features, features_len, transcript, transcript_len = zip(*out_data)

        log('Writing feature DB...')
        with tables.open_file(hdf5_path, 'w') as file:
            features_dset = file.create_vlarray(file.root, 'features', tables.Float32Atom(), filters=tables.Filters(complevel=1))
            # VLArray atoms need to be 1D, so flatten feature array
            for f in features:
                features_dset.append(np.reshape(f, -1))
            features_len_dset = file.create_array(file.root, 'features_len', features_len)

            transcript_dset = file.create_vlarray(file.root, 'transcript', tables.Int32Atom(), filters=tables.Filters(complevel=1))
            for t in transcript:
                transcript_dset.append(t)

            transcript_len_dset = file.create_array(file.root, 'transcript_len', transcript_len)
        log('Wrote features of %d samples to feature DB "%s".' % (len(features), hdf5_path))

Python tables.Float32Atom() Examples