Python fuel.datasets.IndexableDataset() Examples

The following are 30 code examples of fuel.datasets.IndexableDataset(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module fuel.datasets , or try the search function .
Example #1
Source File: timit.py    From CTC-LSTM with Apache License 2.0 6 votes vote down vote up
def setup_datastream(path, batch_size, sort_batch_count, valid=False):
    A = numpy.load(os.path.join(path, ('valid_x_raw.npy' if valid else 'train_x_raw.npy')))
    B = numpy.load(os.path.join(path, ('valid_phn.npy' if valid else 'train_phn.npy')))
    C = numpy.load(os.path.join(path, ('valid_seq_to_phn.npy' if valid else 'train_seq_to_phn.npy')))

    D = [B[x[0]:x[1], 2] for x in C]

    ds = IndexableDataset({'input': A, 'output': D})
    stream = DataStream(ds, iteration_scheme=ShuffledExampleScheme(len(A)))

    stream = Batch(stream, iteration_scheme=ConstantScheme(batch_size * sort_batch_count))
    comparison = _balanced_batch_helper(stream.sources.index('input'))
    stream = Mapping(stream, SortMapping(comparison))
    stream = Unpack(stream)

    stream = Batch(stream, iteration_scheme=ConstantScheme(batch_size, num_examples=len(A)))
    stream = Padding(stream, mask_sources=['input', 'output'])

    return ds, stream 
Example #2
Source File: test_transformers.py    From fuel with MIT License 5 votes vote down vote up
def setUp(self):
        self.stream = DataStream(
            IndexableDataset(
                OrderedDict([('X', numpy.ones((4, 2, 2))),
                             ('y', numpy.array([0, 1, 0, 1]))]),
                axis_labels={'X': ('batch', 'width', 'height'),
                             'y': ('batch',)}),
            iteration_scheme=SequentialScheme(4, 2))
        self.transformer = Rename(
            self.stream, {'X': 'features', 'y': 'targets'}) 
Example #3
Source File: test_datasets.py    From fuel with MIT License 5 votes vote down vote up
def test_batch_iteration_scheme_with_lists(self):
        """Batch schemes should work with more than ndarrays."""
        data = IndexableDataset(OrderedDict([('foo', list(range(50))),
                                             ('bar', list(range(1, 51)))]))
        stream = DataStream(data,
                            iteration_scheme=ShuffledScheme(data.num_examples,
                                                            5))
        returned = [sum(batches, []) for batches in
                    zip(*list(stream.get_epoch_iterator()))]
        assert set(returned[0]) == set(range(50))
        assert set(returned[1]) == set(range(1, 51)) 
Example #4
Source File: test_streams.py    From fuel with MIT License 5 votes vote down vote up
def test_axis_labels_on_produces_batches(self):
        dataset = IndexableDataset(numpy.eye(2))
        axis_labels = {'data': ('batch', 'features')}
        dataset.axis_labels = axis_labels
        stream = DataStream(dataset, iteration_scheme=SequentialScheme(2, 2))
        assert_equal(stream.axis_labels, axis_labels) 
Example #5
Source File: test_transformers.py    From fuel with MIT License 5 votes vote down vote up
def test_flatten_batches(self):
        wrapper = Flatten(
            DataStream(IndexableDataset(self.data),
                       iteration_scheme=SequentialScheme(4, 2)),
            which_sources=('features',))
        assert_equal(
            list(wrapper.get_epoch_iterator()),
            [(numpy.ones((2, 4)), numpy.array([[0], [1]])),
             (numpy.ones((2, 4)), numpy.array([[0], [1]]))]) 
Example #6
Source File: test_transformers.py    From fuel with MIT License 5 votes vote down vote up
def test_axis_labels_on_flatten_batches(self):
        wrapper = Flatten(
            DataStream(IndexableDataset(self.data),
                       iteration_scheme=SequentialScheme(4, 2),
                       axis_labels={'features': ('batch', 'width', 'height'),
                                    'targets': ('batch', 'index')}),
            which_sources=('features',))
        assert_equal(wrapper.axis_labels, {'features': ('batch', 'feature'),
                                           'targets': ('batch', 'index')}) 
Example #7
Source File: test_transformers.py    From fuel with MIT License 5 votes vote down vote up
def test_axis_labels_on_flatten_batches_with_none(self):
        wrapper = Flatten(
            DataStream(IndexableDataset(self.data),
                       iteration_scheme=SequentialScheme(4, 2),
                       axis_labels={'features': None,
                                    'targets': ('batch', 'index')}),
            which_sources=('features',))
        assert_equal(wrapper.axis_labels, {'features': None,
                                           'targets': ('batch', 'index')}) 
Example #8
Source File: test_transformers.py    From fuel with MIT License 5 votes vote down vote up
def test_flatten_examples(self):
        wrapper = Flatten(
            DataStream(IndexableDataset(self.data),
                       iteration_scheme=SequentialExampleScheme(4)),
            which_sources=('features',))
        assert_equal(
            list(wrapper.get_epoch_iterator()),
            [(numpy.ones(4), 0), (numpy.ones(4), 1)] * 2) 
Example #9
Source File: test_transformers.py    From fuel with MIT License 5 votes vote down vote up
def test_filter_batches(self):
        data = [1, 2, 3, 4]
        data_filtered = [([3, 4],)]
        stream = DataStream(IndexableDataset(data),
                            iteration_scheme=SequentialScheme(4, 2))
        wrapper = Filter(stream, lambda d: d[0][0] % 3 == 0)
        assert_equal(list(wrapper.get_epoch_iterator()), data_filtered) 
Example #10
Source File: test_transformers.py    From fuel with MIT License 5 votes vote down vote up
def test_axis_labels_are_passed_through(self):
        stream = DataStream(
            IndexableDataset(
                {'features': [1, 2, 3, 4]},
                axis_labels={'features': ('batch',)}),
            iteration_scheme=SequentialScheme(4, 2))
        wrapper = Filter(stream, lambda d: d[0][0] % 3 == 0)
        assert_equal(wrapper.axis_labels, stream.axis_labels) 
Example #11
Source File: test_transformers.py    From fuel with MIT License 5 votes vote down vote up
def test_value_error_on_batch_stream(self):
        stream = DataStream(IndexableDataset([1, 2, 3, 4]),
                            iteration_scheme=SequentialScheme(4, 2))
        assert_raises(ValueError, Batch, stream, SequentialScheme(4, 2)) 
Example #12
Source File: test_transformers.py    From fuel with MIT License 5 votes vote down vote up
def test_value_error_on_different_stream_output_type(self):
        spanish_stream = DataStream(IndexableDataset(['Hola mundo!']),
                                    iteration_scheme=SequentialScheme(2, 2))
        assert_raises(ValueError, Merge, self.streams + (spanish_stream,),
                      ('english', 'french', 'spanish')) 
Example #13
Source File: test_datasets.py    From fuel with MIT License 5 votes vote down vote up
def test_pickling(self):
        cPickle.loads(cPickle.dumps(IndexableDataset({'a': (1, 2)}))) 
Example #14
Source File: test_transformers.py    From fuel with MIT License 5 votes vote down vote up
def setUp(self):
        self.string_data = [b'Hello', b'World!']
        self.dataset = IndexableDataset(
            indexables={'words': [numpy.fromstring(s, dtype='uint8')
                                  for s in self.string_data]},
            axis_labels={'words': ('batch', 'bytes')}) 
Example #15
Source File: test_predict.py    From blocks-extras with MIT License 5 votes vote down vote up
def test_predict():
    tempfile_path = os.path.join(gettempdir(), 'test_predict.npz')

    # set up mock datastream
    source = [[1], [2], [3], [4]]
    dataset = IndexableDataset(OrderedDict([('input', source)]))
    scheme = SequentialScheme(dataset.num_examples, batch_size=2)
    data_stream = DataStream(dataset, iteration_scheme=scheme)

    # simulate small "network" that increments the input by 1
    input_tensor = tensor.matrix('input')
    output_tensor = input_tensor + 1
    output_tensor.name = 'output_tensor'

    main_loop = MockMainLoop(extensions=[
        PredictDataStream(data_stream=data_stream,
                          variables=[output_tensor],
                          path=tempfile_path,
                          after_training=True),
        FinishAfter(after_n_epochs=1)
    ])
    main_loop.run()

    # assert resulting prediction is saved
    prediction = numpy.load(tempfile_path)
    assert numpy.all(prediction[output_tensor.name] == numpy.array(source) + 1)

    try:
        os.remove(tempfile_path)
    except:
        pass 
Example #16
Source File: weibo_vest.py    From CopyNet with MIT License 5 votes vote down vote up
def build_data(data):
    # create fuel dataset.
    dataset     = datasets.IndexableDataset(indexables=OrderedDict([('source', data['source']),
                                                                    ('target', data['target']),
                                                                    ('target_c', data['target_c']),
                                                                    ]))
    dataset.example_iteration_scheme \
                = schemes.ShuffledExampleScheme(dataset.num_examples)
    return dataset 
Example #17
Source File: bst_vest.py    From CopyNet with MIT License 5 votes vote down vote up
def build_data(data):
    # create fuel dataset.
    dataset     = datasets.IndexableDataset(indexables=OrderedDict([('source', data['source']),
                                                                    ('target', data['target']),
                                                                    ('target_c', data['target_c']),
                                                                    ]))
    dataset.example_iteration_scheme \
                = schemes.ShuffledExampleScheme(dataset.num_examples)
    return dataset 
Example #18
Source File: syn_vest.py    From CopyNet with MIT License 5 votes vote down vote up
def build_data(data):
    # create fuel dataset.
    dataset     = datasets.IndexableDataset(indexables=OrderedDict([('source', data['source']),
                                                                    ('target', data['target']),
                                                                    ('target_c', data['target_c']),
                                                                    ]))
    dataset.example_iteration_scheme \
                = schemes.ShuffledExampleScheme(dataset.num_examples)
    return dataset 
Example #19
Source File: lcsts_test.py    From CopyNet with MIT License 5 votes vote down vote up
def build_data(data):
    # create fuel dataset.
    dataset     = datasets.IndexableDataset(indexables=OrderedDict([('source', data['source']),
                                                                    ('target', data['target']),
                                                                    ('target_c', data['target_c']),
                                                                    ]))
    dataset.example_iteration_scheme \
                = schemes.ShuffledExampleScheme(dataset.num_examples)
    return dataset 
Example #20
Source File: lcsts_vest.py    From CopyNet with MIT License 5 votes vote down vote up
def build_data(data):
    # create fuel dataset.
    dataset     = datasets.IndexableDataset(indexables=OrderedDict([('source', data['source']),
                                                                    ('target', data['target']),
                                                                    ('target_c', data['target_c']),
                                                                    ]))
    dataset.example_iteration_scheme \
                = schemes.ShuffledExampleScheme(dataset.num_examples)
    return dataset 
Example #21
Source File: syntest.py    From CopyNet with MIT License 5 votes vote down vote up
def build_data(data):
    # create fuel dataset.
    dataset     = datasets.IndexableDataset(indexables=OrderedDict([('source', data['source']),
                                                                    ('target', data['target']),
                                                                    ('target_c', data['target_c']),
                                                                    ('rule_id', data['rule_id']),
                                                                    ('rule', data['rule'])]))
    dataset.example_iteration_scheme \
                = schemes.ShuffledExampleScheme(dataset.num_examples)
    return dataset 
Example #22
Source File: copynet.py    From CopyNet with MIT License 5 votes vote down vote up
def build_data(source, target):
    # create fuel dataset.
    dataset     = datasets.IndexableDataset(indexables=OrderedDict([('source', source), ('target', target)]))
    dataset.example_iteration_scheme \
                = schemes.ShuffledExampleScheme(dataset.num_examples)
    return dataset, len(source) 
Example #23
Source File: lcsts_sample.py    From CopyNet with MIT License 5 votes vote down vote up
def build_data(data):
    # create fuel dataset.
    dataset     = datasets.IndexableDataset(indexables=OrderedDict([('source', data['source']),
                                                                    ('target', data['target']),
                                                                    ('target_c', data['target_c']),
                                                                    ]))
    dataset.example_iteration_scheme \
                = schemes.ShuffledExampleScheme(dataset.num_examples)
    return dataset 
Example #24
Source File: build_dataset.py    From CopyNet with MIT License 5 votes vote down vote up
def build_fuel(data):
    # create fuel dataset.
    dataset     = datasets.IndexableDataset(indexables=OrderedDict([('data', data)]))
    dataset.example_iteration_scheme \
                = schemes.ShuffledExampleScheme(dataset.num_examples)
    return dataset, len(data) 
Example #25
Source File: test_transformers.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_axis_labels_on_flatten_batches_with_none(self):
        wrapper = Flatten(
            DataStream(IndexableDataset(self.data),
                       iteration_scheme=SequentialScheme(4, 2),
                       axis_labels={'features': None,
                                    'targets': ('batch', 'index')}),
            which_sources=('features',))
        assert_equal(wrapper.axis_labels, {'features': None,
                                           'targets': ('batch', 'index')}) 
Example #26
Source File: keyphrase_copynet.py    From seq2seq-keyphrase with MIT License 5 votes vote down vote up
def build_data(data):
    # create fuel dataset.
    dataset = datasets.IndexableDataset(indexables=OrderedDict([('source', data['source']),
                                                                ('target', data['target']),
                                                                # ('target_c', data['target_c']),
                                                                ]))
    dataset.example_iteration_scheme \
        = schemes.ShuffledExampleScheme(dataset.num_examples)
    return dataset 
Example #27
Source File: test_aggregation.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_mean_aggregator():
    num_examples = 4
    batch_size = 2

    features = numpy.array([[0, 3],
                           [2, 9],
                           [2, 4],
                           [5, 1]], dtype=theano.config.floatX)

    dataset = IndexableDataset(OrderedDict([('features', features)]))

    data_stream = DataStream(dataset,
                             iteration_scheme=SequentialScheme(num_examples,
                                                               batch_size))

    x = tensor.matrix('features')
    y = (x**2).mean(axis=0)
    y.name = 'y'
    z = y.sum()
    z.name = 'z'

    y.tag.aggregation_scheme = Mean(y, 1.)
    z.tag.aggregation_scheme = Mean(z, 1.)

    assert_allclose(DatasetEvaluator([y]).evaluate(data_stream)['y'],
                    numpy.array([8.25, 26.75], dtype=theano.config.floatX))
    assert_allclose(DatasetEvaluator([z]).evaluate(data_stream)['z'],
                    numpy.array([35], dtype=theano.config.floatX)) 
Example #28
Source File: test_datasets.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_getattr(self):
        assert_equal(getattr(IndexableDataset({'a': (1, 2)}), 'a'), (1, 2)) 
Example #29
Source File: test_datasets.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_value_error_get_data_state(self):
        assert_raises(
            ValueError, IndexableDataset([1, 2, 3]).get_data, True, [1, 2]) 
Example #30
Source File: test_datasets.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_value_error_get_data_none_request(self):
        assert_raises(
            ValueError, IndexableDataset([1, 2, 3]).get_data, None, None)