Python Examples of fuel.datasets.IndexableDataset

Source File: timit.py From CTC-LSTM with Apache License 2.0

6 votes

def setup_datastream(path, batch_size, sort_batch_count, valid=False):
    A = numpy.load(os.path.join(path, ('valid_x_raw.npy' if valid else 'train_x_raw.npy')))
    B = numpy.load(os.path.join(path, ('valid_phn.npy' if valid else 'train_phn.npy')))
    C = numpy.load(os.path.join(path, ('valid_seq_to_phn.npy' if valid else 'train_seq_to_phn.npy')))

    D = [B[x[0]:x[1], 2] for x in C]

    ds = IndexableDataset({'input': A, 'output': D})
    stream = DataStream(ds, iteration_scheme=ShuffledExampleScheme(len(A)))

    stream = Batch(stream, iteration_scheme=ConstantScheme(batch_size * sort_batch_count))
    comparison = _balanced_batch_helper(stream.sources.index('input'))
    stream = Mapping(stream, SortMapping(comparison))
    stream = Unpack(stream)

    stream = Batch(stream, iteration_scheme=ConstantScheme(batch_size, num_examples=len(A)))
    stream = Padding(stream, mask_sources=['input', 'output'])

    return ds, stream

Source File: test_transformers.py From fuel with MIT License

5 votes

def setUp(self):
        self.stream = DataStream(
            IndexableDataset(
                OrderedDict([('X', numpy.ones((4, 2, 2))),
                             ('y', numpy.array([0, 1, 0, 1]))]),
                axis_labels={'X': ('batch', 'width', 'height'),
                             'y': ('batch',)}),
            iteration_scheme=SequentialScheme(4, 2))
        self.transformer = Rename(
            self.stream, {'X': 'features', 'y': 'targets'})

Source File: test_datasets.py From fuel with MIT License

5 votes

def test_batch_iteration_scheme_with_lists(self):
        """Batch schemes should work with more than ndarrays."""
        data = IndexableDataset(OrderedDict([('foo', list(range(50))),
                                             ('bar', list(range(1, 51)))]))
        stream = DataStream(data,
                            iteration_scheme=ShuffledScheme(data.num_examples,
                                                            5))
        returned = [sum(batches, []) for batches in
                    zip(*list(stream.get_epoch_iterator()))]
        assert set(returned[0]) == set(range(50))
        assert set(returned[1]) == set(range(1, 51))

Source File: test_streams.py From fuel with MIT License

5 votes

def test_axis_labels_on_produces_batches(self):
        dataset = IndexableDataset(numpy.eye(2))
        axis_labels = {'data': ('batch', 'features')}
        dataset.axis_labels = axis_labels
        stream = DataStream(dataset, iteration_scheme=SequentialScheme(2, 2))
        assert_equal(stream.axis_labels, axis_labels)

Source File: test_transformers.py From fuel with MIT License

5 votes

def test_flatten_batches(self):
        wrapper = Flatten(
            DataStream(IndexableDataset(self.data),
                       iteration_scheme=SequentialScheme(4, 2)),
            which_sources=('features',))
        assert_equal(
            list(wrapper.get_epoch_iterator()),
            [(numpy.ones((2, 4)), numpy.array([[0], [1]])),
             (numpy.ones((2, 4)), numpy.array([[0], [1]]))])

Source File: test_transformers.py From fuel with MIT License

5 votes

def test_axis_labels_on_flatten_batches(self):
        wrapper = Flatten(
            DataStream(IndexableDataset(self.data),
                       iteration_scheme=SequentialScheme(4, 2),
                       axis_labels={'features': ('batch', 'width', 'height'),
                                    'targets': ('batch', 'index')}),
            which_sources=('features',))
        assert_equal(wrapper.axis_labels, {'features': ('batch', 'feature'),
                                           'targets': ('batch', 'index')})

Source File: test_transformers.py From fuel with MIT License

5 votes

def test_axis_labels_on_flatten_batches_with_none(self):
        wrapper = Flatten(
            DataStream(IndexableDataset(self.data),
                       iteration_scheme=SequentialScheme(4, 2),
                       axis_labels={'features': None,
                                    'targets': ('batch', 'index')}),
            which_sources=('features',))
        assert_equal(wrapper.axis_labels, {'features': None,
                                           'targets': ('batch', 'index')})

Source File: test_transformers.py From fuel with MIT License

5 votes

def test_flatten_examples(self):
        wrapper = Flatten(
            DataStream(IndexableDataset(self.data),
                       iteration_scheme=SequentialExampleScheme(4)),
            which_sources=('features',))
        assert_equal(
            list(wrapper.get_epoch_iterator()),
            [(numpy.ones(4), 0), (numpy.ones(4), 1)] * 2)

Source File: test_transformers.py From fuel with MIT License

5 votes

def test_filter_batches(self):
        data = [1, 2, 3, 4]
        data_filtered = [([3, 4],)]
        stream = DataStream(IndexableDataset(data),
                            iteration_scheme=SequentialScheme(4, 2))
        wrapper = Filter(stream, lambda d: d[0][0] % 3 == 0)
        assert_equal(list(wrapper.get_epoch_iterator()), data_filtered)

Source File: test_transformers.py From fuel with MIT License

5 votes

def test_axis_labels_are_passed_through(self):
        stream = DataStream(
            IndexableDataset(
                {'features': [1, 2, 3, 4]},
                axis_labels={'features': ('batch',)}),
            iteration_scheme=SequentialScheme(4, 2))
        wrapper = Filter(stream, lambda d: d[0][0] % 3 == 0)
        assert_equal(wrapper.axis_labels, stream.axis_labels)

Source File: test_transformers.py From fuel with MIT License

5 votes

def test_value_error_on_batch_stream(self):
        stream = DataStream(IndexableDataset([1, 2, 3, 4]),
                            iteration_scheme=SequentialScheme(4, 2))
        assert_raises(ValueError, Batch, stream, SequentialScheme(4, 2))

Source File: test_transformers.py From fuel with MIT License

5 votes

def test_value_error_on_different_stream_output_type(self):
        spanish_stream = DataStream(IndexableDataset(['Hola mundo!']),
                                    iteration_scheme=SequentialScheme(2, 2))
        assert_raises(ValueError, Merge, self.streams + (spanish_stream,),
                      ('english', 'french', 'spanish'))

Source File: test_datasets.py From fuel with MIT License

5 votes

def test_pickling(self):
        cPickle.loads(cPickle.dumps(IndexableDataset({'a': (1, 2)})))

Source File: test_transformers.py From fuel with MIT License

5 votes

def setUp(self):
        self.string_data = [b'Hello', b'World!']
        self.dataset = IndexableDataset(
            indexables={'words': [numpy.fromstring(s, dtype='uint8')
                                  for s in self.string_data]},
            axis_labels={'words': ('batch', 'bytes')})

Source File: test_predict.py From blocks-extras with MIT License

5 votes

def test_predict():
    tempfile_path = os.path.join(gettempdir(), 'test_predict.npz')

    # set up mock datastream
    source = [[1], [2], [3], [4]]
    dataset = IndexableDataset(OrderedDict([('input', source)]))
    scheme = SequentialScheme(dataset.num_examples, batch_size=2)
    data_stream = DataStream(dataset, iteration_scheme=scheme)

    # simulate small "network" that increments the input by 1
    input_tensor = tensor.matrix('input')
    output_tensor = input_tensor + 1
    output_tensor.name = 'output_tensor'

    main_loop = MockMainLoop(extensions=[
        PredictDataStream(data_stream=data_stream,
                          variables=[output_tensor],
                          path=tempfile_path,
                          after_training=True),
        FinishAfter(after_n_epochs=1)
    ])
    main_loop.run()

    # assert resulting prediction is saved
    prediction = numpy.load(tempfile_path)
    assert numpy.all(prediction[output_tensor.name] == numpy.array(source) + 1)

    try:
        os.remove(tempfile_path)
    except:
        pass

Source File: weibo_vest.py From CopyNet with MIT License

5 votes

def build_data(data):
    # create fuel dataset.
    dataset     = datasets.IndexableDataset(indexables=OrderedDict([('source', data['source']),
                                                                    ('target', data['target']),
                                                                    ('target_c', data['target_c']),
                                                                    ]))
    dataset.example_iteration_scheme \
                = schemes.ShuffledExampleScheme(dataset.num_examples)
    return dataset

Source File: bst_vest.py From CopyNet with MIT License

5 votes

def build_data(data):
    # create fuel dataset.
    dataset     = datasets.IndexableDataset(indexables=OrderedDict([('source', data['source']),
                                                                    ('target', data['target']),
                                                                    ('target_c', data['target_c']),
                                                                    ]))
    dataset.example_iteration_scheme \
                = schemes.ShuffledExampleScheme(dataset.num_examples)
    return dataset

Source File: syn_vest.py From CopyNet with MIT License

5 votes

def build_data(data):
    # create fuel dataset.
    dataset     = datasets.IndexableDataset(indexables=OrderedDict([('source', data['source']),
                                                                    ('target', data['target']),
                                                                    ('target_c', data['target_c']),
                                                                    ]))
    dataset.example_iteration_scheme \
                = schemes.ShuffledExampleScheme(dataset.num_examples)
    return dataset

Source File: lcsts_test.py From CopyNet with MIT License

5 votes

def build_data(data):
    # create fuel dataset.
    dataset     = datasets.IndexableDataset(indexables=OrderedDict([('source', data['source']),
                                                                    ('target', data['target']),
                                                                    ('target_c', data['target_c']),
                                                                    ]))
    dataset.example_iteration_scheme \
                = schemes.ShuffledExampleScheme(dataset.num_examples)
    return dataset

Source File: lcsts_vest.py From CopyNet with MIT License

5 votes

def build_data(data):
    # create fuel dataset.
    dataset     = datasets.IndexableDataset(indexables=OrderedDict([('source', data['source']),
                                                                    ('target', data['target']),
                                                                    ('target_c', data['target_c']),
                                                                    ]))
    dataset.example_iteration_scheme \
                = schemes.ShuffledExampleScheme(dataset.num_examples)
    return dataset

Source File: syntest.py From CopyNet with MIT License

5 votes

def build_data(data):
    # create fuel dataset.
    dataset     = datasets.IndexableDataset(indexables=OrderedDict([('source', data['source']),
                                                                    ('target', data['target']),
                                                                    ('target_c', data['target_c']),
                                                                    ('rule_id', data['rule_id']),
                                                                    ('rule', data['rule'])]))
    dataset.example_iteration_scheme \
                = schemes.ShuffledExampleScheme(dataset.num_examples)
    return dataset

Source File: copynet.py From CopyNet with MIT License

5 votes

def build_data(source, target):
    # create fuel dataset.
    dataset     = datasets.IndexableDataset(indexables=OrderedDict([('source', source), ('target', target)]))
    dataset.example_iteration_scheme \
                = schemes.ShuffledExampleScheme(dataset.num_examples)
    return dataset, len(source)

Source File: lcsts_sample.py From CopyNet with MIT License

5 votes

def build_data(data):
    # create fuel dataset.
    dataset     = datasets.IndexableDataset(indexables=OrderedDict([('source', data['source']),
                                                                    ('target', data['target']),
                                                                    ('target_c', data['target_c']),
                                                                    ]))
    dataset.example_iteration_scheme \
                = schemes.ShuffledExampleScheme(dataset.num_examples)
    return dataset

Source File: build_dataset.py From CopyNet with MIT License

5 votes

def build_fuel(data):
    # create fuel dataset.
    dataset     = datasets.IndexableDataset(indexables=OrderedDict([('data', data)]))
    dataset.example_iteration_scheme \
                = schemes.ShuffledExampleScheme(dataset.num_examples)
    return dataset, len(data)

Source File: test_transformers.py From attention-lvcsr with MIT License

5 votes

def test_axis_labels_on_flatten_batches_with_none(self):
        wrapper = Flatten(
            DataStream(IndexableDataset(self.data),
                       iteration_scheme=SequentialScheme(4, 2),
                       axis_labels={'features': None,
                                    'targets': ('batch', 'index')}),
            which_sources=('features',))
        assert_equal(wrapper.axis_labels, {'features': None,
                                           'targets': ('batch', 'index')})

Source File: keyphrase_copynet.py From seq2seq-keyphrase with MIT License

5 votes

def build_data(data):
    # create fuel dataset.
    dataset = datasets.IndexableDataset(indexables=OrderedDict([('source', data['source']),
                                                                ('target', data['target']),
                                                                # ('target_c', data['target_c']),
                                                                ]))
    dataset.example_iteration_scheme \
        = schemes.ShuffledExampleScheme(dataset.num_examples)
    return dataset

Source File: test_aggregation.py From attention-lvcsr with MIT License

5 votes

def test_mean_aggregator():
    num_examples = 4
    batch_size = 2

    features = numpy.array([[0, 3],
                           [2, 9],
                           [2, 4],
                           [5, 1]], dtype=theano.config.floatX)

    dataset = IndexableDataset(OrderedDict([('features', features)]))

    data_stream = DataStream(dataset,
                             iteration_scheme=SequentialScheme(num_examples,
                                                               batch_size))

    x = tensor.matrix('features')
    y = (x**2).mean(axis=0)
    y.name = 'y'
    z = y.sum()
    z.name = 'z'

    y.tag.aggregation_scheme = Mean(y, 1.)
    z.tag.aggregation_scheme = Mean(z, 1.)

    assert_allclose(DatasetEvaluator([y]).evaluate(data_stream)['y'],
                    numpy.array([8.25, 26.75], dtype=theano.config.floatX))
    assert_allclose(DatasetEvaluator([z]).evaluate(data_stream)['z'],
                    numpy.array([35], dtype=theano.config.floatX))

Source File: test_datasets.py From attention-lvcsr with MIT License

5 votes

def test_getattr(self):
        assert_equal(getattr(IndexableDataset({'a': (1, 2)}), 'a'), (1, 2))

Source File: test_datasets.py From attention-lvcsr with MIT License

5 votes

def test_value_error_get_data_state(self):
        assert_raises(
            ValueError, IndexableDataset([1, 2, 3]).get_data, True, [1, 2])

Source File: test_datasets.py From attention-lvcsr with MIT License

5 votes

def test_value_error_get_data_none_request(self):
        assert_raises(
            ValueError, IndexableDataset([1, 2, 3]).get_data, None, None)

Python fuel.datasets.IndexableDataset() Examples