Python fuel.transformers.Batch() Examples
The following are 14
code examples of fuel.transformers.Batch().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
fuel.transformers
, or try the search function
.
Example #1
Source File: data.py From DeepMind-Teaching-Machines-to-Read-and-Comprehend with MIT License | 6 votes |
def setup_datastream(path, vocab_file, config): ds = QADataset(path, vocab_file, config.n_entities, need_sep_token=config.concat_ctx_and_question) it = QAIterator(path, shuffle=config.shuffle_questions) stream = DataStream(ds, iteration_scheme=it) if config.concat_ctx_and_question: stream = ConcatCtxAndQuestion(stream, config.concat_question_before, ds.reverse_vocab['<SEP>']) # Sort sets of multiple batches to make batches of similar sizes stream = Batch(stream, iteration_scheme=ConstantScheme(config.batch_size * config.sort_batch_count)) comparison = _balanced_batch_helper(stream.sources.index('question' if config.concat_ctx_and_question else 'context')) stream = Mapping(stream, SortMapping(comparison)) stream = Unpack(stream) stream = Batch(stream, iteration_scheme=ConstantScheme(config.batch_size)) stream = Padding(stream, mask_sources=['context', 'question', 'candidates'], mask_dtype='int32') return ds, stream
Example #2
Source File: timit.py From CTC-LSTM with Apache License 2.0 | 6 votes |
def setup_datastream(path, batch_size, sort_batch_count, valid=False): A = numpy.load(os.path.join(path, ('valid_x_raw.npy' if valid else 'train_x_raw.npy'))) B = numpy.load(os.path.join(path, ('valid_phn.npy' if valid else 'train_phn.npy'))) C = numpy.load(os.path.join(path, ('valid_seq_to_phn.npy' if valid else 'train_seq_to_phn.npy'))) D = [B[x[0]:x[1], 2] for x in C] ds = IndexableDataset({'input': A, 'output': D}) stream = DataStream(ds, iteration_scheme=ShuffledExampleScheme(len(A))) stream = Batch(stream, iteration_scheme=ConstantScheme(batch_size * sort_batch_count)) comparison = _balanced_batch_helper(stream.sources.index('input')) stream = Mapping(stream, SortMapping(comparison)) stream = Unpack(stream) stream = Batch(stream, iteration_scheme=ConstantScheme(batch_size, num_examples=len(A))) stream = Padding(stream, mask_sources=['input', 'output']) return ds, stream
Example #3
Source File: build_dataset.py From seq2seq-keyphrase with MIT License | 5 votes |
def obtain_stream(dataset, batch_size, size=1): if size == 1: data_stream = dataset.get_example_stream() data_stream = transformers.Batch(data_stream, iteration_scheme=schemes.ConstantScheme(batch_size)) # add padding and masks to the dataset data_stream = transformers.Padding(data_stream, mask_sources=('data')) return data_stream else: data_streams = [dataset.get_example_stream() for _ in range(size)] data_streams = [transformers.Batch(data_stream, iteration_scheme=schemes.ConstantScheme(batch_size)) for data_stream in data_streams] data_streams = [transformers.Padding(data_stream, mask_sources=('data')) for data_stream in data_streams] return data_streams
Example #4
Source File: keyphrase_copynet.py From seq2seq-keyphrase with MIT License | 5 votes |
def output_stream(dataset, batch_size, size=1): data_stream = dataset.get_example_stream() data_stream = transformers.Batch(data_stream, iteration_scheme=schemes.ConstantScheme(batch_size)) # add padding and masks to the dataset # Warning: in multiple output case, will raise ValueError: All dimensions except length must be equal, need padding manually # data_stream = transformers.Padding(data_stream, mask_sources=('source', 'target', 'target_c')) # data_stream = transformers.Padding(data_stream, mask_sources=('source', 'target')) return data_stream
Example #5
Source File: toy_dataset.py From CTC-LSTM with Apache License 2.0 | 5 votes |
def setup_datastream(batch_size, **kwargs): ds = ToyDataset(**kwargs) stream = DataStream(ds, iteration_scheme=SequentialExampleScheme(kwargs['nb_examples'])) stream = Batch(stream, iteration_scheme=ConstantScheme(batch_size)) stream = Padding(stream, mask_sources=['input', 'output']) return ds, stream
Example #6
Source File: __init__.py From blocks-examples with MIT License | 5 votes |
def get_data_stream(iterable): """Returns a 'fuel.Batch' datastream of [x~input~numbers, y~targets~roots], with each iteration returning a batch of 20 training examples """ numbers = numpy.asarray(iterable, dtype=floatX) dataset = IterableDataset( {'numbers': numbers, 'roots': numpy.sqrt(numbers)}) return Batch(dataset.get_example_stream(), ConstantScheme(20))
Example #7
Source File: weibo_vest.py From CopyNet with MIT License | 5 votes |
def output_stream(dataset, batch_size, size=1): data_stream = dataset.get_example_stream() data_stream = transformers.Batch(data_stream, iteration_scheme=schemes.ConstantScheme(batch_size)) # add padding and masks to the dataset data_stream = transformers.Padding(data_stream, mask_sources=('source', 'target')) return data_stream
Example #8
Source File: bst_vest.py From CopyNet with MIT License | 5 votes |
def output_stream(dataset, batch_size, size=1): data_stream = dataset.get_example_stream() data_stream = transformers.Batch(data_stream, iteration_scheme=schemes.ConstantScheme(batch_size)) # add padding and masks to the dataset data_stream = transformers.Padding(data_stream, mask_sources=('source', 'target')) return data_stream
Example #9
Source File: syn_vest.py From CopyNet with MIT License | 5 votes |
def output_stream(dataset, batch_size, size=1): data_stream = dataset.get_example_stream() data_stream = transformers.Batch(data_stream, iteration_scheme=schemes.ConstantScheme(batch_size)) # add padding and masks to the dataset data_stream = transformers.Padding(data_stream, mask_sources=('source', 'target')) return data_stream
Example #10
Source File: lcsts_test.py From CopyNet with MIT License | 5 votes |
def output_stream(dataset, batch_size, size=1): data_stream = dataset.get_example_stream() data_stream = transformers.Batch(data_stream, iteration_scheme=schemes.ConstantScheme(batch_size)) # add padding and masks to the dataset data_stream = transformers.Padding(data_stream, mask_sources=('source', 'target', 'target_c')) return data_stream
Example #11
Source File: lcsts_vest.py From CopyNet with MIT License | 5 votes |
def output_stream(dataset, batch_size, size=1): data_stream = dataset.get_example_stream() data_stream = transformers.Batch(data_stream, iteration_scheme=schemes.ConstantScheme(batch_size)) # add padding and masks to the dataset data_stream = transformers.Padding(data_stream, mask_sources=('source', 'target')) return data_stream
Example #12
Source File: syntest.py From CopyNet with MIT License | 5 votes |
def output_stream(dataset, batch_size, size=1): data_stream = dataset.get_example_stream() data_stream = transformers.Batch(data_stream, iteration_scheme=schemes.ConstantScheme(batch_size)) # add padding and masks to the dataset data_stream = transformers.Padding(data_stream, mask_sources=('source', 'target', 'target_c')) return data_stream
Example #13
Source File: copynet.py From CopyNet with MIT License | 5 votes |
def output_stream(dataset, batch_size, size=1): data_stream = dataset.get_example_stream() data_stream = transformers.Batch(data_stream, iteration_scheme=schemes.ConstantScheme(batch_size)) # add padding and masks to the dataset data_stream = transformers.Padding(data_stream, mask_sources=('source', 'target')) return data_stream
Example #14
Source File: build_dataset.py From CopyNet with MIT License | 5 votes |
def obtain_stream(dataset, batch_size, size=1): if size == 1: data_stream = dataset.get_example_stream() data_stream = transformers.Batch(data_stream, iteration_scheme=schemes.ConstantScheme(batch_size)) # add padding and masks to the dataset data_stream = transformers.Padding(data_stream, mask_sources=('data')) return data_stream else: data_streams = [dataset.get_example_stream() for _ in xrange(size)] data_streams = [transformers.Batch(data_stream, iteration_scheme=schemes.ConstantScheme(batch_size)) for data_stream in data_streams] data_streams = [transformers.Padding(data_stream, mask_sources=('data')) for data_stream in data_streams] return data_streams