Python chainer.dataset() Examples
The following are 30
code examples of chainer.dataset().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
chainer
, or try the search function
.
Example #1
Source File: convert.py From chainer with MIT License | 6 votes |
def _call_converter(converter, batch, device): # Calls the converter. # Converter can be either new-style (accepts chainer.backend.Device) or # old-style (accepts int as device). assert device is None or isinstance(device, backend.Device) if isinstance(converter, Converter): # New-style converter return converter(batch, device) # Old-style converter if device is None: return converter(batch, None) if device.xp is numpy: return converter(batch, -1) if device.xp is cuda.cupy: return converter(batch, device.device.id) raise RuntimeError( 'Converter does not support ChainerX. ' 'Use chainer.dataset.converter decorator.')
Example #2
Source File: utils_pretrain.py From models with MIT License | 6 votes |
def __next__(self): # This iterator returns a list representing a mini-batch. Each item # indicates a different position in the original sequence. Each item is # represented by a pair of two word IDs. The first word is at the # "current" position, while the second word at the next position. # At each iteration, the iteration count is incremented, which pushes # forward the "current" position. length = len(self.dataset) if not self.repeat and self.iteration * self.batch_size >= length: # If not self.repeat, this iterator stops at the end of the first # epoch (i.e., when all words are visited once). raise StopIteration cur_words = self.get_words() self._previous_epoch_detail = self.epoch_detail self.iteration += 1 next_words = self.get_words() epoch = self.iteration * self.batch_size // length self.is_new_epoch = self.epoch < epoch if self.is_new_epoch: self.epoch = epoch return list(zip(cur_words, next_words))
Example #3
Source File: train.py From portrait_matting with GNU General Public License v3.0 | 6 votes |
def setup_dataset(mode, crop_dir, mask_dir=None, mean_mask_dir=None, mean_grid_dir=None, trimap_dir=None, alpha_dir=None, alpha_weight_dir=None): # Create dataset dataset = datasets.create(mode, crop_dir, mask_dir, mean_mask_dir, mean_grid_dir, trimap_dir, alpha_dir, alpha_weight_dir) # Create transform function transform = transforms.create(mode) transform_random = transforms.transform_random # Split into train and test train_raw, test_raw = datasets.split_dataset(dataset) # Increase data variety train_raw = chainer.datasets.TransformDataset(train_raw, transform_random) # Transform for network inputs train = chainer.datasets.TransformDataset(train_raw, transform) test = chainer.datasets.TransformDataset(test_raw, transform) return train, test
Example #4
Source File: chain_utils.py From contextual_augmentation with MIT License | 6 votes |
def convert_sequence_chain(batch, device): def to_device_batch(batch): if device is None: return batch elif device < 0: return [chainer.dataset.to_device(device, x) for x in batch] else: xp = cuda.cupy.get_array_module(*batch) concat = xp.concatenate(batch, axis=0) sections = np.cumsum([len(x) for x in batch[:-1]], dtype='i') concat_dev = chainer.dataset.to_device(device, concat) batch_dev = cuda.cupy.split(concat_dev, sections) return batch_dev return [to_device_batch([x[i] for x in batch]) for i in range(len(batch[0]))]
Example #5
Source File: single_machine_custom_loop.py From sagemaker-chainer-container with Apache License 2.0 | 6 votes |
def _preprocess_mnist(raw, withlabel, ndim, scale, image_dtype, label_dtype, rgb_format): images = raw['x'] if ndim == 2: images = images.reshape(-1, 28, 28) elif ndim == 3: images = images.reshape(-1, 1, 28, 28) if rgb_format: images = np.broadcast_to(images, (len(images), 3) + images.shape[2:]) elif ndim != 1: raise ValueError('invalid ndim for MNIST dataset') images = images.astype(image_dtype) images *= scale / 255. if withlabel: labels = raw['y'].astype(label_dtype) return tuple_dataset.TupleDataset(images, labels) else: return images
Example #6
Source File: imagenet1k1.py From imgclsmob with MIT License | 6 votes |
def get_val_data_iterator(data_dir, batch_size, num_workers, num_classes): val_dir_path = os.path.join(data_dir, 'val') val_dataset = DirectoryParsingLabelDataset(val_dir_path) val_dataset_len = len(val_dataset) assert(len(directory_parsing_label_names(val_dir_path)) == num_classes) val_iterator = iterators.MultiprocessIterator( dataset=val_dataset, batch_size=batch_size, repeat=False, shuffle=False, n_processes=num_workers, shared_mem=300000000) return val_iterator, val_dataset_len
Example #7
Source File: utils_pretrain.py From models with MIT License | 6 votes |
def __init__(self, dataset, batch_size, repeat=True): self.dataset = dataset self.batch_size = batch_size # batch size # Number of completed sweeps over the dataset. In this case, it is # incremented if every word is visited at least once after the last # increment. self.epoch = 0 # True if the epoch is incremented at the last iteration. self.is_new_epoch = False self.repeat = repeat length = len(dataset) # Offsets maintain the position of each sequence in the mini-batch. self.offsets = [i * length // batch_size for i in range(batch_size)] # NOTE: this is not a count of parameter updates. It is just a count of # calls of ``__next__``. self.iteration = 0 # use -1 instead of None internally self._previous_epoch_detail = -1.
Example #8
Source File: cifar1.py From imgclsmob with MIT License | 6 votes |
def get_val_data_iterator(dataset_name, batch_size, num_workers): if dataset_name == "CIFAR10": _, test_ds = cifar.get_cifar10() elif dataset_name == "CIFAR100": _, test_ds = cifar.get_cifar100() elif dataset_name == "SVHN": _, test_ds = svhn.get_svhn() else: raise Exception('Unrecognized dataset: {}'.format(dataset_name)) val_dataset = test_ds val_dataset_len = len(val_dataset) val_iterator = iterators.MultiprocessIterator( dataset=val_dataset, batch_size=batch_size, repeat=False, shuffle=False, n_processes=num_workers, shared_mem=300000000) return val_iterator, val_dataset_len
Example #9
Source File: train_ptb_custom_loop.py From chainer with MIT License | 6 votes |
def __next__(self): # This iterator returns a list representing a mini-batch. Each item # indicates a different position in the original sequence. Each item is # represented by a pair of two word IDs. The first word is at the # "current" position, while the second word at the next position. # At each iteration, the iteration count is incremented, which pushes # forward the "current" position. length = len(self.dataset) if not self.repeat and self.iteration * self.batch_size >= length: # If not self.repeat, this iterator stops at the end of the first # epoch (i.e., when all words are visited once). raise StopIteration cur_words = self.get_words() self._previous_epoch_detail = self.epoch_detail self.iteration += 1 next_words = self.get_words() epoch = self.iteration * self.batch_size // length self.is_new_epoch = self.epoch < epoch if self.is_new_epoch: self.epoch = epoch return list(zip(cur_words, next_words))
Example #10
Source File: seq2seq.py From convolutional_seq2seq with BSD 3-Clause "New" or "Revised" License | 6 votes |
def __call__(self, trainer): print('## Calculate BLEU') with chainer.no_backprop_mode(): with chainer.using_config('train', False): references = [] hypotheses = [] for i in range(0, len(self.test_data), self.batch): sources, targets = zip(*self.test_data[i:i + self.batch]) references.extend([[t.tolist()] for t in targets]) sources = [ chainer.dataset.to_device(self.device, x) for x in sources] ys = [y.tolist() for y in self.model.translate(sources, self.max_length)] hypotheses.extend(ys) bleu = bleu_score.corpus_bleu( references, hypotheses, smoothing_function=bleu_score.SmoothingFunction().method1) * 100 print('BLEU:', bleu) reporter.report({self.key: bleu})
Example #11
Source File: cifar.py From chainer with MIT License | 6 votes |
def _preprocess_cifar(images, labels, withlabel, ndim, scale, dtype): if ndim == 1: images = images.reshape(-1, 3072) elif ndim == 3: images = images.reshape(-1, 3, 32, 32) else: raise ValueError('invalid ndim for CIFAR dataset') dtype = chainer.get_dtype(dtype) images = images.astype(dtype) images *= scale / 255. if withlabel: labels = labels.astype(numpy.int32) return tuple_dataset.TupleDataset(images, labels) else: return images
Example #12
Source File: tabular_dataset.py From chainer with MIT License | 6 votes |
def convert(self, data): """Convert fetched data. This method takes data fetched by :meth:`fetch` and pre-process them before passing them to models. The default behaviour is converting each column into an ndarray. This behaviour can be overridden by :meth:`with_converter`. If the dataset is constructed by :meth:`concat` or :meth:`join`, the converter of the first dataset is used. Args: data (tuple or dict): Data from :meth:`fetch`. Returns: A tuple or dict. Each value is an ndarray. """ if isinstance(data, tuple): return tuple(_as_array(d) for d in data) elif isinstance(data, dict): return {k: _as_array(v) for k, v in data.items()} else: return _as_array(data)
Example #13
Source File: tabular_dataset.py From chainer with MIT License | 6 votes |
def fetch(self): """Fetch data. This method fetches all data of the dataset/view. Note that this method returns a column-major data (i.e. :obj:`([a[0], ..., a[3]], ..., [c[0], ... c[3]])`, :obj:`{'a': [a[0], ..., a[3]], ..., 'c': [c[0], ..., c[3]]}`, or :obj:`[a[0], ..., a[3]]`). Returns: If :attr:`mode` is :class:`tuple`, this method returns a tuple of lists/arrays. If :attr:`mode` is :class:`dict`, this method returns a dict of lists/arrays. """ examples = self.get_examples(None, None) if self.mode is tuple: return examples elif self.mode is dict: return dict(six.moves.zip(self.keys, examples)) elif self.mode is None: return examples[0]
Example #14
Source File: train_ptb_custom_loop.py From chainer with MIT License | 6 votes |
def __init__(self, dataset, batch_size, repeat=True): self.dataset = dataset self.batch_size = batch_size # batch size # Number of completed sweeps over the dataset. In this case, it is # incremented if every word is visited at least once after the last # increment. self.epoch = 0 # True if the epoch is incremented at the last iteration. self.is_new_epoch = False self.repeat = repeat length = len(dataset) # Offsets maintain the position of each sequence in the mini-batch. self.offsets = [i * length // batch_size for i in range(batch_size)] # NOTE: this is not a count of parameter updates. It is just a count of # calls of ``__next__``. self.iteration = 0 # use -1 instead of None internally self._previous_epoch_detail = -1.
Example #15
Source File: train_ptb_custom_loop.py From chainer with MIT License | 6 votes |
def serialize(self, serializer): # It is important to serialize the state to be recovered on resume. self.iteration = serializer('iteration', self.iteration) self.epoch = serializer('epoch', self.epoch) try: self._previous_epoch_detail = serializer( 'previous_epoch_detail', self._previous_epoch_detail) except KeyError: # guess previous_epoch_detail for older version self._previous_epoch_detail = self.epoch + \ (self.current_position - self.batch_size) / len(self.dataset) if self.epoch_detail > 0: self._previous_epoch_detail = max( self._previous_epoch_detail, 0.) else: self._previous_epoch_detail = -1.
Example #16
Source File: train.py From portrait_matting with GNU General Public License v3.0 | 5 votes |
def parse_arguments(argv): parser = argparse.ArgumentParser(description='Training Script') parser.add_argument('--config', '-c', default='config.json', help='Configure json filepath') parser.add_argument('--batchsize', '-b', type=int, default=1, help='Number of images in each mini-batch') parser.add_argument('--max_iteration', '-e', type=int, default=30000, help='Number of sweeps over the dataset to train') parser.add_argument('--gpus', '-g', type=int, default=[-1], nargs='*', help='GPU IDs (negative value indicates CPU)') parser.add_argument('--lr', type=float, default=1e-4, help='Initial learning rate') parser.add_argument('--momentum', default=0.99, help='Momentum for SGD') parser.add_argument('--weight_decay', default=0.0005, help='Weight decay') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--mode', choices=['seg', 'seg+', 'seg_tri', 'mat'], help='Training mode', required=True) parser.add_argument('--pretrained_fcn8s', default=None, help='Pretrained model path of FCN8s') parser.add_argument('--pretrained_n_input_ch', default=3, type=int, help='Input channel number of Pretrained model') parser.add_argument('--pretrained_n_output_ch', default=21, type=int, help='Output channel number of Pretrained model') parser.add_argument('--mat_scale', default=4, type=int, help='Matting scale for speed up') args = parser.parse_args(argv) return args
Example #17
Source File: image_dataset.py From kiss with GNU General Public License v3.0 | 5 votes |
def __init__(self, image_size, npz_file=None, memory_manager=None, base_name=None, root='.', dtype=None, transform_probability=0, use_imgaug=True, keep_aspect_ratio=False, image_mode='RGB', full_normalize=False, resize_after_load=True): _check_pillow_availability() if not isinstance(image_size, Size): image_size = Size(*image_size) self.shared_buffers = [] self.root = root self.dtype = chainer.get_dtype(dtype) self.image_size = image_size self.transform_probability = transform_probability self.use_imgaug = use_imgaug self.keep_aspect_ratio = keep_aspect_ratio self.image_mode = image_mode self.full_normalize = full_normalize # normalize each image to be in range of [0, 1] even if brightest pixel is != 255 self.resize_after_load = resize_after_load # resize the image to self.image_size after loading if npz_file is not None: assert isinstance(npz_file, six.string_types), "paths must be a file name!" assert os.path.splitext(npz_file)[-1] == ".npz", "You have to supply gt information as npz file!" with numpy.load(npz_file, allow_pickle=True) as gt_data: self.gt_data = self.copy_npz_data_to_ram(gt_data) self.memory_manager = None self.base_name = None self.length = len(self.gt_data['file_name']) else: assert memory_manager is not None, "If you do not specify an npz file, you must specify a memory manager!" assert base_name is not None, "If you want to use shared memory, you'll need to supply a base name for each dataset" self.gt_data = None self.memory_manager = memory_manager self.base_name = base_name self.length = self.memory_manager.get_shape(self.base_name, 'file_name').pop(0) self.augmentations = self.init_augmentations()
Example #18
Source File: test_delegate_dataset.py From chainer with MIT License | 5 votes |
def test_delegate_dataset(self): dataset = tabular.DelegateDataset( dummy_dataset.DummyDataset(mode=self.mode)) self.assertIsInstance(dataset, chainer.dataset.TabularDataset) self.assertEqual(len(dataset), len(dataset.dataset)) self.assertEqual(dataset.keys, dataset.dataset.keys) self.assertEqual(dataset.mode, dataset.dataset.mode) self.assertEqual( dataset.get_example(3), dataset.dataset.get_example(3))
Example #19
Source File: train_utils.py From see with GNU General Public License v3.0 | 5 votes |
def evaluate(self): iterator = self._iterators['main'] target = self._targets['main'] eval_func = self.eval_func or target if self.eval_hook: self.eval_hook(self) it = copy.copy(iterator) summary = reporter_module.DictSummary() for _ in range(min(len(iterator.dataset) // iterator.batch_size, self.num_iterations)): batch = next(it, None) if batch is None: break observation = {} with reporter_module.report_scope(observation), chainer.using_config('train', False), chainer.using_config('enable_backprop', False): in_arrays = self.converter(batch, self.device) if isinstance(in_arrays, tuple): eval_func(*in_arrays) elif isinstance(in_arrays, dict): eval_func(**in_arrays) else: eval_func(in_arrays) summary.add(observation) return summary.compute_mean()
Example #20
Source File: utils_pretrain.py From models with MIT License | 5 votes |
def count_words(dataset, alpha=0.4): counts = collections.defaultdict(int) for w in dataset: counts[w] += 1 counts = [counts[i] for i in range(len(counts))] counts = np.array(counts, 'f') counts /= counts.sum() counts = counts ** alpha counts = counts.tolist() return counts
Example #21
Source File: kuzushiji_mnist.py From chainer with MIT License | 5 votes |
def _retrieve_kuzushiji_mnist_training(): base_url = 'http://codh.rois.ac.jp/' urls = [base_url + 'kmnist/dataset/kmnist/train-images-idx3-ubyte.gz', base_url + 'kmnist/dataset/kmnist/train-labels-idx1-ubyte.gz'] return _retrieve_kuzushiji_mnist('train.npz', urls)
Example #22
Source File: tabular_dataset.py From chainer with MIT License | 5 votes |
def transform_batch(self, keys, transform_batch): """Apply a transform to examples. Args: keys (tuple of strs): The keys of transformed examples. transform_batch (callable): A callable that takes examples and returns transformed examples. :attr:`mode` of transformed dataset is determined by the transformed examples. Returns: A transfromed dataset. """ return chainer.dataset.tabular._transform._TransformBatch( self, keys, transform_batch)
Example #23
Source File: tabular_dataset.py From chainer with MIT License | 5 votes |
def transform(self, keys, transform): """Apply a transform to each example. Args: keys (tuple of strs): The keys of transformed examples. transform (callable): A callable that takes an example and returns transformed example. :attr:`mode` of transformed dataset is determined by the transformed examples. Returns: A transfromed dataset. """ return chainer.dataset.tabular._transform._Transform( self, keys, transform)
Example #24
Source File: tabular_dataset.py From chainer with MIT License | 5 votes |
def concat(self, *datasets): """Stack datasets along rows. Args: datasets (iterable of :class:`TabularDataset`): Datasets to be concatenated. All datasets must have the same :attr:`keys`. Returns: A concatenated dataset. """ return chainer.dataset.tabular._concat._Concat(self, *datasets)
Example #25
Source File: tabular_dataset.py From chainer with MIT License | 5 votes |
def asdict(self): """Return a view with dict mode. Returns: A view whose :attr:`mode` is :class:`dict`. """ return chainer.dataset.tabular._asmode._Asdict(self)
Example #26
Source File: tabular_dataset.py From chainer with MIT License | 5 votes |
def astuple(self): """Return a view with tuple mode. Returns: A view whose :attr:`mode` is :class:`tuple`. """ return chainer.dataset.tabular._asmode._Astuple(self)
Example #27
Source File: utils_pretrain.py From models with MIT License | 5 votes |
def get_words(self): # It returns a list of current words. return [self.dataset[(offset + self.iteration) % len(self.dataset)] for offset in self.offsets]
Example #28
Source File: utils_pretrain.py From models with MIT License | 5 votes |
def epoch_detail(self): # Floating point version of epoch. return self.iteration * self.batch_size / len(self.dataset)
Example #29
Source File: convert.py From chainer with MIT License | 5 votes |
def to_device(device, x): """Send an array to a given device. This method sends a given array to a given device. This method is used in :func:`~chainer.dataset.concat_examples`. You can also use this method in a custom converter method used in :class:`~chainer.training.Updater` and :class:`~chainer.training.Extension` such as :class:`~chainer.training.updaters.StandardUpdater` and :class:`~chainer.training.extensions.Evaluator`. See also :func:`chainer.dataset.concat_examples`. Args: device (None or int or device specifier): A device to which an array is sent. If it is a negative integer, an array is sent to CPU. If it is a positive integer, an array is sent to GPU with the given ID. If it is``None``, an array is left in the original device. Also, any of device specifiers described at :class:`~chainer.backend.DeviceId` is accepted. x (:ref:`ndarray`): An array to send. Returns: Converted array. """ device = _get_device(device) if device is None: return x return device.send(x)
Example #30
Source File: tabular_dataset.py From chainer with MIT License | 5 votes |
def with_converter(self, converter): """Override the behaviour of :meth:`convert`. This method overrides :meth:`convert`. Args: converter (callable): A new converter. Returns: A dataset with the new converter. """ return chainer.dataset.tabular._with_converter._WithConverter( self, converter)