Python chainer.dataset() Examples

The following are 30 code examples of chainer.dataset(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module chainer , or try the search function .
Example #1
Source File: convert.py    From chainer with MIT License 6 votes vote down vote up
def _call_converter(converter, batch, device):
    # Calls the converter.
    # Converter can be either new-style (accepts chainer.backend.Device) or
    # old-style (accepts int as device).
    assert device is None or isinstance(device, backend.Device)

    if isinstance(converter, Converter):
        # New-style converter
        return converter(batch, device)

    # Old-style converter
    if device is None:
        return converter(batch, None)
    if device.xp is numpy:
        return converter(batch, -1)
    if device.xp is cuda.cupy:
        return converter(batch, device.device.id)
    raise RuntimeError(
        'Converter does not support ChainerX. '
        'Use chainer.dataset.converter decorator.') 
Example #2
Source File: utils_pretrain.py    From models with MIT License 6 votes vote down vote up
def __next__(self):
        # This iterator returns a list representing a mini-batch. Each item
        # indicates a different position in the original sequence. Each item is
        # represented by a pair of two word IDs. The first word is at the
        # "current" position, while the second word at the next position.
        # At each iteration, the iteration count is incremented, which pushes
        # forward the "current" position.
        length = len(self.dataset)
        if not self.repeat and self.iteration * self.batch_size >= length:
            # If not self.repeat, this iterator stops at the end of the first
            # epoch (i.e., when all words are visited once).
            raise StopIteration
        cur_words = self.get_words()
        self._previous_epoch_detail = self.epoch_detail
        self.iteration += 1
        next_words = self.get_words()

        epoch = self.iteration * self.batch_size // length
        self.is_new_epoch = self.epoch < epoch
        if self.is_new_epoch:
            self.epoch = epoch

        return list(zip(cur_words, next_words)) 
Example #3
Source File: train.py    From portrait_matting with GNU General Public License v3.0 6 votes vote down vote up
def setup_dataset(mode, crop_dir, mask_dir=None, mean_mask_dir=None,
                  mean_grid_dir=None, trimap_dir=None, alpha_dir=None,
                  alpha_weight_dir=None):
    # Create dataset
    dataset = datasets.create(mode, crop_dir, mask_dir, mean_mask_dir,
                              mean_grid_dir, trimap_dir, alpha_dir,
                              alpha_weight_dir)

    # Create transform function
    transform = transforms.create(mode)
    transform_random = transforms.transform_random

    # Split into train and test
    train_raw, test_raw = datasets.split_dataset(dataset)

    # Increase data variety
    train_raw = chainer.datasets.TransformDataset(train_raw, transform_random)

    # Transform for network inputs
    train = chainer.datasets.TransformDataset(train_raw, transform)
    test = chainer.datasets.TransformDataset(test_raw, transform)

    return train, test 
Example #4
Source File: chain_utils.py    From contextual_augmentation with MIT License 6 votes vote down vote up
def convert_sequence_chain(batch, device):
    def to_device_batch(batch):
        if device is None:
            return batch
        elif device < 0:
            return [chainer.dataset.to_device(device, x) for x in batch]
        else:
            xp = cuda.cupy.get_array_module(*batch)
            concat = xp.concatenate(batch, axis=0)
            sections = np.cumsum([len(x) for x in batch[:-1]], dtype='i')
            concat_dev = chainer.dataset.to_device(device, concat)
            batch_dev = cuda.cupy.split(concat_dev, sections)
            return batch_dev

    return [to_device_batch([x[i] for x in batch])
            for i in range(len(batch[0]))] 
Example #5
Source File: single_machine_custom_loop.py    From sagemaker-chainer-container with Apache License 2.0 6 votes vote down vote up
def _preprocess_mnist(raw, withlabel, ndim, scale, image_dtype, label_dtype, rgb_format):
    images = raw['x']
    if ndim == 2:
        images = images.reshape(-1, 28, 28)
    elif ndim == 3:
        images = images.reshape(-1, 1, 28, 28)
        if rgb_format:
            images = np.broadcast_to(images, (len(images), 3) + images.shape[2:])
    elif ndim != 1:
        raise ValueError('invalid ndim for MNIST dataset')
    images = images.astype(image_dtype)
    images *= scale / 255.

    if withlabel:
        labels = raw['y'].astype(label_dtype)
        return tuple_dataset.TupleDataset(images, labels)
    else:
        return images 
Example #6
Source File: imagenet1k1.py    From imgclsmob with MIT License 6 votes vote down vote up
def get_val_data_iterator(data_dir,
                          batch_size,
                          num_workers,
                          num_classes):

    val_dir_path = os.path.join(data_dir, 'val')
    val_dataset = DirectoryParsingLabelDataset(val_dir_path)
    val_dataset_len = len(val_dataset)
    assert(len(directory_parsing_label_names(val_dir_path)) == num_classes)

    val_iterator = iterators.MultiprocessIterator(
        dataset=val_dataset,
        batch_size=batch_size,
        repeat=False,
        shuffle=False,
        n_processes=num_workers,
        shared_mem=300000000)

    return val_iterator, val_dataset_len 
Example #7
Source File: utils_pretrain.py    From models with MIT License 6 votes vote down vote up
def __init__(self, dataset, batch_size, repeat=True):
        self.dataset = dataset
        self.batch_size = batch_size  # batch size
        # Number of completed sweeps over the dataset. In this case, it is
        # incremented if every word is visited at least once after the last
        # increment.
        self.epoch = 0
        # True if the epoch is incremented at the last iteration.
        self.is_new_epoch = False
        self.repeat = repeat
        length = len(dataset)
        # Offsets maintain the position of each sequence in the mini-batch.
        self.offsets = [i * length // batch_size for i in range(batch_size)]
        # NOTE: this is not a count of parameter updates. It is just a count of
        # calls of ``__next__``.
        self.iteration = 0
        # use -1 instead of None internally
        self._previous_epoch_detail = -1. 
Example #8
Source File: cifar1.py    From imgclsmob with MIT License 6 votes vote down vote up
def get_val_data_iterator(dataset_name,
                          batch_size,
                          num_workers):

    if dataset_name == "CIFAR10":
        _, test_ds = cifar.get_cifar10()
    elif dataset_name == "CIFAR100":
        _, test_ds = cifar.get_cifar100()
    elif dataset_name == "SVHN":
        _, test_ds = svhn.get_svhn()
    else:
        raise Exception('Unrecognized dataset: {}'.format(dataset_name))

    val_dataset = test_ds
    val_dataset_len = len(val_dataset)

    val_iterator = iterators.MultiprocessIterator(
        dataset=val_dataset,
        batch_size=batch_size,
        repeat=False,
        shuffle=False,
        n_processes=num_workers,
        shared_mem=300000000)

    return val_iterator, val_dataset_len 
Example #9
Source File: train_ptb_custom_loop.py    From chainer with MIT License 6 votes vote down vote up
def __next__(self):
        # This iterator returns a list representing a mini-batch. Each item
        # indicates a different position in the original sequence. Each item is
        # represented by a pair of two word IDs. The first word is at the
        # "current" position, while the second word at the next position.
        # At each iteration, the iteration count is incremented, which pushes
        # forward the "current" position.
        length = len(self.dataset)
        if not self.repeat and self.iteration * self.batch_size >= length:
            # If not self.repeat, this iterator stops at the end of the first
            # epoch (i.e., when all words are visited once).
            raise StopIteration
        cur_words = self.get_words()
        self._previous_epoch_detail = self.epoch_detail
        self.iteration += 1
        next_words = self.get_words()

        epoch = self.iteration * self.batch_size // length
        self.is_new_epoch = self.epoch < epoch
        if self.is_new_epoch:
            self.epoch = epoch

        return list(zip(cur_words, next_words)) 
Example #10
Source File: seq2seq.py    From convolutional_seq2seq with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def __call__(self, trainer):
        print('## Calculate BLEU')
        with chainer.no_backprop_mode():
            with chainer.using_config('train', False):
                references = []
                hypotheses = []
                for i in range(0, len(self.test_data), self.batch):
                    sources, targets = zip(*self.test_data[i:i + self.batch])
                    references.extend([[t.tolist()] for t in targets])

                    sources = [
                        chainer.dataset.to_device(self.device, x) for x in sources]
                    ys = [y.tolist()
                          for y in self.model.translate(sources, self.max_length)]
                    hypotheses.extend(ys)

        bleu = bleu_score.corpus_bleu(
            references, hypotheses,
            smoothing_function=bleu_score.SmoothingFunction().method1) * 100
        print('BLEU:', bleu)
        reporter.report({self.key: bleu}) 
Example #11
Source File: cifar.py    From chainer with MIT License 6 votes vote down vote up
def _preprocess_cifar(images, labels, withlabel, ndim, scale, dtype):
    if ndim == 1:
        images = images.reshape(-1, 3072)
    elif ndim == 3:
        images = images.reshape(-1, 3, 32, 32)
    else:
        raise ValueError('invalid ndim for CIFAR dataset')
    dtype = chainer.get_dtype(dtype)
    images = images.astype(dtype)
    images *= scale / 255.

    if withlabel:
        labels = labels.astype(numpy.int32)
        return tuple_dataset.TupleDataset(images, labels)
    else:
        return images 
Example #12
Source File: tabular_dataset.py    From chainer with MIT License 6 votes vote down vote up
def convert(self, data):
        """Convert fetched data.

        This method takes data fetched by :meth:`fetch` and
        pre-process them before passing them to models.
        The default behaviour is converting each column into an ndarray.
        This behaviour can be overridden by :meth:`with_converter`.
        If the dataset is constructed by :meth:`concat` or :meth:`join`,
        the converter of the first dataset is used.

        Args:
            data (tuple or dict): Data from :meth:`fetch`.

        Returns:
            A tuple or dict.
            Each value is an ndarray.
        """
        if isinstance(data, tuple):
            return tuple(_as_array(d) for d in data)
        elif isinstance(data, dict):
            return {k: _as_array(v) for k, v in data.items()}
        else:
            return _as_array(data) 
Example #13
Source File: tabular_dataset.py    From chainer with MIT License 6 votes vote down vote up
def fetch(self):
        """Fetch data.

        This method fetches all data of the dataset/view.
        Note that this method returns a column-major data
        (i.e. :obj:`([a[0], ..., a[3]], ..., [c[0], ... c[3]])`,
        :obj:`{'a': [a[0], ..., a[3]], ..., 'c': [c[0], ..., c[3]]}`, or
        :obj:`[a[0], ..., a[3]]`).

        Returns:
            If :attr:`mode` is :class:`tuple`,
            this method returns a tuple of lists/arrays.
            If :attr:`mode` is :class:`dict`,
            this method returns a dict of lists/arrays.
        """
        examples = self.get_examples(None, None)
        if self.mode is tuple:
            return examples
        elif self.mode is dict:
            return dict(six.moves.zip(self.keys, examples))
        elif self.mode is None:
            return examples[0] 
Example #14
Source File: train_ptb_custom_loop.py    From chainer with MIT License 6 votes vote down vote up
def __init__(self, dataset, batch_size, repeat=True):
        self.dataset = dataset
        self.batch_size = batch_size  # batch size
        # Number of completed sweeps over the dataset. In this case, it is
        # incremented if every word is visited at least once after the last
        # increment.
        self.epoch = 0
        # True if the epoch is incremented at the last iteration.
        self.is_new_epoch = False
        self.repeat = repeat
        length = len(dataset)
        # Offsets maintain the position of each sequence in the mini-batch.
        self.offsets = [i * length // batch_size for i in range(batch_size)]
        # NOTE: this is not a count of parameter updates. It is just a count of
        # calls of ``__next__``.
        self.iteration = 0
        # use -1 instead of None internally
        self._previous_epoch_detail = -1. 
Example #15
Source File: train_ptb_custom_loop.py    From chainer with MIT License 6 votes vote down vote up
def serialize(self, serializer):
        # It is important to serialize the state to be recovered on resume.
        self.iteration = serializer('iteration', self.iteration)
        self.epoch = serializer('epoch', self.epoch)
        try:
            self._previous_epoch_detail = serializer(
                'previous_epoch_detail', self._previous_epoch_detail)
        except KeyError:
            # guess previous_epoch_detail for older version
            self._previous_epoch_detail = self.epoch + \
                (self.current_position - self.batch_size) / len(self.dataset)
            if self.epoch_detail > 0:
                self._previous_epoch_detail = max(
                    self._previous_epoch_detail, 0.)
            else:
                self._previous_epoch_detail = -1. 
Example #16
Source File: train.py    From portrait_matting with GNU General Public License v3.0 5 votes vote down vote up
def parse_arguments(argv):
    parser = argparse.ArgumentParser(description='Training Script')
    parser.add_argument('--config', '-c', default='config.json',
                        help='Configure json filepath')
    parser.add_argument('--batchsize', '-b', type=int, default=1,
                        help='Number of images in each mini-batch')
    parser.add_argument('--max_iteration', '-e', type=int, default=30000,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpus', '-g', type=int, default=[-1], nargs='*',
                        help='GPU IDs (negative value indicates CPU)')
    parser.add_argument('--lr', type=float, default=1e-4,
                        help='Initial learning rate')
    parser.add_argument('--momentum', default=0.99, help='Momentum for SGD')
    parser.add_argument('--weight_decay', default=0.0005, help='Weight decay')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--mode', choices=['seg', 'seg+', 'seg_tri', 'mat'],
                        help='Training mode', required=True)
    parser.add_argument('--pretrained_fcn8s', default=None,
                        help='Pretrained model path of FCN8s')
    parser.add_argument('--pretrained_n_input_ch', default=3, type=int,
                        help='Input channel number of Pretrained model')
    parser.add_argument('--pretrained_n_output_ch', default=21, type=int,
                        help='Output channel number of Pretrained model')
    parser.add_argument('--mat_scale', default=4, type=int,
                        help='Matting scale for speed up')
    args = parser.parse_args(argv)
    return args 
Example #17
Source File: image_dataset.py    From kiss with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, image_size, npz_file=None, memory_manager=None, base_name=None, root='.', dtype=None, transform_probability=0, use_imgaug=True, keep_aspect_ratio=False, image_mode='RGB', full_normalize=False, resize_after_load=True):
        _check_pillow_availability()

        if not isinstance(image_size, Size):
            image_size = Size(*image_size)

        self.shared_buffers = []
        self.root = root
        self.dtype = chainer.get_dtype(dtype)
        self.image_size = image_size
        self.transform_probability = transform_probability
        self.use_imgaug = use_imgaug
        self.keep_aspect_ratio = keep_aspect_ratio
        self.image_mode = image_mode
        self.full_normalize = full_normalize  # normalize each image to be in range of [0, 1] even if brightest pixel is != 255
        self.resize_after_load = resize_after_load  # resize the image to self.image_size after loading

        if npz_file is not None:
            assert isinstance(npz_file, six.string_types), "paths must be a file name!"
            assert os.path.splitext(npz_file)[-1] == ".npz", "You have to supply gt information as npz file!"

            with numpy.load(npz_file, allow_pickle=True) as gt_data:
                self.gt_data = self.copy_npz_data_to_ram(gt_data)
            self.memory_manager = None
            self.base_name = None
            self.length = len(self.gt_data['file_name'])
        else:
            assert memory_manager is not None, "If you do not specify an npz file, you must specify a memory manager!"
            assert base_name is not None, "If you want to use shared memory, you'll need to supply a base name for each dataset"
            self.gt_data = None
            self.memory_manager = memory_manager
            self.base_name = base_name
            self.length = self.memory_manager.get_shape(self.base_name, 'file_name').pop(0)

        self.augmentations = self.init_augmentations() 
Example #18
Source File: test_delegate_dataset.py    From chainer with MIT License 5 votes vote down vote up
def test_delegate_dataset(self):
        dataset = tabular.DelegateDataset(
            dummy_dataset.DummyDataset(mode=self.mode))

        self.assertIsInstance(dataset, chainer.dataset.TabularDataset)
        self.assertEqual(len(dataset), len(dataset.dataset))
        self.assertEqual(dataset.keys, dataset.dataset.keys)
        self.assertEqual(dataset.mode, dataset.dataset.mode)
        self.assertEqual(
            dataset.get_example(3), dataset.dataset.get_example(3)) 
Example #19
Source File: train_utils.py    From see with GNU General Public License v3.0 5 votes vote down vote up
def evaluate(self):
        iterator = self._iterators['main']
        target = self._targets['main']
        eval_func = self.eval_func or target

        if self.eval_hook:
            self.eval_hook(self)
        it = copy.copy(iterator)
        summary = reporter_module.DictSummary()

        for _ in range(min(len(iterator.dataset) // iterator.batch_size, self.num_iterations)):
            batch = next(it, None)
            if batch is None:
                break

            observation = {}
            with reporter_module.report_scope(observation), chainer.using_config('train', False), chainer.using_config('enable_backprop', False):
                in_arrays = self.converter(batch, self.device)
                if isinstance(in_arrays, tuple):
                    eval_func(*in_arrays)
                elif isinstance(in_arrays, dict):
                    eval_func(**in_arrays)
                else:
                    eval_func(in_arrays)

            summary.add(observation)

        return summary.compute_mean() 
Example #20
Source File: utils_pretrain.py    From models with MIT License 5 votes vote down vote up
def count_words(dataset, alpha=0.4):
    counts = collections.defaultdict(int)
    for w in dataset:
        counts[w] += 1
    counts = [counts[i] for i in range(len(counts))]
    counts = np.array(counts, 'f')
    counts /= counts.sum()
    counts = counts ** alpha
    counts = counts.tolist()
    return counts 
Example #21
Source File: kuzushiji_mnist.py    From chainer with MIT License 5 votes vote down vote up
def _retrieve_kuzushiji_mnist_training():
    base_url = 'http://codh.rois.ac.jp/'
    urls = [base_url + 'kmnist/dataset/kmnist/train-images-idx3-ubyte.gz',
            base_url + 'kmnist/dataset/kmnist/train-labels-idx1-ubyte.gz']
    return _retrieve_kuzushiji_mnist('train.npz', urls) 
Example #22
Source File: tabular_dataset.py    From chainer with MIT License 5 votes vote down vote up
def transform_batch(self, keys, transform_batch):
        """Apply a transform to examples.

        Args:
            keys (tuple of strs): The keys of transformed examples.
            transform_batch (callable): A callable that takes examples
                and returns transformed examples. :attr:`mode` of
                transformed dataset is determined by the transformed
                examples.

        Returns:
            A transfromed dataset.
        """
        return chainer.dataset.tabular._transform._TransformBatch(
            self, keys, transform_batch) 
Example #23
Source File: tabular_dataset.py    From chainer with MIT License 5 votes vote down vote up
def transform(self, keys, transform):
        """Apply a transform to each example.

        Args:
            keys (tuple of strs): The keys of transformed examples.
            transform (callable): A callable that takes an example
                and returns transformed example. :attr:`mode` of
                transformed dataset is determined by the transformed
                examples.

        Returns:
            A transfromed dataset.
        """
        return chainer.dataset.tabular._transform._Transform(
            self, keys, transform) 
Example #24
Source File: tabular_dataset.py    From chainer with MIT License 5 votes vote down vote up
def concat(self, *datasets):
        """Stack datasets along rows.

        Args:
            datasets (iterable of :class:`TabularDataset`):
                Datasets to be concatenated.
                All datasets must have the same :attr:`keys`.

        Returns:
            A concatenated dataset.
        """
        return chainer.dataset.tabular._concat._Concat(self, *datasets) 
Example #25
Source File: tabular_dataset.py    From chainer with MIT License 5 votes vote down vote up
def asdict(self):
        """Return a view with dict mode.

        Returns:
            A view whose :attr:`mode` is :class:`dict`.
        """
        return chainer.dataset.tabular._asmode._Asdict(self) 
Example #26
Source File: tabular_dataset.py    From chainer with MIT License 5 votes vote down vote up
def astuple(self):
        """Return a view with tuple mode.

        Returns:
            A view whose :attr:`mode` is :class:`tuple`.
        """
        return chainer.dataset.tabular._asmode._Astuple(self) 
Example #27
Source File: utils_pretrain.py    From models with MIT License 5 votes vote down vote up
def get_words(self):
        # It returns a list of current words.
        return [self.dataset[(offset + self.iteration) % len(self.dataset)]
                for offset in self.offsets] 
Example #28
Source File: utils_pretrain.py    From models with MIT License 5 votes vote down vote up
def epoch_detail(self):
        # Floating point version of epoch.
        return self.iteration * self.batch_size / len(self.dataset) 
Example #29
Source File: convert.py    From chainer with MIT License 5 votes vote down vote up
def to_device(device, x):
    """Send an array to a given device.

    This method sends a given array to a given device. This method is used in
    :func:`~chainer.dataset.concat_examples`.
    You can also use this method in a custom converter method used in
    :class:`~chainer.training.Updater` and :class:`~chainer.training.Extension`
    such as :class:`~chainer.training.updaters.StandardUpdater` and
    :class:`~chainer.training.extensions.Evaluator`.

    See also :func:`chainer.dataset.concat_examples`.

    Args:
        device (None or int or device specifier): A device to which an array
            is sent. If it is a negative integer, an array is sent to CPU.
            If it is a positive integer, an array is sent to GPU with the
            given ID. If it is``None``, an array is left in the original
            device. Also, any of device specifiers described at
            :class:`~chainer.backend.DeviceId` is accepted.
        x (:ref:`ndarray`): An array to send.

    Returns:
        Converted array.

    """
    device = _get_device(device)

    if device is None:
        return x
    return device.send(x) 
Example #30
Source File: tabular_dataset.py    From chainer with MIT License 5 votes vote down vote up
def with_converter(self, converter):
        """Override the behaviour of :meth:`convert`.

        This method overrides :meth:`convert`.

        Args:
            converter (callable): A new converter.

        Returns:
            A dataset with the new converter.
        """

        return chainer.dataset.tabular._with_converter._WithConverter(
            self, converter)