Python Examples of mxnet.nd.empty

Source File: dataloader.py From gluon-cv with Apache License 2.0

6 votes

def default_mp_pad_batchify_fn(data):
    """Use shared memory for collating data into batch, labels are padded to same shape"""
    if isinstance(data[0], nd.NDArray):
        out = nd.empty((len(data),) + data[0].shape, dtype=data[0].dtype,
                       ctx=context.Context('cpu_shared', 0))
        return nd.stack(*data, out=out)
    elif isinstance(data[0], tuple):
        data = zip(*data)
        return [default_mp_pad_batchify_fn(i) for i in data]
    else:
        data = np.asarray(data)
        batch_size = len(data)
        pad = max([l.shape[0] for l in data] + [1,])
        buf = np.full((batch_size, pad, data[0].shape[-1]), -1, dtype=data[0].dtype)
        for i, l in enumerate(data):
            buf[i][:l.shape[0], :] = l
        return nd.array(buf, dtype=data[0].dtype, ctx=context.Context('cpu_shared', 0))

Source File: dataloader.py From panoptic-fpn-gluon with Apache License 2.0

6 votes

def default_mp_pad_batchify_fn(data):
    """Use shared memory for collating data into batch, labels are padded to same shape"""
    if isinstance(data[0], nd.NDArray):
        out = nd.empty((len(data),) + data[0].shape, dtype=data[0].dtype,
                       ctx=context.Context('cpu_shared', 0))
        return nd.stack(*data, out=out)
    elif isinstance(data[0], tuple):
        data = zip(*data)
        return [default_mp_pad_batchify_fn(i) for i in data]
    else:
        data = np.asarray(data)
        batch_size = len(data)
        pad = max([l.shape[0] for l in data] + [1,])
        buf = np.full((batch_size, pad, data[0].shape[-1]), -1, dtype=data[0].dtype)
        for i, l in enumerate(data):
            buf[i][:l.shape[0], :] = l
        return nd.array(buf, dtype=data[0].dtype, ctx=context.Context('cpu_shared', 0))

Source File: dataloader.py From cascade_rcnn_gluon with Apache License 2.0

6 votes

def default_mp_pad_batchify_fn(data):
    """Use shared memory for collating data into batch, labels are padded to same shape"""
    if isinstance(data[0], nd.NDArray):
        out = nd.empty((len(data),) + data[0].shape, dtype=data[0].dtype,
                       ctx=context.Context('cpu_shared', 0))
        return nd.stack(*data, out=out)
    elif isinstance(data[0], tuple):
        data = zip(*data)
        return [default_mp_pad_batchify_fn(i) for i in data]
    else:
        data = np.asarray(data)
        batch_size = len(data)
        pad = max([l.shape[0] for l in data] + [1,])
        buf = np.full((batch_size, pad, data[0].shape[-1]), -1, dtype=data[0].dtype)
        for i, l in enumerate(data):
            buf[i][:l.shape[0], :] = l
        return nd.array(buf, dtype=data[0].dtype, ctx=context.Context('cpu_shared', 0))

Source File: parallelized_loader.py From gluon-ts with Apache License 2.0

5 votes

def __next__(self) -> DataEntry:
        # if the buffer is empty, fill the buffer first.
        # (should only executed in the first round)
        if not self.shuffle_buffer:
            self.shuffle_buffer = list(
                itertools.islice(
                    self.base_iterator, self.shuffle_buffer_length
                )
            )
        # if buffer still empty, means all elements used,
        # return a signal of end of iterator
        if not self.shuffle_buffer:
            raise StopIteration
        # choose an element at a random index and yield it
        # and fill it with the next element in the sequential generator
        idx = random.randint(0, len(self.shuffle_buffer) - 1)
        next_sample = self.shuffle_buffer[idx]

        # replace the index with the next element in the iterator if the iterator has not finished.
        # delete the index otherwise.
        try:
            self.shuffle_buffer[idx] = next(self.base_iterator)
        except StopIteration:
            del self.shuffle_buffer[idx]

        return next_sample

Source File: utils.py From coach with Apache License 2.0

5 votes

def clip_grad(
        grads: Union[Generator[NDArray, NDArray, NDArray], List[NDArray], Tuple[NDArray]],
        clip_method: GradientClippingMethod,
        clip_val: float,
        inplace=True) -> List[NDArray]:
    """
    Clip gradient values inplace
    :param grads: gradients to be clipped
    :param clip_method: clipping method
    :param clip_val: clipping value. Interpreted differently depending on clipping method.
    :param inplace: modify grads if True, otherwise create NDArrays
    :return: clipped gradients
    """
    output = list(grads) if inplace else list(nd.empty(g.shape) for g in grads)
    if clip_method == GradientClippingMethod.ClipByGlobalNorm:
        norm_unclipped_grads = global_norm(grads)
        scale = clip_val / (norm_unclipped_grads.asscalar() + 1e-8)  # todo: use branching operators?
        if scale < 1.0:
            for g, o in zip(grads, output):
                nd.broadcast_mul(g, nd.array([scale]), out=o)
    elif clip_method == GradientClippingMethod.ClipByValue:
        for g, o in zip(grads, output):
            g.clip(-clip_val, clip_val, out=o)
    elif clip_method == GradientClippingMethod.ClipByNorm:
        for g, o in zip(grads, output):
            nd.broadcast_mul(g, nd.minimum(1.0, clip_val / (g.norm() + 1e-8)), out=o)
    else:
        raise KeyError('Unsupported gradient clipping method')
    return output

Source File: parallelized_loader.py From gluon-ts with Apache License 2.0

4 votes

def _worker_fn(
    batch_size: int,
    batchify_fn: Callable,
    dtype: DType,
    is_train: bool,
    cyclic: bool,
    cycle_num: int,
    shuffle_buffer_length: int,
):
    """Function for processing data in worker process."""

    # initialize, or reset the iterator at each cycle
    if (_WorkerData.iterator_latest_reset_cycle < cycle_num) and (
        _WorkerData.iterator_latest_reset_cycle == 0 or not cyclic
    ):
        _worker_reset_iterator(
            is_train, cyclic, cycle_num, shuffle_buffer_length
        )

    # retrieve the samples that will be batched
    batch_samples = list(
        itertools.islice(_WorkerData.dataset_iterator, batch_size)
    )
    # batch the samples, if there were any
    if batch_samples:
        success = True
        batch = batchify_fn(
            data=batch_samples, dtype=dtype, multi_processing=True
        )
    else:
        # the second time without being able to provide a batch we want to delay calling them again
        # on fist exhaustion they should not be delayed, since they need to indicate depletion
        # dont make the penalty to high, since that delays rescheduling of non empty iterators
        if _WorkerData.iterator_exhausted_indicator:
            time.sleep(0.05)
        else:
            _WorkerData.iterator_exhausted_indicator = True
        success = False
        batch = None

    buf = io.BytesIO()
    ForkingPickler(buf, pickle.HIGHEST_PROTOCOL).dump(
        (success, MPWorkerInfo.worker_id, batch)
    )
    return buf.getvalue()

Source File: parallelized_loader.py From gluon-ts with Apache License 2.0

4 votes

def __next__(self) -> DataBatch:
        # Try to get a batch, sometimes its possible that an iterator was
        # exhausted and thus we don't get a new batch
        success = False
        while not success:
            try:
                self._push_next()

                if self._rcvd_idx == self._sent_idx:
                    assert (
                        not self._data_buffer
                    ), "Data buffer should be empty at this moment"
                    raise StopIteration
                assert (
                    self._rcvd_idx < self._sent_idx
                ), "rcvd_idx must be smaller than sent_idx"
                assert (
                    self._rcvd_idx in self._data_buffer
                ), "fatal error with _push_next, rcvd_idx missing"

                ret = self._data_buffer.pop(self._rcvd_idx)
                got = ret.get(self._timeout)
                self._rcvd_idx += 1

                # retrieve the batch from shared memory along with metadata
                success, worker_id, batch = pickle.loads(got)

                # If iterator exhausted/empty
                if not success:
                    self._exhausted_iterators.add(worker_id)
                    if self._num_workers == len(self._exhausted_iterators):
                        # No more batches to be generated
                        return {}
                    else:
                        self._push_next()
                else:
                    # either pin to cpu memory (with ctx=context.cpu_pinned(self.pin_device_id)),
                    # or return with the right context straight away
                    return _as_in_context(batch, self._ctx)
            except multiprocessing.context.TimeoutError:
                print(
                    f"Worker timed out after {self._timeout} seconds. This might be caused by "
                    "\n - Slow transform. Please increase timeout to allow slower data loading in each worker. "
                    "\n - Insufficient shared_memory if `timeout` is large enough. "
                    "\n Please consider to reduce `num_workers` or increase shared_memory in system."
                )
                raise
            except Exception:
                print("An unexpected error occurred in the WorkerIterator.")
                self._worker_pool.terminate()
                raise
        return {}

Python mxnet.nd.empty() Examples