Python Examples of tensorflow.python.ops.data_flow

Source File: variable_mgr.py From tf-imagenet with Apache License 2.0

6 votes

def assign_sub(self, delta, name=None):
    """Mimic the updates to the variable.

    Args:
      delta: is pushed into a staging buffer and will be pumped later.
      name: currently ignored; names of ops and the StagingArea are
            computed without using this pass name.
    Returns:
      The actual updates. The colocation constraint will be reapplied.
    """
    # This parameter is ignored: the StagingArea only supports setting
    # the shared name, not the names of individual ops it uses.
    del name

    # colocate_with(None, True) clears the colocation constraints.
    # Push the delta into a staging buffer.
    with ops.colocate_with(None, True), tf.device(self.var_stage_get.device):
      delta_staging_area = data_flow_ops.StagingArea(
          [self.var_stage_get.dtype], shapes=[self.var_stage_get.shape])
      delta_put_op = delta_staging_area.put([delta])
      self.variable_mgr.staging_delta_ops.append(delta_put_op)
      delta_get_op = delta_staging_area.get()[0]
    # Return the actual updates. The colocation constraint will be reapplied.
    return self.real_var.assign_sub(delta_get_op)

Source File: input_source.py From ADL with MIT License

6 votes

def __init__(self, input, nr_stage=1, device=None):
        """
        Args:
            input (FeedfreeInput):
            nr_stage (int): number of elements to prefetch into each StagingArea, at the beginning.
                Since enqueue and dequeue are synchronized, prefetching 1 element should be sufficient.
            device (str or None): if not None, place the StagingArea on a specific device. e.g., '/cpu:0'.
                Otherwise, they are placed under where `get_inputs_tensors`
                gets called, which could be unspecified in case of simple trainers.
        """
        if not isinstance(input, FeedfreeInput):
            raise ValueError("StagingInput takes a FeedfreeInput! Got {}".format(input))
        if isinstance(input, StagingInput):
            raise ValueError("StagingInput cannot be nested!")

        self._input = input

        self._nr_stage = nr_stage
        self._areas = []
        self._stage_ops = []
        self._unstage_ops = []
        self._device = device

Source File: input_source.py From petridishnn with MIT License

6 votes

def __init__(self, input, nr_stage=1, device=None):
        """
        Args:
            input (FeedfreeInput):
            nr_stage (int): number of elements to prefetch into each StagingArea, at the beginning.
                Since enqueue and dequeue are synchronized, prefetching 1 element should be sufficient.
            device (str or None): if not None, place the StagingArea on a specific device. e.g., '/cpu:0'.
                Otherwise, they are placed under where `get_inputs_tensors`
                gets called, which could be unspecified in case of simple trainers.
        """
        if not isinstance(input, FeedfreeInput):
            raise ValueError("StagingInput takes a FeedfreeInput! Got {}".format(input))
        if isinstance(input, StagingInput):
            raise ValueError("StagingInput cannot be nested!")

        self._input = input

        self._nr_stage = nr_stage
        self._areas = []
        self._stage_ops = []
        self._unstage_ops = []
        self._device = device

Source File: variable_mgr_util.py From dlcookbook-dlbs with Apache License 2.0

6 votes

def assign_sub(self, delta, name=None):
    """Mimic the updates to the variable.

    Args:
      delta: is pushed into a staging buffer and will be pumped later.
      name: currently ignored; names of ops and the StagingArea are
            computed without using this pass name.
    Returns:
      The actual updates. The colocation constraint will be reapplied.
    """
    # This parameter is ignored: the StagingArea only supports setting
    # the shared name, not the names of individual ops it uses.
    del name

    # colocate_with(None, True) clears the colocation constraints.
    # Push the delta into a staging buffer.
    with ops.colocate_with(None, True), tf.device(self.var_stage_get.device):
      delta_staging_area = data_flow_ops.StagingArea(
          [self.var_stage_get.dtype], shapes=[self.var_stage_get.shape])
      delta_put_op = delta_staging_area.put([delta])
      self.variable_mgr.staging_delta_ops.append(delta_put_op)
      delta_get_op = delta_staging_area.get()[0]
    # Return the actual updates. The colocation constraint will be reapplied.
    return self.real_var.assign_sub(delta_get_op)

Source File: benchmark_cnn.py From tf-imagenet with Apache License 2.0

6 votes

def _build_image_processing(self, shift_ratio=0):
    """"Build the image (pre)processing portion of the model graph."""
    with tf.device(self.cpu_device):
      if self.params.eval:
        subset = 'validation'
      else:
        subset = 'train'
      image_producer_ops = []
      image_producer_stages = []
      images_splits, labels_splits = self.image_preprocessor.minibatch(
          self.dataset, subset=subset, use_datasets=self.params.use_datasets,
          cache_data=self.params.cache_data, shift_ratio=shift_ratio)
      images_shape = images_splits[0].get_shape()
      labels_shape = labels_splits[0].get_shape()
      for device_num in range(len(self.devices)):
        image_producer_stages.append(data_flow_ops.StagingArea(
            [images_splits[0].dtype, labels_splits[0].dtype],
            shapes=[images_shape, labels_shape]))
        for group_index in xrange(self.batch_group_size):
          if not self.use_synthetic_gpu_images:
            batch_index = group_index + device_num * self.batch_group_size
            put_op = image_producer_stages[device_num].put(
                [images_splits[batch_index], labels_splits[batch_index]])
            image_producer_ops.append(put_op)
    return (image_producer_ops, image_producer_stages)

Source File: trainer_cnn.py From tf-imagenet with Apache License 2.0

6 votes

def _build_image_processing(self, shift_ratio=0):
        """"Build the image (pre)processing portion of the model graph."""
        with tf.device(self.cpu_device):
            if self.params.eval:
                subset = 'validation'
            else:
                subset = 'train'
            image_producer_ops = []
            image_producer_stages = []
            images_splits, labels_splits = self.image_preprocessor.minibatch(
                self.dataset, subset=subset, use_datasets=self.params.use_datasets,
                cache_data=self.params.cache_data, shift_ratio=shift_ratio)
            images_shape = images_splits[0].get_shape()
            labels_shape = labels_splits[0].get_shape()
            for device_num in range(len(self.devices)):
                image_producer_stages.append(data_flow_ops.StagingArea(
                    [images_splits[0].dtype, labels_splits[0].dtype],
                    shapes=[images_shape, labels_shape]))
                for group_index in xrange(self.batch_group_size):
                    if not self.use_synthetic_gpu_images:
                        batch_index = group_index + device_num * self.batch_group_size
                        put_op = image_producer_stages[device_num].put(
                            [images_splits[batch_index], labels_splits[batch_index]])
                        image_producer_ops.append(put_op)
        return (image_producer_ops, image_producer_stages)

Source File: benchmark_cnn.py From deeplearning-benchmark with Apache License 2.0

6 votes

def _build_image_processing(self, shift_ratio=0):
    """"Build the image (pre)processing portion of the model graph."""
    with tf.device(self.cpu_device):
      if self.params.eval:
        subset = 'validation'
      else:
        subset = 'train'
      image_producer_ops = []
      image_producer_stages = []
      images_splits, labels_splits = self.image_preprocessor.minibatch(
          self.dataset, subset=subset, use_datasets=self.params.use_datasets,
          cache_data=self.params.cache_data, shift_ratio=shift_ratio)
      images_shape = images_splits[0].get_shape()
      labels_shape = labels_splits[0].get_shape()
      for device_num in range(len(self.devices)):
        image_producer_stages.append(data_flow_ops.StagingArea(
            [images_splits[0].dtype, labels_splits[0].dtype],
            shapes=[images_shape, labels_shape]))
        for group_index in xrange(self.batch_group_size):
          if not self.use_synthetic_gpu_images:
            batch_index = group_index + device_num * self.batch_group_size
            put_op = image_producer_stages[device_num].put(
                [images_splits[batch_index], labels_splits[batch_index]])
            image_producer_ops.append(put_op)
    return (image_producer_ops, image_producer_stages)

Source File: variable_mgr.py From deeplearning-benchmark with Apache License 2.0

6 votes

def assign_sub(self, delta, name=None):
    """Mimic the updates to the variable.

    Args:
      delta: is pushed into a staging buffer and will be pumped later.
      name: currently ignored; names of ops and the StagingArea are
            computed without using this pass name.
    Returns:
      The actual updates. The colocation constraint will be reapplied.
    """
    # This parameter is ignored: the StagingArea only supports setting
    # the shared name, not the names of individual ops it uses.
    del name

    # colocate_with(None, True) clears the colocation constraints.
    # Push the delta into a staging buffer.
    with ops.colocate_with(None, True), tf.device(self.var_stage_get.device):
      delta_staging_area = data_flow_ops.StagingArea(
          [self.var_stage_get.dtype], shapes=[self.var_stage_get.shape])
      delta_put_op = delta_staging_area.put([delta])
      self.variable_mgr.staging_delta_ops.append(delta_put_op)
      delta_get_op = delta_staging_area.get()[0]
    # Return the actual updates. The colocation constraint will be reapplied.
    return self.real_var.assign_sub(delta_get_op)

Source File: input_source.py From tensorpack with Apache License 2.0

6 votes

def __init__(self, input, nr_stage=1, device=None):
        """
        Args:
            input (FeedfreeInput):
            nr_stage (int): number of elements to prefetch into each StagingArea, at the beginning.
                Since enqueue and dequeue are synchronized, prefetching 1 element should be sufficient.
            device (str or None): if not None, place the StagingArea on a specific device. e.g., '/cpu:0'.
                Otherwise, they are placed under where `get_inputs_tensors`
                gets called, which could be unspecified in case of simple trainers.
        """
        if not isinstance(input, FeedfreeInput):
            raise ValueError("StagingInput takes a FeedfreeInput! Got {}".format(input))
        if isinstance(input, StagingInput):
            raise ValueError("StagingInput cannot be nested!")

        self._input = input

        self._nr_stage = nr_stage
        self._areas = []
        self._stage_ops = []
        self._unstage_ops = []
        self._device = device

Source File: batch_allreduce.py From benchmarks with Apache License 2.0

6 votes

def _defer_tensor(tensor):
  """Defers the retrieval of a tensor.

  The tensor is put into a StagingArea, and the return value is the
  retrieval of the tensor from the StagingArea. The effect is that the
  tensor returned from this function is the tensor that was put in the
  StagingArea for the previous Session.run() call.

  Args:
    tensor: The tensor to defer for one step.

  Returns:
    deferred_tensor: The tensor deferred for one step.
    put_op: An op to put `tensor` in the StagingArea. Must be run every step
      that `deferred_tensor` is run.
    warmup_op: A warmup op that should be called before the first step. Puts
      a zero tensor into the StagingArea.
  """
  tensor_stage = data_flow_ops.StagingArea([tensor.dtype], [tensor.shape])
  put_op = tensor_stage.put([tensor])
  warmup_op = tensor_stage.put([tf.zeros(tensor.shape, dtype=tensor.dtype)])

  # Fetch the next tensor to use.
  (tensor,) = tensor_stage.get()
  return tensor, put_op, warmup_op

Source File: batch_allreduce.py From benchmarks with Apache License 2.0

6 votes

def defer_single_device_tensors(device_tensors):
  """Defer tensors (gradients in this case) from a single device.

  Arguments:
    device_tensors: A list of gradients tensors from a single device to defer.

  Returns:
    deferred_tensors: A list of tensors deferred for one step.
    put_ops: A list of ops that put `tensors` in the StagingAreas. Must be run
      every step that `deferred_tensors` is run.
    warmup_ops: Warmup ops that should be called before the first step. Puts
      zero tensors into the StagingArea.
  """
  put_ops = []
  warmup_ops = []
  deferred_tensors = []

  for tensor in device_tensors:
    deferred_tensor, put_op, warmup_op = _defer_tensor(tensor)
    deferred_tensors.append(deferred_tensor)
    put_ops.append(put_op)
    warmup_ops.append(warmup_op)

  return deferred_tensors, put_ops, warmup_ops

Source File: benchmark_cnn.py From dlcookbook-dlbs with Apache License 2.0

5 votes

def _build_image_processing(self, shift_ratio=0):
    """"Build the image (pre)processing portion of the model graph."""
    with tf.device(self.cpu_device):
      if self.params.eval:
        subset = 'validation'
      else:
        subset = 'train'
      image_producer_ops = []
      image_producer_stages = []
      images_splits, labels_splits = self.image_preprocessor.minibatch(
          self.dataset,
          subset=subset,
          use_datasets=self.params.use_datasets,
          cache_data=self.params.cache_data,
          shift_ratio=shift_ratio)
      images_shape = images_splits[0].get_shape()
      labels_shape = labels_splits[0].get_shape()
      for device_num in range(len(self.devices)):
        image_producer_stages.append(
            data_flow_ops.StagingArea(
                [images_splits[0].dtype, labels_splits[0].dtype],
                shapes=[images_shape, labels_shape]))
        for group_index in xrange(self.batch_group_size):
          if not self.use_synthetic_gpu_images:
            batch_index = group_index + device_num * self.batch_group_size
            put_op = image_producer_stages[device_num].put(
                [images_splits[batch_index], labels_splits[batch_index]])
            image_producer_ops.append(put_op)
    return (image_producer_ops, image_producer_stages)

Source File: input_source.py From tensorpack with Apache License 2.0

5 votes

def _get_input_tensors(self):
        inputs = self._input.get_input_tensors()

        with self._device_ctx():
            with self.cached_name_scope():
                # Putting variables to stagingarea will cause trouble
                dtypes = []
                for idx in range(len(inputs)):
                    dtype = inputs[idx].dtype
                    if dtype.base_dtype != dtype:     # is reference type
                        inputs[idx] = tf.identity(inputs[idx])
                    dtypes.append(dtype.base_dtype)

                # TODO tensorflow/benchmarks use static shapes here,
                # though it doesn't seem to help. We can use it when it's known.
                # Setting capacity to 1 to potentially save some memory, because we should
                # expect the consumers to run slower than the producer.
                stage = StagingArea(dtypes, shapes=None, capacity=1)

            # put & get automatically inherit the name scope from the area
            self._stage_ops.append(stage.put(inputs))
            self._areas.append(stage)
            outputs = stage.get()
            if isinstance(outputs, tf.Tensor):  # when size=1, TF doesn't return a list
                outputs = [outputs]

            for vin, vout in zip(inputs, outputs):
                vout.set_shape(vin.get_shape())
            self._unstage_ops.append(outputs)
            # self._size_ops.append(stage.size())
            return outputs

Source File: variable_mgr_util.py From dlcookbook-dlbs with Apache License 2.0

5 votes

def __call__(self, getter, name, *args, **kwargs):
    staging_ops = self.variable_mgr.staging_vars_on_devices[self.device_num]
    if name in staging_ops:
      put_op, get_op = staging_ops[name]
      return get_op
    real_var = getter(name, *args, **kwargs)
    shape = kwargs['shape']
    dtype = kwargs['dtype']
    trainable = kwargs['trainable']
    if self.cpu_device:
      with tf.device(self.cpu_device):
        # This helps copying the weights from the parameter to this server only
        # once.
        if name in self.variable_mgr.staged_vars_on_cpu:
          cpu_var = self.variable_mgr.staged_vars_on_cpu[name]
        else:
          cpu_var = tf.identity(real_var)
          self.variable_mgr.staged_vars_on_cpu[name] = cpu_var
      var_to_stage = cpu_var
    else:
      var_to_stage = tf.identity(real_var)  # de-reference the variable.

    with tf.device(self.devices[self.device_num]):
      staging_area = data_flow_ops.StagingArea([dtype], shapes=[shape])
      put_op = staging_area.put([var_to_stage])
      get_op = staging_area.get()[0]
      staging_ops[name] = (put_op, get_op)
    if trainable:
      # For trainable variables, they are managed separatedly through
      # apply_gradients.
      return get_op
    else:
      # For other shadow variables, the access is decoupled through a wrapper
      # class.
      return StagedModelVariable(real_var, get_op, self.variable_mgr)

Source File: input_source.py From tensorpack with Apache License 2.0

5 votes

def _prefill(self, sess):
            logger.info("Pre-filling StagingArea ...")
            for _ in range(self.nr_stage):
                self.stage_op.run(session=sess)
            logger.info("{} element{} put into StagingArea on each tower.".format(
                self.nr_stage, "s were" if self.nr_stage > 1 else " was"))

Source File: nvcnn.py From dlcookbook-dlbs with Apache License 2.0

5 votes

def stage(tensors):
    """Stages the given tensors in a StagingArea for asynchronous put/get.
    """
    stage_area = data_flow_ops.StagingArea(
        dtypes=[tensor.dtype       for tensor in tensors],
        shapes=[tensor.get_shape() for tensor in tensors])
    put_op      = stage_area.put(tensors)
    get_tensors = stage_area.get()

    get_tensors = [tf.reshape(gt, t.get_shape())
                   for (gt,t) in zip(get_tensors, tensors)]
    return put_op, get_tensors

Source File: input_source.py From ADL with MIT License

5 votes

def _get_input_tensors(self):
        inputs = self._input.get_input_tensors()

        with self._device_ctx():
            with self.cached_name_scope():
                # Putting variables to stagingarea will cause trouble
                dtypes = []
                for idx in range(len(inputs)):
                    dtype = inputs[idx].dtype
                    if dtype.base_dtype != dtype:     # is reference type
                        inputs[idx] = tf.identity(inputs[idx])
                    dtypes.append(dtype.base_dtype)

                # TODO tensorflow/benchmarks use static shapes here,
                # though it doesn't seem to help. We can use it when it's known.
                # Setting capacity to 1 to potentially save some memory, because we should
                # expect the consumers to run slower than the producer.
                stage = StagingArea(dtypes, shapes=None, capacity=1)

            # put & get automatically inherit the name scope from the area
            self._stage_ops.append(stage.put(inputs))
            self._areas.append(stage)
            outputs = stage.get()
            if isinstance(outputs, tf.Tensor):  # when size=1, TF doesn't return a list
                outputs = [outputs]

            for vin, vout in zip(inputs, outputs):
                vout.set_shape(vin.get_shape())
            self._unstage_ops.append(outputs)
            # self._size_ops.append(stage.size())
            return outputs

Source File: input_source.py From ADL with MIT License

5 votes

def _prefill(self, sess):
            logger.info("Pre-filling StagingArea ...")
            for _ in range(self.nr_stage):
                self.stage_op.run(session=sess)
            logger.info("{} element{} put into StagingArea on each tower.".format(
                self.nr_stage, "s were" if self.nr_stage > 1 else " was"))

Source File: train_imagenet_resnet_hvd.py From sagemaker-tensorflow-training-toolkit with Apache License 2.0

5 votes

def stage(tensors):
    """Stages the given tensors in a StagingArea for asynchronous put/get.
    """
    stage_area = data_flow_ops.StagingArea(
        dtypes=[tensor.dtype for tensor in tensors],
        shapes=[tensor.get_shape() for tensor in tensors])
    put_op = stage_area.put(tensors)
    get_tensors = stage_area.get()
    tf.add_to_collection('STAGING_AREA_PUTS', put_op)
    return put_op, get_tensors

Source File: input_source.py From petridishnn with MIT License

5 votes

def _get_input_tensors(self):
        inputs = self._input.get_input_tensors()

        with self._device_ctx():
            with self.cached_name_scope():
                # Putting variables to stagingarea will cause trouble
                dtypes = []
                for idx in range(len(inputs)):
                    dtype = inputs[idx].dtype
                    if dtype.base_dtype != dtype:     # is reference type
                        inputs[idx] = tf.identity(inputs[idx])
                    dtypes.append(dtype.base_dtype)

                # TODO tensorflow/benchmarks use static shapes here,
                # though it doesn't seem to help. We can use it when it's known.
                # Setting capacity to 1 to potentially save some memory, because we should
                # expect the consumers to run slower than the producer.
                stage = StagingArea(dtypes, shapes=None, capacity=1)

            # put & get automatically inherit the name scope from the area
            self._stage_ops.append(stage.put(inputs))
            self._areas.append(stage)
            outputs = stage.get()
            if isinstance(outputs, tf.Tensor):  # when size=1, TF doesn't return a list
                outputs = [outputs]

            for vin, vout in zip(inputs, outputs):
                vout.set_shape(vin.get_shape())
            self._unstage_ops.append(outputs)
            # self._size_ops.append(stage.size())
            return outputs

Source File: input_source.py From petridishnn with MIT License

5 votes

def _prefill(self, sess):
            logger.info("Pre-filling StagingArea ...")
            for k in range(self.nr_stage):
                self.stage_op.run(session=sess)
            logger.info("{} element{} put into StagingArea on each tower.".format(
                self.nr_stage, "s were" if self.nr_stage > 1 else " was"))

Source File: variable_mgr.py From tf-imagenet with Apache License 2.0

5 votes

def __call__(self, getter, name, *args, **kwargs):
    staging_ops = self.variable_mgr.staging_vars_on_devices[self.device_num]
    if name in staging_ops:
      put_op, get_op = staging_ops[name]
      return get_op
    real_var = getter(name, *args, **kwargs)
    shape = kwargs['shape']
    dtype = kwargs['dtype']
    trainable = kwargs['trainable']
    if self.cpu_device:
      with tf.device(self.cpu_device):
        # This helps copying the weights from the parameter to this server only
        # once.
        if name in self.variable_mgr.staged_vars_on_cpu:
          cpu_var = self.variable_mgr.staged_vars_on_cpu[name]
        else:
          cpu_var = tf.identity(real_var)
          self.variable_mgr.staged_vars_on_cpu[name] = cpu_var
      var_to_stage = cpu_var
    else:
      var_to_stage = tf.identity(real_var)  # de-reference the variable.

    with tf.device(self.devices[self.device_num]):
      staging_area = data_flow_ops.StagingArea([dtype], shapes=[shape])
      put_op = staging_area.put([var_to_stage])
      get_op = staging_area.get()[0]
      staging_ops[name] = (put_op, get_op)
    if trainable:
      # For trainable variables, they are managed separatedly through
      # apply_gradients.
      return get_op
    else:
      # For other shadow variables, the access is decoupled through a wrapper
      # class.
      return StagedModelVariable(real_var, get_op, self.variable_mgr)

Source File: variable_mgr.py From deeplearning-benchmark with Apache License 2.0

5 votes

def __call__(self, getter, name, *args, **kwargs):
    staging_ops = self.variable_mgr.staging_vars_on_devices[self.device_num]
    if name in staging_ops:
      put_op, get_op = staging_ops[name]
      return get_op
    real_var = getter(name, *args, **kwargs)
    shape = kwargs['shape']
    dtype = kwargs['dtype']
    trainable = kwargs['trainable']
    if self.cpu_device:
      with tf.device(self.cpu_device):
        # This helps copying the weights from the parameter to this server only
        # once.
        if name in self.variable_mgr.staged_vars_on_cpu:
          cpu_var = self.variable_mgr.staged_vars_on_cpu[name]
        else:
          cpu_var = tf.identity(real_var)
          self.variable_mgr.staged_vars_on_cpu[name] = cpu_var
      var_to_stage = cpu_var
    else:
      var_to_stage = tf.identity(real_var)  # de-reference the variable.

    with tf.device(self.devices[self.device_num]):
      staging_area = data_flow_ops.StagingArea([dtype], shapes=[shape])
      put_op = staging_area.put([var_to_stage])
      get_op = staging_area.get()[0]
      staging_ops[name] = (put_op, get_op)
    if trainable:
      # For trainable variables, they are managed separatedly through
      # apply_gradients.
      return get_op
    else:
      # For other shadow variables, the access is decoupled through a wrapper
      # class.
      return StagedModelVariable(real_var, get_op, self.variable_mgr)

Source File: benchmark_cnn.py From parallax with Apache License 2.0

5 votes

def _build_image_processing(self, shift_ratio=0):
        """"Build the image (pre)processing portion of the model graph."""
        if self.use_synthetic_gpu_images:
            return (None, None)

        with tf.device('/cpu:0'):
            if self.params.eval:
                subset = 'validation'
            else:
                subset = 'train'
            image_producer_ops = []
            images_splits, labels_splits = self.image_preprocessor.minibatch(
                self.dataset,
                subset=subset,
                use_datasets=self.params.use_datasets,
                cache_data=self.params.cache_data,
                shift_ratio=shift_ratio)
            images_shape = images_splits[0].get_shape()
            labels_shape = labels_splits[0].get_shape()

        with tf.device('/gpu:0'):
            if self.params.eval:
                image_producer_stage = data_flow_ops.StagingArea(
                    [images_splits[0].dtype, labels_splits[0].dtype],
                    shapes=[images_shape, labels_shape],
                    capacity=1)
            else:
                image_producer_stage = data_flow_ops.StagingArea(
                    [images_splits[0].dtype, labels_splits[0].dtype],
                    shapes=[images_shape, labels_shape],
                    capacity=self.batch_group_size)

            put_op = image_producer_stage.put(
                [images_splits[0], labels_splits[0]])
            image_producer_ops.append(put_op)
        return (image_producer_ops, image_producer_stage)

Source File: variable_mgr_util.py From benchmarks with Apache License 2.0

5 votes

def __call__(self, getter, name, *args, **kwargs):
    staging_ops = self.variable_mgr.staging_vars_on_devices[self.device_num]
    if name in staging_ops:
      put_op, get_op = staging_ops[name]
      return get_op
    real_var = getter(name, *args, **kwargs)
    shape = kwargs['shape']
    dtype = kwargs['dtype']
    trainable = kwargs['trainable']
    if self.cpu_device:
      with tf.device(self.cpu_device):
        # This helps copying the weights from the parameter to this server only
        # once.
        if name in self.variable_mgr.staged_vars_on_cpu:
          cpu_var = self.variable_mgr.staged_vars_on_cpu[name]
        else:
          cpu_var = tf.identity(real_var)
          self.variable_mgr.staged_vars_on_cpu[name] = cpu_var
      var_to_stage = cpu_var
    else:
      var_to_stage = tf.identity(real_var)  # de-reference the variable.

    with tf.device(self.devices[self.device_num]):
      staging_area = data_flow_ops.StagingArea([dtype], shapes=[shape])
      put_op = staging_area.put([var_to_stage])
      get_op = staging_area.get()[0]
      staging_ops[name] = (put_op, get_op)
    if trainable:
      # For trainable variables, they are managed separatedly through
      # apply_gradients.
      return get_op
    else:
      # For other shadow variables, the access is decoupled through a wrapper
      # class.
      return StagedModelVariable(real_var, get_op, self.variable_mgr)

Source File: variable_mgr_util.py From benchmarks with Apache License 2.0

5 votes

def assign_sub(self, delta, name=None, read_value=True):
    """Mimic the updates to the variable.

    Args:
      delta: is pushed into a staging buffer and will be pumped later.
      name: currently ignored; names of ops and the StagingArea are
            computed without using this pass name.
      read_value: if True, will return something which evaluates to the new
              value of the variable; if False will return the assign op.
    Returns:
      The actual updates. The colocation constraint will be reapplied.
    """
    # This parameter is ignored: the StagingArea only supports setting
    # the shared name, not the names of individual ops it uses.
    del name

    # colocate_with(None, True) clears the colocation constraints.
    # Push the delta into a staging buffer.
    with ops.colocate_with(None, True), tf.device(self.var_stage_get.device):
      delta_staging_area = data_flow_ops.StagingArea(
          [self.var_stage_get.dtype], shapes=[self.var_stage_get.shape])
      delta_put_op = delta_staging_area.put([delta])
      self.variable_mgr.staging_delta_ops.append(delta_put_op)
      delta_get_op = delta_staging_area.get()[0]
    # Return the actual updates. The colocation constraint will be reapplied.
    return self.real_var.assign_sub(delta_get_op, read_value=read_value)

Source File: _multigpu_with_nccl.py From keras_experiments with The Unlicense

4 votes

def all_sync_params(tower_params, devices, usenccl=True):
    """Assigns the params from the first tower to all others"""
    if len(devices) == 1:
        return tf.no_op()
    sync_ops = []
    if have_nccl and usenccl:
        for param_on_devices in zip(*tower_params):
            # print('PARAM_ON_DEVICES: {}'.format(param_on_devices))  # DEBUG
            # Note: param_on_devices is [paramX_gpu0, paramX_gpu1, ...]
            param0 = param_on_devices[0]
            send_op, received_tensors = nccl.broadcast(param0, devices[1:])
            sync_ops.append(send_op)
            for device, param, received in zip(devices[1:],
                                               param_on_devices[1:],
                                               received_tensors):
                with tf.device(device):
                    sync_op = param.assign(received)
                    sync_ops.append(sync_op)
    else:
        params0 = tower_params[0]
        for device, params in zip(devices, tower_params):
            with tf.device(device):
                for param, param0 in zip(params, params0):
                    sync_op = param.assign(param0.read_value())
                    sync_ops.append(sync_op)

    return tf.group(*sync_ops)


# def stage(tensors):
#     """Stages the given tensors in a StagingArea for asynchronous put/get.
#     """
#     stage_area = data_flow_ops.StagingArea(
#         dtypes=[tensor.dtype for tensor in tensors],
#         shapes=[tensor.get_shape() for tensor in tensors])
#     put_op = stage_area.put(tensors)
#     get_tensors = stage_area.get()
#     if not isinstance(get_tensors, list):
#         get_tensors = [get_tensors]
#     # print('GET_TENSORS: {}'.format(get_tensors))  # DEBUG
#
#     get_tensors = [tf.reshape(gt, t.get_shape())
#                    for (gt, t) in zip(get_tensors, tensors)]
#     return put_op, get_tensors

Source File: _multigpu_with_nccl.py From keras_experiments with The Unlicense

4 votes

def __init__(self, *args, **kwargs):
        # :param model_creator: Callable that returns a serial i.e. non-multi
        #     GPU Keras model i.e. a keras.models.Model model. REQUIRED.
        #     Suggestion, use partial from functools to setup model_creator.
        # try:
        #     model_creator = kwargs.pop('model_creator')
        # except KeyError:
        #     raise RuntimeError('Keyword argument "model_creator" required '
        #                        'for ModelMGPU.')
        super(ModelMGPU, self).__init__()

        try:
            smodel = kwargs.pop('serial_model')
        except KeyError:
            raise RuntimeError('Keyword argument "serial_model" required '
                               'for ModelMGPU.')

        # SET STATE: Instance of serial model for checkpointing
        self._smodel = smodel  # model_creator()

        try:
            gdev_list = kwargs.pop('gdev_list')
        except KeyError:
            raise RuntimeError('Keyword argument "gdev_list" required '
                               'for ModelMGPU.')
        self._gdev_list = gdev_list

        mname = kwargs.pop('name', self._smodel.name)
        kwargs['name'] = mname

        self._ps_device = kwargs.pop('ps_device', '/cpu:0')
        self._initsync = kwargs.pop('initsync', True)
        self._usenccl = kwargs.pop('usenccl', False)
        self._syncopt = kwargs.pop('syncopt', False)
        self._enqueue = kwargs.pop('enqueue', False)

        if self._enqueue:
            warnings.warn('Enqueue option to use StagingArea currenctly does '
                          'not work.', UserWarning)

        # NOTE: To use staging have to patch keras tensorflow_backend.Function.
        #     Function implementation in keras_exp.multigpu._patch_tf_backend
        self._enqueue_ops = []

        self._tower_params = []  # For init/sync'ing of parameters.
        kwargs_ = self._init_make_dataparallel(gdev_list, **kwargs)
        super(ModelMGPU, self).__init__(*args, **kwargs_)

Python tensorflow.python.ops.data_flow_ops.StagingArea() Examples