Python tensorflow.python.ops.data_flow_ops.StagingArea() Examples
The following are 28
code examples of tensorflow.python.ops.data_flow_ops.StagingArea().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow.python.ops.data_flow_ops
, or try the search function
.
Example #1
Source File: variable_mgr.py From tf-imagenet with Apache License 2.0 | 6 votes |
def assign_sub(self, delta, name=None): """Mimic the updates to the variable. Args: delta: is pushed into a staging buffer and will be pumped later. name: currently ignored; names of ops and the StagingArea are computed without using this pass name. Returns: The actual updates. The colocation constraint will be reapplied. """ # This parameter is ignored: the StagingArea only supports setting # the shared name, not the names of individual ops it uses. del name # colocate_with(None, True) clears the colocation constraints. # Push the delta into a staging buffer. with ops.colocate_with(None, True), tf.device(self.var_stage_get.device): delta_staging_area = data_flow_ops.StagingArea( [self.var_stage_get.dtype], shapes=[self.var_stage_get.shape]) delta_put_op = delta_staging_area.put([delta]) self.variable_mgr.staging_delta_ops.append(delta_put_op) delta_get_op = delta_staging_area.get()[0] # Return the actual updates. The colocation constraint will be reapplied. return self.real_var.assign_sub(delta_get_op)
Example #2
Source File: input_source.py From ADL with MIT License | 6 votes |
def __init__(self, input, nr_stage=1, device=None): """ Args: input (FeedfreeInput): nr_stage (int): number of elements to prefetch into each StagingArea, at the beginning. Since enqueue and dequeue are synchronized, prefetching 1 element should be sufficient. device (str or None): if not None, place the StagingArea on a specific device. e.g., '/cpu:0'. Otherwise, they are placed under where `get_inputs_tensors` gets called, which could be unspecified in case of simple trainers. """ if not isinstance(input, FeedfreeInput): raise ValueError("StagingInput takes a FeedfreeInput! Got {}".format(input)) if isinstance(input, StagingInput): raise ValueError("StagingInput cannot be nested!") self._input = input self._nr_stage = nr_stage self._areas = [] self._stage_ops = [] self._unstage_ops = [] self._device = device
Example #3
Source File: input_source.py From petridishnn with MIT License | 6 votes |
def __init__(self, input, nr_stage=1, device=None): """ Args: input (FeedfreeInput): nr_stage (int): number of elements to prefetch into each StagingArea, at the beginning. Since enqueue and dequeue are synchronized, prefetching 1 element should be sufficient. device (str or None): if not None, place the StagingArea on a specific device. e.g., '/cpu:0'. Otherwise, they are placed under where `get_inputs_tensors` gets called, which could be unspecified in case of simple trainers. """ if not isinstance(input, FeedfreeInput): raise ValueError("StagingInput takes a FeedfreeInput! Got {}".format(input)) if isinstance(input, StagingInput): raise ValueError("StagingInput cannot be nested!") self._input = input self._nr_stage = nr_stage self._areas = [] self._stage_ops = [] self._unstage_ops = [] self._device = device
Example #4
Source File: variable_mgr_util.py From dlcookbook-dlbs with Apache License 2.0 | 6 votes |
def assign_sub(self, delta, name=None): """Mimic the updates to the variable. Args: delta: is pushed into a staging buffer and will be pumped later. name: currently ignored; names of ops and the StagingArea are computed without using this pass name. Returns: The actual updates. The colocation constraint will be reapplied. """ # This parameter is ignored: the StagingArea only supports setting # the shared name, not the names of individual ops it uses. del name # colocate_with(None, True) clears the colocation constraints. # Push the delta into a staging buffer. with ops.colocate_with(None, True), tf.device(self.var_stage_get.device): delta_staging_area = data_flow_ops.StagingArea( [self.var_stage_get.dtype], shapes=[self.var_stage_get.shape]) delta_put_op = delta_staging_area.put([delta]) self.variable_mgr.staging_delta_ops.append(delta_put_op) delta_get_op = delta_staging_area.get()[0] # Return the actual updates. The colocation constraint will be reapplied. return self.real_var.assign_sub(delta_get_op)
Example #5
Source File: benchmark_cnn.py From tf-imagenet with Apache License 2.0 | 6 votes |
def _build_image_processing(self, shift_ratio=0): """"Build the image (pre)processing portion of the model graph.""" with tf.device(self.cpu_device): if self.params.eval: subset = 'validation' else: subset = 'train' image_producer_ops = [] image_producer_stages = [] images_splits, labels_splits = self.image_preprocessor.minibatch( self.dataset, subset=subset, use_datasets=self.params.use_datasets, cache_data=self.params.cache_data, shift_ratio=shift_ratio) images_shape = images_splits[0].get_shape() labels_shape = labels_splits[0].get_shape() for device_num in range(len(self.devices)): image_producer_stages.append(data_flow_ops.StagingArea( [images_splits[0].dtype, labels_splits[0].dtype], shapes=[images_shape, labels_shape])) for group_index in xrange(self.batch_group_size): if not self.use_synthetic_gpu_images: batch_index = group_index + device_num * self.batch_group_size put_op = image_producer_stages[device_num].put( [images_splits[batch_index], labels_splits[batch_index]]) image_producer_ops.append(put_op) return (image_producer_ops, image_producer_stages)
Example #6
Source File: trainer_cnn.py From tf-imagenet with Apache License 2.0 | 6 votes |
def _build_image_processing(self, shift_ratio=0): """"Build the image (pre)processing portion of the model graph.""" with tf.device(self.cpu_device): if self.params.eval: subset = 'validation' else: subset = 'train' image_producer_ops = [] image_producer_stages = [] images_splits, labels_splits = self.image_preprocessor.minibatch( self.dataset, subset=subset, use_datasets=self.params.use_datasets, cache_data=self.params.cache_data, shift_ratio=shift_ratio) images_shape = images_splits[0].get_shape() labels_shape = labels_splits[0].get_shape() for device_num in range(len(self.devices)): image_producer_stages.append(data_flow_ops.StagingArea( [images_splits[0].dtype, labels_splits[0].dtype], shapes=[images_shape, labels_shape])) for group_index in xrange(self.batch_group_size): if not self.use_synthetic_gpu_images: batch_index = group_index + device_num * self.batch_group_size put_op = image_producer_stages[device_num].put( [images_splits[batch_index], labels_splits[batch_index]]) image_producer_ops.append(put_op) return (image_producer_ops, image_producer_stages)
Example #7
Source File: benchmark_cnn.py From deeplearning-benchmark with Apache License 2.0 | 6 votes |
def _build_image_processing(self, shift_ratio=0): """"Build the image (pre)processing portion of the model graph.""" with tf.device(self.cpu_device): if self.params.eval: subset = 'validation' else: subset = 'train' image_producer_ops = [] image_producer_stages = [] images_splits, labels_splits = self.image_preprocessor.minibatch( self.dataset, subset=subset, use_datasets=self.params.use_datasets, cache_data=self.params.cache_data, shift_ratio=shift_ratio) images_shape = images_splits[0].get_shape() labels_shape = labels_splits[0].get_shape() for device_num in range(len(self.devices)): image_producer_stages.append(data_flow_ops.StagingArea( [images_splits[0].dtype, labels_splits[0].dtype], shapes=[images_shape, labels_shape])) for group_index in xrange(self.batch_group_size): if not self.use_synthetic_gpu_images: batch_index = group_index + device_num * self.batch_group_size put_op = image_producer_stages[device_num].put( [images_splits[batch_index], labels_splits[batch_index]]) image_producer_ops.append(put_op) return (image_producer_ops, image_producer_stages)
Example #8
Source File: variable_mgr.py From deeplearning-benchmark with Apache License 2.0 | 6 votes |
def assign_sub(self, delta, name=None): """Mimic the updates to the variable. Args: delta: is pushed into a staging buffer and will be pumped later. name: currently ignored; names of ops and the StagingArea are computed without using this pass name. Returns: The actual updates. The colocation constraint will be reapplied. """ # This parameter is ignored: the StagingArea only supports setting # the shared name, not the names of individual ops it uses. del name # colocate_with(None, True) clears the colocation constraints. # Push the delta into a staging buffer. with ops.colocate_with(None, True), tf.device(self.var_stage_get.device): delta_staging_area = data_flow_ops.StagingArea( [self.var_stage_get.dtype], shapes=[self.var_stage_get.shape]) delta_put_op = delta_staging_area.put([delta]) self.variable_mgr.staging_delta_ops.append(delta_put_op) delta_get_op = delta_staging_area.get()[0] # Return the actual updates. The colocation constraint will be reapplied. return self.real_var.assign_sub(delta_get_op)
Example #9
Source File: input_source.py From tensorpack with Apache License 2.0 | 6 votes |
def __init__(self, input, nr_stage=1, device=None): """ Args: input (FeedfreeInput): nr_stage (int): number of elements to prefetch into each StagingArea, at the beginning. Since enqueue and dequeue are synchronized, prefetching 1 element should be sufficient. device (str or None): if not None, place the StagingArea on a specific device. e.g., '/cpu:0'. Otherwise, they are placed under where `get_inputs_tensors` gets called, which could be unspecified in case of simple trainers. """ if not isinstance(input, FeedfreeInput): raise ValueError("StagingInput takes a FeedfreeInput! Got {}".format(input)) if isinstance(input, StagingInput): raise ValueError("StagingInput cannot be nested!") self._input = input self._nr_stage = nr_stage self._areas = [] self._stage_ops = [] self._unstage_ops = [] self._device = device
Example #10
Source File: batch_allreduce.py From benchmarks with Apache License 2.0 | 6 votes |
def _defer_tensor(tensor): """Defers the retrieval of a tensor. The tensor is put into a StagingArea, and the return value is the retrieval of the tensor from the StagingArea. The effect is that the tensor returned from this function is the tensor that was put in the StagingArea for the previous Session.run() call. Args: tensor: The tensor to defer for one step. Returns: deferred_tensor: The tensor deferred for one step. put_op: An op to put `tensor` in the StagingArea. Must be run every step that `deferred_tensor` is run. warmup_op: A warmup op that should be called before the first step. Puts a zero tensor into the StagingArea. """ tensor_stage = data_flow_ops.StagingArea([tensor.dtype], [tensor.shape]) put_op = tensor_stage.put([tensor]) warmup_op = tensor_stage.put([tf.zeros(tensor.shape, dtype=tensor.dtype)]) # Fetch the next tensor to use. (tensor,) = tensor_stage.get() return tensor, put_op, warmup_op
Example #11
Source File: batch_allreduce.py From benchmarks with Apache License 2.0 | 6 votes |
def defer_single_device_tensors(device_tensors): """Defer tensors (gradients in this case) from a single device. Arguments: device_tensors: A list of gradients tensors from a single device to defer. Returns: deferred_tensors: A list of tensors deferred for one step. put_ops: A list of ops that put `tensors` in the StagingAreas. Must be run every step that `deferred_tensors` is run. warmup_ops: Warmup ops that should be called before the first step. Puts zero tensors into the StagingArea. """ put_ops = [] warmup_ops = [] deferred_tensors = [] for tensor in device_tensors: deferred_tensor, put_op, warmup_op = _defer_tensor(tensor) deferred_tensors.append(deferred_tensor) put_ops.append(put_op) warmup_ops.append(warmup_op) return deferred_tensors, put_ops, warmup_ops
Example #12
Source File: benchmark_cnn.py From dlcookbook-dlbs with Apache License 2.0 | 5 votes |
def _build_image_processing(self, shift_ratio=0): """"Build the image (pre)processing portion of the model graph.""" with tf.device(self.cpu_device): if self.params.eval: subset = 'validation' else: subset = 'train' image_producer_ops = [] image_producer_stages = [] images_splits, labels_splits = self.image_preprocessor.minibatch( self.dataset, subset=subset, use_datasets=self.params.use_datasets, cache_data=self.params.cache_data, shift_ratio=shift_ratio) images_shape = images_splits[0].get_shape() labels_shape = labels_splits[0].get_shape() for device_num in range(len(self.devices)): image_producer_stages.append( data_flow_ops.StagingArea( [images_splits[0].dtype, labels_splits[0].dtype], shapes=[images_shape, labels_shape])) for group_index in xrange(self.batch_group_size): if not self.use_synthetic_gpu_images: batch_index = group_index + device_num * self.batch_group_size put_op = image_producer_stages[device_num].put( [images_splits[batch_index], labels_splits[batch_index]]) image_producer_ops.append(put_op) return (image_producer_ops, image_producer_stages)
Example #13
Source File: input_source.py From tensorpack with Apache License 2.0 | 5 votes |
def _get_input_tensors(self): inputs = self._input.get_input_tensors() with self._device_ctx(): with self.cached_name_scope(): # Putting variables to stagingarea will cause trouble dtypes = [] for idx in range(len(inputs)): dtype = inputs[idx].dtype if dtype.base_dtype != dtype: # is reference type inputs[idx] = tf.identity(inputs[idx]) dtypes.append(dtype.base_dtype) # TODO tensorflow/benchmarks use static shapes here, # though it doesn't seem to help. We can use it when it's known. # Setting capacity to 1 to potentially save some memory, because we should # expect the consumers to run slower than the producer. stage = StagingArea(dtypes, shapes=None, capacity=1) # put & get automatically inherit the name scope from the area self._stage_ops.append(stage.put(inputs)) self._areas.append(stage) outputs = stage.get() if isinstance(outputs, tf.Tensor): # when size=1, TF doesn't return a list outputs = [outputs] for vin, vout in zip(inputs, outputs): vout.set_shape(vin.get_shape()) self._unstage_ops.append(outputs) # self._size_ops.append(stage.size()) return outputs
Example #14
Source File: variable_mgr_util.py From dlcookbook-dlbs with Apache License 2.0 | 5 votes |
def __call__(self, getter, name, *args, **kwargs): staging_ops = self.variable_mgr.staging_vars_on_devices[self.device_num] if name in staging_ops: put_op, get_op = staging_ops[name] return get_op real_var = getter(name, *args, **kwargs) shape = kwargs['shape'] dtype = kwargs['dtype'] trainable = kwargs['trainable'] if self.cpu_device: with tf.device(self.cpu_device): # This helps copying the weights from the parameter to this server only # once. if name in self.variable_mgr.staged_vars_on_cpu: cpu_var = self.variable_mgr.staged_vars_on_cpu[name] else: cpu_var = tf.identity(real_var) self.variable_mgr.staged_vars_on_cpu[name] = cpu_var var_to_stage = cpu_var else: var_to_stage = tf.identity(real_var) # de-reference the variable. with tf.device(self.devices[self.device_num]): staging_area = data_flow_ops.StagingArea([dtype], shapes=[shape]) put_op = staging_area.put([var_to_stage]) get_op = staging_area.get()[0] staging_ops[name] = (put_op, get_op) if trainable: # For trainable variables, they are managed separatedly through # apply_gradients. return get_op else: # For other shadow variables, the access is decoupled through a wrapper # class. return StagedModelVariable(real_var, get_op, self.variable_mgr)
Example #15
Source File: input_source.py From tensorpack with Apache License 2.0 | 5 votes |
def _prefill(self, sess): logger.info("Pre-filling StagingArea ...") for _ in range(self.nr_stage): self.stage_op.run(session=sess) logger.info("{} element{} put into StagingArea on each tower.".format( self.nr_stage, "s were" if self.nr_stage > 1 else " was"))
Example #16
Source File: nvcnn.py From dlcookbook-dlbs with Apache License 2.0 | 5 votes |
def stage(tensors): """Stages the given tensors in a StagingArea for asynchronous put/get. """ stage_area = data_flow_ops.StagingArea( dtypes=[tensor.dtype for tensor in tensors], shapes=[tensor.get_shape() for tensor in tensors]) put_op = stage_area.put(tensors) get_tensors = stage_area.get() get_tensors = [tf.reshape(gt, t.get_shape()) for (gt,t) in zip(get_tensors, tensors)] return put_op, get_tensors
Example #17
Source File: input_source.py From ADL with MIT License | 5 votes |
def _get_input_tensors(self): inputs = self._input.get_input_tensors() with self._device_ctx(): with self.cached_name_scope(): # Putting variables to stagingarea will cause trouble dtypes = [] for idx in range(len(inputs)): dtype = inputs[idx].dtype if dtype.base_dtype != dtype: # is reference type inputs[idx] = tf.identity(inputs[idx]) dtypes.append(dtype.base_dtype) # TODO tensorflow/benchmarks use static shapes here, # though it doesn't seem to help. We can use it when it's known. # Setting capacity to 1 to potentially save some memory, because we should # expect the consumers to run slower than the producer. stage = StagingArea(dtypes, shapes=None, capacity=1) # put & get automatically inherit the name scope from the area self._stage_ops.append(stage.put(inputs)) self._areas.append(stage) outputs = stage.get() if isinstance(outputs, tf.Tensor): # when size=1, TF doesn't return a list outputs = [outputs] for vin, vout in zip(inputs, outputs): vout.set_shape(vin.get_shape()) self._unstage_ops.append(outputs) # self._size_ops.append(stage.size()) return outputs
Example #18
Source File: input_source.py From ADL with MIT License | 5 votes |
def _prefill(self, sess): logger.info("Pre-filling StagingArea ...") for _ in range(self.nr_stage): self.stage_op.run(session=sess) logger.info("{} element{} put into StagingArea on each tower.".format( self.nr_stage, "s were" if self.nr_stage > 1 else " was"))
Example #19
Source File: train_imagenet_resnet_hvd.py From sagemaker-tensorflow-training-toolkit with Apache License 2.0 | 5 votes |
def stage(tensors): """Stages the given tensors in a StagingArea for asynchronous put/get. """ stage_area = data_flow_ops.StagingArea( dtypes=[tensor.dtype for tensor in tensors], shapes=[tensor.get_shape() for tensor in tensors]) put_op = stage_area.put(tensors) get_tensors = stage_area.get() tf.add_to_collection('STAGING_AREA_PUTS', put_op) return put_op, get_tensors
Example #20
Source File: input_source.py From petridishnn with MIT License | 5 votes |
def _get_input_tensors(self): inputs = self._input.get_input_tensors() with self._device_ctx(): with self.cached_name_scope(): # Putting variables to stagingarea will cause trouble dtypes = [] for idx in range(len(inputs)): dtype = inputs[idx].dtype if dtype.base_dtype != dtype: # is reference type inputs[idx] = tf.identity(inputs[idx]) dtypes.append(dtype.base_dtype) # TODO tensorflow/benchmarks use static shapes here, # though it doesn't seem to help. We can use it when it's known. # Setting capacity to 1 to potentially save some memory, because we should # expect the consumers to run slower than the producer. stage = StagingArea(dtypes, shapes=None, capacity=1) # put & get automatically inherit the name scope from the area self._stage_ops.append(stage.put(inputs)) self._areas.append(stage) outputs = stage.get() if isinstance(outputs, tf.Tensor): # when size=1, TF doesn't return a list outputs = [outputs] for vin, vout in zip(inputs, outputs): vout.set_shape(vin.get_shape()) self._unstage_ops.append(outputs) # self._size_ops.append(stage.size()) return outputs
Example #21
Source File: input_source.py From petridishnn with MIT License | 5 votes |
def _prefill(self, sess): logger.info("Pre-filling StagingArea ...") for k in range(self.nr_stage): self.stage_op.run(session=sess) logger.info("{} element{} put into StagingArea on each tower.".format( self.nr_stage, "s were" if self.nr_stage > 1 else " was"))
Example #22
Source File: variable_mgr.py From tf-imagenet with Apache License 2.0 | 5 votes |
def __call__(self, getter, name, *args, **kwargs): staging_ops = self.variable_mgr.staging_vars_on_devices[self.device_num] if name in staging_ops: put_op, get_op = staging_ops[name] return get_op real_var = getter(name, *args, **kwargs) shape = kwargs['shape'] dtype = kwargs['dtype'] trainable = kwargs['trainable'] if self.cpu_device: with tf.device(self.cpu_device): # This helps copying the weights from the parameter to this server only # once. if name in self.variable_mgr.staged_vars_on_cpu: cpu_var = self.variable_mgr.staged_vars_on_cpu[name] else: cpu_var = tf.identity(real_var) self.variable_mgr.staged_vars_on_cpu[name] = cpu_var var_to_stage = cpu_var else: var_to_stage = tf.identity(real_var) # de-reference the variable. with tf.device(self.devices[self.device_num]): staging_area = data_flow_ops.StagingArea([dtype], shapes=[shape]) put_op = staging_area.put([var_to_stage]) get_op = staging_area.get()[0] staging_ops[name] = (put_op, get_op) if trainable: # For trainable variables, they are managed separatedly through # apply_gradients. return get_op else: # For other shadow variables, the access is decoupled through a wrapper # class. return StagedModelVariable(real_var, get_op, self.variable_mgr)
Example #23
Source File: variable_mgr.py From deeplearning-benchmark with Apache License 2.0 | 5 votes |
def __call__(self, getter, name, *args, **kwargs): staging_ops = self.variable_mgr.staging_vars_on_devices[self.device_num] if name in staging_ops: put_op, get_op = staging_ops[name] return get_op real_var = getter(name, *args, **kwargs) shape = kwargs['shape'] dtype = kwargs['dtype'] trainable = kwargs['trainable'] if self.cpu_device: with tf.device(self.cpu_device): # This helps copying the weights from the parameter to this server only # once. if name in self.variable_mgr.staged_vars_on_cpu: cpu_var = self.variable_mgr.staged_vars_on_cpu[name] else: cpu_var = tf.identity(real_var) self.variable_mgr.staged_vars_on_cpu[name] = cpu_var var_to_stage = cpu_var else: var_to_stage = tf.identity(real_var) # de-reference the variable. with tf.device(self.devices[self.device_num]): staging_area = data_flow_ops.StagingArea([dtype], shapes=[shape]) put_op = staging_area.put([var_to_stage]) get_op = staging_area.get()[0] staging_ops[name] = (put_op, get_op) if trainable: # For trainable variables, they are managed separatedly through # apply_gradients. return get_op else: # For other shadow variables, the access is decoupled through a wrapper # class. return StagedModelVariable(real_var, get_op, self.variable_mgr)
Example #24
Source File: benchmark_cnn.py From parallax with Apache License 2.0 | 5 votes |
def _build_image_processing(self, shift_ratio=0): """"Build the image (pre)processing portion of the model graph.""" if self.use_synthetic_gpu_images: return (None, None) with tf.device('/cpu:0'): if self.params.eval: subset = 'validation' else: subset = 'train' image_producer_ops = [] images_splits, labels_splits = self.image_preprocessor.minibatch( self.dataset, subset=subset, use_datasets=self.params.use_datasets, cache_data=self.params.cache_data, shift_ratio=shift_ratio) images_shape = images_splits[0].get_shape() labels_shape = labels_splits[0].get_shape() with tf.device('/gpu:0'): if self.params.eval: image_producer_stage = data_flow_ops.StagingArea( [images_splits[0].dtype, labels_splits[0].dtype], shapes=[images_shape, labels_shape], capacity=1) else: image_producer_stage = data_flow_ops.StagingArea( [images_splits[0].dtype, labels_splits[0].dtype], shapes=[images_shape, labels_shape], capacity=self.batch_group_size) put_op = image_producer_stage.put( [images_splits[0], labels_splits[0]]) image_producer_ops.append(put_op) return (image_producer_ops, image_producer_stage)
Example #25
Source File: variable_mgr_util.py From benchmarks with Apache License 2.0 | 5 votes |
def __call__(self, getter, name, *args, **kwargs): staging_ops = self.variable_mgr.staging_vars_on_devices[self.device_num] if name in staging_ops: put_op, get_op = staging_ops[name] return get_op real_var = getter(name, *args, **kwargs) shape = kwargs['shape'] dtype = kwargs['dtype'] trainable = kwargs['trainable'] if self.cpu_device: with tf.device(self.cpu_device): # This helps copying the weights from the parameter to this server only # once. if name in self.variable_mgr.staged_vars_on_cpu: cpu_var = self.variable_mgr.staged_vars_on_cpu[name] else: cpu_var = tf.identity(real_var) self.variable_mgr.staged_vars_on_cpu[name] = cpu_var var_to_stage = cpu_var else: var_to_stage = tf.identity(real_var) # de-reference the variable. with tf.device(self.devices[self.device_num]): staging_area = data_flow_ops.StagingArea([dtype], shapes=[shape]) put_op = staging_area.put([var_to_stage]) get_op = staging_area.get()[0] staging_ops[name] = (put_op, get_op) if trainable: # For trainable variables, they are managed separatedly through # apply_gradients. return get_op else: # For other shadow variables, the access is decoupled through a wrapper # class. return StagedModelVariable(real_var, get_op, self.variable_mgr)
Example #26
Source File: variable_mgr_util.py From benchmarks with Apache License 2.0 | 5 votes |
def assign_sub(self, delta, name=None, read_value=True): """Mimic the updates to the variable. Args: delta: is pushed into a staging buffer and will be pumped later. name: currently ignored; names of ops and the StagingArea are computed without using this pass name. read_value: if True, will return something which evaluates to the new value of the variable; if False will return the assign op. Returns: The actual updates. The colocation constraint will be reapplied. """ # This parameter is ignored: the StagingArea only supports setting # the shared name, not the names of individual ops it uses. del name # colocate_with(None, True) clears the colocation constraints. # Push the delta into a staging buffer. with ops.colocate_with(None, True), tf.device(self.var_stage_get.device): delta_staging_area = data_flow_ops.StagingArea( [self.var_stage_get.dtype], shapes=[self.var_stage_get.shape]) delta_put_op = delta_staging_area.put([delta]) self.variable_mgr.staging_delta_ops.append(delta_put_op) delta_get_op = delta_staging_area.get()[0] # Return the actual updates. The colocation constraint will be reapplied. return self.real_var.assign_sub(delta_get_op, read_value=read_value)
Example #27
Source File: _multigpu_with_nccl.py From keras_experiments with The Unlicense | 4 votes |
def all_sync_params(tower_params, devices, usenccl=True): """Assigns the params from the first tower to all others""" if len(devices) == 1: return tf.no_op() sync_ops = [] if have_nccl and usenccl: for param_on_devices in zip(*tower_params): # print('PARAM_ON_DEVICES: {}'.format(param_on_devices)) # DEBUG # Note: param_on_devices is [paramX_gpu0, paramX_gpu1, ...] param0 = param_on_devices[0] send_op, received_tensors = nccl.broadcast(param0, devices[1:]) sync_ops.append(send_op) for device, param, received in zip(devices[1:], param_on_devices[1:], received_tensors): with tf.device(device): sync_op = param.assign(received) sync_ops.append(sync_op) else: params0 = tower_params[0] for device, params in zip(devices, tower_params): with tf.device(device): for param, param0 in zip(params, params0): sync_op = param.assign(param0.read_value()) sync_ops.append(sync_op) return tf.group(*sync_ops) # def stage(tensors): # """Stages the given tensors in a StagingArea for asynchronous put/get. # """ # stage_area = data_flow_ops.StagingArea( # dtypes=[tensor.dtype for tensor in tensors], # shapes=[tensor.get_shape() for tensor in tensors]) # put_op = stage_area.put(tensors) # get_tensors = stage_area.get() # if not isinstance(get_tensors, list): # get_tensors = [get_tensors] # # print('GET_TENSORS: {}'.format(get_tensors)) # DEBUG # # get_tensors = [tf.reshape(gt, t.get_shape()) # for (gt, t) in zip(get_tensors, tensors)] # return put_op, get_tensors
Example #28
Source File: _multigpu_with_nccl.py From keras_experiments with The Unlicense | 4 votes |
def __init__(self, *args, **kwargs): # :param model_creator: Callable that returns a serial i.e. non-multi # GPU Keras model i.e. a keras.models.Model model. REQUIRED. # Suggestion, use partial from functools to setup model_creator. # try: # model_creator = kwargs.pop('model_creator') # except KeyError: # raise RuntimeError('Keyword argument "model_creator" required ' # 'for ModelMGPU.') super(ModelMGPU, self).__init__() try: smodel = kwargs.pop('serial_model') except KeyError: raise RuntimeError('Keyword argument "serial_model" required ' 'for ModelMGPU.') # SET STATE: Instance of serial model for checkpointing self._smodel = smodel # model_creator() try: gdev_list = kwargs.pop('gdev_list') except KeyError: raise RuntimeError('Keyword argument "gdev_list" required ' 'for ModelMGPU.') self._gdev_list = gdev_list mname = kwargs.pop('name', self._smodel.name) kwargs['name'] = mname self._ps_device = kwargs.pop('ps_device', '/cpu:0') self._initsync = kwargs.pop('initsync', True) self._usenccl = kwargs.pop('usenccl', False) self._syncopt = kwargs.pop('syncopt', False) self._enqueue = kwargs.pop('enqueue', False) if self._enqueue: warnings.warn('Enqueue option to use StagingArea currenctly does ' 'not work.', UserWarning) # NOTE: To use staging have to patch keras tensorflow_backend.Function. # Function implementation in keras_exp.multigpu._patch_tf_backend self._enqueue_ops = [] self._tower_params = [] # For init/sync'ing of parameters. kwargs_ = self._init_make_dataparallel(gdev_list, **kwargs) super(ModelMGPU, self).__init__(*args, **kwargs_)