Python horovod.tensorflow.size() Examples
The following are 30
code examples of horovod.tensorflow.size().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
horovod.tensorflow
, or try the search function
.
Example #1
Source File: cape_ablate_horovod.py From cape-document-qa with Apache License 2.0 | 6 votes |
def get_training_params(train_config): return TrainParams( SerializableOptimizer( train_config.optimizer, dict(learning_rate=train_config.learning_rate * hvd.size()) ), num_epochs=train_config.n_epochs, ema=train_config.ema, max_checkpoints_to_keep=train_config.max_checkpoints_to_keep, async_encoding=train_config.async_encoding, log_period=train_config.log_period, eval_period=train_config.eval_period, save_period=train_config.save_period, best_weights=("dev", "b8/question-text-f1"), eval_samples=dict(dev=None, train=6000), eval_at_zero=False )
Example #2
Source File: hvd_distributed_tf_data_utils.py From BERT with Apache License 2.0 | 6 votes |
def train_input_fn(input_file, _parse_fn, name_to_features, params, **kargs): if_shard = kargs.get("if_shard", "1") dataset = tf.data.TFRecordDataset(input_file, buffer_size=params.get("buffer_size", 100)) print("==hvd size {}, rank {}==".format(hvd.size(), hvd.rank())) if if_shard == "1": dataset = dataset.shard(hvd.size(), hvd.rank()) dataset = dataset.map(lambda x:_parse_fn(x, name_to_features)) dataset = dataset.shuffle( buffer_size=params.get("buffer_size", 1024)+3*params.get("batch_size", 32), seed=np.random.randint(0,1e10,1)[0], reshuffle_each_iteration=True) dataset = dataset.batch(params.get("batch_size", 32)) dataset = dataset.repeat(params.get("epoch", 100)) iterator = dataset.make_one_shot_iterator() features = iterator.get_next() return features
Example #3
Source File: flow_training.py From flowpp with MIT License | 6 votes |
def setup_horovod(): import horovod.tensorflow as hvd # Initialize Horovod hvd.init() # Verify that MPI multi-threading is supported. assert hvd.mpi_threads_supported() from mpi4py import MPI assert hvd.size() == MPI.COMM_WORLD.Get_size() is_root = hvd.rank() == 0 def mpi_average(local_list): # _local_list_orig = local_list local_list = list(map(float, local_list)) # print('RANK {} AVERAGING {} -> {}'.format(hvd.rank(), _local_list_orig, local_list)) sums = MPI.COMM_WORLD.gather(sum(local_list), root=0) counts = MPI.COMM_WORLD.gather(len(local_list), root=0) sum_counts = sum(counts) if is_root else None avg = (sum(sums) / sum_counts) if is_root else None return avg, sum_counts return hvd, MPI, is_root, mpi_average
Example #4
Source File: hvd_distributed_optimizer.py From BERT with Apache License 2.0 | 6 votes |
def get_train_op(self, loss, tvars, init_lr, num_train_steps, **kargs): learning_rate = self.lr_decay_fn(init_lr, num_train_steps, **kargs) learning_rate = self.warm_up(learning_rate, init_lr, **kargs) print("==optimizer hvd size=={}".format(hvd.size())) opt = self.optimizer_op(learning_rate*hvd.size(), **kargs) # add uber horvod distributed optimizer self.opt = hvd.DistributedOptimizer(opt) grads = self.grad_clip_fn(self.opt, loss, tvars, **kargs) # self.grad_summaries_merged = optimizer_utils.add_grad_summaries( # zip(grads, tvars)) train_op = self.opt.apply_gradients( zip(grads, tvars), global_step=self.global_step) new_global_step = self.global_step + 1 train_op = tf.group(train_op, [self.global_step.assign(new_global_step)]) return train_op
Example #5
Source File: horovod_patches.py From cape-document-qa with Apache License 2.0 | 6 votes |
def _train(model: Model, data: TrainingData, checkpoint: Union[str, None], parameter_checkpoint: Union[str, None], save_start: bool, train_params: trainer.TrainParams, evaluators: List[Evaluator], out: ModelDir, notes=None, dry_run=False, start_eval=False): print('Horovod size: ', hvd.size()) print('Horovod rank: ', hvd.rank()) print('Horovod local rank: ', hvd.local_rank()) if train_params.async_encoding: _train_async(model, data, checkpoint, parameter_checkpoint, save_start, train_params, evaluators, out, notes, dry_run, start_eval) return else: raise NotImplementedError('Syncronous training with Horovod not supported yet')
Example #6
Source File: resnet_main.py From DistributedDeepLearning with MIT License | 6 votes |
def _log_summary(total_images, batch_size, duration): logger = logging.getLogger(__name__) images_per_second = total_images / duration logger.info("Data length: {}".format(total_images)) logger.info("Total duration: {:.3f}".format(duration)) logger.info("Total images/sec: {:.3f}".format(images_per_second)) logger.info( "Batch size: (Per GPU {}: Total {})".format( batch_size, hvd.size() * batch_size if defaults.DISTRIBUTED else batch_size ) ) logger.info( "Distributed: {}".format("True" if defaults.DISTRIBUTED else "False") ) logger.info( "Num GPUs: {:.3f}".format(hvd.size() if defaults.DISTRIBUTED else 1) )
Example #7
Source File: __init__.py From training_results_v0.6 with Apache License 2.0 | 6 votes |
def get_gradients(self, loss, params): """ Compute gradients of all trainable variables. See Optimizer.get_gradients() for more info. In DistributedOptimizer, get_gradients() is overriden to also allreduce the gradients before returning them. """ gradients = super(self.__class__, self).get_gradients(loss, params) if hvd.size() > 1: averaged_gradients = [] with tf.name_scope(self._name + "_Allreduce"): for grad in gradients: if grad is not None: avg_grad = hvd.allreduce(grad, device_dense=self._device_dense, device_sparse=self._device_sparse) averaged_gradients.append(avg_grad) else: averaged_gradients.append(None) return averaged_gradients else: return gradients
Example #8
Source File: horovod.py From blueoil with Apache License 2.0 | 6 votes |
def setup(): if not horovod_installed: return False global horovod_initialized if horovod_initialized: return hvd hvd.init() horovod_initialized = True horovod_num_worker = hvd.size() horovod_rank = hvd.rank() # verify that MPI multi-threading is supported. assert hvd.mpi_threads_supported() # make sure MPI is not re-initialized. import mpi4py.rc mpi4py.rc.initialize = False # import mpi4py from mpi4py import MPI comm = MPI.COMM_WORLD # check size and rank are synchronized assert horovod_num_worker == comm.Get_size() assert horovod_rank == comm.Get_rank() return hvd
Example #9
Source File: horovod.py From blueoil with Apache License 2.0 | 6 votes |
def setup(): if not horovod_installed: return False global horovod_initialized if horovod_initialized: return hvd hvd.init() horovod_initialized = True horovod_num_worker = hvd.size() horovod_rank = hvd.rank() # verify that MPI multi-threading is supported. assert hvd.mpi_threads_supported() # make sure MPI is not re-initialized. import mpi4py.rc mpi4py.rc.initialize = False # import mpi4py from mpi4py import MPI comm = MPI.COMM_WORLD # check size and rank are synchronized assert horovod_num_worker == comm.Get_size() assert horovod_rank == comm.Get_rank() return hvd
Example #10
Source File: test_tensorflow.py From training_results_v0.6 with Apache License 2.0 | 6 votes |
def test_horovod_broadcast_error(self): """Test that the broadcast returns an error if any dimension besides the first is different among the tensors being broadcasted.""" hvd.init() rank = hvd.rank() size = hvd.size() # This test does not apply if there is only one worker. if size == 1: return with self.test_session(config=self.config) as session: tensor_size = [17] * 3 tensor_size[1] = 10 * (rank + 1) tensor = tf.ones(tensor_size, dtype=tf.float32) * rank with self.assertRaises(tf.errors.FailedPreconditionError): session.run(hvd.broadcast(tensor, 0))
Example #11
Source File: test_tensorflow.py From training_results_v0.6 with Apache License 2.0 | 6 votes |
def test_horovod_allgather_type_error(self): """Test that the allgather returns an error if the types being gathered differ among the processes""" hvd.init() rank = hvd.rank() size = hvd.size() # This test does not apply if there is only one worker. if size == 1: return with self.test_session(config=self.config) as session: tensor_size = [17] * 3 dtype = tf.int32 if rank % 2 == 0 else tf.float32 tensor = tf.ones(tensor_size, dtype=dtype) * rank with self.assertRaises(tf.errors.FailedPreconditionError): session.run(hvd.allgather(tensor))
Example #12
Source File: test_tensorflow.py From training_results_v0.6 with Apache License 2.0 | 6 votes |
def test_horovod_allreduce_type_error(self): """Test that the allreduce raises an error if different ranks try to send tensors of different type.""" hvd.init() rank = hvd.rank() size = hvd.size() # This test does not apply if there is only one worker. if size == 1: return with self.test_session(config=self.config) as session: # Same rank, different dimension dims = [17] * 3 tensor = tf.ones(dims, dtype=tf.int32 if rank % 2 == 0 else tf.float32) with self.assertRaises(tf.errors.FailedPreconditionError): session.run(hvd.allreduce(tensor))
Example #13
Source File: test_tensorflow.py From training_results_v0.6 with Apache License 2.0 | 6 votes |
def test_horovod_allreduce_cpu_gpu_error(self): """Test that the allreduce raises an error if different ranks try to perform reduction on CPU and GPU.""" # Only do this test if there are GPUs available. if not tf.test.is_gpu_available(cuda_only=True): return hvd.init() local_rank = hvd.local_rank() size = hvd.size() # This test does not apply if there is only one worker. if size == 1: return device = "/gpu:%d" % local_rank if local_rank % 2 == 0 else "/cpu:0" with self.test_session(config=self.config) as session: with tf.device(device): # Same rank, different dimension dims = [17] * 3 tensor = tf.ones(dims, dtype=tf.int32) with self.assertRaises(tf.errors.FailedPreconditionError): session.run(hvd.allreduce(tensor))
Example #14
Source File: test_tensorflow.py From training_results_v0.6 with Apache License 2.0 | 6 votes |
def test_horovod_allgather_error(self): """Test that the allgather returns an error if any dimension besides the first is different among the tensors being gathered.""" hvd.init() rank = hvd.rank() size = hvd.size() # This test does not apply if there is only one worker. if size == 1: return with self.test_session(config=self.config) as session: tensor_size = [17] * 3 tensor_size[1] = 10 * (rank + 1) tensor = tf.ones(tensor_size, dtype=tf.float32) * rank with self.assertRaises(tf.errors.FailedPreconditionError): session.run(hvd.allgather(tensor))
Example #15
Source File: train_model.py From DistributedDeepLearning with MIT License | 5 votes |
def _get_hooks(is_distributed=DISTRIBUTED): logger = logging.getLogger(__name__) if is_distributed: bcast_hook = hvd.BroadcastGlobalVariablesHook(0) logger.info("Rank: {} Cluster Size {}".format(hvd.local_rank(), hvd.size())) return [bcast_hook] else: return []
Example #16
Source File: wrappers.py From atari-reset with MIT License | 5 votes |
def size(self): return self.low.shape
Example #17
Source File: wrappers.py From atari-reset with MIT License | 5 votes |
def proc_infos(self): epinfos = [info['episode'] for info in self.infos if 'episode' in info] if hvd.size()>1: epinfos = flatten_lists(MPI.COMM_WORLD.allgather(epinfos)) new_sp_wins = {} new_sp_counts = {} for epinfo in epinfos: sp = epinfo['starting_point'] if sp in new_sp_counts: new_sp_counts[sp] += 1 if epinfo['as_good_as_demo']: new_sp_wins[sp] += 1 else: new_sp_counts[sp] = 1 if epinfo['as_good_as_demo']: new_sp_wins[sp] = 1 else: new_sp_wins[sp] = 0 for sp,wins in new_sp_wins.items(): self.starting_point_success[sp] = np.cast[np.float32](wins)/new_sp_counts[sp] # move starting point, ensuring at least 20% of workers are able to complete the demo csd = np.argwhere(np.cumsum(self.starting_point_success) / self.nrstartsteps >= 0.2) if len(csd) > 0: new_max_start = csd[0][0] else: new_max_start = np.minimum(self.max_starting_point + 100, self.max_max_starting_point) n_points_to_shift = self.max_starting_point - new_max_start self.decrement_starting_point(n_points_to_shift) self.infos = []
Example #18
Source File: vgg_tfrecord_horovod.py From keras_experiments with The Unlicense | 5 votes |
def _parser(desc): parser = ap.ArgumentParser(description=dedent(desc), formatter_class=CustomFormatter) parser.add_argument('--epochs', type=int, default=10, help='Number of epochs to run training for.\n' '(Default: %(default)s)\n') parser.add_argument( '--batch_size', type=int, default=64, help='S|Batch size. Default: %(default)s') parser.add_argument( '--ngpus_per_model', type=int, default=1, choices=(1, 2), help='S|GPUs for Model parallelism. Max set to 2 for now. ' 'Default: %(default)s') parser.add_argument( '--imgs_per_epoch', type=int, default=0, help='S|Number of images to run during epoch. Use for timing.\n' 'Default uses all the images for an epoch.') imagenet_datadir = '/datasets/imagenet/train-val-tfrecord-480-subset' parser.add_argument( '--datadir', default=imagenet_datadir, help='S|Data directory with Imagenet TFrecord dataset. Assumes\n' 'TFrecord subsets prefixed with train-* and validation-* are in the\n' 'directory. Default: %(default)s') parser.add_argument( '--distort_color', action='store_true', default=False, help='S|Distort color during training on imagenet to "enrich" the\n' 'dataset. Default no distortion. Set this flag to enable distortion.') args = parser.parse_args() return args
Example #19
Source File: resnet50_tfrecord_horovod.py From keras_experiments with The Unlicense | 5 votes |
def _parser(desc): parser = ap.ArgumentParser(description=dedent(desc), formatter_class=CustomFormatter) parser.add_argument('--epochs', type=int, default=10, help='Number of epochs to run training for.\n' '(Default: %(default)s)\n') parser.add_argument( '--batch_size', type=int, default=64, help='S|Batch size. Default: %(default)s') parser.add_argument( '--imgs_per_epoch', type=int, default=0, help='S|Number of images to run during epoch. Use for timing.\n' 'Default uses all the images for an epoch.') imagenet_datadir = '/datasets/imagenet/train-val-tfrecord-480-subset' parser.add_argument( '--datadir', default=imagenet_datadir, help='S|Data directory with Imagenet TFrecord dataset. Assumes\n' 'TFrecord subsets prefixed with train-* and validation-* are in the\n' 'directory. Default: %(default)s') parser.add_argument( '--distort_color', action='store_true', default=False, help='S|Distort color during training on imagenet to "enrich" the\n' 'dataset. Default no distortion. Set this flag to enable distortion.') args = parser.parse_args() return args
Example #20
Source File: optimizers.py From OpenSeq2Seq with Apache License 2.0 | 5 votes |
def reduce_gradients(grads_and_vars, on_horovod, model=None): if on_horovod: from horovod.tensorflow import allreduce, size if size() > 1: averaged_grads_and_vars = [] with tf.name_scope("all_reduce"): for grad, var in grads_and_vars: if grad is not None: if isinstance(grad, tf.IndexedSlices): if model._decoder.params.get('shared_embed', False): from tensorflow.python.training.optimizer import _deduplicate_indexed_slices summed_values, unique_indices = _deduplicate_indexed_slices( values=grad.values, indices=grad.indices) gradient_no_duplicate_indices = tf.IndexedSlices( indices=unique_indices, values=summed_values, dense_shape=grad.dense_shape) grad = tf.convert_to_tensor(gradient_no_duplicate_indices) avg_grad = allreduce(grad) averaged_grads_and_vars.append((avg_grad, var)) else: averaged_grads_and_vars.append((None, var)) return averaged_grads_and_vars else: return grads_and_vars else: raise NotImplementedError("Reduce in tower-mode is not implemented.")
Example #21
Source File: resnet_main.py From DistributedDeepLearning with MIT License | 5 votes |
def _get_hooks(batch_size, is_distributed=defaults.DISTRIBUTED): logger = logging.getLogger(__name__) if is_distributed: exps_hook = ExamplesPerSecondHook(batch_size * hvd.size()) bcast_hook = hvd.BroadcastGlobalVariablesHook(0) logger.info("Rank: {} Cluster Size {}".format(hvd.rank(), hvd.size())) return [bcast_hook, exps_hook] else: exps_hook = ExamplesPerSecondHook(batch_size) return [exps_hook]
Example #22
Source File: optim.py From pix2pix-flow with MIT License | 5 votes |
def adam2_old(self, params, cost_or_grads, lr=3e-4, mom1=0.9, mom2=0.999, epsilon=1e-8): updates = [] if type(cost_or_grads) is not list: gs = tf.gradients(cost_or_grads, params) else: gs = cost_or_grads # all-reduce grads1 = [Z.allreduce_mean(g) for g in gs] grads2 = [Z.allreduce_mean(tf.square(g)) for g in gs] mom2 = tf.maximum(0., 1. - (hvd.size() * (1 - mom2))) t = tf.Variable(1., 'adam_t') lr_t = lr * tf.sqrt((1. - tf.pow(mom2, t))) / (1. - tf.pow(mom1, t)) updates.append(t.assign_add(1)) for p, g1, g2 in zip(params, grads1, grads2): mg = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_mg') if mom1 > 0: v = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_v') v_t = mom1 * v + (1. - mom1) * g1 updates.append(v.assign(v_t)) else: v_t = g1 mg_t = mom2 * mg + (1. - mom2) * g2 delta_t = v_t / (tf.sqrt(mg_t) + epsilon) p_t = p - lr_t * delta_t updates.append(mg.assign(mg_t)) updates.append(p.assign(p_t)) return tf.group(*updates)
Example #23
Source File: test_tensorflow.py From training_results_v0.6 with Apache License 2.0 | 5 votes |
def test_horovod_broadcast_rank_error(self): """Test that the broadcast returns an error if different ranks specify different root rank.""" hvd.init() rank = hvd.rank() size = hvd.size() # This test does not apply if there is only one worker. if size == 1: return with self.test_session(config=self.config) as session: tensor = tf.ones([17] * 3, dtype=tf.float32) with self.assertRaises(tf.errors.FailedPreconditionError): session.run(hvd.broadcast(tensor, rank))
Example #24
Source File: train.py From pix2pix-flow with MIT License | 5 votes |
def get_its(hps): # These run for a fixed amount of time. As anchored batch is smaller, we've actually seen fewer examples train_its = int(np.ceil(hps.n_train / (hps.n_batch_train * hvd.size()))) test_its = int(np.ceil(hps.n_test / (hps.n_batch_train * hvd.size()))) train_epoch = train_its * hps.n_batch_train * hvd.size() # Do a full validation run if hvd.rank() == 0: print(hps.n_test, hps.local_batch_test, hvd.size()) assert hps.n_test % (hps.local_batch_test * hvd.size()) == 0 full_test_its = hps.n_test // (hps.local_batch_test * hvd.size()) if hvd.rank() == 0: print("Train epoch size: " + str(train_epoch)) return train_its, test_its, full_test_its
Example #25
Source File: test_tensorflow.py From training_results_v0.6 with Apache License 2.0 | 5 votes |
def test_horovod_broadcast(self): """Test that the broadcast correctly broadcasts 1D, 2D, 3D tensors.""" hvd.init() rank = hvd.rank() size = hvd.size() # This test does not apply if there is only one worker. if size == 1: return with self.test_session(config=self.config) as session: dtypes = [tf.uint8, tf.int8, tf.uint16, tf.int16, tf.int32, tf.int64, tf.float16, tf.float32, tf.float64, tf.bool] dims = [1, 2, 3] root_ranks = list(range(size)) for dtype, dim, root_rank in itertools.product(dtypes, dims, root_ranks): tensor = tf.ones([17] * dim) * rank root_tensor = tf.ones([17] * dim) * root_rank if dtype == tf.bool: tensor = tensor % 2 root_tensor = root_tensor % 2 tensor = tf.cast(tensor, dtype=dtype) root_tensor = tf.cast(root_tensor, dtype=dtype) broadcasted_tensor = hvd.broadcast(tensor, root_rank) self.assertTrue( session.run(tf.reduce_all(tf.equal( tf.cast(root_tensor, tf.int32), tf.cast(broadcasted_tensor, tf.int32)))), "hvd.broadcast produces incorrect broadcasted tensor")
Example #26
Source File: test_tensorflow.py From training_results_v0.6 with Apache License 2.0 | 5 votes |
def test_horovod_allgather_grad(self): """Test the correctness of the allgather gradient.""" hvd.init() rank = hvd.rank() size = hvd.size() with self.test_session(config=self.config) as session: # As of TensorFlow v1.9, gradients are not supported on # integer tensors dtypes = [tf.float32, tf.float64] dims = [1, 2, 3] for dtype, dim in itertools.product(dtypes, dims): tensor_sizes = [3, 2, 7, 4, 6, 8, 10] * 5 tensor_sizes = tensor_sizes[:size] tensor = tf.ones([tensor_sizes[rank]] + [17] * (dim - 1)) * rank if dtype == tf.bool: tensor = tensor % 2 tensor = tf.cast(tensor, dtype=dtype) gathered = hvd.allgather(tensor) grad_list = [] for r, tensor_size in enumerate(tensor_sizes): g = tf.ones([tensor_size] + [17] * (dim - 1)) * r grad_list.append(g) grad_ys = tf.concat(grad_list, axis=0) grad = tf.gradients(gathered, tensor, grad_ys)[0] grad_out = session.run(grad) expected = np.ones( [tensor_sizes[rank]] + [17] * (dim - 1) ) * rank * size err = np.linalg.norm(expected - grad_out) self.assertLess(err, 0.00000001, "gradient %s differs from expected %s, " "error: %s" % (grad_out, expected, str(err)))
Example #27
Source File: tfops.py From pix2pix-flow with MIT License | 5 votes |
def allreduce_sum(x): if hvd.size() == 1: return x return hvd.mpi_ops._allreduce(x)
Example #28
Source File: tfops.py From pix2pix-flow with MIT License | 5 votes |
def add_edge_padding(x, filter_size): assert filter_size[0] % 2 == 1 if filter_size[0] == 1 and filter_size[1] == 1: return x a = (filter_size[0] - 1) // 2 # vertical padding size b = (filter_size[1] - 1) // 2 # horizontal padding size if True: x = tf.pad(x, [[0, 0], [a, a], [b, b], [0, 0]]) name = "_".join([str(dim) for dim in [a, b, *int_shape(x)[1:3]]]) pads = tf.get_collection(name) if not pads: if hvd.rank() == 0: print("Creating pad", name) pad = np.zeros([1] + int_shape(x)[1:3] + [1], dtype='float32') pad[:, :a, :, 0] = 1. pad[:, -a:, :, 0] = 1. pad[:, :, :b, 0] = 1. pad[:, :, -b:, 0] = 1. pad = tf.convert_to_tensor(pad) tf.add_to_collection(name, pad) else: pad = pads[0] pad = tf.tile(pad, [tf.shape(x)[0], 1, 1, 1]) x = tf.concat([x, pad], axis=3) else: pad = tf.pad(tf.zeros_like(x[:, :, :, :1]) - 1, [[0, 0], [a, a], [b, b], [0, 0]]) + 1 x = tf.pad(x, [[0, 0], [a, a], [b, b], [0, 0]]) x = tf.concat([x, pad], axis=3) return x
Example #29
Source File: optim.py From glow with MIT License | 5 votes |
def adam2_old(params, cost_or_grads, lr=3e-4, mom1=0.9, mom2=0.999, epsilon=1e-8): updates = [] if type(cost_or_grads) is not list: gs = tf.gradients(cost_or_grads, params) else: gs = cost_or_grads # all-reduce grads1 = [Z.allreduce_mean(g) for g in gs] grads2 = [Z.allreduce_mean(tf.square(g)) for g in gs] mom2 = tf.maximum(0., 1. - (hvd.size() * (1 - mom2))) t = tf.Variable(1., 'adam_t') lr_t = lr * tf.sqrt((1. - tf.pow(mom2, t))) / (1. - tf.pow(mom1, t)) updates.append(t.assign_add(1)) for p, g1, g2 in zip(params, grads1, grads2): mg = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_mg') if mom1 > 0: v = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_v') v_t = mom1 * v + (1. - mom1) * g1 updates.append(v.assign(v_t)) else: v_t = g1 mg_t = mom2 * mg + (1. - mom2) * g2 delta_t = v_t / (tf.sqrt(mg_t) + epsilon) p_t = p - lr_t * delta_t updates.append(mg.assign(mg_t)) updates.append(p.assign(p_t)) return tf.group(*updates)
Example #30
Source File: graph_transform.py From parallax with Apache License 2.0 | 5 votes |
def graph_transform_mpi(single_gpu_meta_graph_def, config, op_library_path=None): if op_library_path is not None: tf.load_op_library(op_library_path) with tf.Graph().as_default() as replica: tf.train.import_meta_graph(single_gpu_meta_graph_def) tensor_or_op_name_to_replica_names = {} for op in replica.get_operations(): tensor_or_op_name_to_replica_names[op.name] = [op.name] for output in op.outputs: tensor_or_op_name_to_replica_names[output.name] = [output.name] # Initialize horovod hvd.init() num_workers = hvd.size() worker_id = hvd.rank() update_shard_values_for_worker(num_workers, worker_id) op_to_control_consumer_ops = get_all_control_consumers(replica) trainable_variable_ops = [var.op for var in tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES)] for gradients_info in tf.get_collection(tf.GraphKeys.GRADIENTS_INFO): target_tensor = gradients_info._target if target_tensor.op not in trainable_variable_ops: parallax_log.debug( "Gradient for non-trainable variable %s is created, ignore" % target_tensor.op.name) continue _add_aggregation_ops(gradients_info, op_to_control_consumer_ops, config) _add_broadcast_ops() return tf.train.export_meta_graph(graph=replica), \ tensor_or_op_name_to_replica_names