Python horovod.tensorflow.size() Examples

The following are 30 code examples of horovod.tensorflow.size(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module horovod.tensorflow , or try the search function .
Example #1
Source File: cape_ablate_horovod.py    From cape-document-qa with Apache License 2.0 6 votes vote down vote up
def get_training_params(train_config):
    return TrainParams(
        SerializableOptimizer(
            train_config.optimizer,
            dict(learning_rate=train_config.learning_rate * hvd.size())
        ),
        num_epochs=train_config.n_epochs,
        ema=train_config.ema,
        max_checkpoints_to_keep=train_config.max_checkpoints_to_keep,
        async_encoding=train_config.async_encoding,
        log_period=train_config.log_period,
        eval_period=train_config.eval_period,
        save_period=train_config.save_period,
        best_weights=("dev", "b8/question-text-f1"),
        eval_samples=dict(dev=None, train=6000),
        eval_at_zero=False
    ) 
Example #2
Source File: hvd_distributed_tf_data_utils.py    From BERT with Apache License 2.0 6 votes vote down vote up
def train_input_fn(input_file, _parse_fn, name_to_features,
		params, **kargs):
	if_shard = kargs.get("if_shard", "1")
	dataset = tf.data.TFRecordDataset(input_file, buffer_size=params.get("buffer_size", 100))
	print("==hvd size {}, rank {}==".format(hvd.size(), hvd.rank()))
	if if_shard == "1":
		dataset = dataset.shard(hvd.size(), hvd.rank())
	dataset = dataset.map(lambda x:_parse_fn(x, name_to_features))
	dataset = dataset.shuffle(
							buffer_size=params.get("buffer_size", 1024)+3*params.get("batch_size", 32),
							seed=np.random.randint(0,1e10,1)[0],
							reshuffle_each_iteration=True)
	dataset = dataset.batch(params.get("batch_size", 32))
	dataset = dataset.repeat(params.get("epoch", 100))
	iterator = dataset.make_one_shot_iterator()
	features = iterator.get_next()
	return features 
Example #3
Source File: flow_training.py    From flowpp with MIT License 6 votes vote down vote up
def setup_horovod():
    import horovod.tensorflow as hvd

    # Initialize Horovod
    hvd.init()
    # Verify that MPI multi-threading is supported.
    assert hvd.mpi_threads_supported()

    from mpi4py import MPI

    assert hvd.size() == MPI.COMM_WORLD.Get_size()

    is_root = hvd.rank() == 0

    def mpi_average(local_list):
        # _local_list_orig = local_list
        local_list = list(map(float, local_list))
        # print('RANK {} AVERAGING {} -> {}'.format(hvd.rank(), _local_list_orig, local_list))
        sums = MPI.COMM_WORLD.gather(sum(local_list), root=0)
        counts = MPI.COMM_WORLD.gather(len(local_list), root=0)
        sum_counts = sum(counts) if is_root else None
        avg = (sum(sums) / sum_counts) if is_root else None
        return avg, sum_counts

    return hvd, MPI, is_root, mpi_average 
Example #4
Source File: hvd_distributed_optimizer.py    From BERT with Apache License 2.0 6 votes vote down vote up
def get_train_op(self, loss, tvars, init_lr, 
							num_train_steps, **kargs):
		learning_rate = self.lr_decay_fn(init_lr, num_train_steps, **kargs)
		learning_rate = self.warm_up(learning_rate, init_lr, **kargs)
		print("==optimizer hvd size=={}".format(hvd.size()))
		opt = self.optimizer_op(learning_rate*hvd.size(), **kargs)

		# add uber horvod distributed optimizer
		self.opt = hvd.DistributedOptimizer(opt)
		grads = self.grad_clip_fn(self.opt, loss, tvars, **kargs)

		# self.grad_summaries_merged = optimizer_utils.add_grad_summaries(
		# 						zip(grads, tvars))

		train_op = self.opt.apply_gradients(
					zip(grads, tvars), global_step=self.global_step)
		new_global_step = self.global_step + 1
		train_op = tf.group(train_op, [self.global_step.assign(new_global_step)])
		return train_op 
Example #5
Source File: horovod_patches.py    From cape-document-qa with Apache License 2.0 6 votes vote down vote up
def _train(model: Model,
           data: TrainingData,
           checkpoint: Union[str, None],
           parameter_checkpoint: Union[str, None],
           save_start: bool,
           train_params: trainer.TrainParams,
           evaluators: List[Evaluator],
           out: ModelDir,
           notes=None,
           dry_run=False,
           start_eval=False):
    print('Horovod size: ', hvd.size())
    print('Horovod rank: ', hvd.rank())
    print('Horovod local rank: ', hvd.local_rank())

    if train_params.async_encoding:
        _train_async(model, data, checkpoint, parameter_checkpoint, save_start, train_params,
                 evaluators, out, notes, dry_run, start_eval)
        return
    else:
        raise NotImplementedError('Syncronous training with Horovod not supported yet') 
Example #6
Source File: resnet_main.py    From DistributedDeepLearning with MIT License 6 votes vote down vote up
def _log_summary(total_images, batch_size, duration):
    logger = logging.getLogger(__name__)
    images_per_second = total_images / duration
    logger.info("Data length:      {}".format(total_images))
    logger.info("Total duration:   {:.3f}".format(duration))
    logger.info("Total images/sec: {:.3f}".format(images_per_second))
    logger.info(
        "Batch size:       (Per GPU {}: Total {})".format(
            batch_size, hvd.size() * batch_size if defaults.DISTRIBUTED else batch_size
        )
    )
    logger.info(
        "Distributed:      {}".format("True" if defaults.DISTRIBUTED else "False")
    )
    logger.info(
        "Num GPUs:         {:.3f}".format(hvd.size() if defaults.DISTRIBUTED else 1)
    ) 
Example #7
Source File: __init__.py    From training_results_v0.6 with Apache License 2.0 6 votes vote down vote up
def get_gradients(self, loss, params):
        """
        Compute gradients of all trainable variables.

        See Optimizer.get_gradients() for more info.

        In DistributedOptimizer, get_gradients() is overriden to also
        allreduce the gradients before returning them.
        """
        gradients = super(self.__class__, self).get_gradients(loss, params)
        if hvd.size() > 1:
            averaged_gradients = []
            with tf.name_scope(self._name + "_Allreduce"):
                for grad in gradients:
                    if grad is not None:
                        avg_grad = hvd.allreduce(grad, device_dense=self._device_dense,
                                                 device_sparse=self._device_sparse)
                        averaged_gradients.append(avg_grad)
                    else:
                        averaged_gradients.append(None)
                return averaged_gradients
        else:
            return gradients 
Example #8
Source File: horovod.py    From blueoil with Apache License 2.0 6 votes vote down vote up
def setup():
    if not horovod_installed:
        return False

    global horovod_initialized
    if horovod_initialized:
        return hvd

    hvd.init()
    horovod_initialized = True

    horovod_num_worker = hvd.size()
    horovod_rank = hvd.rank()
    # verify that MPI multi-threading is supported.
    assert hvd.mpi_threads_supported()
    # make sure MPI is not re-initialized.
    import mpi4py.rc
    mpi4py.rc.initialize = False
    # import mpi4py
    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    # check size and rank are synchronized
    assert horovod_num_worker == comm.Get_size()
    assert horovod_rank == comm.Get_rank()
    return hvd 
Example #9
Source File: horovod.py    From blueoil with Apache License 2.0 6 votes vote down vote up
def setup():
    if not horovod_installed:
        return False

    global horovod_initialized
    if horovod_initialized:
        return hvd

    hvd.init()
    horovod_initialized = True

    horovod_num_worker = hvd.size()
    horovod_rank = hvd.rank()
    # verify that MPI multi-threading is supported.
    assert hvd.mpi_threads_supported()
    # make sure MPI is not re-initialized.
    import mpi4py.rc
    mpi4py.rc.initialize = False
    # import mpi4py
    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    # check size and rank are synchronized
    assert horovod_num_worker == comm.Get_size()
    assert horovod_rank == comm.Get_rank()
    return hvd 
Example #10
Source File: test_tensorflow.py    From training_results_v0.6 with Apache License 2.0 6 votes vote down vote up
def test_horovod_broadcast_error(self):
        """Test that the broadcast returns an error if any dimension besides
        the first is different among the tensors being broadcasted."""
        hvd.init()
        rank = hvd.rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            return

        with self.test_session(config=self.config) as session:
            tensor_size = [17] * 3
            tensor_size[1] = 10 * (rank + 1)
            tensor = tf.ones(tensor_size, dtype=tf.float32) * rank
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(hvd.broadcast(tensor, 0)) 
Example #11
Source File: test_tensorflow.py    From training_results_v0.6 with Apache License 2.0 6 votes vote down vote up
def test_horovod_allgather_type_error(self):
        """Test that the allgather returns an error if the types being gathered
        differ among the processes"""
        hvd.init()
        rank = hvd.rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            return

        with self.test_session(config=self.config) as session:
            tensor_size = [17] * 3
            dtype = tf.int32 if rank % 2 == 0 else tf.float32
            tensor = tf.ones(tensor_size, dtype=dtype) * rank
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(hvd.allgather(tensor)) 
Example #12
Source File: test_tensorflow.py    From training_results_v0.6 with Apache License 2.0 6 votes vote down vote up
def test_horovod_allreduce_type_error(self):
        """Test that the allreduce raises an error if different ranks try to
        send tensors of different type."""
        hvd.init()
        rank = hvd.rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            return

        with self.test_session(config=self.config) as session:
            # Same rank, different dimension
            dims = [17] * 3
            tensor = tf.ones(dims,
                             dtype=tf.int32 if rank % 2 == 0 else tf.float32)
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(hvd.allreduce(tensor)) 
Example #13
Source File: test_tensorflow.py    From training_results_v0.6 with Apache License 2.0 6 votes vote down vote up
def test_horovod_allreduce_cpu_gpu_error(self):
        """Test that the allreduce raises an error if different ranks try to
        perform reduction on CPU and GPU."""
        # Only do this test if there are GPUs available.
        if not tf.test.is_gpu_available(cuda_only=True):
            return

        hvd.init()
        local_rank = hvd.local_rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            return

        device = "/gpu:%d" % local_rank if local_rank % 2 == 0 else "/cpu:0"
        with self.test_session(config=self.config) as session:
            with tf.device(device):
                # Same rank, different dimension
                dims = [17] * 3
                tensor = tf.ones(dims, dtype=tf.int32)
                with self.assertRaises(tf.errors.FailedPreconditionError):
                    session.run(hvd.allreduce(tensor)) 
Example #14
Source File: test_tensorflow.py    From training_results_v0.6 with Apache License 2.0 6 votes vote down vote up
def test_horovod_allgather_error(self):
        """Test that the allgather returns an error if any dimension besides
        the first is different among the tensors being gathered."""
        hvd.init()
        rank = hvd.rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            return

        with self.test_session(config=self.config) as session:
            tensor_size = [17] * 3
            tensor_size[1] = 10 * (rank + 1)
            tensor = tf.ones(tensor_size, dtype=tf.float32) * rank
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(hvd.allgather(tensor)) 
Example #15
Source File: train_model.py    From DistributedDeepLearning with MIT License 5 votes vote down vote up
def _get_hooks(is_distributed=DISTRIBUTED):
    logger = logging.getLogger(__name__)
    if is_distributed:
        bcast_hook = hvd.BroadcastGlobalVariablesHook(0)
        logger.info("Rank: {} Cluster Size {}".format(hvd.local_rank(), hvd.size()))
        return [bcast_hook]
    else:
        return [] 
Example #16
Source File: wrappers.py    From atari-reset with MIT License 5 votes vote down vote up
def size(self):
        return self.low.shape 
Example #17
Source File: wrappers.py    From atari-reset with MIT License 5 votes vote down vote up
def proc_infos(self):
        epinfos = [info['episode'] for info in self.infos if 'episode' in info]

        if hvd.size()>1:
            epinfos = flatten_lists(MPI.COMM_WORLD.allgather(epinfos))

        new_sp_wins = {}
        new_sp_counts = {}
        for epinfo in epinfos:
            sp = epinfo['starting_point']
            if sp in new_sp_counts:
                new_sp_counts[sp] += 1
                if epinfo['as_good_as_demo']:
                    new_sp_wins[sp] += 1
            else:
                new_sp_counts[sp] = 1
                if epinfo['as_good_as_demo']:
                    new_sp_wins[sp] = 1
                else:
                    new_sp_wins[sp] = 0

        for sp,wins in new_sp_wins.items():
            self.starting_point_success[sp] = np.cast[np.float32](wins)/new_sp_counts[sp]

        # move starting point, ensuring at least 20% of workers are able to complete the demo
        csd = np.argwhere(np.cumsum(self.starting_point_success) / self.nrstartsteps >= 0.2)
        if len(csd) > 0:
            new_max_start = csd[0][0]
        else:
            new_max_start = np.minimum(self.max_starting_point + 100, self.max_max_starting_point)
        n_points_to_shift = self.max_starting_point - new_max_start
        self.decrement_starting_point(n_points_to_shift)
        self.infos = [] 
Example #18
Source File: vgg_tfrecord_horovod.py    From keras_experiments with The Unlicense 5 votes vote down vote up
def _parser(desc):
    parser = ap.ArgumentParser(description=dedent(desc),
                               formatter_class=CustomFormatter)

    parser.add_argument('--epochs', type=int, default=10,
                        help='Number of epochs to run training for.\n'
                        '(Default: %(default)s)\n')

    parser.add_argument(
        '--batch_size', type=int, default=64,
        help='S|Batch size. Default: %(default)s')

    parser.add_argument(
        '--ngpus_per_model', type=int, default=1, choices=(1, 2),
        help='S|GPUs for Model parallelism. Max set to 2 for now. '
        'Default: %(default)s')

    parser.add_argument(
        '--imgs_per_epoch', type=int, default=0,
        help='S|Number of images to run during epoch. Use for timing.\n'
        'Default uses all the images for an epoch.')

    imagenet_datadir = '/datasets/imagenet/train-val-tfrecord-480-subset'
    parser.add_argument(
        '--datadir', default=imagenet_datadir,
        help='S|Data directory with Imagenet TFrecord dataset. Assumes\n'
        'TFrecord subsets prefixed with train-* and validation-* are in the\n'
        'directory. Default: %(default)s')

    parser.add_argument(
        '--distort_color', action='store_true', default=False,
        help='S|Distort color during training on imagenet to "enrich" the\n'
        'dataset. Default no distortion. Set this flag to enable distortion.')

    args = parser.parse_args()

    return args 
Example #19
Source File: resnet50_tfrecord_horovod.py    From keras_experiments with The Unlicense 5 votes vote down vote up
def _parser(desc):
    parser = ap.ArgumentParser(description=dedent(desc),
                               formatter_class=CustomFormatter)

    parser.add_argument('--epochs', type=int, default=10,
                        help='Number of epochs to run training for.\n'
                        '(Default: %(default)s)\n')

    parser.add_argument(
        '--batch_size', type=int, default=64,
        help='S|Batch size. Default: %(default)s')

    parser.add_argument(
        '--imgs_per_epoch', type=int, default=0,
        help='S|Number of images to run during epoch. Use for timing.\n'
        'Default uses all the images for an epoch.')

    imagenet_datadir = '/datasets/imagenet/train-val-tfrecord-480-subset'
    parser.add_argument(
        '--datadir', default=imagenet_datadir,
        help='S|Data directory with Imagenet TFrecord dataset. Assumes\n'
        'TFrecord subsets prefixed with train-* and validation-* are in the\n'
        'directory. Default: %(default)s')

    parser.add_argument(
        '--distort_color', action='store_true', default=False,
        help='S|Distort color during training on imagenet to "enrich" the\n'
        'dataset. Default no distortion. Set this flag to enable distortion.')

    args = parser.parse_args()

    return args 
Example #20
Source File: optimizers.py    From OpenSeq2Seq with Apache License 2.0 5 votes vote down vote up
def reduce_gradients(grads_and_vars, on_horovod, model=None):
  if on_horovod:
    from horovod.tensorflow import allreduce, size

    if size() > 1:
      averaged_grads_and_vars = []
      with tf.name_scope("all_reduce"):
        for grad, var in grads_and_vars:
          if grad is not None:
            if isinstance(grad, tf.IndexedSlices):
              if model._decoder.params.get('shared_embed', False):
                from tensorflow.python.training.optimizer import _deduplicate_indexed_slices
                summed_values, unique_indices = _deduplicate_indexed_slices(
                    values=grad.values, indices=grad.indices)
                gradient_no_duplicate_indices = tf.IndexedSlices(
                    indices=unique_indices,
                    values=summed_values,
                    dense_shape=grad.dense_shape)
                grad = tf.convert_to_tensor(gradient_no_duplicate_indices)
            avg_grad = allreduce(grad)
            averaged_grads_and_vars.append((avg_grad, var))
          else:
            averaged_grads_and_vars.append((None, var))
      return averaged_grads_and_vars
    else:
      return grads_and_vars
  else:
    raise NotImplementedError("Reduce in tower-mode is not implemented.") 
Example #21
Source File: resnet_main.py    From DistributedDeepLearning with MIT License 5 votes vote down vote up
def _get_hooks(batch_size, is_distributed=defaults.DISTRIBUTED):
    logger = logging.getLogger(__name__)

    if is_distributed:
        exps_hook = ExamplesPerSecondHook(batch_size * hvd.size())
        bcast_hook = hvd.BroadcastGlobalVariablesHook(0)
        logger.info("Rank: {} Cluster Size {}".format(hvd.rank(), hvd.size()))
        return [bcast_hook, exps_hook]
    else:
        exps_hook = ExamplesPerSecondHook(batch_size)
        return [exps_hook] 
Example #22
Source File: optim.py    From pix2pix-flow with MIT License 5 votes vote down vote up
def adam2_old(self, params, cost_or_grads, lr=3e-4, mom1=0.9, mom2=0.999, epsilon=1e-8):
        updates = []
        if type(cost_or_grads) is not list:
            gs = tf.gradients(cost_or_grads, params)
        else:
            gs = cost_or_grads

        # all-reduce
        grads1 = [Z.allreduce_mean(g) for g in gs]
        grads2 = [Z.allreduce_mean(tf.square(g)) for g in gs]
        mom2 = tf.maximum(0., 1. - (hvd.size() * (1 - mom2)))

        t = tf.Variable(1., 'adam_t')
        lr_t = lr * tf.sqrt((1. - tf.pow(mom2, t))) / (1. - tf.pow(mom1, t))
        updates.append(t.assign_add(1))

        for p, g1, g2 in zip(params, grads1, grads2):
            mg = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_mg')
            if mom1 > 0:
                v = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_v')
                v_t = mom1 * v + (1. - mom1) * g1
                updates.append(v.assign(v_t))
            else:
                v_t = g1
            mg_t = mom2 * mg + (1. - mom2) * g2
            delta_t = v_t / (tf.sqrt(mg_t) + epsilon)
            p_t = p - lr_t * delta_t
            updates.append(mg.assign(mg_t))
            updates.append(p.assign(p_t))
        return tf.group(*updates) 
Example #23
Source File: test_tensorflow.py    From training_results_v0.6 with Apache License 2.0 5 votes vote down vote up
def test_horovod_broadcast_rank_error(self):
        """Test that the broadcast returns an error if different ranks
        specify different root rank."""
        hvd.init()
        rank = hvd.rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            return

        with self.test_session(config=self.config) as session:
            tensor = tf.ones([17] * 3, dtype=tf.float32)
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(hvd.broadcast(tensor, rank)) 
Example #24
Source File: train.py    From pix2pix-flow with MIT License 5 votes vote down vote up
def get_its(hps):
    # These run for a fixed amount of time. As anchored batch is smaller, we've actually seen fewer examples
    train_its = int(np.ceil(hps.n_train / (hps.n_batch_train * hvd.size())))
    test_its = int(np.ceil(hps.n_test / (hps.n_batch_train * hvd.size())))
    train_epoch = train_its * hps.n_batch_train * hvd.size()

    # Do a full validation run
    if hvd.rank() == 0:
        print(hps.n_test, hps.local_batch_test, hvd.size())
    assert hps.n_test % (hps.local_batch_test * hvd.size()) == 0
    full_test_its = hps.n_test // (hps.local_batch_test * hvd.size())

    if hvd.rank() == 0:
        print("Train epoch size: " + str(train_epoch))
    return train_its, test_its, full_test_its 
Example #25
Source File: test_tensorflow.py    From training_results_v0.6 with Apache License 2.0 5 votes vote down vote up
def test_horovod_broadcast(self):
        """Test that the broadcast correctly broadcasts 1D, 2D, 3D tensors."""
        hvd.init()
        rank = hvd.rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            return

        with self.test_session(config=self.config) as session:
            dtypes = [tf.uint8, tf.int8, tf.uint16, tf.int16,
                      tf.int32, tf.int64, tf.float16, tf.float32,
                      tf.float64, tf.bool]
            dims = [1, 2, 3]
            root_ranks = list(range(size))
            for dtype, dim, root_rank in itertools.product(dtypes, dims, root_ranks):
                tensor = tf.ones([17] * dim) * rank
                root_tensor = tf.ones([17] * dim) * root_rank
                if dtype == tf.bool:
                    tensor = tensor % 2
                    root_tensor = root_tensor % 2
                tensor = tf.cast(tensor, dtype=dtype)
                root_tensor = tf.cast(root_tensor, dtype=dtype)
                broadcasted_tensor = hvd.broadcast(tensor, root_rank)
                self.assertTrue(
                    session.run(tf.reduce_all(tf.equal(
                        tf.cast(root_tensor, tf.int32), tf.cast(broadcasted_tensor, tf.int32)))),
                    "hvd.broadcast produces incorrect broadcasted tensor") 
Example #26
Source File: test_tensorflow.py    From training_results_v0.6 with Apache License 2.0 5 votes vote down vote up
def test_horovod_allgather_grad(self):
        """Test the correctness of the allgather gradient."""
        hvd.init()
        rank = hvd.rank()
        size = hvd.size()

        with self.test_session(config=self.config) as session:
            # As of TensorFlow v1.9, gradients are not supported on
            # integer tensors
            dtypes = [tf.float32, tf.float64]
            dims = [1, 2, 3]
            for dtype, dim in itertools.product(dtypes, dims):
                tensor_sizes = [3, 2, 7, 4, 6, 8, 10] * 5
                tensor_sizes = tensor_sizes[:size]

                tensor = tf.ones([tensor_sizes[rank]] + [17] * (dim - 1)) * rank
                if dtype == tf.bool:
                    tensor = tensor % 2
                tensor = tf.cast(tensor, dtype=dtype)
                gathered = hvd.allgather(tensor)

                grad_list = []
                for r, tensor_size in enumerate(tensor_sizes):
                    g = tf.ones([tensor_size] + [17] * (dim - 1)) * r
                    grad_list.append(g)
                grad_ys = tf.concat(grad_list, axis=0)

                grad = tf.gradients(gathered, tensor, grad_ys)[0]
                grad_out = session.run(grad)

                expected = np.ones(
                    [tensor_sizes[rank]] + [17] * (dim - 1)
                ) * rank * size
                err = np.linalg.norm(expected - grad_out)
                self.assertLess(err, 0.00000001,
                                "gradient %s differs from expected %s, "
                                "error: %s" %
                                (grad_out, expected, str(err))) 
Example #27
Source File: tfops.py    From pix2pix-flow with MIT License 5 votes vote down vote up
def allreduce_sum(x):
    if hvd.size() == 1:
        return x
    return hvd.mpi_ops._allreduce(x) 
Example #28
Source File: tfops.py    From pix2pix-flow with MIT License 5 votes vote down vote up
def add_edge_padding(x, filter_size):
    assert filter_size[0] % 2 == 1
    if filter_size[0] == 1 and filter_size[1] == 1:
        return x
    a = (filter_size[0] - 1) // 2  # vertical padding size
    b = (filter_size[1] - 1) // 2  # horizontal padding size
    if True:
        x = tf.pad(x, [[0, 0], [a, a], [b, b], [0, 0]])
        name = "_".join([str(dim) for dim in [a, b, *int_shape(x)[1:3]]])
        pads = tf.get_collection(name)
        if not pads:
            if hvd.rank() == 0:
                print("Creating pad", name)
            pad = np.zeros([1] + int_shape(x)[1:3] + [1], dtype='float32')
            pad[:, :a, :, 0] = 1.
            pad[:, -a:, :, 0] = 1.
            pad[:, :, :b, 0] = 1.
            pad[:, :, -b:, 0] = 1.
            pad = tf.convert_to_tensor(pad)
            tf.add_to_collection(name, pad)
        else:
            pad = pads[0]
        pad = tf.tile(pad, [tf.shape(x)[0], 1, 1, 1])
        x = tf.concat([x, pad], axis=3)
    else:
        pad = tf.pad(tf.zeros_like(x[:, :, :, :1]) - 1,
                     [[0, 0], [a, a], [b, b], [0, 0]]) + 1
        x = tf.pad(x, [[0, 0], [a, a], [b, b], [0, 0]])
        x = tf.concat([x, pad], axis=3)
    return x 
Example #29
Source File: optim.py    From glow with MIT License 5 votes vote down vote up
def adam2_old(params, cost_or_grads, lr=3e-4, mom1=0.9, mom2=0.999, epsilon=1e-8):
    updates = []
    if type(cost_or_grads) is not list:
        gs = tf.gradients(cost_or_grads, params)
    else:
        gs = cost_or_grads

    # all-reduce
    grads1 = [Z.allreduce_mean(g) for g in gs]
    grads2 = [Z.allreduce_mean(tf.square(g)) for g in gs]
    mom2 = tf.maximum(0., 1. - (hvd.size() * (1 - mom2)))

    t = tf.Variable(1., 'adam_t')
    lr_t = lr * tf.sqrt((1. - tf.pow(mom2, t))) / (1. - tf.pow(mom1, t))
    updates.append(t.assign_add(1))

    for p, g1, g2 in zip(params, grads1, grads2):
        mg = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_mg')
        if mom1 > 0:
            v = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_v')
            v_t = mom1 * v + (1. - mom1) * g1
            updates.append(v.assign(v_t))
        else:
            v_t = g1
        mg_t = mom2 * mg + (1. - mom2) * g2
        delta_t = v_t / (tf.sqrt(mg_t) + epsilon)
        p_t = p - lr_t * delta_t
        updates.append(mg.assign(mg_t))
        updates.append(p.assign(p_t))
    return tf.group(*updates) 
Example #30
Source File: graph_transform.py    From parallax with Apache License 2.0 5 votes vote down vote up
def graph_transform_mpi(single_gpu_meta_graph_def, config,
                        op_library_path=None):
    if op_library_path is not None:
        tf.load_op_library(op_library_path)

    with tf.Graph().as_default() as replica:
        tf.train.import_meta_graph(single_gpu_meta_graph_def)

        tensor_or_op_name_to_replica_names = {}
        for op in replica.get_operations():
            tensor_or_op_name_to_replica_names[op.name] = [op.name]
            for output in op.outputs:
                tensor_or_op_name_to_replica_names[output.name] = [output.name]

        # Initialize horovod
        hvd.init()

        num_workers = hvd.size()
        worker_id = hvd.rank()
        update_shard_values_for_worker(num_workers, worker_id)

        op_to_control_consumer_ops = get_all_control_consumers(replica)
        trainable_variable_ops = [var.op for var in tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES)]

        for gradients_info in tf.get_collection(tf.GraphKeys.GRADIENTS_INFO):
            target_tensor = gradients_info._target
            if target_tensor.op not in trainable_variable_ops:
                parallax_log.debug(
                    "Gradient for non-trainable variable %s is created, ignore"
                    % target_tensor.op.name)
                continue

            _add_aggregation_ops(gradients_info, op_to_control_consumer_ops, config)
        _add_broadcast_ops()

    return tf.train.export_meta_graph(graph=replica), \
           tensor_or_op_name_to_replica_names