Python Examples of horovod.tensorflow.size

Source File: cape_ablate_horovod.py From cape-document-qa with Apache License 2.0

6 votes

def get_training_params(train_config):
    return TrainParams(
        SerializableOptimizer(
            train_config.optimizer,
            dict(learning_rate=train_config.learning_rate * hvd.size())
        ),
        num_epochs=train_config.n_epochs,
        ema=train_config.ema,
        max_checkpoints_to_keep=train_config.max_checkpoints_to_keep,
        async_encoding=train_config.async_encoding,
        log_period=train_config.log_period,
        eval_period=train_config.eval_period,
        save_period=train_config.save_period,
        best_weights=("dev", "b8/question-text-f1"),
        eval_samples=dict(dev=None, train=6000),
        eval_at_zero=False
    )

Source File: hvd_distributed_tf_data_utils.py From BERT with Apache License 2.0

6 votes

def train_input_fn(input_file, _parse_fn, name_to_features,
		params, **kargs):
	if_shard = kargs.get("if_shard", "1")
	dataset = tf.data.TFRecordDataset(input_file, buffer_size=params.get("buffer_size", 100))
	print("==hvd size {}, rank {}==".format(hvd.size(), hvd.rank()))
	if if_shard == "1":
		dataset = dataset.shard(hvd.size(), hvd.rank())
	dataset = dataset.map(lambda x:_parse_fn(x, name_to_features))
	dataset = dataset.shuffle(
							buffer_size=params.get("buffer_size", 1024)+3*params.get("batch_size", 32),
							seed=np.random.randint(0,1e10,1)[0],
							reshuffle_each_iteration=True)
	dataset = dataset.batch(params.get("batch_size", 32))
	dataset = dataset.repeat(params.get("epoch", 100))
	iterator = dataset.make_one_shot_iterator()
	features = iterator.get_next()
	return features

Source File: flow_training.py From flowpp with MIT License

6 votes

def setup_horovod():
    import horovod.tensorflow as hvd

    # Initialize Horovod
    hvd.init()
    # Verify that MPI multi-threading is supported.
    assert hvd.mpi_threads_supported()

    from mpi4py import MPI

    assert hvd.size() == MPI.COMM_WORLD.Get_size()

    is_root = hvd.rank() == 0

    def mpi_average(local_list):
        # _local_list_orig = local_list
        local_list = list(map(float, local_list))
        # print('RANK {} AVERAGING {} -> {}'.format(hvd.rank(), _local_list_orig, local_list))
        sums = MPI.COMM_WORLD.gather(sum(local_list), root=0)
        counts = MPI.COMM_WORLD.gather(len(local_list), root=0)
        sum_counts = sum(counts) if is_root else None
        avg = (sum(sums) / sum_counts) if is_root else None
        return avg, sum_counts

    return hvd, MPI, is_root, mpi_average

Source File: hvd_distributed_optimizer.py From BERT with Apache License 2.0

6 votes

def get_train_op(self, loss, tvars, init_lr, 
							num_train_steps, **kargs):
		learning_rate = self.lr_decay_fn(init_lr, num_train_steps, **kargs)
		learning_rate = self.warm_up(learning_rate, init_lr, **kargs)
		print("==optimizer hvd size=={}".format(hvd.size()))
		opt = self.optimizer_op(learning_rate*hvd.size(), **kargs)

		# add uber horvod distributed optimizer
		self.opt = hvd.DistributedOptimizer(opt)
		grads = self.grad_clip_fn(self.opt, loss, tvars, **kargs)

		# self.grad_summaries_merged = optimizer_utils.add_grad_summaries(
		# 						zip(grads, tvars))

		train_op = self.opt.apply_gradients(
					zip(grads, tvars), global_step=self.global_step)
		new_global_step = self.global_step + 1
		train_op = tf.group(train_op, [self.global_step.assign(new_global_step)])
		return train_op

Source File: horovod_patches.py From cape-document-qa with Apache License 2.0

6 votes

def _train(model: Model,
           data: TrainingData,
           checkpoint: Union[str, None],
           parameter_checkpoint: Union[str, None],
           save_start: bool,
           train_params: trainer.TrainParams,
           evaluators: List[Evaluator],
           out: ModelDir,
           notes=None,
           dry_run=False,
           start_eval=False):
    print('Horovod size: ', hvd.size())
    print('Horovod rank: ', hvd.rank())
    print('Horovod local rank: ', hvd.local_rank())

    if train_params.async_encoding:
        _train_async(model, data, checkpoint, parameter_checkpoint, save_start, train_params,
                 evaluators, out, notes, dry_run, start_eval)
        return
    else:
        raise NotImplementedError('Syncronous training with Horovod not supported yet')

Source File: resnet_main.py From DistributedDeepLearning with MIT License

6 votes

def _log_summary(total_images, batch_size, duration):
    logger = logging.getLogger(__name__)
    images_per_second = total_images / duration
    logger.info("Data length:      {}".format(total_images))
    logger.info("Total duration:   {:.3f}".format(duration))
    logger.info("Total images/sec: {:.3f}".format(images_per_second))
    logger.info(
        "Batch size:       (Per GPU {}: Total {})".format(
            batch_size, hvd.size() * batch_size if defaults.DISTRIBUTED else batch_size
        )
    )
    logger.info(
        "Distributed:      {}".format("True" if defaults.DISTRIBUTED else "False")
    )
    logger.info(
        "Num GPUs:         {:.3f}".format(hvd.size() if defaults.DISTRIBUTED else 1)
    )

Source File: __init__.py From training_results_v0.6 with Apache License 2.0

6 votes

def get_gradients(self, loss, params):
        """
        Compute gradients of all trainable variables.

        See Optimizer.get_gradients() for more info.

        In DistributedOptimizer, get_gradients() is overriden to also
        allreduce the gradients before returning them.
        """
        gradients = super(self.__class__, self).get_gradients(loss, params)
        if hvd.size() > 1:
            averaged_gradients = []
            with tf.name_scope(self._name + "_Allreduce"):
                for grad in gradients:
                    if grad is not None:
                        avg_grad = hvd.allreduce(grad, device_dense=self._device_dense,
                                                 device_sparse=self._device_sparse)
                        averaged_gradients.append(avg_grad)
                    else:
                        averaged_gradients.append(None)
                return averaged_gradients
        else:
            return gradients

Source File: horovod.py From blueoil with Apache License 2.0

6 votes

def setup():
    if not horovod_installed:
        return False

    global horovod_initialized
    if horovod_initialized:
        return hvd

    hvd.init()
    horovod_initialized = True

    horovod_num_worker = hvd.size()
    horovod_rank = hvd.rank()
    # verify that MPI multi-threading is supported.
    assert hvd.mpi_threads_supported()
    # make sure MPI is not re-initialized.
    import mpi4py.rc
    mpi4py.rc.initialize = False
    # import mpi4py
    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    # check size and rank are synchronized
    assert horovod_num_worker == comm.Get_size()
    assert horovod_rank == comm.Get_rank()
    return hvd

Source File: horovod.py From blueoil with Apache License 2.0

6 votes

def setup():
    if not horovod_installed:
        return False

    global horovod_initialized
    if horovod_initialized:
        return hvd

    hvd.init()
    horovod_initialized = True

    horovod_num_worker = hvd.size()
    horovod_rank = hvd.rank()
    # verify that MPI multi-threading is supported.
    assert hvd.mpi_threads_supported()
    # make sure MPI is not re-initialized.
    import mpi4py.rc
    mpi4py.rc.initialize = False
    # import mpi4py
    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    # check size and rank are synchronized
    assert horovod_num_worker == comm.Get_size()
    assert horovod_rank == comm.Get_rank()
    return hvd

Source File: test_tensorflow.py From training_results_v0.6 with Apache License 2.0

6 votes

def test_horovod_broadcast_error(self):
        """Test that the broadcast returns an error if any dimension besides
        the first is different among the tensors being broadcasted."""
        hvd.init()
        rank = hvd.rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            return

        with self.test_session(config=self.config) as session:
            tensor_size = [17] * 3
            tensor_size[1] = 10 * (rank + 1)
            tensor = tf.ones(tensor_size, dtype=tf.float32) * rank
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(hvd.broadcast(tensor, 0))

Source File: test_tensorflow.py From training_results_v0.6 with Apache License 2.0

6 votes

def test_horovod_allgather_type_error(self):
        """Test that the allgather returns an error if the types being gathered
        differ among the processes"""
        hvd.init()
        rank = hvd.rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            return

        with self.test_session(config=self.config) as session:
            tensor_size = [17] * 3
            dtype = tf.int32 if rank % 2 == 0 else tf.float32
            tensor = tf.ones(tensor_size, dtype=dtype) * rank
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(hvd.allgather(tensor))

Source File: test_tensorflow.py From training_results_v0.6 with Apache License 2.0

6 votes

def test_horovod_allreduce_type_error(self):
        """Test that the allreduce raises an error if different ranks try to
        send tensors of different type."""
        hvd.init()
        rank = hvd.rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            return

        with self.test_session(config=self.config) as session:
            # Same rank, different dimension
            dims = [17] * 3
            tensor = tf.ones(dims,
                             dtype=tf.int32 if rank % 2 == 0 else tf.float32)
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(hvd.allreduce(tensor))

Source File: test_tensorflow.py From training_results_v0.6 with Apache License 2.0

6 votes

def test_horovod_allreduce_cpu_gpu_error(self):
        """Test that the allreduce raises an error if different ranks try to
        perform reduction on CPU and GPU."""
        # Only do this test if there are GPUs available.
        if not tf.test.is_gpu_available(cuda_only=True):
            return

        hvd.init()
        local_rank = hvd.local_rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            return

        device = "/gpu:%d" % local_rank if local_rank % 2 == 0 else "/cpu:0"
        with self.test_session(config=self.config) as session:
            with tf.device(device):
                # Same rank, different dimension
                dims = [17] * 3
                tensor = tf.ones(dims, dtype=tf.int32)
                with self.assertRaises(tf.errors.FailedPreconditionError):
                    session.run(hvd.allreduce(tensor))

Source File: test_tensorflow.py From training_results_v0.6 with Apache License 2.0

6 votes

def test_horovod_allgather_error(self):
        """Test that the allgather returns an error if any dimension besides
        the first is different among the tensors being gathered."""
        hvd.init()
        rank = hvd.rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            return

        with self.test_session(config=self.config) as session:
            tensor_size = [17] * 3
            tensor_size[1] = 10 * (rank + 1)
            tensor = tf.ones(tensor_size, dtype=tf.float32) * rank
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(hvd.allgather(tensor))

Source File: train_model.py From DistributedDeepLearning with MIT License

5 votes

def _get_hooks(is_distributed=DISTRIBUTED):
    logger = logging.getLogger(__name__)
    if is_distributed:
        bcast_hook = hvd.BroadcastGlobalVariablesHook(0)
        logger.info("Rank: {} Cluster Size {}".format(hvd.local_rank(), hvd.size()))
        return [bcast_hook]
    else:
        return []

Source File: wrappers.py From atari-reset with MIT License

5 votes

def size(self):
        return self.low.shape

Source File: wrappers.py From atari-reset with MIT License

5 votes

def proc_infos(self):
        epinfos = [info['episode'] for info in self.infos if 'episode' in info]

        if hvd.size()>1:
            epinfos = flatten_lists(MPI.COMM_WORLD.allgather(epinfos))

        new_sp_wins = {}
        new_sp_counts = {}
        for epinfo in epinfos:
            sp = epinfo['starting_point']
            if sp in new_sp_counts:
                new_sp_counts[sp] += 1
                if epinfo['as_good_as_demo']:
                    new_sp_wins[sp] += 1
            else:
                new_sp_counts[sp] = 1
                if epinfo['as_good_as_demo']:
                    new_sp_wins[sp] = 1
                else:
                    new_sp_wins[sp] = 0

        for sp,wins in new_sp_wins.items():
            self.starting_point_success[sp] = np.cast[np.float32](wins)/new_sp_counts[sp]

        # move starting point, ensuring at least 20% of workers are able to complete the demo
        csd = np.argwhere(np.cumsum(self.starting_point_success) / self.nrstartsteps >= 0.2)
        if len(csd) > 0:
            new_max_start = csd[0][0]
        else:
            new_max_start = np.minimum(self.max_starting_point + 100, self.max_max_starting_point)
        n_points_to_shift = self.max_starting_point - new_max_start
        self.decrement_starting_point(n_points_to_shift)
        self.infos = []

Source File: vgg_tfrecord_horovod.py From keras_experiments with The Unlicense

5 votes

def _parser(desc):
    parser = ap.ArgumentParser(description=dedent(desc),
                               formatter_class=CustomFormatter)

    parser.add_argument('--epochs', type=int, default=10,
                        help='Number of epochs to run training for.\n'
                        '(Default: %(default)s)\n')

    parser.add_argument(
        '--batch_size', type=int, default=64,
        help='S|Batch size. Default: %(default)s')

    parser.add_argument(
        '--ngpus_per_model', type=int, default=1, choices=(1, 2),
        help='S|GPUs for Model parallelism. Max set to 2 for now. '
        'Default: %(default)s')

    parser.add_argument(
        '--imgs_per_epoch', type=int, default=0,
        help='S|Number of images to run during epoch. Use for timing.\n'
        'Default uses all the images for an epoch.')

    imagenet_datadir = '/datasets/imagenet/train-val-tfrecord-480-subset'
    parser.add_argument(
        '--datadir', default=imagenet_datadir,
        help='S|Data directory with Imagenet TFrecord dataset. Assumes\n'
        'TFrecord subsets prefixed with train-* and validation-* are in the\n'
        'directory. Default: %(default)s')

    parser.add_argument(
        '--distort_color', action='store_true', default=False,
        help='S|Distort color during training on imagenet to "enrich" the\n'
        'dataset. Default no distortion. Set this flag to enable distortion.')

    args = parser.parse_args()

    return args

Source File: resnet50_tfrecord_horovod.py From keras_experiments with The Unlicense

5 votes

def _parser(desc):
    parser = ap.ArgumentParser(description=dedent(desc),
                               formatter_class=CustomFormatter)

    parser.add_argument('--epochs', type=int, default=10,
                        help='Number of epochs to run training for.\n'
                        '(Default: %(default)s)\n')

    parser.add_argument(
        '--batch_size', type=int, default=64,
        help='S|Batch size. Default: %(default)s')

    parser.add_argument(
        '--imgs_per_epoch', type=int, default=0,
        help='S|Number of images to run during epoch. Use for timing.\n'
        'Default uses all the images for an epoch.')

    imagenet_datadir = '/datasets/imagenet/train-val-tfrecord-480-subset'
    parser.add_argument(
        '--datadir', default=imagenet_datadir,
        help='S|Data directory with Imagenet TFrecord dataset. Assumes\n'
        'TFrecord subsets prefixed with train-* and validation-* are in the\n'
        'directory. Default: %(default)s')

    parser.add_argument(
        '--distort_color', action='store_true', default=False,
        help='S|Distort color during training on imagenet to "enrich" the\n'
        'dataset. Default no distortion. Set this flag to enable distortion.')

    args = parser.parse_args()

    return args

Source File: optimizers.py From OpenSeq2Seq with Apache License 2.0

5 votes

def reduce_gradients(grads_and_vars, on_horovod, model=None):
  if on_horovod:
    from horovod.tensorflow import allreduce, size

    if size() > 1:
      averaged_grads_and_vars = []
      with tf.name_scope("all_reduce"):
        for grad, var in grads_and_vars:
          if grad is not None:
            if isinstance(grad, tf.IndexedSlices):
              if model._decoder.params.get('shared_embed', False):
                from tensorflow.python.training.optimizer import _deduplicate_indexed_slices
                summed_values, unique_indices = _deduplicate_indexed_slices(
                    values=grad.values, indices=grad.indices)
                gradient_no_duplicate_indices = tf.IndexedSlices(
                    indices=unique_indices,
                    values=summed_values,
                    dense_shape=grad.dense_shape)
                grad = tf.convert_to_tensor(gradient_no_duplicate_indices)
            avg_grad = allreduce(grad)
            averaged_grads_and_vars.append((avg_grad, var))
          else:
            averaged_grads_and_vars.append((None, var))
      return averaged_grads_and_vars
    else:
      return grads_and_vars
  else:
    raise NotImplementedError("Reduce in tower-mode is not implemented.")

Source File: resnet_main.py From DistributedDeepLearning with MIT License

5 votes

def _get_hooks(batch_size, is_distributed=defaults.DISTRIBUTED):
    logger = logging.getLogger(__name__)

    if is_distributed:
        exps_hook = ExamplesPerSecondHook(batch_size * hvd.size())
        bcast_hook = hvd.BroadcastGlobalVariablesHook(0)
        logger.info("Rank: {} Cluster Size {}".format(hvd.rank(), hvd.size()))
        return [bcast_hook, exps_hook]
    else:
        exps_hook = ExamplesPerSecondHook(batch_size)
        return [exps_hook]

Source File: optim.py From pix2pix-flow with MIT License

5 votes

def adam2_old(self, params, cost_or_grads, lr=3e-4, mom1=0.9, mom2=0.999, epsilon=1e-8):
        updates = []
        if type(cost_or_grads) is not list:
            gs = tf.gradients(cost_or_grads, params)
        else:
            gs = cost_or_grads

        # all-reduce
        grads1 = [Z.allreduce_mean(g) for g in gs]
        grads2 = [Z.allreduce_mean(tf.square(g)) for g in gs]
        mom2 = tf.maximum(0., 1. - (hvd.size() * (1 - mom2)))

        t = tf.Variable(1., 'adam_t')
        lr_t = lr * tf.sqrt((1. - tf.pow(mom2, t))) / (1. - tf.pow(mom1, t))
        updates.append(t.assign_add(1))

        for p, g1, g2 in zip(params, grads1, grads2):
            mg = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_mg')
            if mom1 > 0:
                v = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_v')
                v_t = mom1 * v + (1. - mom1) * g1
                updates.append(v.assign(v_t))
            else:
                v_t = g1
            mg_t = mom2 * mg + (1. - mom2) * g2
            delta_t = v_t / (tf.sqrt(mg_t) + epsilon)
            p_t = p - lr_t * delta_t
            updates.append(mg.assign(mg_t))
            updates.append(p.assign(p_t))
        return tf.group(*updates)

Source File: test_tensorflow.py From training_results_v0.6 with Apache License 2.0

5 votes

def test_horovod_broadcast_rank_error(self):
        """Test that the broadcast returns an error if different ranks
        specify different root rank."""
        hvd.init()
        rank = hvd.rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            return

        with self.test_session(config=self.config) as session:
            tensor = tf.ones([17] * 3, dtype=tf.float32)
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(hvd.broadcast(tensor, rank))

Source File: train.py From pix2pix-flow with MIT License

5 votes

def get_its(hps):
    # These run for a fixed amount of time. As anchored batch is smaller, we've actually seen fewer examples
    train_its = int(np.ceil(hps.n_train / (hps.n_batch_train * hvd.size())))
    test_its = int(np.ceil(hps.n_test / (hps.n_batch_train * hvd.size())))
    train_epoch = train_its * hps.n_batch_train * hvd.size()

    # Do a full validation run
    if hvd.rank() == 0:
        print(hps.n_test, hps.local_batch_test, hvd.size())
    assert hps.n_test % (hps.local_batch_test * hvd.size()) == 0
    full_test_its = hps.n_test // (hps.local_batch_test * hvd.size())

    if hvd.rank() == 0:
        print("Train epoch size: " + str(train_epoch))
    return train_its, test_its, full_test_its

Source File: test_tensorflow.py From training_results_v0.6 with Apache License 2.0

5 votes

def test_horovod_broadcast(self):
        """Test that the broadcast correctly broadcasts 1D, 2D, 3D tensors."""
        hvd.init()
        rank = hvd.rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            return

        with self.test_session(config=self.config) as session:
            dtypes = [tf.uint8, tf.int8, tf.uint16, tf.int16,
                      tf.int32, tf.int64, tf.float16, tf.float32,
                      tf.float64, tf.bool]
            dims = [1, 2, 3]
            root_ranks = list(range(size))
            for dtype, dim, root_rank in itertools.product(dtypes, dims, root_ranks):
                tensor = tf.ones([17] * dim) * rank
                root_tensor = tf.ones([17] * dim) * root_rank
                if dtype == tf.bool:
                    tensor = tensor % 2
                    root_tensor = root_tensor % 2
                tensor = tf.cast(tensor, dtype=dtype)
                root_tensor = tf.cast(root_tensor, dtype=dtype)
                broadcasted_tensor = hvd.broadcast(tensor, root_rank)
                self.assertTrue(
                    session.run(tf.reduce_all(tf.equal(
                        tf.cast(root_tensor, tf.int32), tf.cast(broadcasted_tensor, tf.int32)))),
                    "hvd.broadcast produces incorrect broadcasted tensor")

Source File: test_tensorflow.py From training_results_v0.6 with Apache License 2.0

5 votes

def test_horovod_allgather_grad(self):
        """Test the correctness of the allgather gradient."""
        hvd.init()
        rank = hvd.rank()
        size = hvd.size()

        with self.test_session(config=self.config) as session:
            # As of TensorFlow v1.9, gradients are not supported on
            # integer tensors
            dtypes = [tf.float32, tf.float64]
            dims = [1, 2, 3]
            for dtype, dim in itertools.product(dtypes, dims):
                tensor_sizes = [3, 2, 7, 4, 6, 8, 10] * 5
                tensor_sizes = tensor_sizes[:size]

                tensor = tf.ones([tensor_sizes[rank]] + [17] * (dim - 1)) * rank
                if dtype == tf.bool:
                    tensor = tensor % 2
                tensor = tf.cast(tensor, dtype=dtype)
                gathered = hvd.allgather(tensor)

                grad_list = []
                for r, tensor_size in enumerate(tensor_sizes):
                    g = tf.ones([tensor_size] + [17] * (dim - 1)) * r
                    grad_list.append(g)
                grad_ys = tf.concat(grad_list, axis=0)

                grad = tf.gradients(gathered, tensor, grad_ys)[0]
                grad_out = session.run(grad)

                expected = np.ones(
                    [tensor_sizes[rank]] + [17] * (dim - 1)
                ) * rank * size
                err = np.linalg.norm(expected - grad_out)
                self.assertLess(err, 0.00000001,
                                "gradient %s differs from expected %s, "
                                "error: %s" %
                                (grad_out, expected, str(err)))

Source File: tfops.py From pix2pix-flow with MIT License

5 votes

def allreduce_sum(x):
    if hvd.size() == 1:
        return x
    return hvd.mpi_ops._allreduce(x)

Source File: tfops.py From pix2pix-flow with MIT License

5 votes

def add_edge_padding(x, filter_size):
    assert filter_size[0] % 2 == 1
    if filter_size[0] == 1 and filter_size[1] == 1:
        return x
    a = (filter_size[0] - 1) // 2  # vertical padding size
    b = (filter_size[1] - 1) // 2  # horizontal padding size
    if True:
        x = tf.pad(x, [[0, 0], [a, a], [b, b], [0, 0]])
        name = "_".join([str(dim) for dim in [a, b, *int_shape(x)[1:3]]])
        pads = tf.get_collection(name)
        if not pads:
            if hvd.rank() == 0:
                print("Creating pad", name)
            pad = np.zeros([1] + int_shape(x)[1:3] + [1], dtype='float32')
            pad[:, :a, :, 0] = 1.
            pad[:, -a:, :, 0] = 1.
            pad[:, :, :b, 0] = 1.
            pad[:, :, -b:, 0] = 1.
            pad = tf.convert_to_tensor(pad)
            tf.add_to_collection(name, pad)
        else:
            pad = pads[0]
        pad = tf.tile(pad, [tf.shape(x)[0], 1, 1, 1])
        x = tf.concat([x, pad], axis=3)
    else:
        pad = tf.pad(tf.zeros_like(x[:, :, :, :1]) - 1,
                     [[0, 0], [a, a], [b, b], [0, 0]]) + 1
        x = tf.pad(x, [[0, 0], [a, a], [b, b], [0, 0]])
        x = tf.concat([x, pad], axis=3)
    return x

Source File: optim.py From glow with MIT License

5 votes

def adam2_old(params, cost_or_grads, lr=3e-4, mom1=0.9, mom2=0.999, epsilon=1e-8):
    updates = []
    if type(cost_or_grads) is not list:
        gs = tf.gradients(cost_or_grads, params)
    else:
        gs = cost_or_grads

    # all-reduce
    grads1 = [Z.allreduce_mean(g) for g in gs]
    grads2 = [Z.allreduce_mean(tf.square(g)) for g in gs]
    mom2 = tf.maximum(0., 1. - (hvd.size() * (1 - mom2)))

    t = tf.Variable(1., 'adam_t')
    lr_t = lr * tf.sqrt((1. - tf.pow(mom2, t))) / (1. - tf.pow(mom1, t))
    updates.append(t.assign_add(1))

    for p, g1, g2 in zip(params, grads1, grads2):
        mg = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_mg')
        if mom1 > 0:
            v = tf.Variable(tf.zeros(p.get_shape()), p.name + '_adam_v')
            v_t = mom1 * v + (1. - mom1) * g1
            updates.append(v.assign(v_t))
        else:
            v_t = g1
        mg_t = mom2 * mg + (1. - mom2) * g2
        delta_t = v_t / (tf.sqrt(mg_t) + epsilon)
        p_t = p - lr_t * delta_t
        updates.append(mg.assign(mg_t))
        updates.append(p.assign(p_t))
    return tf.group(*updates)

Source File: graph_transform.py From parallax with Apache License 2.0

5 votes

def graph_transform_mpi(single_gpu_meta_graph_def, config,
                        op_library_path=None):
    if op_library_path is not None:
        tf.load_op_library(op_library_path)

    with tf.Graph().as_default() as replica:
        tf.train.import_meta_graph(single_gpu_meta_graph_def)

        tensor_or_op_name_to_replica_names = {}
        for op in replica.get_operations():
            tensor_or_op_name_to_replica_names[op.name] = [op.name]
            for output in op.outputs:
                tensor_or_op_name_to_replica_names[output.name] = [output.name]

        # Initialize horovod
        hvd.init()

        num_workers = hvd.size()
        worker_id = hvd.rank()
        update_shard_values_for_worker(num_workers, worker_id)

        op_to_control_consumer_ops = get_all_control_consumers(replica)
        trainable_variable_ops = [var.op for var in tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES)]

        for gradients_info in tf.get_collection(tf.GraphKeys.GRADIENTS_INFO):
            target_tensor = gradients_info._target
            if target_tensor.op not in trainable_variable_ops:
                parallax_log.debug(
                    "Gradient for non-trainable variable %s is created, ignore"
                    % target_tensor.op.name)
                continue

            _add_aggregation_ops(gradients_info, op_to_control_consumer_ops, config)
        _add_broadcast_ops()

    return tf.train.export_meta_graph(graph=replica), \
           tensor_or_op_name_to_replica_names

Python horovod.tensorflow.size() Examples