Python tensorflow.python.training.basic_session_run_hooks.NanTensorHook() Examples

The following are 2 code examples of tensorflow.python.training.basic_session_run_hooks.NanTensorHook(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorflow.python.training.basic_session_run_hooks , or try the search function

Example #1

Source File: estimator.py From auto-alt-text-lambda-api with MIT License

4 votes

def _train_model(self, input_fn, hooks):
    all_hooks = []
    self._graph = ops.Graph()
    with self._graph.as_default() as g, g.device(self._device_fn):
      random_seed.set_random_seed(self._config.tf_random_seed)
      global_step = contrib_framework.create_global_step(g)
      features, labels = input_fn()
      self._check_inputs(features, labels)
      model_fn_ops = self._call_legacy_get_train_ops(features, labels)
      ops.add_to_collection(ops.GraphKeys.LOSSES, model_fn_ops.loss)
      all_hooks.extend([
          basic_session_run_hooks.NanTensorHook(model_fn_ops.loss),
          basic_session_run_hooks.LoggingTensorHook(
              {
                  'loss': model_fn_ops.loss,
                  'step': global_step
              },
              every_n_iter=100)
      ])
      all_hooks.extend(hooks)

      scaffold = model_fn_ops.training_scaffold or monitored_session.Scaffold()
      if not (scaffold.saver or ops.get_collection(ops.GraphKeys.SAVERS)):
        ops.add_to_collection(
            ops.GraphKeys.SAVERS,
            saver.Saver(
                sharded=True,
                max_to_keep=self._config.keep_checkpoint_max,
                defer_build=True))

      chief_hooks = []
      if (self._config.save_checkpoints_secs or
          self._config.save_checkpoints_steps):
        saver_hook_exists = any([
            isinstance(h, basic_session_run_hooks.CheckpointSaverHook)
            for h in (all_hooks + model_fn_ops.training_hooks + chief_hooks +
                      model_fn_ops.training_chief_hooks)
        ])
        if not saver_hook_exists:
          chief_hooks = [
              basic_session_run_hooks.CheckpointSaverHook(
                  self._model_dir,
                  save_secs=self._config.save_checkpoints_secs,
                  save_steps=self._config.save_checkpoints_steps,
                  scaffold=scaffold)
          ]
      with monitored_session.MonitoredTrainingSession(
          master=self._config.master,
          is_chief=self._config.is_chief,
          checkpoint_dir=self._model_dir,
          scaffold=scaffold,
          hooks=all_hooks + model_fn_ops.training_hooks,
          chief_only_hooks=chief_hooks + model_fn_ops.training_chief_hooks,
          save_checkpoint_secs=0,  # Saving is handled by a hook.
          save_summaries_steps=self._config.save_summary_steps,
          config=self.config.tf_config) as mon_sess:
        loss = None
        while not mon_sess.should_stop():
          _, loss = mon_sess.run([model_fn_ops.train_op, model_fn_ops.loss])
      summary_io.SummaryWriterCache.clear()
      return loss

Example #2

Source File: estimator.py From keras-lambda with MIT License

4 votes

def _train_model(self, input_fn, hooks):
    all_hooks = []
    self._graph = ops.Graph()
    with self._graph.as_default() as g, g.device(self._device_fn):
      random_seed.set_random_seed(self._config.tf_random_seed)
      global_step = contrib_framework.create_global_step(g)
      features, labels = input_fn()
      self._check_inputs(features, labels)
      model_fn_ops = self._call_legacy_get_train_ops(features, labels)
      ops.add_to_collection(ops.GraphKeys.LOSSES, model_fn_ops.loss)
      all_hooks.extend([
          basic_session_run_hooks.NanTensorHook(model_fn_ops.loss),
          basic_session_run_hooks.LoggingTensorHook(
              {
                  'loss': model_fn_ops.loss,
                  'step': global_step
              },
              every_n_iter=100)
      ])
      all_hooks.extend(hooks)

      scaffold = model_fn_ops.training_scaffold or monitored_session.Scaffold()
      if not (scaffold.saver or ops.get_collection(ops.GraphKeys.SAVERS)):
        ops.add_to_collection(
            ops.GraphKeys.SAVERS,
            saver.Saver(
                sharded=True,
                max_to_keep=self._config.keep_checkpoint_max,
                defer_build=True))

      chief_hooks = []
      if (self._config.save_checkpoints_secs or
          self._config.save_checkpoints_steps):
        saver_hook_exists = any([
            isinstance(h, basic_session_run_hooks.CheckpointSaverHook)
            for h in (all_hooks + model_fn_ops.training_hooks + chief_hooks +
                      model_fn_ops.training_chief_hooks)
        ])
        if not saver_hook_exists:
          chief_hooks = [
              basic_session_run_hooks.CheckpointSaverHook(
                  self._model_dir,
                  save_secs=self._config.save_checkpoints_secs,
                  save_steps=self._config.save_checkpoints_steps,
                  scaffold=scaffold)
          ]
      with monitored_session.MonitoredTrainingSession(
          master=self._config.master,
          is_chief=self._config.is_chief,
          checkpoint_dir=self._model_dir,
          scaffold=scaffold,
          hooks=all_hooks + model_fn_ops.training_hooks,
          chief_only_hooks=chief_hooks + model_fn_ops.training_chief_hooks,
          save_checkpoint_secs=0,  # Saving is handled by a hook.
          save_summaries_steps=self._config.save_summary_steps,
          config=self.config.tf_config) as mon_sess:
        loss = None
        while not mon_sess.should_stop():
          _, loss = mon_sess.run([model_fn_ops.train_op, model_fn_ops.loss])
      summary_io.SummaryWriterCache.clear()
      return loss