Python tensorflow.python.training.basic_session_run_hooks.NanTensorHook() Examples
The following are 2
code examples of tensorflow.python.training.basic_session_run_hooks.NanTensorHook().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow.python.training.basic_session_run_hooks
, or try the search function
.
Example #1
Source File: estimator.py From auto-alt-text-lambda-api with MIT License | 4 votes |
def _train_model(self, input_fn, hooks): all_hooks = [] self._graph = ops.Graph() with self._graph.as_default() as g, g.device(self._device_fn): random_seed.set_random_seed(self._config.tf_random_seed) global_step = contrib_framework.create_global_step(g) features, labels = input_fn() self._check_inputs(features, labels) model_fn_ops = self._call_legacy_get_train_ops(features, labels) ops.add_to_collection(ops.GraphKeys.LOSSES, model_fn_ops.loss) all_hooks.extend([ basic_session_run_hooks.NanTensorHook(model_fn_ops.loss), basic_session_run_hooks.LoggingTensorHook( { 'loss': model_fn_ops.loss, 'step': global_step }, every_n_iter=100) ]) all_hooks.extend(hooks) scaffold = model_fn_ops.training_scaffold or monitored_session.Scaffold() if not (scaffold.saver or ops.get_collection(ops.GraphKeys.SAVERS)): ops.add_to_collection( ops.GraphKeys.SAVERS, saver.Saver( sharded=True, max_to_keep=self._config.keep_checkpoint_max, defer_build=True)) chief_hooks = [] if (self._config.save_checkpoints_secs or self._config.save_checkpoints_steps): saver_hook_exists = any([ isinstance(h, basic_session_run_hooks.CheckpointSaverHook) for h in (all_hooks + model_fn_ops.training_hooks + chief_hooks + model_fn_ops.training_chief_hooks) ]) if not saver_hook_exists: chief_hooks = [ basic_session_run_hooks.CheckpointSaverHook( self._model_dir, save_secs=self._config.save_checkpoints_secs, save_steps=self._config.save_checkpoints_steps, scaffold=scaffold) ] with monitored_session.MonitoredTrainingSession( master=self._config.master, is_chief=self._config.is_chief, checkpoint_dir=self._model_dir, scaffold=scaffold, hooks=all_hooks + model_fn_ops.training_hooks, chief_only_hooks=chief_hooks + model_fn_ops.training_chief_hooks, save_checkpoint_secs=0, # Saving is handled by a hook. save_summaries_steps=self._config.save_summary_steps, config=self.config.tf_config) as mon_sess: loss = None while not mon_sess.should_stop(): _, loss = mon_sess.run([model_fn_ops.train_op, model_fn_ops.loss]) summary_io.SummaryWriterCache.clear() return loss
Example #2
Source File: estimator.py From keras-lambda with MIT License | 4 votes |
def _train_model(self, input_fn, hooks): all_hooks = [] self._graph = ops.Graph() with self._graph.as_default() as g, g.device(self._device_fn): random_seed.set_random_seed(self._config.tf_random_seed) global_step = contrib_framework.create_global_step(g) features, labels = input_fn() self._check_inputs(features, labels) model_fn_ops = self._call_legacy_get_train_ops(features, labels) ops.add_to_collection(ops.GraphKeys.LOSSES, model_fn_ops.loss) all_hooks.extend([ basic_session_run_hooks.NanTensorHook(model_fn_ops.loss), basic_session_run_hooks.LoggingTensorHook( { 'loss': model_fn_ops.loss, 'step': global_step }, every_n_iter=100) ]) all_hooks.extend(hooks) scaffold = model_fn_ops.training_scaffold or monitored_session.Scaffold() if not (scaffold.saver or ops.get_collection(ops.GraphKeys.SAVERS)): ops.add_to_collection( ops.GraphKeys.SAVERS, saver.Saver( sharded=True, max_to_keep=self._config.keep_checkpoint_max, defer_build=True)) chief_hooks = [] if (self._config.save_checkpoints_secs or self._config.save_checkpoints_steps): saver_hook_exists = any([ isinstance(h, basic_session_run_hooks.CheckpointSaverHook) for h in (all_hooks + model_fn_ops.training_hooks + chief_hooks + model_fn_ops.training_chief_hooks) ]) if not saver_hook_exists: chief_hooks = [ basic_session_run_hooks.CheckpointSaverHook( self._model_dir, save_secs=self._config.save_checkpoints_secs, save_steps=self._config.save_checkpoints_steps, scaffold=scaffold) ] with monitored_session.MonitoredTrainingSession( master=self._config.master, is_chief=self._config.is_chief, checkpoint_dir=self._model_dir, scaffold=scaffold, hooks=all_hooks + model_fn_ops.training_hooks, chief_only_hooks=chief_hooks + model_fn_ops.training_chief_hooks, save_checkpoint_secs=0, # Saving is handled by a hook. save_summaries_steps=self._config.save_summary_steps, config=self.config.tf_config) as mon_sess: loss = None while not mon_sess.should_stop(): _, loss = mon_sess.run([model_fn_ops.train_op, model_fn_ops.loss]) summary_io.SummaryWriterCache.clear() return loss