Python Examples of tensorflow.add_check_numerics

Source File: model.py From ShuffleNet with Apache License 2.0

6 votes

def __init_output(self):
        with tf.variable_scope('output'):
            # Losses
            self.regularization_loss = tf.reduce_sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
            self.cross_entropy_loss = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=self.y, name='loss'))
            self.loss = self.regularization_loss + self.cross_entropy_loss

            # Optimizer
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                self.optimizer = tf.train.AdamOptimizer(learning_rate=self.args.learning_rate)
                self.train_op = self.optimizer.minimize(self.loss)
                # This is for debugging NaNs. Check TensorFlow documentation.
                self.check_op = tf.add_check_numerics_ops()

            # Output and Metrics
            self.y_out_softmax = tf.nn.softmax(self.logits)
            self.y_out_argmax = tf.argmax(self.y_out_softmax, axis=-1, output_type=tf.int32)
            self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.y, self.y_out_argmax), tf.float32))

        with tf.name_scope('train-summary-per-iteration'):
            tf.summary.scalar('loss', self.loss)
            tf.summary.scalar('acc', self.accuracy)
            self.summaries_merged = tf.summary.merge_all()

Source File: train.py From g-tensorflow-models with Apache License 2.0

6 votes

def add_check_numerics_ops():
  check_op = []
  for op in tf.get_default_graph().get_operations():
    bad = ["logits/Log", "sample/Reshape", "log_prob/mul",
           "log_prob/SparseSoftmaxCrossEntropyWithLogits/Reshape",
           "entropy/Reshape", "entropy/LogSoftmax", "Categorical", "Mean"]
    if all([x not in op.name for x in bad]):
      for output in op.outputs:
        if output.dtype in [tf.float16, tf.float32, tf.float64]:
          if op._get_control_flow_context() is not None:  # pylint: disable=protected-access
            raise ValueError("`tf.add_check_numerics_ops() is not compatible "
                             "with TensorFlow control flow operations such as "
                             "`tf.cond()` or `tf.while_loop()`.")

          message = op.name + ":" + str(output.value_index)
          with tf.control_dependencies(check_op):
            check_op = [tf.check_numerics(output, message=message)]
  return tf.group(*check_op)

Source File: train.py From models with Apache License 2.0

6 votes

def add_check_numerics_ops():
  check_op = []
  for op in tf.get_default_graph().get_operations():
    bad = ["logits/Log", "sample/Reshape", "log_prob/mul",
           "log_prob/SparseSoftmaxCrossEntropyWithLogits/Reshape",
           "entropy/Reshape", "entropy/LogSoftmax", "Categorical", "Mean"]
    if all([x not in op.name for x in bad]):
      for output in op.outputs:
        if output.dtype in [tf.float16, tf.float32, tf.float64]:
          if op._get_control_flow_context() is not None:  # pylint: disable=protected-access
            raise ValueError("`tf.add_check_numerics_ops() is not compatible "
                             "with TensorFlow control flow operations such as "
                             "`tf.cond()` or `tf.while_loop()`.")

          message = op.name + ":" + str(output.value_index)
          with tf.control_dependencies(check_op):
            check_op = [tf.check_numerics(output, message=message)]
  return tf.group(*check_op)

Source File: train.py From multilabel-image-classification-tensorflow with MIT License

6 votes

def add_check_numerics_ops():
  check_op = []
  for op in tf.get_default_graph().get_operations():
    bad = ["logits/Log", "sample/Reshape", "log_prob/mul",
           "log_prob/SparseSoftmaxCrossEntropyWithLogits/Reshape",
           "entropy/Reshape", "entropy/LogSoftmax", "Categorical", "Mean"]
    if all([x not in op.name for x in bad]):
      for output in op.outputs:
        if output.dtype in [tf.float16, tf.float32, tf.float64]:
          if op._get_control_flow_context() is not None:  # pylint: disable=protected-access
            raise ValueError("`tf.add_check_numerics_ops() is not compatible "
                             "with TensorFlow control flow operations such as "
                             "`tf.cond()` or `tf.while_loop()`.")

          message = op.name + ":" + str(output.value_index)
          with tf.control_dependencies(check_op):
            check_op = [tf.check_numerics(output, message=message)]
  return tf.group(*check_op)

Source File: callbacks.py From keras-fcn with MIT License

5 votes

def set_model(self, model):
        self.model = model
        self.sess = K.get_session()
        self.check_num = tf.add_check_numerics_ops()

Source File: actor.py From phillip with GNU General Public License v3.0

5 votes

def __init__(self, **kwargs):
    super(Actor, self).__init__(**kwargs)

    with self.graph.as_default(), tf.device(self.device): 
      if self.predict: self._init_model(**kwargs)
      self._init_policy(**kwargs)
      
      # build computation graph
      self.input = ct.inputCType(ssbm.SimpleStateAction, [self.config.memory+1], "input")
      self.input['delayed_action'] = tf.placeholder(tf.int64, [self.config.delay], "delayed_action")
      self.input['hidden'] = util.deepMap(lambda size: tf.placeholder(tf.float32, [size], name="input/hidden"), self.core.hidden_size)

      batch_input = util.deepMap(lambda t: tf.expand_dims(t, 0), self.input)

      states = self.embedGame(batch_input['state'])
      prev_actions = self.embedAction(batch_input['prev_action'])
      combined = tf.concat(axis=-1, values=[states, prev_actions])
      history = tf.unstack(combined, axis=1)
      inputs = tf.concat(axis=-1, values=history)
      core_output, hidden_state = self.core(inputs, batch_input['hidden'])
      actions = self.embedAction(batch_input['delayed_action'])
      
      if self.predict:
        predict_actions = actions[:, :self.model.predict_steps]
        delayed_actions = actions[:, self.model.predict_steps:]
        core_output = self.model.predict(history, core_output, hidden_state, predict_actions, batch_input['state'])
      else:
        delayed_actions = actions
      
      batch_policy = self.policy.getPolicy(core_output, delayed_actions), hidden_state
      self.run_policy = util.deepMap(lambda t: tf.squeeze(t, [0]), batch_policy)

      self.check_op = tf.no_op() if self.dynamic else tf.add_check_numerics_ops()
      
      self._finalize_setup()

Source File: numerics_test.py From deep_image_model with Apache License 2.0

5 votes

def testInf(self):
    with self.test_session(graph=tf.Graph()):
      t1 = tf.constant(1.0)
      t2 = tf.constant(0.0)
      a = tf.div(t1, t2)
      check = tf.add_check_numerics_ops()
      a = control_flow_ops.with_dependencies([check], a)
      with self.assertRaisesOpError("Inf"):
        a.eval()

Source File: numerics_test.py From deep_image_model with Apache License 2.0

5 votes

def testNaN(self):
    with self.test_session(graph=tf.Graph()):
      t1 = tf.constant(0.0)
      t2 = tf.constant(0.0)
      a = tf.div(t1, t2)
      check = tf.add_check_numerics_ops()
      a = control_flow_ops.with_dependencies([check], a)
      with self.assertRaisesOpError("NaN"):
        a.eval()

Source File: numerics_test.py From deep_image_model with Apache License 2.0

5 votes

def testBoth(self):
    with self.test_session(graph=tf.Graph()):
      t1 = tf.constant([1.0, 0.0])
      t2 = tf.constant([0.0, 0.0])
      a = tf.div(t1, t2)
      check = tf.add_check_numerics_ops()
      a = control_flow_ops.with_dependencies([check], a)
      with self.assertRaisesOpError("Inf and NaN"):
        a.eval()

Source File: joint_model.py From elbow with BSD 3-Clause "New" or "Revised" License

4 votes

def train(self, adam_rate=0.1, stopping_rule=None, steps=None,
              avg_decay=None, debug=False, print_s=1):
        elbo, elp, entropy = self.construct_elbo(return_all=True)


        if stopping_rule is None:
            if steps is not None:
                stopping_rule = StepCountStopper(step_count=steps)
            elif avg_decay is not None:
                stopping_rule = MovingAverageStopper(decay=avg_decay)
            else:
                stopping_rule = MovingAverageStopper()
        try:
            train_step = tf.train.AdamOptimizer(adam_rate).minimize(-elbo)
        except ValueError as e:
            print e
            return
            
        if debug:
            debug_ops = tf.add_check_numerics_ops()

        session = self.get_session(do_init=False)

        init = tf.global_variables_initializer()
        session.run(init)
        
        elbo_val = None
        running_elbo = 0
        i = 0
        t = -np.inf
        stopping_rule.reset()
        while not stopping_rule.observe(elbo_val):
            if debug:
                session.run(debug_ops)

            fd = self.feed_dict()
                
            session.run(train_step, feed_dict=fd)

            elbo_val, elp_val, entropy_val = session.run((elbo, elp, entropy), feed_dict=fd)
            if print_s is not None and (time.time() - t) > print_s:
                print "step %d elp %.2f entropy %.2f elbo %.2f" % (i, elp_val, entropy_val, elbo_val)
                t = time.time()
                
            i += 1

Source File: model_template.py From ReSAN with Apache License 2.0

4 votes

def update_tensor_add_ema_and_opt(self):
        self.logits, (self.s1_act, self.s1_logpa), (self.s2_act, self.s2_logpa), \
            (self.s1_percentage, self.s2_percentage) = self.build_network()
        self.loss_sl, self.loss_rl = self.build_loss()
        self.accuracy = self.build_accuracy()

        # ------------ema-------------
        if True:
            self.var_ema = tf.train.ExponentialMovingAverage(cfg.var_decay)
            self.build_var_ema()

        if cfg.mode == 'train':
            self.ema = tf.train.ExponentialMovingAverage(cfg.decay)
            self.build_ema()
        self.summary = tf.summary.merge_all()

        # ---------- optimization ---------
        if cfg.optimizer.lower() == 'adadelta':
            assert cfg.learning_rate > 0.1 and cfg.learning_rate < 1.
            self.opt_sl = tf.train.AdadeltaOptimizer(cfg.learning_rate)
            self.opt_rl = tf.train.AdadeltaOptimizer(cfg.learning_rate)
        elif cfg.optimizer.lower() == 'adam':
            assert cfg.learning_rate < 0.1
            self.opt_sl = tf.train.AdamOptimizer(cfg.learning_rate)
            self.opt_rl = tf.train.AdamOptimizer(cfg.learning_rate)
        elif cfg.optimizer.lower() == 'rmsprop':
            assert cfg.learning_rate < 0.1
            self.opt_sl = tf.train.RMSPropOptimizer(cfg.learning_rate)
            self.opt_rl = tf.train.RMSPropOptimizer(cfg.learning_rate)
        else:
            raise AttributeError('no optimizer named as \'%s\'' % cfg.optimizer)

        trainable_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope)
        # trainable param num:
        # print params num
        all_params_num = 0
        for elem in trainable_vars:
            # elem.name
            var_name = elem.name.split(':')[0]
            if var_name.endswith('emb_mat'):
                continue
            params_num = 1
            for l in elem.get_shape().as_list(): params_num *= l
            all_params_num += params_num
        _logger.add('Trainable Parameters Number: %d' % all_params_num)

        sl_vars = [var for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope)
                      if not var.op.name.startswith(self.scope+'/hard_network')]
        self.train_op_sl = self.opt_sl.minimize(
            self.loss_sl, self.global_step,
            var_list=sl_vars)

        rl_vars = [var for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope)
                      if var.op.name.startswith(self.scope + '/hard_network')]
        if len(rl_vars) > 0:
            self.train_op_rl = self.opt_rl.minimize(
                self.loss_rl,
                var_list=rl_vars)
        else:
            self.train_op_rl = None
        # self.check_op = tf.add_check_numerics_ops()

Source File: Trainer.py From PReMVOS with MIT License

4 votes

def __init__(self, config, train_network, test_network, global_step, session):
    self.profile = config.bool("profile", False)
    self.add_grad_checks = config.bool("add_grad_checks", False)
    self.add_numerical_checks = config.bool("add_numerical_checks", False)
    self.measures = config.unicode_list("measures", [])
    self.opt_str = config.str("optimizer", "adam").lower()
    self.train_network = train_network
    self.test_network = test_network
    self.session = session
    self.global_step = global_step
    self.validation_step_number = 0
    self.gradient_clipping = config.float("gradient_clipping", -1.0)
    self.optimizer_exclude_prefix = config.str("optimizer_exclude_prefix", "")
    self.learning_rates = config.int_key_dict("learning_rates")
    self.recursive_training = config.bool(Constants.RECURSIVE_TRAINING, False)
    assert 1 in self.learning_rates, "no initial learning rate specified"
    self.curr_learning_rate = self.learning_rates[1]
    self.lr_var = tf.placeholder(config.dtype, shape=[], name="learning_rate")
    self.loss_scale_var = tf.placeholder_with_default(1.0, shape=[], name="loss_scale")
    self.opt, self.reset_opt_op = self.create_optimizer(config)
    grad_norm = None
    if train_network is not None:
      if train_network.use_partialflow:
        self.prepare_partialflow()
        self.step_op = tf.no_op("step")
      else:
        self.step_op, grad_norm = self.create_step_op()
      if len(self.train_network.update_ops) == 0:
        self.update_ops = []
      else:
        self.update_ops = self.train_network.update_ops
      if self.add_numerical_checks:
        self.update_ops.append(tf.add_check_numerics_ops())
      self.train_targets = self.train_network.raw_labels
      self.train_inputs = self.train_network.inputs
      self.train_network_ys = self.train_network.y_softmax
      if self.train_network_ys is not None and self.train_targets is not None:
        self.train_network_ys = self._adjust_results_to_targets(self.train_network_ys, self.train_targets)
    else:
      self.step_op = None
      self.update_ops = None
    self.summary_writer, self.summary_op, self.summary_op_test = self.init_summaries(config, grad_norm)

    if test_network is not None:
      self.test_targets = self.test_network.raw_labels
      self.test_inputs = self.test_network.inputs
      self.test_network_ys = self.test_network.y_softmax
      if self.test_network_ys is not None and self.test_targets is not None:
        self.test_network_ys = self._adjust_results_to_targets(self.test_network_ys, self.test_targets)

Python tensorflow.add_check_numerics_ops() Examples