Python tensorflow.keras.backend.constant() Examples
The following are 23
code examples of tensorflow.keras.backend.constant().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow.keras.backend
, or try the search function
.
Example #1
Source File: gin_conv.py From spektral with MIT License | 6 votes |
def build(self, input_shape): assert len(input_shape) >= 2 layer_kwargs = dict( kernel_initializer=self.kernel_initializer, bias_initializer=self.bias_initializer, kernel_regularizer=self.kernel_regularizer, bias_regularizer=self.bias_regularizer, kernel_constraint=self.kernel_constraint, bias_constraint=self.bias_constraint ) self.mlp = Sequential([ Dense(channels, self.mlp_activation, **layer_kwargs) for channels in self.mlp_hidden ] + [Dense(self.channels, self.activation, use_bias=self.use_bias, **layer_kwargs)]) if self.epsilon is None: self.eps = self.add_weight(shape=(1,), initializer='zeros', name='eps') else: # If epsilon is given, keep it constant self.eps = K.constant(self.epsilon) self.built = True
Example #2
Source File: qactivation_test.py From qkeras with Apache License 2.0 | 6 votes |
def test_stochastic_ternary(bound, alpha, temperature, expected_values, expected_scale): np.random.seed(42) K.set_learning_phase(1) n = 1000 x = np.random.uniform(-bound, bound, size=(n, 10)) x = np.sort(x, axis=1) s = stochastic_ternary(alpha=alpha, temperature=temperature) y = K.eval(s(K.constant(x))) scale = K.eval(s.scale).astype(np.float32)[0] ty = np.zeros_like(s) for i in range(n): ty = ty + (y[i] / scale) result = (ty/n).astype(np.float32) assert_allclose(result, expected_values, atol=0.1) assert_allclose(scale, expected_scale, rtol=0.1)
Example #3
Source File: qalpha_test.py From qkeras with Apache License 2.0 | 6 votes |
def test_ternary_auto(): """Test ternary auto scale quantizer.""" np.random.seed(42) N = 1000000 m_list = [1.0, 0.1, 0.01, 0.001] for m in m_list: x = np.random.uniform(-m, m, (N, 10)).astype(K.floatx()) x = K.constant(x) quantizer = ternary(alpha="auto") q = K.eval(quantizer(x)) d = m/3.0 result = np.mean(get_weight_scale(quantizer, q)) expected = (m + d) / 2.0 assert_allclose(result, expected, rtol=0.02)
Example #4
Source File: qalpha_test.py From qkeras with Apache License 2.0 | 6 votes |
def test_binary_auto(): """Test binary auto scale quantizer.""" np.random.seed(42) N = 1000000 m_list = [1.0, 0.1, 0.01, 0.001] for m in m_list: x = np.random.uniform(-m, m, (N, 10)).astype(K.floatx()) x = K.constant(x) quantizer = binary(alpha="auto") q = K.eval(quantizer(x)) result = get_weight_scale(quantizer, q) expected = m / 2.0 logging.info("expect %s", expected) logging.info("result %s", result) assert_allclose(result, expected, rtol=0.02)
Example #5
Source File: qalpha_test.py From qkeras with Apache License 2.0 | 6 votes |
def test_binary_auto_po2(): """Test binary auto_po2 scale quantizer.""" np.random.seed(42) N = 1000000 m_list = [1.0, 0.1, 0.01, 0.001] for m in m_list: x = np.random.uniform(-m, m, (N, 10)).astype(K.floatx()) x = K.constant(x) quantizer_ref = binary(alpha="auto") quantizer = binary(alpha="auto_po2") q_ref = K.eval(quantizer_ref(x)) q = K.eval(quantizer(x)) ref = get_weight_scale(quantizer_ref, q_ref) expected = np.power(2.0, np.round(np.log2(ref))) result = get_weight_scale(quantizer, q) assert_allclose(result, expected, rtol=0.0001)
Example #6
Source File: postprocess.py From keras-YOLOv3-model-set with MIT License | 5 votes |
def yolo3_head(feats, anchors, num_classes, input_shape, calc_loss=False): """Convert final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) # Adjust preditions to each spatial grid point and anchor size. box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[..., ::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[..., ::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
Example #7
Source File: loss.py From keras-YOLOv3-model-set with MIT License | 5 votes |
def _smooth_labels(y_true, label_smoothing): label_smoothing = K.constant(label_smoothing, dtype=K.floatx()) return y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing
Example #8
Source File: loss.py From keras-YOLOv3-model-set with MIT License | 5 votes |
def _smooth_labels(y_true, label_smoothing): label_smoothing = K.constant(label_smoothing, dtype=K.floatx()) return y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing
Example #9
Source File: test_layer.py From TensorNetwork with Apache License 2.0 | 5 votes |
def test_output_shape(dummy_data, make_model): # Disable the redefined-outer-name violation in this function # pylint: disable=redefined-outer-name data, _ = dummy_data data = K.constant(data) input_shape = data.shape model = make_model actual_output_shape = model(data).shape expected_output_shape = model.compute_output_shape(input_shape) np.testing.assert_equal(expected_output_shape, actual_output_shape)
Example #10
Source File: test_conv_layer.py From TensorNetwork with Apache License 2.0 | 5 votes |
def test_output_shape(dummy_data, make_model): # pylint: disable=redefined-outer-name data, _ = dummy_data data = K.constant(data) model = make_model l = model.get_layer(LAYER_NAME) actual_output_shape = l(data).shape expected_output_shape = l.compute_output_shape(data.shape) np.testing.assert_equal(expected_output_shape, actual_output_shape)
Example #11
Source File: qactivation_test.py From qkeras with Apache License 2.0 | 5 votes |
def test_stochastic_binary(): np.random.seed(42) K.set_learning_phase(1) x = np.random.uniform(-0.01, 0.01, size=10) x = np.sort(x) s = stochastic_binary(alpha="auto_po2") ty = np.zeros_like(s) ts = 0.0 n = 1000 for _ in range(n): y = K.eval(s(K.constant(x))) scale = K.eval(s.scale)[0] ts = ts + scale ty = ty + (y / scale) result = (ty/n).astype(np.float32) scale = np.array([ts/n]) expected = np.array( [-1., -1., -1., -0.852, 0.782, 0.768, 0.97, 0.978, 1.0, 1.0] ).astype(np.float32) expected_scale = np.array([0.003906]) assert_allclose(result, expected, atol=0.1) assert_allclose(scale, expected_scale, rtol=0.1)
Example #12
Source File: postprocess.py From keras-YOLOv3-model-set with MIT License | 5 votes |
def yolo2_head(feats, anchors, num_classes, input_shape, calc_loss=False): """Convert final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) # Adjust preditions to each spatial grid point and anchor size. box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[..., ::-1], K.dtype(feats)) #box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(grid_shape[..., ::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[..., ::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.softmax(feats[..., 5:]) if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
Example #13
Source File: qtools_util.py From qkeras with Apache License 2.0 | 5 votes |
def get_weights(layer): weights = layer.get_weights() out = copy.deepcopy(weights) for j, weight in enumerate(weights): if hasattr(layer, "get_quantizers") and layer.get_quantizers()[j]: out[j] = K.eval( layer.get_quantizers()[j](K.constant(weight))) return out
Example #14
Source File: backend.py From bert4keras with Apache License 2.0 | 5 votes |
def piecewise_linear(t, schedule): """分段线性函数 其中schedule是形如{1000: 1, 2000: 0.1}的字典, 表示 t ∈ [0, 1000]时,输出从0均匀增加至1,而 t ∈ [1000, 2000]时,输出从1均匀降低到0.1,最后 t > 2000时,保持0.1不变。 """ schedule = sorted(schedule.items()) if schedule[0][0] != 0: schedule = [(0, 0.0)] + schedule x = K.constant(schedule[0][1], dtype=K.floatx()) t = K.cast(t, K.floatx()) for i in range(len(schedule)): t_begin = schedule[i][0] x_begin = x if i != len(schedule) - 1: dx = schedule[i + 1][1] - schedule[i][1] dt = schedule[i + 1][0] - schedule[i][0] slope = 1.0 * dx / dt x = schedule[i][1] + slope * (t - t_begin) else: x = K.constant(schedule[i][1], dtype=K.floatx()) x = K.switch(t >= t_begin, x, x_begin) return x
Example #15
Source File: agnn_conv.py From spektral with MIT License | 5 votes |
def build(self, input_shape): assert len(input_shape) >= 2 if self.trainable: self.beta = self.add_weight(shape=(1,), initializer='ones', name='beta') else: self.beta = K.constant(1.) self.built = True
Example #16
Source File: base.py From spektral with MIT License | 5 votes |
def call(self, inputs): F = K.int_shape(inputs)[-1] minkowski_prod_mat = np.eye(F) minkowski_prod_mat[-1, -1] = -1. minkowski_prod_mat = K.constant(minkowski_prod_mat) output = K.dot(inputs, minkowski_prod_mat) output = K.dot(output, K.transpose(inputs)) output = K.clip(output, -10e9, -1.) if self.activation is not None: output = self.activation(output) return output
Example #17
Source File: optimizers_225tf.py From keras-adamw with MIT License | 5 votes |
def __init__(self, learning_rate=0.01, momentum=0.0, nesterov=False, model=None, zero_penalties=True, batch_size=32, total_iterations=0, total_iterations_wd=None, use_cosine_annealing=False, lr_multipliers=None, weight_decays=None, init_verbose=True, eta_min=0, eta_max=1, t_cur=0, name="SGDW", **kwargs): if total_iterations > 1: weight_decays = _init_weight_decays(model, zero_penalties, weight_decays) eta_t = kwargs.pop('eta_t', 1.) super(SGDW, self).__init__(name, **kwargs) self._set_hyper("learning_rate", kwargs.get("lr", learning_rate)) self._set_hyper("decay", self._initial_decay) self._momentum = False if isinstance(momentum, ops.Tensor) or callable(momentum) or momentum > 0: self._momentum = True if isinstance(momentum, (int, float)) and (momentum < 0 or momentum > 1): raise ValueError("`momentum` must be between [0, 1].") self._set_hyper("momentum", momentum) self.nesterov = nesterov self.eta_min = K.constant(eta_min, name='eta_min') self.eta_max = K.constant(eta_max, name='eta_max') self.eta_t = K.variable(eta_t, dtype='float32', name='eta_t') self.t_cur = K.variable(t_cur, dtype='int64', name='t_cur') self.batch_size = batch_size self.total_iterations = total_iterations self.total_iterations_wd = total_iterations_wd or total_iterations self.lr_multipliers = lr_multipliers self.weight_decays = weight_decays or {} self.init_verbose = init_verbose self.use_cosine_annealing = use_cosine_annealing _check_args(self, total_iterations, use_cosine_annealing, weight_decays) self._init_lr = kwargs.get('lr', learning_rate) # to print lr_mult setup self._updates_processed = 0 # to track num calls to '_resource_apply_...' self._init_notified = False self._init_lr = kwargs.get('lr', learning_rate)
Example #18
Source File: optimizers_225tf.py From keras-adamw with MIT License | 5 votes |
def __init__(self, learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0., amsgrad=False, model=None, zero_penalties=True, batch_size=32, total_iterations=0, total_iterations_wd=None, use_cosine_annealing=False, lr_multipliers=None, weight_decays=None, init_verbose=True, eta_min=0, eta_max=1, t_cur=0, name="AdamW", **kwargs): if total_iterations > 1: weight_decays = _init_weight_decays(model, zero_penalties, weight_decays) eta_t = kwargs.pop('eta_t', 1.) super(AdamW, self).__init__(name, **kwargs) self._set_hyper('learning_rate', kwargs.get('lr', learning_rate)) self._set_hyper('decay', self._initial_decay) self._set_hyper('beta_1', beta_1) self._set_hyper('beta_2', beta_2) self.eta_min = K.constant(eta_min, name='eta_min') self.eta_max = K.constant(eta_max, name='eta_max') self.eta_t = K.variable(eta_t, dtype='float32', name='eta_t') self.t_cur = K.variable(t_cur, dtype='int64', name='t_cur') self.batch_size = batch_size self.total_iterations = total_iterations self.total_iterations_wd = total_iterations_wd or total_iterations self.lr_multipliers = lr_multipliers self.weight_decays = weight_decays or {} self.init_verbose = init_verbose self.use_cosine_annealing = use_cosine_annealing self.epsilon = epsilon or backend_config.epsilon() self.amsgrad = amsgrad _check_args(self, total_iterations, use_cosine_annealing, weight_decays) self._init_lr = kwargs.get('lr', learning_rate) # to print lr_mult setup self._updates_processed = 0 # to track num calls to '_resource_apply_...' self._init_notified = False self._init_lr = kwargs.get('lr', learning_rate)
Example #19
Source File: postprocess.py From keras-YOLOv3-model-set with MIT License | 4 votes |
def batched_yolo2_postprocess(args, anchors, num_classes, max_boxes=100, confidence=0.1, iou_threshold=0.4): """Postprocess for YOLOv2 model on given input and return filtered boxes.""" yolo_outputs = args[0] image_shape = args[1] input_shape = K.shape(yolo_outputs)[1:3] * 32 batch_size = K.shape(image_shape)[0] # batch size, tensor boxes, box_scores = batched_yolo2_boxes_and_scores(yolo_outputs, anchors, num_classes, input_shape, image_shape) mask = box_scores >= confidence max_boxes_tensor = K.constant(max_boxes, dtype='int32') def single_image_nms(b, batch_boxes, batch_scores, batch_classes): boxes_ = [] scores_ = [] classes_ = [] for c in range(num_classes): # TODO: use keras backend instead of tf. class_boxes = tf.boolean_mask(boxes[b], mask[b, :, c]) class_box_scores = tf.boolean_mask(box_scores[b, :, c], mask[b, :, c]) nms_index = tf.image.non_max_suppression( class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) class_boxes = K.gather(class_boxes, nms_index) class_box_scores = K.gather(class_box_scores, nms_index) classes = K.ones_like(class_box_scores, 'int32') * c boxes_.append(class_boxes) scores_.append(class_box_scores) classes_.append(classes) boxes_ = K.concatenate(boxes_, axis=0) scores_ = K.concatenate(scores_, axis=0) classes_ = K.concatenate(classes_, axis=0) batch_boxes = batch_boxes.write(b, boxes_) batch_scores = batch_scores.write(b, scores_) batch_classes = batch_classes.write(b, classes_) return b+1, batch_boxes, batch_scores, batch_classes batch_boxes = tf.TensorArray(K.dtype(boxes), size=1, dynamic_size=True) batch_scores = tf.TensorArray(K.dtype(box_scores), size=1, dynamic_size=True) batch_classes = tf.TensorArray(dtype=tf.int32, size=1, dynamic_size=True) _, batch_boxes, batch_scores, batch_classes = tf.while_loop(lambda b,*args: b<batch_size, single_image_nms, [0, batch_boxes, batch_scores, batch_classes]) batch_boxes = batch_boxes.stack() batch_scores = batch_scores.stack() batch_classes = batch_classes.stack() return batch_boxes, batch_scores, batch_classes
Example #20
Source File: postprocess.py From keras-YOLOv3-model-set with MIT License | 4 votes |
def yolo3_postprocess(args, anchors, num_classes, max_boxes=100, confidence=0.1, iou_threshold=0.4): """Postprocess for YOLOv3 model on given input and return filtered boxes.""" num_layers = len(anchors)//3 # default setting yolo_outputs = args[:num_layers] image_shape = args[num_layers] anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [0,1,2]] # default setting input_shape = K.shape(yolo_outputs[0])[1:3] * 32 # print("yolo_outputs",yolo_outputs) boxes = [] box_scores = [] for l in range(num_layers): _boxes, _box_scores = yolo3_boxes_and_scores(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, image_shape) boxes.append(_boxes) box_scores.append(_box_scores) boxes = K.concatenate(boxes, axis=0) box_scores = K.concatenate(box_scores, axis=0) mask = box_scores >= confidence max_boxes_tensor = K.constant(max_boxes, dtype='int32') boxes_ = [] scores_ = [] classes_ = [] for c in range(num_classes): # TODO: use keras backend instead of tf. class_boxes = tf.boolean_mask(boxes, mask[:, c]) class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) nms_index = tf.image.non_max_suppression( class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) class_boxes = K.gather(class_boxes, nms_index) class_box_scores = K.gather(class_box_scores, nms_index) classes = K.ones_like(class_box_scores, 'int32') * c boxes_.append(class_boxes) scores_.append(class_box_scores) classes_.append(classes) boxes_ = K.concatenate(boxes_, axis=0) scores_ = K.concatenate(scores_, axis=0) classes_ = K.concatenate(classes_, axis=0) return boxes_, scores_, classes_
Example #21
Source File: deeplabcut.py From DeepPoseKit with Apache License 2.0 | 4 votes |
def _preprocess_symbolic_input(x, data_format, mode, **kwargs): """Preprocesses a tensor encoding a batch of images. # Arguments x: Input tensor, 3D or 4D. data_format: Data format of the image tensor. mode: One of "caffe", "tf" or "torch". - caffe: will convert the images from RGB to BGR, then will zero-center each color channel with respect to the ImageNet dataset, without scaling. - tf: will scale pixels between -1 and 1, sample-wise. - torch: will scale pixels between 0 and 1 and then will normalize each channel with respect to the ImageNet dataset. # Returns Preprocessed tensor. """ if mode == "tf": x /= 127.5 x -= 1.0 return x if mode == "torch": x /= 255.0 mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] else: if data_format == "channels_first": # 'RGB'->'BGR' if backend.ndim(x) == 3: x = x[::-1, ...] else: x = x[:, ::-1, ...] else: # 'RGB'->'BGR' x = x[..., ::-1] mean = [103.939, 116.779, 123.68] std = None mean_tensor = backend.constant(-np.array(mean)) # Zero-center by mean pixel if backend.dtype(x) != backend.dtype(mean_tensor): x = backend.bias_add( x, backend.cast(mean_tensor, backend.dtype(x)), data_format=data_format ) else: x = backend.bias_add(x, mean_tensor, data_format) if std is not None: x /= std return x
Example #22
Source File: utils.py From qkeras with Apache License 2.0 | 4 votes |
def quantized_model_debug(model, X_test, plot=False): """Debugs and plots model weights and activations.""" outputs = [] output_names = [] for layer in model.layers: if layer.__class__.__name__ in REGISTERED_LAYERS: output_names.append(layer.name) outputs.append(layer.output) model_debug = Model(inputs=model.inputs, outputs=outputs) y_pred = model_debug.predict(X_test) print("{:30} {: 8.4f} {: 8.4f}".format( "input", np.min(X_test), np.max(X_test))) for n, p in zip(output_names, y_pred): layer = model.get_layer(n) if layer.__class__.__name__ == "QActivation": alpha = get_weight_scale(layer.activation, p) else: alpha = 1.0 print( "{:30} {: 8.4f} {: 8.4f}".format(n, np.min(p / alpha), np.max(p / alpha)), end="") if alpha != 1.0: print(" a[{: 8.4f} {:8.4f}]".format(np.min(alpha), np.max(alpha))) if plot and layer.__class__.__name__ in [ "QConv2D", "QDense", "QActivation" ]: plt.hist(p.flatten(), bins=25) plt.title(layer.name + "(output)") plt.show() alpha = None for i, weights in enumerate(layer.get_weights()): if hasattr(layer, "get_quantizers") and layer.get_quantizers()[i]: weights = K.eval(layer.get_quantizers()[i](K.constant(weights))) if i == 0 and layer.__class__.__name__ in [ "QConv1D", "QConv2D", "QDense" ]: alpha = get_weight_scale(layer.get_quantizers()[i], weights) # if alpha is 0, let's remove all weights. alpha_mask = (alpha == 0.0) weights = np.where(alpha_mask, weights * alpha, weights / alpha) if plot: plt.hist(weights.flatten(), bins=25) plt.title(layer.name + "(weights)") plt.show() print(" ({: 8.4f} {: 8.4f})".format(np.min(weights), np.max(weights)), end="") if alpha is not None and isinstance(alpha, np.ndarray): print(" a({: 10.6f} {: 10.6f})".format( np.min(alpha), np.max(alpha)), end="") print("")
Example #23
Source File: optimizers_225tf.py From keras-adamw with MIT License | 4 votes |
def __init__(self, learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-7, model=None, zero_penalties=True, batch_size=32, total_iterations=0, total_iterations_wd=None, use_cosine_annealing=False, lr_multipliers=None, weight_decays=None, init_verbose=True, eta_min=0, eta_max=1, t_cur=0, name="NadamW", **kwargs): if total_iterations > 1: weight_decays = _init_weight_decays(model, zero_penalties, weight_decays) # Backwards compatibility with keras NAdam optimizer. kwargs['decay'] = kwargs.pop('schedule_decay', 0.004) eta_t = kwargs.pop('eta_t', 1.) learning_rate = kwargs.get('lr', learning_rate) if isinstance(learning_rate, learning_rate_schedule.LearningRateSchedule): raise ValueError('The Nadam optimizer does not support ' 'tf.keras.optimizers.LearningRateSchedules as the ' 'learning rate.') super(NadamW, self).__init__(name, **kwargs) self._set_hyper('learning_rate', kwargs.get('lr', learning_rate)) self._set_hyper('decay', self._initial_decay) self._set_hyper('beta_1', beta_1) self._set_hyper('beta_2', beta_2) self.epsilon = epsilon or backend_config.epsilon() self._m_cache = None self.eta_min = K.constant(eta_min, name='eta_min') self.eta_max = K.constant(eta_max, name='eta_max') self.eta_t = K.variable(eta_t, dtype='float32', name='eta_t') self.t_cur = K.variable(t_cur, dtype='int64', name='t_cur') self.batch_size = batch_size self.total_iterations = total_iterations self.total_iterations_wd = total_iterations_wd or total_iterations self.lr_multipliers = lr_multipliers self.weight_decays = weight_decays or {} self.init_verbose = init_verbose self.use_cosine_annealing = use_cosine_annealing self.epsilon = epsilon or backend_config.epsilon() _check_args(self, total_iterations, use_cosine_annealing, weight_decays) self._init_lr = kwargs.get('lr', learning_rate) # to print lr_mult setup self._updates_processed = 0 # to track num calls to '_resource_apply_...' self._init_notified = False self._init_lr = kwargs.get('lr', learning_rate)