Python keras.backend.stop_gradient() Examples
The following are 25
code examples of keras.backend.stop_gradient().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
keras.backend
, or try the search function
.
Example #1
Source File: fgs.py From blackbox-attacks with MIT License | 6 votes |
def symbolic_fgs(x, grad, eps=0.3, clipping=True): """ FGSM attack. """ # signed gradient normed_grad = K.sign(grad) # Multiply by constant epsilon scaled_grad = eps * normed_grad # Add perturbation to original example to obtain adversarial example adv_x = K.stop_gradient(x + scaled_grad) if clipping: adv_x = K.clip(adv_x, 0, 1) return adv_x
Example #2
Source File: fgs.py From ensemble-adv-training with MIT License | 6 votes |
def symbolic_fgs(x, grad, eps=0.3, clipping=True): """ FGSM attack. """ # signed gradient normed_grad = K.sign(grad) # Multiply by constant epsilon scaled_grad = eps * normed_grad # Add perturbation to original example to obtain adversarial example adv_x = K.stop_gradient(x + scaled_grad) if clipping: adv_x = K.clip(adv_x, 0, 1) return adv_x
Example #3
Source File: model.py From 2018DSB with MIT License | 6 votes |
def inst_weight(output_y, output_x, output_dr, output_dl, config=None): dy = output_y[:,2:,2:]-output_y[:, :-2,2:] + \ 2*(output_y[:,2:,1:-1]- output_y[:,:-2,1:-1]) + \ output_y[:,2:,:-2]-output_y[:,:-2,:-2] dx = output_x[:,2:,2:]- output_x[:,2:,:-2] + \ 2*( output_x[:,1:-1,2:]- output_x[:,1:-1,:-2]) +\ output_x[:,:-2,2:]- output_x[:,:-2,:-2] ddr= (output_dr[:,2:,2:]-output_dr[:,:-2,:-2] +\ output_dr[:,1:-1,2:]-output_dr[:,:-2,1:-1]+\ output_dr[:,2:,1:-1]-output_dr[:,1:-1,:-2])*K.constant(2) ddl= (output_dl[:,2:,:-2]-output_dl[:,:-2,2:] +\ output_dl[:,2:,1:-1]-output_dl[:,1:-1,2:]+\ output_dl[:,1:-1,:-2]-output_dl[:,:-2,1:-1])*K.constant(2) dpred = K.concatenate([dy,dx,ddr,ddl],axis=-1) dpred = K.spatial_2d_padding(dpred) weight_fg = K.cast(K.all(dpred>K.constant(config.GRADIENT_THRES), axis=3, keepdims=True), K.floatx()) weight = K.clip(K.sqrt(weight_fg*K.prod(dpred, axis=3, keepdims=True)), config.WEIGHT_AREA/config.CLIP_AREA_HIGH, config.WEIGHT_AREA/config.CLIP_AREA_LOW) weight +=(1-weight_fg)*config.WEIGHT_AREA/config.BG_AREA weight = K.conv2d(weight, K.constant(config.GAUSSIAN_KERNEL), padding='same') return K.stop_gradient(weight)
Example #4
Source File: learn_labelembedding.py From semantic-embeddings with MIT License | 6 votes |
def labelembed_model(base_model, num_classes, **kwargs): input_ = base_model.input embedding = base_model.output out = keras.layers.Activation('relu')(embedding) out = keras.layers.BatchNormalization(name = 'embedding_bn')(out) out1 = keras.layers.Dense(num_classes, name = 'prob')(out) out2 = keras.layers.Dense(num_classes, name = 'out2')(keras.layers.Lambda(lambda x: K.stop_gradient(x))(out)) cls_input_ = keras.layers.Input((1,), name = 'labels') cls_embedding_layer = keras.layers.Embedding(num_classes, num_classes, embeddings_initializer = 'identity', name = 'labelembeddings') cls_embedding = keras.layers.Flatten()(cls_embedding_layer(cls_input_)) loss = keras.layers.Lambda(lambda x: labelembed_loss(x[0], x[1], x[2], K.flatten(x[3]), num_classes = num_classes, **kwargs)[:,None], name = 'labelembed_loss')([out1, out2, cls_embedding, cls_input_]) return keras.models.Model([input_, cls_input_], [embedding, out1, loss])
Example #5
Source File: learn_labelembedding.py From semantic-embeddings with MIT License | 6 votes |
def labelembed_loss(out1, out2, tar, targets, tau = 2., alpha = 0.9, beta = 0.5, num_classes = 100): out2_prob = K.softmax(out2) tau2_prob = K.stop_gradient(K.softmax(out2 / tau)) soft_tar = K.stop_gradient(K.softmax(tar)) L_o1_y = K.sparse_categorical_crossentropy(output = K.softmax(out1), target = targets) pred = K.argmax(out2, axis = -1) mask = K.stop_gradient(K.cast(K.equal(pred, K.cast(targets, 'int64')), K.floatx())) L_o1_emb = -cross_entropy(out1, soft_tar) # pylint: disable=invalid-unary-operand-type L_o2_y = K.sparse_categorical_crossentropy(output = out2_prob, target = targets) L_emb_o2 = -cross_entropy(tar, tau2_prob) * mask * (K.cast(K.shape(mask)[0], K.floatx())/(K.sum(mask)+1e-8)) # pylint: disable=invalid-unary-operand-type L_re = K.relu(K.sum(out2_prob * K.one_hot(K.cast(targets, 'int64'), num_classes), axis = -1) - alpha) return beta * L_o1_y + (1-beta) * L_o1_emb + L_o2_y + L_emb_o2 + L_re
Example #6
Source File: cartpole_a3c.py From reinforcement-learning with MIT License | 6 votes |
def actor_optimizer(self): action = K.placeholder(shape=(None, self.action_size)) advantages = K.placeholder(shape=(None, )) policy = self.actor.output good_prob = K.sum(action * policy, axis=1) eligibility = K.log(good_prob + 1e-10) * K.stop_gradient(advantages) loss = -K.sum(eligibility) entropy = K.sum(policy * K.log(policy + 1e-10), axis=1) actor_loss = loss + 0.01*entropy optimizer = Adam(lr=self.actor_lr) updates = optimizer.get_updates(self.actor.trainable_weights, [], actor_loss) train = K.function([self.actor.input, action, advantages], [], updates=updates) return train # make loss function for Value approximation
Example #7
Source File: fgs.py From blackbox-attacks with MIT License | 6 votes |
def symbolic_fg(x, grad, eps=0.3, clipping=True): """ FG attack """ # Unit vector in direction of gradient reduc_ind = list(xrange(1, len(x.get_shape()))) normed_grad = grad / tf.sqrt(tf.reduce_sum(tf.square(grad), reduction_indices=reduc_ind, keep_dims=True)) # Multiply by constant epsilon scaled_grad = eps * normed_grad # Add perturbation to original example to obtain adversarial example adv_x = K.stop_gradient(x + scaled_grad) if clipping: adv_x = K.clip(adv_x, 0, 1) return adv_x
Example #8
Source File: model.py From models with MIT License | 5 votes |
def profile_contrib(p): return kl.Lambda(lambda p: K.mean(K.sum(K.stop_gradient(tf.nn.softmax(p, dim=-2)) * p, axis=-2), axis=-1) )(p)
Example #9
Source File: rbm.py From keras_bn_library with MIT License | 5 votes |
def mcmc_chain(self, x, nb_gibbs_steps): xi = x for i in range(nb_gibbs_steps): xi, xi_pre, xi_sigm = self.gibbs_xhx(xi) x_rec, x_rec_pre, x_rec_sigm = xi, xi_pre, xi_sigm x_rec = K.stop_gradient(x_rec) return x_rec, x_rec_pre, x_rec_sigm
Example #10
Source File: binary_layers.py From QuantizedNeuralNetworks-Keras-Tensorflow with BSD 3-Clause "New" or "Revised" License | 5 votes |
def call(self, inputs): binary_kernel = binarize(self.kernel, H=self.H) inverse_kernel_lr_multiplier = 1./self.kernel_lr_multiplier inputs_bnn_gradient = (inputs - (1. - 1./inverse_kernel_lr_multiplier) * K.stop_gradient(inputs))\ * inverse_kernel_lr_multiplier outputs_bnn_gradient = K.conv2d( inputs_bnn_gradient, binary_kernel, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) outputs = (outputs_bnn_gradient - (1. - 1./self.kernel_lr_multiplier) * K.stop_gradient(outputs_bnn_gradient))\ * self.kernel_lr_multiplier if self.use_bias: outputs = K.bias_add( outputs, self.bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs
Example #11
Source File: binary_ops.py From QuantizedNeuralNetworks-Keras-Tensorflow with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _mean_abs(x, axis=None, keepdims=False): return K.stop_gradient(K.mean(K.abs(x), axis=axis, keepdims=keepdims))
Example #12
Source File: quantized_layers.py From QuantizedNeuralNetworks-Keras-Tensorflow with BSD 3-Clause "New" or "Revised" License | 5 votes |
def call(self, inputs): quantized_kernel = quantize(self.kernel, nb=self.nb) inverse_kernel_lr_multiplier = 1./self.kernel_lr_multiplier inputs_qnn_gradient = (inputs - (1. - 1./inverse_kernel_lr_multiplier) * K.stop_gradient(inputs))\ * inverse_kernel_lr_multiplier outputs_qnn_gradient = K.conv2d( inputs_qnn_gradient, quantized_kernel, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) outputs = (outputs_qnn_gradient - (1. - 1./self.kernel_lr_multiplier) * K.stop_gradient(outputs_qnn_gradient))\ * self.kernel_lr_multiplier #outputs = outputs*K.mean(K.abs(self.kernel)) if self.use_bias: outputs = K.bias_add( outputs, self.bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs
Example #13
Source File: quantized_ops.py From QuantizedNeuralNetworks-Keras-Tensorflow with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _mean_abs(x, axis=None, keepdims=False): return K.stop_gradient(K.mean(K.abs(x), axis=axis, keepdims=keepdims))
Example #14
Source File: quantized_ops.py From QuantizedNeuralNetworks-Keras-Tensorflow with BSD 3-Clause "New" or "Revised" License | 5 votes |
def clip_through(x, min, max): '''Element-wise rounding to the closest integer with full gradient propagation. A trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182) ''' clipped = K.clip(x,min,max) return x + K.stop_gradient(clipped - x)
Example #15
Source File: quantized_ops.py From QuantizedNeuralNetworks-Keras-Tensorflow with BSD 3-Clause "New" or "Revised" License | 5 votes |
def clip_through(x, min_val, max_val): '''Element-wise clipping with gradient propagation Analogue to round_through ''' clipped = K.clip(x, min_val, max_val) clipped_through= x + K.stop_gradient(clipped-x) return clipped_through
Example #16
Source File: quantized_ops.py From QuantizedNeuralNetworks-Keras-Tensorflow with BSD 3-Clause "New" or "Revised" License | 5 votes |
def round_through(x): '''Element-wise rounding to the closest integer with full gradient propagation. A trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182) ''' rounded = K.round(x) rounded_through = x + K.stop_gradient(rounded - x) return rounded_through
Example #17
Source File: ternary_ops.py From nn_playground with MIT License | 5 votes |
def ternarize_dot(x, W): '''For RNN (maybe Dense or Conv too). Refer to 'Recurrent Neural Networks with Limited Numerical Precision' Section 3.1 ''' Wt = _ternarize(W) return K.dot(x, W) + K.stop_gradient(K.dot(x, Wt - W))
Example #18
Source File: binary_ops.py From nn_playground with MIT License | 5 votes |
def _mean_abs(x, axis=None, keepdims=False): return K.stop_gradient(K.mean(K.abs(x), axis=axis, keepdims=keepdims))
Example #19
Source File: binary_ops.py From nn_playground with MIT License | 5 votes |
def round_through(x): '''Element-wise rounding to the closest integer with full gradient propagation. A trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182) ''' rounded = K.round(x) return x + K.stop_gradient(rounded - x)
Example #20
Source File: binary_ops.py From nn_playground with MIT License | 5 votes |
def _mean_abs(x, axis=None, keepdims=False): return K.stop_gradient(K.mean(K.abs(x), axis=axis, keepdims=keepdims))
Example #21
Source File: binary_ops.py From nn_playground with MIT License | 5 votes |
def round_through(x): '''Element-wise rounding to the closest integer with full gradient propagation. A trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182) ''' rounded = K.round(x) return x + K.stop_gradient(rounded - x)
Example #22
Source File: capslayers.py From deepcaps with MIT License | 4 votes |
def call(self, inputs): if self.r_num == 1: outputs = K.dot(K.reshape(inputs, (-1, self.ch_i * self.n_i)), K.reshape(self.w, (self.ch_i * self.n_i, self.ch_j * self.n_j))) outputs = squeeze(K.reshape(outputs, (-1, self.ch_j, self.n_j))) else: wr = K.reshape(self.w, (self.ch_i, self.n_i, self.ch_j * self.n_j)) u = tf.transpose(tf.matmul(tf.transpose(inputs, [1, 0, 2]), wr), [1, 0, 2]) u = K.reshape(u, (-1, self.ch_i, self.ch_j, self.n_j)) def rt(ub): ub = K.reshape(ub, (-1, self.ch_i, self.ch_j, self.n_j)) ub_wo_g = K.stop_gradient(ub) b = 0.0 for r in range(self.r_num): if r > 0: c = K.expand_dims(K.softmax(b * self.b_alphas[r])) * self.ch_j # distribution of weighs of capsules in I across capsules in J c = K.stop_gradient(c) else: c = 1.0 if r == self.r_num - 1: cub = c * ub else: cub = c * ub_wo_g s = K.sum(cub, axis=-3) # vectors of capsules in J v = squeeze(s) # squeezed vectors of capsules in J if r == self.r_num - 1: break v = K.stop_gradient(v) a = tf.einsum('bjk,bijk->bij', v, ub) # a = v dot u # a = K.matmul(K.reshape(v, (-1, 1, J, 1, n_j)), # K.reshape(u, (-1, I, J, n_j, 1))).reshape((-1, I, J)) b = b + a # increase those b[i,j] where v[j] dot b[i,j] is larger return v u = K.reshape(u, (-1, self.ch_i * self.ch_j * self.n_j)) global useGPU if useGPU: outputs = rt(u) else: outputs = tf.map_fn(rt, u, parallel_iterations=100, back_prop=True, infer_shape=False) outputs = K.reshape(outputs, (-1, self.ch_j, self.n_j)) return outputs
Example #23
Source File: xnor_layers.py From nn_playground with MIT License | 4 votes |
def call(self, inputs): _, kernel_b = xnorize(self.kernel, self.H) _, inputs_b = xnorize(inputs) outputs = K.conv2d(inputs_b, kernel_b, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) # calculate Wa and xa # kernel_a mask = K.reshape(self.kernel, (-1, self.filters)) # self.nb_row * self.nb_col * channels, filters kernel_a = K.stop_gradient(K.mean(K.abs(mask), axis=0)) # filters # inputs_a if self.data_format == 'channels_first': channel_axis = 1 else: channel_axis = -1 mask = K.mean(K.abs(inputs), axis=channel_axis, keepdims=True) ones = K.ones(self.kernel_size + (1, 1)) inputs_a = K.conv2d(mask, ones, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) # nb_sample, 1, new_nb_row, new_nb_col if self.data_format == 'channels_first': outputs = outputs * K.stop_gradient(inputs_a) * K.expand_dims(K.expand_dims(K.expand_dims(kernel_a, 0), -1), -1) else: outputs = outputs * K.stop_gradient(inputs_a) * K.expand_dims(K.expand_dims(K.expand_dims(kernel_a, 0), 0), 0) if self.use_bias: outputs = K.bias_add( outputs, self.bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs # Aliases
Example #24
Source File: adabound.py From keras-adabound with MIT License | 4 votes |
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) t = K.cast(self.iterations, K.floatx()) + 1 # Applies bounds on actual learning rate step_size = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) final_lr = self.final_lr * lr / self.base_lr lower_bound = final_lr * (1. - 1. / (self.gamma * t + 1.)) upper_bound = final_lr * (1. + 1. / (self.gamma * t)) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] if self.amsbound: vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] else: vhats = [K.zeros(1) for _ in params] self.weights = [self.iterations] + ms + vs + vhats for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): # apply weight decay if self.weight_decay != 0.: g += self.weight_decay * K.stop_gradient(p) m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) if self.amsbound: vhat_t = K.maximum(vhat, v_t) denom = (K.sqrt(vhat_t) + self.epsilon) self.updates.append(K.update(vhat, vhat_t)) else: denom = (K.sqrt(v_t) + self.epsilon) # Compute the bounds step_size_p = step_size * K.ones_like(denom) step_size_p_bound = step_size_p / denom bounded_lr_t = m_t * K.minimum(K.maximum(step_size_p_bound, lower_bound), upper_bound) p_t = p - bounded_lr_t self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
Example #25
Source File: query_based_attack.py From blackbox-attacks with MIT License | 4 votes |
def white_box_fgsm(prediction, target_model, x, logits, y, X_test, X_test_ini, targets, targets_cat, eps, dim): time1 = time.time() #Get gradient from model if args.loss_type == 'xent': grad = gen_grad(x, logits, y) elif args.loss_type == 'cw': real = tf.reduce_sum(y*logits, 1) other = tf.reduce_max((1-y)*logits - (y*10000), 1) if '_un' in args.method: loss = tf.maximum(0.0,real-other+args.conf) else: loss = tf.maximum(0.0,other-real+args.conf) grad = K.gradients(loss, [x])[0] # normalized gradient if args.norm == 'linf': normed_grad = K.sign(grad) elif args.norm == 'l2': normed_grad = K.l2_normalize(grad, axis = (1,2,3)) # Multiply by constant epsilon scaled_grad = (eps - args.alpha) * normed_grad # Add perturbation to original example to obtain adversarial example if args.loss_type == 'xent': if '_un' in args.method: adv_x_t = K.stop_gradient(x + scaled_grad) else: adv_x_t = K.stop_gradient(x - scaled_grad) elif args.loss_type == 'cw': adv_x_t = K.stop_gradient(x - scaled_grad) adv_x_t = K.clip(adv_x_t, CLIP_MIN, CLIP_MAX) X_test_ini_slice = X_test_ini[:BATCH_SIZE*BATCH_EVAL_NUM] targets_cat_mod = targets_cat[:BATCH_SIZE*BATCH_EVAL_NUM] targets_mod = targets[:BATCH_SIZE*BATCH_EVAL_NUM] X_adv_t = np.zeros_like(X_test_ini_slice) for i in range(BATCH_EVAL_NUM): X_test_slice = X_test[i*(BATCH_SIZE):(i+1)*(BATCH_SIZE)] targets_cat_slice = targets_cat[i*(BATCH_SIZE):(i+1)*(BATCH_SIZE)] X_adv_t[i*(BATCH_SIZE):(i+1)*(BATCH_SIZE)] = K.get_session().run([adv_x_t], feed_dict={x: X_test_slice, y: targets_cat_slice})[0] adv_pred_np = K.get_session().run([prediction], feed_dict={x: X_adv_t})[0] # _, _, white_box_error = tf_test_error_rate(target_model, x, X_adv_t, targets_cat_mod) white_box_error = 100.0 * np.sum(np.argmax(adv_pred_np,1) != targets_mod) / adv_pred_np.shape[0] if '_un' not in args.method: white_box_error = 100.0 - white_box_error wb_norm = np.mean(np.linalg.norm((X_adv_t-X_test_ini_slice).reshape(BATCH_SIZE*BATCH_EVAL_NUM, dim), axis=1)) print('Average white-box l2 perturbation: {}'.format(wb_norm)) time2= time.time() print('Total time: {}, Average time: {}'.format(time2-time1, (time2 - time1)/(BATCH_SIZE*BATCH_EVAL_NUM))) wb_write_out(eps, white_box_error, wb_norm) return