Python keras.backend.stop_gradient() Examples

The following are 25 code examples of keras.backend.stop_gradient(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module keras.backend , or try the search function .
Example #1
Source File: fgs.py    From blackbox-attacks with MIT License 6 votes vote down vote up
def symbolic_fgs(x, grad, eps=0.3, clipping=True):
    """
    FGSM attack.
    """

    # signed gradient
    normed_grad = K.sign(grad)

    # Multiply by constant epsilon
    scaled_grad = eps * normed_grad

    # Add perturbation to original example to obtain adversarial example
    adv_x = K.stop_gradient(x + scaled_grad)

    if clipping:
        adv_x = K.clip(adv_x, 0, 1)
    return adv_x 
Example #2
Source File: fgs.py    From ensemble-adv-training with MIT License 6 votes vote down vote up
def symbolic_fgs(x, grad, eps=0.3, clipping=True):
    """
    FGSM attack.
    """

    # signed gradient
    normed_grad = K.sign(grad)

    # Multiply by constant epsilon
    scaled_grad = eps * normed_grad

    # Add perturbation to original example to obtain adversarial example
    adv_x = K.stop_gradient(x + scaled_grad)

    if clipping:
        adv_x = K.clip(adv_x, 0, 1)
    return adv_x 
Example #3
Source File: model.py    From 2018DSB with MIT License 6 votes vote down vote up
def inst_weight(output_y, output_x, output_dr, output_dl, config=None):
    dy = output_y[:,2:,2:]-output_y[:, :-2,2:] + \
         2*(output_y[:,2:,1:-1]- output_y[:,:-2,1:-1]) + \
         output_y[:,2:,:-2]-output_y[:,:-2,:-2]
    dx = output_x[:,2:,2:]- output_x[:,2:,:-2] + \
         2*( output_x[:,1:-1,2:]- output_x[:,1:-1,:-2]) +\
         output_x[:,:-2,2:]- output_x[:,:-2,:-2]
    ddr=  (output_dr[:,2:,2:]-output_dr[:,:-2,:-2] +\
           output_dr[:,1:-1,2:]-output_dr[:,:-2,1:-1]+\
           output_dr[:,2:,1:-1]-output_dr[:,1:-1,:-2])*K.constant(2)
    ddl=  (output_dl[:,2:,:-2]-output_dl[:,:-2,2:] +\
           output_dl[:,2:,1:-1]-output_dl[:,1:-1,2:]+\
           output_dl[:,1:-1,:-2]-output_dl[:,:-2,1:-1])*K.constant(2)
    dpred = K.concatenate([dy,dx,ddr,ddl],axis=-1)
    dpred = K.spatial_2d_padding(dpred)
    weight_fg = K.cast(K.all(dpred>K.constant(config.GRADIENT_THRES), axis=3, 
                          keepdims=True), K.floatx())
    
    weight = K.clip(K.sqrt(weight_fg*K.prod(dpred, axis=3, keepdims=True)), 
                    config.WEIGHT_AREA/config.CLIP_AREA_HIGH, 
                    config.WEIGHT_AREA/config.CLIP_AREA_LOW)
    weight +=(1-weight_fg)*config.WEIGHT_AREA/config.BG_AREA
    weight = K.conv2d(weight, K.constant(config.GAUSSIAN_KERNEL),
                      padding='same')
    return K.stop_gradient(weight) 
Example #4
Source File: learn_labelembedding.py    From semantic-embeddings with MIT License 6 votes vote down vote up
def labelembed_model(base_model, num_classes, **kwargs):
    
    input_ = base_model.input
    embedding = base_model.output
    
    out = keras.layers.Activation('relu')(embedding)
    out = keras.layers.BatchNormalization(name = 'embedding_bn')(out)
    out1 = keras.layers.Dense(num_classes, name = 'prob')(out)
    out2 = keras.layers.Dense(num_classes, name = 'out2')(keras.layers.Lambda(lambda x: K.stop_gradient(x))(out))
    
    cls_input_ = keras.layers.Input((1,), name = 'labels')
    cls_embedding_layer = keras.layers.Embedding(num_classes, num_classes, embeddings_initializer = 'identity', name = 'labelembeddings')
    cls_embedding = keras.layers.Flatten()(cls_embedding_layer(cls_input_))
    
    loss = keras.layers.Lambda(lambda x: labelembed_loss(x[0], x[1], x[2], K.flatten(x[3]), num_classes = num_classes, **kwargs)[:,None], name = 'labelembed_loss')([out1, out2, cls_embedding, cls_input_])
    
    return keras.models.Model([input_, cls_input_], [embedding, out1, loss]) 
Example #5
Source File: learn_labelembedding.py    From semantic-embeddings with MIT License 6 votes vote down vote up
def labelembed_loss(out1, out2, tar, targets, tau = 2., alpha = 0.9, beta = 0.5, num_classes = 100):
    
    out2_prob = K.softmax(out2)
    tau2_prob = K.stop_gradient(K.softmax(out2 / tau))
    soft_tar = K.stop_gradient(K.softmax(tar))
    
    L_o1_y = K.sparse_categorical_crossentropy(output = K.softmax(out1), target = targets)
    
    pred = K.argmax(out2, axis = -1)
    mask = K.stop_gradient(K.cast(K.equal(pred, K.cast(targets, 'int64')), K.floatx()))
    L_o1_emb = -cross_entropy(out1, soft_tar)  # pylint: disable=invalid-unary-operand-type
    
    L_o2_y = K.sparse_categorical_crossentropy(output = out2_prob, target = targets)
    L_emb_o2 = -cross_entropy(tar, tau2_prob) * mask * (K.cast(K.shape(mask)[0], K.floatx())/(K.sum(mask)+1e-8))  # pylint: disable=invalid-unary-operand-type
    L_re = K.relu(K.sum(out2_prob * K.one_hot(K.cast(targets, 'int64'), num_classes), axis = -1) - alpha)
    
    return beta * L_o1_y + (1-beta) * L_o1_emb + L_o2_y + L_emb_o2 + L_re 
Example #6
Source File: cartpole_a3c.py    From reinforcement-learning with MIT License 6 votes vote down vote up
def actor_optimizer(self):
        action = K.placeholder(shape=(None, self.action_size))
        advantages = K.placeholder(shape=(None, ))

        policy = self.actor.output

        good_prob = K.sum(action * policy, axis=1)
        eligibility = K.log(good_prob + 1e-10) * K.stop_gradient(advantages)
        loss = -K.sum(eligibility)

        entropy = K.sum(policy * K.log(policy + 1e-10), axis=1)

        actor_loss = loss + 0.01*entropy

        optimizer = Adam(lr=self.actor_lr)
        updates = optimizer.get_updates(self.actor.trainable_weights, [], actor_loss)
        train = K.function([self.actor.input, action, advantages], [], updates=updates)
        return train

    # make loss function for Value approximation 
Example #7
Source File: fgs.py    From blackbox-attacks with MIT License 6 votes vote down vote up
def symbolic_fg(x, grad, eps=0.3, clipping=True):
    """
    FG attack
    """
    # Unit vector in direction of gradient
    reduc_ind = list(xrange(1, len(x.get_shape())))
    normed_grad = grad / tf.sqrt(tf.reduce_sum(tf.square(grad),
                                                   reduction_indices=reduc_ind,
                                                   keep_dims=True))
    # Multiply by constant epsilon
    scaled_grad = eps * normed_grad

    # Add perturbation to original example to obtain adversarial example
    adv_x = K.stop_gradient(x + scaled_grad)

    if clipping:
        adv_x = K.clip(adv_x, 0, 1)

    return adv_x 
Example #8
Source File: model.py    From models with MIT License 5 votes vote down vote up
def profile_contrib(p):
    return kl.Lambda(lambda p:
                     K.mean(K.sum(K.stop_gradient(tf.nn.softmax(p, dim=-2)) * p, axis=-2), axis=-1)
                     )(p) 
Example #9
Source File: rbm.py    From keras_bn_library with MIT License 5 votes vote down vote up
def mcmc_chain(self, x, nb_gibbs_steps):
		xi = x
		for i in range(nb_gibbs_steps):
			xi, xi_pre, xi_sigm = self.gibbs_xhx(xi)
		x_rec, x_rec_pre, x_rec_sigm = xi, xi_pre, xi_sigm

		x_rec = K.stop_gradient(x_rec)

		return x_rec, x_rec_pre, x_rec_sigm 
Example #10
Source File: binary_layers.py    From QuantizedNeuralNetworks-Keras-Tensorflow with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def call(self, inputs):
        binary_kernel = binarize(self.kernel, H=self.H)

        inverse_kernel_lr_multiplier = 1./self.kernel_lr_multiplier
        inputs_bnn_gradient = (inputs - (1. - 1./inverse_kernel_lr_multiplier) * K.stop_gradient(inputs))\
                  * inverse_kernel_lr_multiplier

        outputs_bnn_gradient = K.conv2d(
            inputs_bnn_gradient,
            binary_kernel,
            strides=self.strides,
            padding=self.padding,
            data_format=self.data_format,
            dilation_rate=self.dilation_rate)

        outputs = (outputs_bnn_gradient - (1. - 1./self.kernel_lr_multiplier) * K.stop_gradient(outputs_bnn_gradient))\
                  * self.kernel_lr_multiplier


        if self.use_bias:
            outputs = K.bias_add(
                outputs,
                self.bias,
                data_format=self.data_format)

        if self.activation is not None:
            return self.activation(outputs)
        return outputs 
Example #11
Source File: binary_ops.py    From QuantizedNeuralNetworks-Keras-Tensorflow with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _mean_abs(x, axis=None, keepdims=False):
    return K.stop_gradient(K.mean(K.abs(x), axis=axis, keepdims=keepdims)) 
Example #12
Source File: quantized_layers.py    From QuantizedNeuralNetworks-Keras-Tensorflow with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def call(self, inputs):
        quantized_kernel = quantize(self.kernel, nb=self.nb)

        inverse_kernel_lr_multiplier = 1./self.kernel_lr_multiplier
        inputs_qnn_gradient = (inputs - (1. - 1./inverse_kernel_lr_multiplier) * K.stop_gradient(inputs))\
                  * inverse_kernel_lr_multiplier

        outputs_qnn_gradient = K.conv2d(
            inputs_qnn_gradient,
            quantized_kernel,
            strides=self.strides,
            padding=self.padding,
            data_format=self.data_format,
            dilation_rate=self.dilation_rate)

        outputs = (outputs_qnn_gradient - (1. - 1./self.kernel_lr_multiplier) * K.stop_gradient(outputs_qnn_gradient))\
                  * self.kernel_lr_multiplier


        #outputs = outputs*K.mean(K.abs(self.kernel))

        if self.use_bias:
            outputs = K.bias_add(
                outputs,
                self.bias,
                data_format=self.data_format)

        if self.activation is not None:
            return self.activation(outputs)

        return outputs 
Example #13
Source File: quantized_ops.py    From QuantizedNeuralNetworks-Keras-Tensorflow with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _mean_abs(x, axis=None, keepdims=False):
    return K.stop_gradient(K.mean(K.abs(x), axis=axis, keepdims=keepdims)) 
Example #14
Source File: quantized_ops.py    From QuantizedNeuralNetworks-Keras-Tensorflow with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def clip_through(x, min, max):
    '''Element-wise rounding to the closest integer with full gradient propagation.
    A trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182)
    '''
    clipped = K.clip(x,min,max)
    return x + K.stop_gradient(clipped - x) 
Example #15
Source File: quantized_ops.py    From QuantizedNeuralNetworks-Keras-Tensorflow with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def clip_through(x, min_val, max_val):
    '''Element-wise clipping with gradient propagation
    Analogue to round_through
    '''
    clipped = K.clip(x, min_val, max_val)
    clipped_through= x + K.stop_gradient(clipped-x)
    return clipped_through 
Example #16
Source File: quantized_ops.py    From QuantizedNeuralNetworks-Keras-Tensorflow with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def round_through(x):
    '''Element-wise rounding to the closest integer with full gradient propagation.
    A trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182)
    '''
    rounded = K.round(x)
    rounded_through = x + K.stop_gradient(rounded - x)
    return rounded_through 
Example #17
Source File: ternary_ops.py    From nn_playground with MIT License 5 votes vote down vote up
def ternarize_dot(x, W):
    '''For RNN (maybe Dense or Conv too). 
    Refer to 'Recurrent Neural Networks with Limited Numerical Precision' Section 3.1
    '''
    Wt = _ternarize(W)
    return K.dot(x, W) + K.stop_gradient(K.dot(x, Wt - W)) 
Example #18
Source File: binary_ops.py    From nn_playground with MIT License 5 votes vote down vote up
def _mean_abs(x, axis=None, keepdims=False):
    return K.stop_gradient(K.mean(K.abs(x), axis=axis, keepdims=keepdims)) 
Example #19
Source File: binary_ops.py    From nn_playground with MIT License 5 votes vote down vote up
def round_through(x):
    '''Element-wise rounding to the closest integer with full gradient propagation.
    A trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182)
    '''
    rounded = K.round(x)
    return x + K.stop_gradient(rounded - x) 
Example #20
Source File: binary_ops.py    From nn_playground with MIT License 5 votes vote down vote up
def _mean_abs(x, axis=None, keepdims=False):
    return K.stop_gradient(K.mean(K.abs(x), axis=axis, keepdims=keepdims)) 
Example #21
Source File: binary_ops.py    From nn_playground with MIT License 5 votes vote down vote up
def round_through(x):
    '''Element-wise rounding to the closest integer with full gradient propagation.
    A trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182)
    '''
    rounded = K.round(x)
    return x + K.stop_gradient(rounded - x) 
Example #22
Source File: capslayers.py    From deepcaps with MIT License 4 votes vote down vote up
def call(self, inputs):
        if self.r_num == 1:
            outputs = K.dot(K.reshape(inputs, (-1, self.ch_i * self.n_i)),
                            K.reshape(self.w, (self.ch_i * self.n_i,
                                               self.ch_j * self.n_j)))
            outputs = squeeze(K.reshape(outputs, (-1, self.ch_j, self.n_j)))
        else:
            wr = K.reshape(self.w, (self.ch_i, self.n_i, self.ch_j * self.n_j))

            u = tf.transpose(tf.matmul(tf.transpose(inputs, [1, 0, 2]), wr), [1, 0, 2])

            u = K.reshape(u, (-1, self.ch_i, self.ch_j, self.n_j))

            def rt(ub):
                ub = K.reshape(ub, (-1, self.ch_i, self.ch_j, self.n_j))
                ub_wo_g = K.stop_gradient(ub)
                b = 0.0
                for r in range(self.r_num):
                    if r > 0:
                        c = K.expand_dims(K.softmax(b * self.b_alphas[r])) * self.ch_j  # distribution of weighs of capsules in I across capsules in J
                        c = K.stop_gradient(c)
                    else:
                        c = 1.0

                    if r == self.r_num - 1:
                        cub = c * ub
                    else:
                        cub = c * ub_wo_g
                    s = K.sum(cub, axis=-3)  # vectors of capsules in J
                    v = squeeze(s)  # squeezed vectors of capsules in J
                    if r == self.r_num - 1:
                        break

                    v = K.stop_gradient(v)

                    a = tf.einsum('bjk,bijk->bij', v, ub)  # a = v dot u
                    # a = K.matmul(K.reshape(v, (-1, 1, J, 1, n_j)),
                    #             K.reshape(u, (-1, I, J, n_j, 1))).reshape((-1, I, J))

                    b = b + a  # increase those b[i,j] where v[j] dot b[i,j] is larger
                return v

            u = K.reshape(u, (-1, self.ch_i * self.ch_j * self.n_j))

            global useGPU

            if useGPU:
                outputs = rt(u)
            else:
                outputs = tf.map_fn(rt, u,
                                    parallel_iterations=100, back_prop=True,
                                    infer_shape=False)

            outputs = K.reshape(outputs, (-1, self.ch_j, self.n_j))

        return outputs 
Example #23
Source File: xnor_layers.py    From nn_playground with MIT License 4 votes vote down vote up
def call(self, inputs):
        _, kernel_b = xnorize(self.kernel, self.H)
        _, inputs_b = xnorize(inputs)
        outputs = K.conv2d(inputs_b, kernel_b, strides=self.strides,
                           padding=self.padding,
                           data_format=self.data_format,
                           dilation_rate=self.dilation_rate)

        # calculate Wa and xa
        
        # kernel_a
        mask = K.reshape(self.kernel, (-1, self.filters)) # self.nb_row * self.nb_col * channels, filters 
        kernel_a = K.stop_gradient(K.mean(K.abs(mask), axis=0)) # filters
        
        # inputs_a
        if self.data_format == 'channels_first':
            channel_axis = 1
        else:
            channel_axis = -1 
        mask = K.mean(K.abs(inputs), axis=channel_axis, keepdims=True) 
        ones = K.ones(self.kernel_size + (1, 1))
        inputs_a = K.conv2d(mask, ones, strides=self.strides,
                      padding=self.padding,
                      data_format=self.data_format,
                      dilation_rate=self.dilation_rate) # nb_sample, 1, new_nb_row, new_nb_col
        if self.data_format == 'channels_first':
            outputs = outputs * K.stop_gradient(inputs_a) * K.expand_dims(K.expand_dims(K.expand_dims(kernel_a, 0), -1), -1)
        else:
            outputs = outputs * K.stop_gradient(inputs_a) * K.expand_dims(K.expand_dims(K.expand_dims(kernel_a, 0), 0), 0)
                                
        if self.use_bias:
            outputs = K.bias_add(
                outputs,
                self.bias,
                data_format=self.data_format)

        if self.activation is not None:
            return self.activation(outputs)
        return outputs


# Aliases 
Example #24
Source File: adabound.py    From keras-adabound with MIT License 4 votes vote down vote up
def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * K.cast(self.iterations,
                                                      K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1

        # Applies bounds on actual learning rate
        step_size = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                          (1. - K.pow(self.beta_1, t)))

        final_lr = self.final_lr * lr / self.base_lr
        lower_bound = final_lr * (1. - 1. / (self.gamma * t + 1.))
        upper_bound = final_lr * (1. + 1. / (self.gamma * t))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        if self.amsbound:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            # apply weight decay
            if self.weight_decay != 0.:
                g += self.weight_decay * K.stop_gradient(p)

            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)

            if self.amsbound:
                vhat_t = K.maximum(vhat, v_t)
                denom = (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(K.update(vhat, vhat_t))
            else:
                denom = (K.sqrt(v_t) + self.epsilon)

            # Compute the bounds
            step_size_p = step_size * K.ones_like(denom)
            step_size_p_bound = step_size_p / denom
            bounded_lr_t = m_t * K.minimum(K.maximum(step_size_p_bound,
                                                     lower_bound), upper_bound)

            p_t = p - bounded_lr_t

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates 
Example #25
Source File: query_based_attack.py    From blackbox-attacks with MIT License 4 votes vote down vote up
def white_box_fgsm(prediction, target_model, x, logits, y, X_test, X_test_ini, targets, targets_cat, eps, dim):
    time1 = time.time()
    #Get gradient from model
    if args.loss_type == 'xent':
        grad = gen_grad(x, logits, y)
    elif args.loss_type == 'cw':
        real = tf.reduce_sum(y*logits, 1)
        other = tf.reduce_max((1-y)*logits - (y*10000), 1)
        if '_un' in args.method:
            loss = tf.maximum(0.0,real-other+args.conf)
        else:
            loss = tf.maximum(0.0,other-real+args.conf)
        grad = K.gradients(loss, [x])[0]

    # normalized gradient
    if args.norm == 'linf':
        normed_grad = K.sign(grad)
    elif args.norm == 'l2':
        normed_grad = K.l2_normalize(grad, axis = (1,2,3))

    # Multiply by constant epsilon
    scaled_grad = (eps - args.alpha) * normed_grad

    # Add perturbation to original example to obtain adversarial example
    if args.loss_type == 'xent':
        if '_un' in args.method:
            adv_x_t = K.stop_gradient(x + scaled_grad)
        else:
            adv_x_t = K.stop_gradient(x - scaled_grad)
    elif args.loss_type == 'cw':
        adv_x_t = K.stop_gradient(x - scaled_grad)

    adv_x_t = K.clip(adv_x_t, CLIP_MIN, CLIP_MAX)

    X_test_ini_slice = X_test_ini[:BATCH_SIZE*BATCH_EVAL_NUM]
    targets_cat_mod = targets_cat[:BATCH_SIZE*BATCH_EVAL_NUM]
    targets_mod = targets[:BATCH_SIZE*BATCH_EVAL_NUM]

    X_adv_t = np.zeros_like(X_test_ini_slice)

    for i in range(BATCH_EVAL_NUM):
        X_test_slice = X_test[i*(BATCH_SIZE):(i+1)*(BATCH_SIZE)]
        targets_cat_slice = targets_cat[i*(BATCH_SIZE):(i+1)*(BATCH_SIZE)]
        X_adv_t[i*(BATCH_SIZE):(i+1)*(BATCH_SIZE)] = K.get_session().run([adv_x_t], feed_dict={x: X_test_slice, y: targets_cat_slice})[0]

    adv_pred_np = K.get_session().run([prediction], feed_dict={x: X_adv_t})[0]

    # _, _, white_box_error = tf_test_error_rate(target_model, x, X_adv_t, targets_cat_mod)
    white_box_error = 100.0 * np.sum(np.argmax(adv_pred_np,1) != targets_mod) / adv_pred_np.shape[0]

    if '_un' not in args.method:
        white_box_error = 100.0 - white_box_error

    wb_norm = np.mean(np.linalg.norm((X_adv_t-X_test_ini_slice).reshape(BATCH_SIZE*BATCH_EVAL_NUM, dim), axis=1))
    print('Average white-box l2 perturbation: {}'.format(wb_norm))
    time2= time.time()
    print('Total time: {}, Average time: {}'.format(time2-time1, (time2 - time1)/(BATCH_SIZE*BATCH_EVAL_NUM)))

    wb_write_out(eps, white_box_error, wb_norm)

    return