Python theano.tensor.mul() Examples

The following are 30 code examples of theano.tensor.mul(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module theano.tensor , or try the search function .
Example #1
Source File: output_layer.py    From recnet with MIT License 6 votes vote down vote up
def sequence_iteration(self, output, mask,use_dropout=0,dropout_value=0.5):

        dot_product = T.dot(output , self.t_w_out)

        net_o = T.add( dot_product , self.t_b_out )

        ex_net = T.exp(net_o)
        sum_net = T.sum(ex_net, axis=2, keepdims=True)
        softmax_o = ex_net / sum_net


        mask = T.addbroadcast(mask, 2) # to do nesseccary?
        output = T.mul(mask, softmax_o)   + T.mul( (1. - mask) , 1e-6 )

        return output #result


######                     Linear Layer
######################################## 
Example #2
Source File: Model4DistancePrediction.py    From RaptorX-Contact with GNU General Public License v3.0 6 votes vote down vote up
def errors4one(self, z, out, weight=None, distLabelType='12C'):
	distBins = config.distCutoffs[distLabelType]
	label8 = DistanceUtils.LabelsOfOneDistance(config.ContactDefinition, distBins)
	label15 = DistanceUtils.LabelsOfOneDistance(config.InteractionLimit, distBins)

	z3C = T.cast( T.ge(z, label8), 'int32') + T.cast( T.ge(z, label15), 'int32')
	o3C = T.cast( T.ge(out, label8), 'int32') + T.cast( T.ge(out, label15), 'int32')

	if weight is not None:
            err = T.sum( T.mul(weight, T.neq(o3C, z3C) ) )*1./T.sum(weight)
	else:
            err = T.mean( T.neq(o3C , z3C) ) 

	## err is s scalar, convert it to a tensor with ndim=1
	return T.stack([err] )

    ## this function returns a vector of errors, the size of this vector is equal to the sum of ValueDims for all the responses 
Example #3
Source File: rbm_adv.py    From SteinGAN with MIT License 6 votes vote down vote up
def rbf_kernel(X):

    XY = T.dot(X, X.T)
    x2 = T.sum(X**2, axis=1).dimshuffle(0, 'x')
    X2e = T.repeat(x2, X.shape[0], axis=1)
    H = X2e +  X2e.T - 2. * XY

    V = H.flatten()
    # median distance
    h = T.switch(T.eq((V.shape[0] % 2), 0),
        # if even vector
        T.mean(T.sort(V)[ ((V.shape[0] // 2) - 1) : ((V.shape[0] // 2) + 1) ]),
        # if odd vector
        T.sort(V)[V.shape[0] // 2])

    h = T.sqrt(.5 * h / T.log(H.shape[0].astype('float32') + 1.)) 
    
    # compute the rbf kernel
    kxy = T.exp(-H / (h ** 2) / 2.0)

    dxkxy = -T.dot(kxy, X)
    sumkxy = T.sum(kxy, axis=1).dimshuffle(0, 'x')
    dxkxy = T.add(dxkxy, T.mul(X, sumkxy)) / (h ** 2)

    return kxy, dxkxy 
Example #4
Source File: utils.py    From RaptorX-Contact with GNU General Public License v3.0 6 votes vote down vote up
def ConvByPattern(x, patterns, mask=None):
    W = np.transpose(patterns, (3, 0, 1, 2))
    out2 = T.nnet.conv2d(x.dimshuffle(0, 3, 1, 2), W, filter_shape=W.shape, border_mode='half')
    if mask is not None:
        ## mask has shape (batchSize, #rows_to_be_masked, nCols)

        ## a subtensor of out2 along the horiz direction
        out2_sub_horiz = out2[:, :, :mask.shape[1], :]
        mask_horiz = mask.dimshuffle(0, 'x', 1, 2)
        out3 = T.set_subtensor(out2_sub_horiz, T.mul(out2_sub_horiz, mask_horiz) )

        ## a subtensor of out3 along the vertical direction
        out3_sub_vertical = out3[:, :, :, :mask.shape[1] ]
        mask_vertical = mask.dimshuffle(0, 'x', 2, 1)
        y = T.set_subtensor(out3_sub_vertical, T.mul(out3_sub_vertical, mask_vertical) )
    else:
	y = out2

    y = y.dimshuffle(0, 2, 3, 1)

    return y/np.prod(patterns.shape[1:3]) 
Example #5
Source File: recurrent_layer.py    From recnet with MIT License 6 votes vote down vote up
def t_forward_step(self,mask, rzup_in_sig, h_pre, u_rz, u_up, t_n_out): #u_r, u_z,



        signal_act = self.activation
        gate_act = self.sigmoid()

        preact = T.dot( h_pre, u_rz)


        r = gate_act( T.add( rzup_in_sig[:, 0:t_n_out] , preact[:, 0:t_n_out] )) #T.dot( h_pre, u_r) ) )
        z = gate_act( T.add( rzup_in_sig[:, t_n_out:2 * t_n_out] , preact[:, t_n_out:2 * t_n_out] )) #T.dot(h_pre, u_z) ))

        h_update = signal_act( T.add( rzup_in_sig[:, 2*t_n_out:3*t_n_out] , T.dot( T.mul( h_pre, r), u_up) ))

        h_new = T.add( (1.-z) * h_update , z * h_pre )

        mask = T.addbroadcast(mask, 1)
        out_sig =  T.add( mask * h_new   , (1. - mask) * h_pre )

        return out_sig 
Example #6
Source File: NN4LogReg.py    From RaptorX-Contact with GNU General Public License v3.0 5 votes vote down vote up
def NLL(self, y, sampleWeight=None):
        ###Return the mean of the negative log-likelihood of the prediction of this model under a given target distribution.

        if sampleWeight is not None:
            return -T.sum(T.mul(sampleWeight, T.log(self.p_y_given_x)[T.arange(y.shape[0]), y] ) )/T.sum(sampleWeight)
        else:
            return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y]) 
Example #7
Source File: output_layer.py    From recnet with MIT License 5 votes vote down vote up
def sequence_iteration(self, output, mask, use_dropout=0, dropout_value=0.5):

        dot_product = T.dot(output, self.t_w_out)

        linear_o = T.add(dot_product, self.t_b_out)


        mask = T.addbroadcast(mask, 2)  # to do nesseccary?
        output = T.mul(mask, linear_o) + T.mul((1. - mask), 1e-6)

        return output  # result


### TEST FUNCTIONS # to do make new file with test functions 
Example #8
Source File: ln_reccurent_layer.py    From recnet with MIT License 5 votes vote down vote up
def t_forward_step(self, mask, cur_w_in_sig, pre_out_sig, pre_cell_sig, w_ig_c, w_fg_c, w_og_c, w_ifco, b_ifco, ln_b1,ln_s1, ln_b2,ln_s2,ln_b3,ln_s3,
                       t_n_out):

        cur_w_in_sig_ln = self.ln(cur_w_in_sig, ln_b1, ln_s1)

        pre_w_out_sig = T.dot(pre_out_sig, w_ifco)
        pre_w_out_sig_ln = self.ln(pre_w_out_sig, ln_b2, ln_s2)

        preact = T.add(cur_w_in_sig_ln, pre_w_out_sig_ln, b_ifco)

        inner_act = self.activation  # T.nnet.hard_sigmoid T.tanh
        gate_act = self.sigmoid()  # T.nnet.hard_sigmoid

        # Input Gate
        ig_t1 = gate_act(T.add(preact[:, 0:t_n_out], T.mul(pre_cell_sig, w_ig_c)))
        # Forget Gate
        fg_t1 = gate_act(T.add(preact[:, 1 * t_n_out:2 * t_n_out], T.mul(pre_cell_sig, w_fg_c),))
        # Cell State
        cs_t1 = T.add(T.mul(fg_t1, pre_cell_sig), T.mul(ig_t1, inner_act( T.add(preact[:, 2 * t_n_out:3 * t_n_out]))))

        mask = T.addbroadcast(mask, 1)
        cs_t1 = mask * cs_t1 + (1. - mask) * pre_cell_sig
        # functionality: cs_t1 =   T.switch(mask , cs_t1, pre_cell_sig)

        cs_t1_ln = self.ln(cs_t1, ln_b3, ln_s3)

        # Output Gate
        og_t1 = gate_act(
            T.add(preact[:, 3 * t_n_out:4 * t_n_out], T.mul(cs_t1_ln, w_og_c)))
        # Output LSTM
        out_sig = T.mul(og_t1, inner_act(cs_t1_ln))

        out_sig = mask * out_sig + (1. - mask) * pre_out_sig

        return [out_sig, cs_t1] 
Example #9
Source File: ln_reccurent_layer.py    From recnet with MIT License 5 votes vote down vote up
def t_forward_step(self, mask, cur_w_in_sig, pre_out_sig, pre_cell_sig, w_ifco, b_ifco,ln_b1,ln_s1, ln_b2,ln_s2,ln_b3,ln_s3,
                       t_n_out):

        cur_w_in_sig_ln = self.ln(cur_w_in_sig, ln_b1, ln_s1)

        pre_w_out_sig = T.dot(pre_out_sig, w_ifco)
        pre_w_out_sig_ln = self.ln(pre_w_out_sig, ln_b2, ln_s2)

        preact = T.add(cur_w_in_sig_ln, pre_w_out_sig_ln, b_ifco)



        inner_act = self.activation # T.nnet.hard_sigmoid #T.tanh # T.nnet.hard_sigmoid T.tanh
        gate_act = self.sigmoid()  # T.nnet.hard_sigmoid #T.nnet.sigmoid

        # Input Gate
        ig_t1 = gate_act(preact[:, 0:t_n_out])
        # Forget Gate
        fg_t1 = gate_act(preact[:, 1 * t_n_out:2 * t_n_out])
        # Cell State
        cs_t1 = T.add(T.mul(fg_t1, pre_cell_sig), T.mul(ig_t1, inner_act(preact[:, 2 * t_n_out:3 * t_n_out])))

        mask = T.addbroadcast(mask, 1)
        cs_t1 = mask * cs_t1 + (1. - mask) * pre_cell_sig

        cs_t1_ln = self.ln(cs_t1, ln_b3, ln_s3)
        # Output Gate
        og_t1 = gate_act(preact[:, 3 * t_n_out:4 * t_n_out])
        # Output LSTM
        out_sig = T.mul(og_t1, inner_act(cs_t1_ln))

        out_sig = mask * out_sig + (1. - mask) * pre_out_sig

        return [out_sig, cs_t1] 
Example #10
Source File: helpers.py    From deep-prior with GNU General Public License v3.0 5 votes vote down vote up
def SlopeLinInv(slope):
    """
    Truncated linear unit
    :param slope: slope of negative quadrant
    :return: x if x > 0 else x*slope
    """
    import theano.tensor as T

    def inner(x):
        return T.switch(T.gt(x, 0), x, T.mul(x, slope))
    return inner 
Example #11
Source File: ln_reccurent_layer.py    From recnet with MIT License 5 votes vote down vote up
def t_forward_step(self,mask, rzup_in_sig, h_pre,b_rzup, u_rz, u_up,ln_b1,ln_s1, ln_b2,ln_s2,ln_b3,ln_s3, t_n_out):



        signal_act = self.activation
        gate_act = self.sigmoid()

        rzup_in_sig_ln = self.ln(rzup_in_sig, ln_b1, ln_s1)

        rzup_b_in_sig_ln = T.add(rzup_in_sig_ln, b_rzup)

        preact = T.dot( h_pre, u_rz)

        preact_ln = self.ln(preact, ln_b2, ln_s2)

        r = gate_act( T.add( rzup_b_in_sig_ln[:, 0:t_n_out] , preact_ln[:, 0:t_n_out] ))
        z = gate_act( T.add( rzup_b_in_sig_ln[:, t_n_out:2 * t_n_out] , preact_ln[:, t_n_out:2 * t_n_out] ))

        preactx = T.dot(h_pre , u_up)
        preactx_ln = self.ln(preactx, ln_b3, ln_s3)
        h_pre_r_ln = T.mul( preactx_ln, r)

        h_update = signal_act( T.add( rzup_b_in_sig_ln[:, 2*t_n_out:3*t_n_out] , h_pre_r_ln ))

        h_new = T.add( (1.-z) * h_update , z * h_pre )

        mask = T.addbroadcast(mask, 1)
        out_sig =  T.add( mask * h_new   , (1. - mask) * h_pre )

        return out_sig 
Example #12
Source File: test_sigm.py    From D-VAE with MIT License 5 votes vote down vote up
def test_local_sigm_times_exp(self):
        """
        Test the `local_sigm_times_exp` optimization.
        exp(x) * sigm(-x) -> sigm(x)
        exp(-x) * sigm(x) -> sigm(-x)
        """
        def match(func, ops):
            # print [node.op.scalar_op for node in func.maker.fgraph.toposort()]
            assert [node.op for node in func.maker.fgraph.toposort()] == ops
        m = self.get_mode(excluding=['local_elemwise_fusion', 'inplace'])
        x, y = tensor.vectors('x', 'y')

        f = theano.function([x], sigmoid(-x) * tensor.exp(x), mode=m)
        assert hasattr(f.maker.fgraph.outputs[0].tag, 'trace')
        match(f, [sigmoid])

        f = theano.function([x], sigmoid(x) * tensor.exp(-x), mode=m)
        assert hasattr(f.maker.fgraph.outputs[0].tag, 'trace')
        match(f, [tensor.neg, sigmoid])

        f = theano.function([x], -(-(-(sigmoid(x)))) * tensor.exp(-x), mode=m)
        assert hasattr(f.maker.fgraph.outputs[0].tag, 'trace')
        match(f, [tensor.neg, sigmoid, tensor.neg])

        f = theano.function(
                [x, y],
                (sigmoid(x) * sigmoid(-y) * -tensor.exp(-x) *
                 tensor.exp(x * y) * tensor.exp(y)),
                mode=m)
        assert hasattr(f.maker.fgraph.outputs[0].tag, 'trace')
        match(f, [sigmoid, tensor.mul, tensor.neg, tensor.exp, sigmoid,
                  tensor.mul]) 
Example #13
Source File: test_sigm.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_local_sigm_times_exp(self):
        """
        Test the `local_sigm_times_exp` optimization.
        exp(x) * sigm(-x) -> sigm(x)
        exp(-x) * sigm(x) -> sigm(-x)
        """
        def match(func, ops):
            # print [node.op.scalar_op for node in func.maker.fgraph.toposort()]
            assert [node.op for node in func.maker.fgraph.toposort()] == ops
        m = self.get_mode(excluding=['local_elemwise_fusion', 'inplace'])
        x, y = tensor.vectors('x', 'y')

        f = theano.function([x], sigmoid(-x) * tensor.exp(x), mode=m)
        match(f, [sigmoid])

        f = theano.function([x], sigmoid(x) * tensor.exp(-x), mode=m)
        match(f, [tensor.neg, sigmoid])

        f = theano.function([x], -(-(-(sigmoid(x)))) * tensor.exp(-x), mode=m)
        match(f, [tensor.neg, sigmoid, tensor.neg])

        f = theano.function(
                [x, y],
                (sigmoid(x) * sigmoid(-y) * -tensor.exp(-x) *
                 tensor.exp(x * y) * tensor.exp(y)),
                mode=m)
        match(f, [sigmoid, tensor.mul, tensor.neg, tensor.exp, sigmoid,
                  tensor.mul]) 
Example #14
Source File: recurrent_layer.py    From recnet with MIT License 5 votes vote down vote up
def t_forward_step(self, mask, cur_w_in_sig, pre_out_sig, pre_cell_sig, w_ifco, b_ifco,
                       t_n_out):

        ifco = T.add(T.dot(pre_out_sig, w_ifco), b_ifco)

        inner_act = self.activation
        gate_act = self.sigmoid()

        # Input Gate
        ig_t1 = gate_act(T.add(ifco[:, 0:t_n_out], cur_w_in_sig[:, 0:t_n_out]))
        # Forget Gate
        fg_t1 = gate_act(T.add(ifco[:, 1 * t_n_out:2 * t_n_out],
                               cur_w_in_sig[:, 1 * t_n_out:2 * t_n_out]))
        # Cell State
        cs_t1 = T.add(T.mul(fg_t1, pre_cell_sig), T.mul(ig_t1, inner_act(
            T.add(ifco[:, 2 * t_n_out:3 * t_n_out], cur_w_in_sig[:, 2 * t_n_out:3 * t_n_out]))))

        mask = T.addbroadcast(mask, 1)
        cs_t1 = mask * cs_t1 + (1. - mask) * pre_cell_sig
        # functionality: cs_t1 =   T.switch(mask , cs_t1, pre_cell_sig)

        # Output Gate
        og_t1 = gate_act(
            T.add(ifco[:, 3 * t_n_out:4 * t_n_out], cur_w_in_sig[:, 3 * t_n_out:4 * t_n_out]))
        # Output LSTM
        out_sig = T.mul(og_t1, inner_act(cs_t1))

        out_sig = mask * out_sig + (1. - mask) * pre_out_sig

        return [out_sig, cs_t1] 
Example #15
Source File: NN4LogReg.py    From RaptorX-Contact with GNU General Public License v3.0 5 votes vote down vote up
def errors(self, y, sampleWeight=None):
	assert (y.ndim == 2)
	err = T.neq(self.y_pred, y)
	if sampleWeight is None:
		return T.mean(err, axis=0)

	assert (sampleWeight.ndim == 2)
	return T.sum( T.mul(err, sampleWeight), axis=0)/T.sum(sampleWeight)

    ## this function returns a scalar 
Example #16
Source File: NN4Normal.py    From RaptorX-Contact with GNU General Public License v3.0 5 votes vote down vote up
def errors(self, y, sampleWeight=None):
		assert (y.ndim == 2)
		err_sqr = T.sqr( y - self.y_pred )
		if sampleWeight is None:
			return T.sqrt(T.mean(err_sqr, axis=0 ) )

		assert (sampleWeight.ndim == 2)
		if self.n_variables == 1:
			weight = sampleWeight
		else:
			weight = T.concatenate( [ sampleWeight, sampleWeight], axis=1 )
		return T.sqrt( T.sum(T.mul( err_sqr, weight ), axis=0)/ T.sum(sampleWeight) )

	## y has shape (batchSize, n_variables), sampleWeight shall have shape (batchSize, 1) instead of (batchSize,) 
Example #17
Source File: utils.py    From RaptorX-Contact with GNU General Public License v3.0 5 votes vote down vote up
def ExpandBy4dPattern(x, patterns):
    ##patterns has shape (numPatterns, nPatternRows, nPatternCols, numLabels)
    ##each element is between 0 and 1 and the sum of the vector patterns[i, j, k, :] is equal to 1
    pshape = patterns.shape

    ## y1 has shape (batchSize, nRows * pshape[1], nCols * pshape[2], pshape[0])
    y1 = MyRepeat(x, (pshape[1], pshape[2]), axes=[1, 2])
    expandedPatterns = T.tile(patterns, (1, x.shape[1], x.shape[2], 1) ).dimshuffle('x', 1, 2, 0, 3)

    ylist = []
    for i in xrange(pshape[3]):
	y2 = T.mul( y1, expandedPatterns[:, :, :, :, i] )
	y3 = T.sum( y2, axis=3, keepdims=True)
	ylist.append(y3)
    return T.concatenate( ylist, axis=3) 
Example #18
Source File: LogReg.py    From RaptorX-Contact with GNU General Public License v3.0 5 votes vote down vote up
def errorsBreakdown(self, y):
	
	##truth shall be casted to at least int32
	def breakDown3C(pred=None, truth=None):
	    labelcount = T.bincount(truth, minlength=3)
            err = T.neq(pred, truth)
            truth_with_wrong_pred = truth[err.nonzero()]
	    errcount = T.bincount(truth_with_wrong_pred, minlength=3)

	    ## use 0.0001 to avoid division by 0
            return T.mul(errcount, 1./(labelcount + 0.0001) )

	if self.n_out == 3:
	    truth = T.cast(y, 'int32')
	    return breakDown3C(self.y_pred, truth)

	if self.n_out == 12:
	    ## convert the 12-label system to the 3-label system
	    ## 0, 1, 2, 3 to 0; 4,5,6,7,8,9,10 to 1; and 11 to 2
	    y1 = T.zeros_like(y)
	    y2 = T.gt(y, 3)
	    y3 = T.gt(y, 10)
	    truth = T.cast(y1 + y2 + y3, 'int32')

	    pred1 = T.zeros_like(self.y_pred)
	    pred2 = T.gt(self.y_pred, 3)
	    pred3 = T.gt(self.y_pred, 10)
	    pred = T.cast( y1 + y2 + y3, 'int32')

	    return breakDown3C(pred, truth)
            
	else:
	    print 'this function only works when n_out is either 3 or 12'
	    sys.exit(-1)

    ## calculate the confusion matrix of the prediction 
Example #19
Source File: LogReg.py    From RaptorX-Contact with GNU General Public License v3.0 5 votes vote down vote up
def errors(self, y, sampleWeight=None):
        """Return a float representing the number of errors in the minibatch
        over the total number of examples of the minibatch ; zero one
        loss over the size of the minibatch

        :type y: theano.tensor.TensorType
        :param y: corresponds to a vector that gives for each example the
                  correct label
        """

        # check if y has same dimension of y_pred
        if y.ndim != self.y_pred.ndim:
            raise TypeError(
                'y should have the same shape as self.y_pred',
                ('y', y.type, 'y_pred', self.y_pred.type)
            )
        # check if y is of the correct datatype
        if y.dtype.startswith('int'):
            # the T.neq operator returns a vector of 0s and 1s, where 1
            # represents a mistake in prediction
	    if sampleWeight is not None:
		return T.sum( T.mul(sampleWeight, T.neq(self.y_pred, y) ) ) * 1./T.sum(sampleWeight)
	    else:
                return T.mean(T.neq(self.y_pred, y))
        else:
            raise NotImplementedError()

    ## T.bincount is a weird function. Its return value has the same type as the dtype of the elements in the array to be counted.
    ##calculate the classification errors for each of the three categories 
Example #20
Source File: LogReg.py    From RaptorX-Contact with GNU General Public License v3.0 5 votes vote down vote up
def negative_log_likelihood(self, y, sampleWeight=None):
        """Return the mean of the negative log-likelihood of the prediction
        of this model under a given target distribution.

        .. math::

            \frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
            \frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|}
                \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
            \ell (\theta=\{W,b\}, \mathcal{D})

        :type y: theano.tensor.TensorType
        :param y: corresponds to a vector that gives for each example the
                  correct label

        Note: we use the mean instead of the sum so that
              the learning rate is less dependent on the batch size
        """
        # start-snippet-2
        # y.shape[0] is (symbolically) the number of rows in y, i.e.,
        # number of examples (call it n) in the minibatch
        # T.arange(y.shape[0]) is a symbolic vector which will contain
        # [0,1,2,... n-1] T.log(self.p_y_given_x) is a matrix of
        # Log-Probabilities (call it LP) with one row per example and
        # one column per class LP[T.arange(y.shape[0]),y] is a vector
        # v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ...,
        # LP[n-1,y[n-1]]] and T.mean(LP[T.arange(y.shape[0]),y]) is
        # the mean (across minibatch examples) of the elements in v,
        # i.e., the mean log-likelihood across the minibatch.

        if sampleWeight is not None:
            return -T.sum(T.mul(sampleWeight, T.log(self.p_y_given_x)[T.arange(y.shape[0]), y] ) )/T.sum(sampleWeight)
	else:
            return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])

        # end-snippet-2 
Example #21
Source File: loss_function.py    From recnet with MIT License 5 votes vote down vote up
def output_error(self, input_sequence,   true_output, mask):

        outputs = T.pow(true_output - input_sequence, 2)
        outputs = T.sum(outputs, axis=2) / outputs.shape[2]
        outputs = T.mul(outputs.dimshuffle(0,1,'x'), mask)
        return T.sum(outputs) / T.sum(mask)



######    2-class weightes cross entropy
######################################## 
Example #22
Source File: loss_function.py    From recnet with MIT License 5 votes vote down vote up
def output_error(self, input_sequence,   true_output, mask):

        outputs = self._w_crossentropy(input_sequence, true_output)

        #outputs = T.mul(outputs.dimshuffle(0,1,'x'), mask) #todo correct mask implementation? influence on result?

        return T.sum(outputs) / T.sum(mask)



######            Standard cross entropy
######################################## 
Example #23
Source File: loss_function.py    From recnet with MIT License 5 votes vote down vote up
def output_error(self, input_sequence,   true_output, mask):

        outputs = T.nnet.categorical_crossentropy(input_sequence, true_output)

        outputs = T.mul(outputs.dimshuffle(0,1,'x'), mask)

        return T.sum(outputs) / T.sum(mask) 
Example #24
Source File: test_blas.py    From attention-lvcsr with MIT License 4 votes vote down vote up
def test_gemm_canonicalize():
    X, Y, Z, a, b = T.matrix('X'), T.matrix('Y'), T.matrix('Z'), T.scalar(
        'a'), T.scalar('b')
    R, S, U, c, d = T.matrix('R'), T.matrix('S'), T.matrix('U'), T.scalar(
        'c'), T.scalar('d')
    u = T.row('u')
    v = T.vector('v')
    w = T.col('w')

    can = []
    _gemm_canonicalize(X + Y + Z, 1.0, can, 0)
    assert can == [(1.0, X), (1.0, Y), (1.0, Z)]

    can = []
    _gemm_canonicalize(X + Y + u, 1.0, can, 0)
    assert can == [(1.0, X), (1.0, Y), (1.0, u)], can

    can = []
    _gemm_canonicalize(X + Y + v, 1.0, can, 0)
    # [(1.0, X), (1.0, Y), (1.0, InplaceDimShuffle{x,0}(v))]
    assert can[:2] == [(1.0, X), (1.0, Y)]
    assert isinstance(can[2], tuple)
    assert len(can[2]) == 2
    assert can[2][0] == 1.0
    assert can[2][1].owner
    assert isinstance(can[2][1].owner.op, T.DimShuffle)
    assert can[2][1].owner.inputs == [v]

    can = []
    _gemm_canonicalize(X + Y + w, 1.0, can, 0)
    assert can == [(1.0, X), (1.0, Y), (1.0, w)], can

    can = []
    _gemm_canonicalize(a * X + Y - b * Z * c, 1.0, can, 0)
    assert can[0] == (a, X)
    assert can[1] == (1.0, Y)
    assert can[2][0].owner.op == T.mul
    assert can[2][0].owner.inputs[0].owner.op == T.neg
    assert can[2][0].owner.inputs[0].owner.inputs[0] == c
    assert can[2][0].owner.inputs[1] == b

    can = []
    _gemm_canonicalize((-d) * X - (a * X + Y - b * Z * c), 1.0, can, 0)
    # print can
    assert can[0][0].owner.op == T.neg
    assert can[0][0].owner.inputs[0] == d
    assert can[0][1] == X
    assert can[1][0].owner.op == T.neg
    assert can[1][0].owner.inputs[0] == a
    assert can[2] == (-1.0, Y)
    assert can[3][0].owner.op == T.mul
    assert can[3][0].owner.inputs == [c, b] 
Example #25
Source File: main.py    From MemN2N with MIT License 4 votes vote down vote up
def __init__(self, incomings, vocab, embedding_size, A, A_T, C, C_T, nonlinearity=lasagne.nonlinearities.softmax, **kwargs):
        super(MemoryNetworkLayer, self).__init__(incomings, **kwargs)
        if len(incomings) != 3:
            raise NotImplementedError

        batch_size, max_seqlen, max_sentlen = self.input_shapes[0]

        l_context_in = lasagne.layers.InputLayer(shape=(batch_size, max_seqlen, max_sentlen))
        l_B_embedding = lasagne.layers.InputLayer(shape=(batch_size, embedding_size))
        l_context_pe_in = lasagne.layers.InputLayer(shape=(batch_size, max_seqlen, max_sentlen, embedding_size))

        l_context_in = lasagne.layers.ReshapeLayer(l_context_in, shape=(batch_size * max_seqlen * max_sentlen, ))
        l_A_embedding = lasagne.layers.EmbeddingLayer(l_context_in, len(vocab)+1, embedding_size, W=A)
        self.A = l_A_embedding.W
        l_A_embedding = lasagne.layers.ReshapeLayer(l_A_embedding, shape=(batch_size, max_seqlen, max_sentlen, embedding_size))
        l_A_embedding = lasagne.layers.ElemwiseMergeLayer((l_A_embedding, l_context_pe_in), merge_function=T.mul)
        l_A_embedding = SumLayer(l_A_embedding, axis=2)
        l_A_embedding = TemporalEncodingLayer(l_A_embedding, T=A_T)
        self.A_T = l_A_embedding.T

        l_C_embedding = lasagne.layers.EmbeddingLayer(l_context_in, len(vocab)+1, embedding_size, W=C)
        self.C = l_C_embedding.W
        l_C_embedding = lasagne.layers.ReshapeLayer(l_C_embedding, shape=(batch_size, max_seqlen, max_sentlen, embedding_size))
        l_C_embedding = lasagne.layers.ElemwiseMergeLayer((l_C_embedding, l_context_pe_in), merge_function=T.mul)
        l_C_embedding = SumLayer(l_C_embedding, axis=2)
        l_C_embedding = TemporalEncodingLayer(l_C_embedding, T=C_T)
        self.C_T = l_C_embedding.T

        l_prob = InnerProductLayer((l_A_embedding, l_B_embedding), nonlinearity=nonlinearity)
        l_weighted_output = BatchedDotLayer((l_prob, l_C_embedding))

        l_sum = lasagne.layers.ElemwiseSumLayer((l_weighted_output, l_B_embedding))

        self.l_context_in = l_context_in
        self.l_B_embedding = l_B_embedding
        self.l_context_pe_in = l_context_pe_in
        self.network = l_sum

        params = lasagne.layers.helper.get_all_params(self.network, trainable=True)
        values = lasagne.layers.helper.get_all_param_values(self.network, trainable=True)
        for p, v in zip(params, values):
            self.add_param(p, v.shape, name=p.name)

        zero_vec_tensor = T.vector()
        self.zero_vec = np.zeros(embedding_size, dtype=theano.config.floatX)
        self.set_zero = theano.function([zero_vec_tensor], updates=[(x, T.set_subtensor(x[0, :], zero_vec_tensor)) for x in [self.A, self.C]]) 
Example #26
Source File: ind_model.py    From planetoid with MIT License 4 votes vote down vote up
def build(self):
        """build the model. This method should be called after self.add_data.
        """
        x_sym = sparse.csr_matrix('x', dtype = 'float32')
        self.x_sym = x_sym
        y_sym = T.imatrix('y')
        gx_sym = sparse.csr_matrix('gx', dtype = 'float32')
        gy_sym = T.ivector('gy')
        gz_sym = T.vector('gz')

        l_x_in = lasagne.layers.InputLayer(shape = (None, self.x.shape[1]), input_var = x_sym)
        l_gx_in = lasagne.layers.InputLayer(shape = (None, self.x.shape[1]), input_var = gx_sym)
        l_gy_in = lasagne.layers.InputLayer(shape = (None, ), input_var = gy_sym)

        l_x_1 = layers.SparseLayer(l_x_in, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax)
        l_x_2 = layers.SparseLayer(l_x_in, self.embedding_size)
        W = l_x_2.W
        l_x_2 = layers.DenseLayer(l_x_2, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax)
        if self.use_feature:
            l_x = lasagne.layers.ConcatLayer([l_x_1, l_x_2], axis = 1)
            l_x = layers.DenseLayer(l_x, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax)
        else:
            l_x = l_x_2

        l_gx = layers.SparseLayer(l_gx_in, self.embedding_size, W = W)
        if self.neg_samp > 0:
            l_gy = lasagne.layers.EmbeddingLayer(l_gy_in, input_size = self.num_ver, output_size = self.embedding_size)
            l_gx = lasagne.layers.ElemwiseMergeLayer([l_gx, l_gy], T.mul)
            pgy_sym = lasagne.layers.get_output(l_gx)
            g_loss = - T.log(T.nnet.sigmoid(T.sum(pgy_sym, axis = 1) * gz_sym)).sum()
        else:
            l_gx = lasagne.layers.DenseLayer(l_gx, self.num_ver, nonlinearity = lasagne.nonlinearities.softmax)
            pgy_sym = lasagne.layers.get_output(l_gx)
            g_loss = lasagne.objectives.categorical_crossentropy(pgy_sym, gy_sym).sum()
        
        self.l = [l_x, l_gx]

        py_sym = lasagne.layers.get_output(l_x)
        loss = lasagne.objectives.categorical_crossentropy(py_sym, y_sym).mean()
        if self.layer_loss and self.use_feature:
            hid_sym = lasagne.layers.get_output(l_x_1)
            loss += lasagne.objectives.categorical_crossentropy(hid_sym, y_sym).mean()
            emd_sym = lasagne.layers.get_output(l_x_2)
            loss += lasagne.objectives.categorical_crossentropy(emd_sym, y_sym).mean()

        params = [l_x_1.W, l_x_1.b, l_x_2.W, l_x_2.b, l_x.W, l_x.b] if self.use_feature else [l_x.W, l_x.b]
        if self.update_emb:
            params = lasagne.layers.get_all_params(l_x)
        updates = lasagne.updates.sgd(loss, params, learning_rate = self.learning_rate)
        self.train_fn = theano.function([x_sym, y_sym], loss, updates = updates)

        g_params = lasagne.layers.get_all_params(l_gx)
        g_updates = lasagne.updates.sgd(g_loss, g_params, learning_rate = self.g_learning_rate)
        self.g_fn = theano.function([gx_sym, gy_sym, gz_sym], g_loss, updates = g_updates, on_unused_input = 'ignore')

        self.test_fn = theano.function([x_sym], py_sym) 
Example #27
Source File: trans_model.py    From planetoid with MIT License 4 votes vote down vote up
def build(self):
        """build the model. This method should be called after self.add_data.
        """
        x_sym = sparse.csr_matrix('x', dtype = 'float32')
        y_sym = T.imatrix('y')
        g_sym = T.imatrix('g')
        gy_sym = T.vector('gy')
        ind_sym = T.ivector('ind')

        l_x_in = lasagne.layers.InputLayer(shape = (None, self.x.shape[1]), input_var = x_sym)
        l_g_in = lasagne.layers.InputLayer(shape = (None, 2), input_var = g_sym)
        l_ind_in = lasagne.layers.InputLayer(shape = (None, ), input_var = ind_sym)
        l_gy_in = lasagne.layers.InputLayer(shape = (None, ), input_var = gy_sym)

        num_ver = max(self.graph.keys()) + 1
        l_emb_in = lasagne.layers.SliceLayer(l_g_in, indices = 0, axis = 1)
        l_emb_in = lasagne.layers.EmbeddingLayer(l_emb_in, input_size = num_ver, output_size = self.embedding_size)
        l_emb_out = lasagne.layers.SliceLayer(l_g_in, indices = 1, axis = 1)
        if self.neg_samp > 0:
            l_emb_out = lasagne.layers.EmbeddingLayer(l_emb_out, input_size = num_ver, output_size = self.embedding_size)

        l_emd_f = lasagne.layers.EmbeddingLayer(l_ind_in, input_size = num_ver, output_size = self.embedding_size, W = l_emb_in.W)
        l_x_hid = layers.SparseLayer(l_x_in, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax)
        
        if self.use_feature:
            l_emd_f = layers.DenseLayer(l_emd_f, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax)
            l_y = lasagne.layers.ConcatLayer([l_x_hid, l_emd_f], axis = 1)
            l_y = layers.DenseLayer(l_y, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax)
        else:
            l_y = layers.DenseLayer(l_emd_f, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax)

        py_sym = lasagne.layers.get_output(l_y)
        loss = lasagne.objectives.categorical_crossentropy(py_sym, y_sym).mean()
        if self.layer_loss and self.use_feature:
            hid_sym = lasagne.layers.get_output(l_x_hid)
            loss += lasagne.objectives.categorical_crossentropy(hid_sym, y_sym).mean()
            emd_sym = lasagne.layers.get_output(l_emd_f)
            loss += lasagne.objectives.categorical_crossentropy(emd_sym, y_sym).mean()

        if self.neg_samp == 0:
            l_gy = layers.DenseLayer(l_emb_in, num_ver, nonlinearity = lasagne.nonlinearities.softmax)
            pgy_sym = lasagne.layers.get_output(l_gy)
            g_loss = lasagne.objectives.categorical_crossentropy(pgy_sym, lasagne.layers.get_output(l_emb_out)).sum()
        else:
            l_gy = lasagne.layers.ElemwiseMergeLayer([l_emb_in, l_emb_out], T.mul)
            pgy_sym = lasagne.layers.get_output(l_gy)
            g_loss = - T.log(T.nnet.sigmoid(T.sum(pgy_sym, axis = 1) * gy_sym)).sum()

        params = [l_emd_f.W, l_emd_f.b, l_x_hid.W, l_x_hid.b, l_y.W, l_y.b] if self.use_feature else [l_y.W, l_y.b]
        if self.update_emb:
            params = lasagne.layers.get_all_params(l_y)
        updates = lasagne.updates.sgd(loss, params, learning_rate = self.learning_rate)

        self.train_fn = theano.function([x_sym, y_sym, ind_sym], loss, updates = updates, on_unused_input = 'ignore')
        self.test_fn = theano.function([x_sym, ind_sym], py_sym, on_unused_input = 'ignore')
        self.l = [l_gy, l_y]

        g_params = lasagne.layers.get_all_params(l_gy, trainable = True)
        g_updates = lasagne.updates.sgd(g_loss, g_params, learning_rate = self.g_learning_rate)
        self.g_fn = theano.function([g_sym, gy_sym], g_loss, updates = g_updates, on_unused_input = 'ignore') 
Example #28
Source File: test_blas.py    From attention-lvcsr with MIT License 4 votes vote down vote up
def test_local_dot22_to_dot22scalar():
    """
    This test that the bug in gh-1507 is really fixed
    """
    A = T.dmatrix()
    mode = theano.compile.mode.get_default_mode()
    opt = theano.tensor.opt.in2out(
        theano.tensor.blas.local_dot22_to_dot22scalar)
    mode = mode.__class__(optimizer=opt)

    x = T.dscalar()
    y = T.dscalar()
    z = T.dscalar()
    # make sure to don't have dimshuffle as we don't opt those cases
    m = T.dmatrix()
    r = T.drow()
    for idx, node in enumerate([
        # Old working cases
        T.mul(_dot22(A, A), x),
        T.mul(_dot22(A, A), x, y),
        T.mul(_dot22(A, A), x, r),
        T.mul(_dot22(A, A), m, x),
        T.mul(_dot22(A, A), x, m),
        T.mul(_dot22(A, A), x, (m * y)),
        T.mul(_dot22(A, A), (m * y), x),
        T.mul(_dot22(A, A), x, (r * y)),
        T.mul(_dot22(A, A), (r * y), x),
        T.mul(_dot22(A, A), (x * y), (m * x)),
        T.mul(_dot22(A, A), (r * y), (y * x)),

        # Case that was raising an assert that is fixed in gh-1507
        T.mul(_dot22(A, A), (m * y), m),
        T.mul(_dot22(A, A), m, (m * y)),
        T.mul(_dot22(A, A), (r * y), (m * x)),

        # assert fixed in gh-1507 and opt case added in gh-1515
        T.mul(_dot22(A, A), (m * y * z), m),
        T.mul(_dot22(A, A), m, (m * y * z)),

        # Opt case added in gh-1515
        T.mul(_dot22(A, A), T.mul(m, y, z), m),
        T.mul(_dot22(A, A), m, T.mul(m, y, z)),

        # Case that opt later in gh-1515
        T.mul(_dot22(A, A), (r * m), (m * x)),
    ]):
        node2 = theano.tensor.blas.local_dot22_to_dot22scalar.transform(
            node.owner)
        assert node2
        f = theano.function([x, y, z, m, r, A], node,
                            mode=mode, on_unused_input='ignore')
        f(.1, .2, .3, [[1, 2], [3, 4]], [[5, 6]], [[7, 8], [9, 10]]) 
Example #29
Source File: net_theano.py    From visual_dynamics with MIT License 4 votes vote down vote up
def build_action_cond_encoder_net(input_shapes, **kwargs):
    x_shape, u_shape = input_shapes

    X_var = T.tensor4('X')
    U_var = T.matrix('U')
    X_diff_var = T.tensor4('X_diff')
    X_next_var = X_var + X_diff_var

    l_x0 = L.InputLayer(shape=(None,) + x_shape, input_var=X_var, name='x')
    l_u = L.InputLayer(shape=(None,) + u_shape, input_var=U_var, name='u')

    l_x1 = L.Conv2DLayer(l_x0, 64, filter_size=6, stride=2, pad=0,
                         nonlinearity=nl.rectify,
                         name='x1')
    l_x2 = L.Conv2DLayer(l_x1, 64, filter_size=6, stride=2, pad=2,
                         nonlinearity=nl.rectify,
                         name='x2')
    l_x3 = L.Conv2DLayer(l_x2, 64, filter_size=6, stride=2, pad=2,
                         nonlinearity=nl.rectify,
                         name='x3')
    l_x3_shape = lasagne.layers.get_output_shape(l_x3)

    l_y4 = L.DenseLayer(l_x3, 1024, nonlinearity=nl.rectify, name='y')
    l_y4d = L.DenseLayer(l_y4, 2048, W=init.Uniform(1.0), nonlinearity=None)
    l_ud = L.DenseLayer(l_u, 2048, W=init.Uniform(0.1), nonlinearity=None)

    l_y4d_diff_pred = L.ElemwiseMergeLayer([l_y4d, l_ud], T.mul)
    l_y4_diff_pred = L.DenseLayer(l_y4d_diff_pred, 1024, W=init.Uniform(1.0), nonlinearity=None, name='y_diff_pred')

    l_y4_next_pred = L.ElemwiseMergeLayer([l_y4, l_y4_diff_pred], T.add, name='y_next_pred')

    l_y3_next_pred = L.DenseLayer(l_y4_next_pred, np.prod(l_x3_shape[1:]), nonlinearity=nl.rectify)
    l_x3_next_pred = L.ReshapeLayer(l_y3_next_pred, ([0],) + l_x3_shape[1:],
                                   name='x3_next_pred')

    l_x2_next_pred = LT.Deconv2DLayer(l_x3_next_pred, 64, filter_size=6, stride=2, pad=2,
                                   nonlinearity=nl.rectify,
                                   name='x2_next_pred')
    l_x1_next_pred = LT.Deconv2DLayer(l_x2_next_pred, 64, filter_size=6, stride=2, pad=2,
                                   nonlinearity=nl.rectify,
                                   name='x1_next_pred')
    l_x0_next_pred = LT.Deconv2DLayer(l_x1_next_pred, 3, filter_size=6, stride=2, pad=0,
                                   nonlinearity=None,
                                   name='x0_next_pred')

    loss_fn = lambda X, X_pred: ((X - X_pred) ** 2).mean(axis=0).sum() / 2.
    loss = loss_fn(X_next_var, lasagne.layers.get_output(l_x0_next_pred))

    net_name = 'ActionCondEncoderNet'
    input_vars = OrderedDict([(var.name, var) for var in [X_var, U_var, X_diff_var]])
    pred_layers = OrderedDict([('x0_next_pred', l_x0_next_pred)])
    return net_name, input_vars, pred_layers, loss 
Example #30
Source File: NN4Normal.py    From RaptorX-Contact with GNU General Public License v3.0 4 votes vote down vote up
def NLL(self, y, useMeanOnly=False, sampleWeight=None):

		assert (y.ndim == 2)

                pi = numpy.pi

		if self.n_variables == 1:
			e = T.sqr( y -self.mean )/2.
			nll = numpy.log(2*pi)/2.
			
			if useMeanOnly or (self.sigma_sqr is None):
				nll = nll + e
			else:
				e = e / self.sigma_sqr
				nll = nll + e + T.log(self.sigma_sqr)/2.

		else:
			err = y - self.mean
			err_sqr = T.sqr( err )

			if useMeanOnly or (self.sigma_sqr is None):
				sig_sqr = T.ones_like(e)
			else:
				sig_sqr = self.sigma_sqr

			nll = T.sum(T.log(sig_sqr) + numpy.log(2*pi), axis=1, keepdims=True)/2.

			e = T.sum( err_sqr/sig_sqr, axis=1, keepdims=True )

			sig = T.sqrt( sig_sqr )
			f = T.prod( err/sig, axis=1, keepdims=True )

			if useMeanOnly or (self.corr is None):
				rho = T.zeros_like(e)
			else:
				rho = T.corr
				
			g = e - T.mul(rho, f) * 2.

			rho_sqr = T.sqr(rho)
			h = g / (2 * ( 1 - rho_sqr ) )

			nll = nll + h + T.log(1 - rho_sqr)/2. 


		if sampleWeight is None:
			return T.mean(nll)
		return T.sum(T.mul(nll, sampleWeight) )/T.sum(sampleWeight)


	## y has shape (batchSize, n_variables), sampleWeight shall have shape (batchSize, 1) instead of (batchSize,)
	## this function returns a vector