Python Examples of theano.tensor.mul

Source File: output_layer.py From recnet with MIT License

6 votes

def sequence_iteration(self, output, mask,use_dropout=0,dropout_value=0.5):

        dot_product = T.dot(output , self.t_w_out)

        net_o = T.add( dot_product , self.t_b_out )

        ex_net = T.exp(net_o)
        sum_net = T.sum(ex_net, axis=2, keepdims=True)
        softmax_o = ex_net / sum_net


        mask = T.addbroadcast(mask, 2) # to do nesseccary?
        output = T.mul(mask, softmax_o)   + T.mul( (1. - mask) , 1e-6 )

        return output #result


######                     Linear Layer
########################################

Source File: Model4DistancePrediction.py From RaptorX-Contact with GNU General Public License v3.0

6 votes

def errors4one(self, z, out, weight=None, distLabelType='12C'):
	distBins = config.distCutoffs[distLabelType]
	label8 = DistanceUtils.LabelsOfOneDistance(config.ContactDefinition, distBins)
	label15 = DistanceUtils.LabelsOfOneDistance(config.InteractionLimit, distBins)

	z3C = T.cast( T.ge(z, label8), 'int32') + T.cast( T.ge(z, label15), 'int32')
	o3C = T.cast( T.ge(out, label8), 'int32') + T.cast( T.ge(out, label15), 'int32')

	if weight is not None:
            err = T.sum( T.mul(weight, T.neq(o3C, z3C) ) )*1./T.sum(weight)
	else:
            err = T.mean( T.neq(o3C , z3C) ) 

	## err is s scalar, convert it to a tensor with ndim=1
	return T.stack([err] )

    ## this function returns a vector of errors, the size of this vector is equal to the sum of ValueDims for all the responses

Source File: rbm_adv.py From SteinGAN with MIT License

6 votes

def rbf_kernel(X):

    XY = T.dot(X, X.T)
    x2 = T.sum(X**2, axis=1).dimshuffle(0, 'x')
    X2e = T.repeat(x2, X.shape[0], axis=1)
    H = X2e +  X2e.T - 2. * XY

    V = H.flatten()
    # median distance
    h = T.switch(T.eq((V.shape[0] % 2), 0),
        # if even vector
        T.mean(T.sort(V)[ ((V.shape[0] // 2) - 1) : ((V.shape[0] // 2) + 1) ]),
        # if odd vector
        T.sort(V)[V.shape[0] // 2])

    h = T.sqrt(.5 * h / T.log(H.shape[0].astype('float32') + 1.)) 
    
    # compute the rbf kernel
    kxy = T.exp(-H / (h ** 2) / 2.0)

    dxkxy = -T.dot(kxy, X)
    sumkxy = T.sum(kxy, axis=1).dimshuffle(0, 'x')
    dxkxy = T.add(dxkxy, T.mul(X, sumkxy)) / (h ** 2)

    return kxy, dxkxy

Source File: utils.py From RaptorX-Contact with GNU General Public License v3.0

6 votes

def ConvByPattern(x, patterns, mask=None):
    W = np.transpose(patterns, (3, 0, 1, 2))
    out2 = T.nnet.conv2d(x.dimshuffle(0, 3, 1, 2), W, filter_shape=W.shape, border_mode='half')
    if mask is not None:
        ## mask has shape (batchSize, #rows_to_be_masked, nCols)

        ## a subtensor of out2 along the horiz direction
        out2_sub_horiz = out2[:, :, :mask.shape[1], :]
        mask_horiz = mask.dimshuffle(0, 'x', 1, 2)
        out3 = T.set_subtensor(out2_sub_horiz, T.mul(out2_sub_horiz, mask_horiz) )

        ## a subtensor of out3 along the vertical direction
        out3_sub_vertical = out3[:, :, :, :mask.shape[1] ]
        mask_vertical = mask.dimshuffle(0, 'x', 2, 1)
        y = T.set_subtensor(out3_sub_vertical, T.mul(out3_sub_vertical, mask_vertical) )
    else:
	y = out2

    y = y.dimshuffle(0, 2, 3, 1)

    return y/np.prod(patterns.shape[1:3])

Source File: recurrent_layer.py From recnet with MIT License

6 votes

def t_forward_step(self,mask, rzup_in_sig, h_pre, u_rz, u_up, t_n_out): #u_r, u_z,



        signal_act = self.activation
        gate_act = self.sigmoid()

        preact = T.dot( h_pre, u_rz)


        r = gate_act( T.add( rzup_in_sig[:, 0:t_n_out] , preact[:, 0:t_n_out] )) #T.dot( h_pre, u_r) ) )
        z = gate_act( T.add( rzup_in_sig[:, t_n_out:2 * t_n_out] , preact[:, t_n_out:2 * t_n_out] )) #T.dot(h_pre, u_z) ))

        h_update = signal_act( T.add( rzup_in_sig[:, 2*t_n_out:3*t_n_out] , T.dot( T.mul( h_pre, r), u_up) ))

        h_new = T.add( (1.-z) * h_update , z * h_pre )

        mask = T.addbroadcast(mask, 1)
        out_sig =  T.add( mask * h_new   , (1. - mask) * h_pre )

        return out_sig

Source File: NN4LogReg.py From RaptorX-Contact with GNU General Public License v3.0

5 votes

def NLL(self, y, sampleWeight=None):
        ###Return the mean of the negative log-likelihood of the prediction of this model under a given target distribution.

        if sampleWeight is not None:
            return -T.sum(T.mul(sampleWeight, T.log(self.p_y_given_x)[T.arange(y.shape[0]), y] ) )/T.sum(sampleWeight)
        else:
            return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])

Source File: output_layer.py From recnet with MIT License

5 votes

def sequence_iteration(self, output, mask, use_dropout=0, dropout_value=0.5):

        dot_product = T.dot(output, self.t_w_out)

        linear_o = T.add(dot_product, self.t_b_out)


        mask = T.addbroadcast(mask, 2)  # to do nesseccary?
        output = T.mul(mask, linear_o) + T.mul((1. - mask), 1e-6)

        return output  # result


### TEST FUNCTIONS # to do make new file with test functions

Source File: ln_reccurent_layer.py From recnet with MIT License

5 votes

def t_forward_step(self, mask, cur_w_in_sig, pre_out_sig, pre_cell_sig, w_ig_c, w_fg_c, w_og_c, w_ifco, b_ifco, ln_b1,ln_s1, ln_b2,ln_s2,ln_b3,ln_s3,
                       t_n_out):

        cur_w_in_sig_ln = self.ln(cur_w_in_sig, ln_b1, ln_s1)

        pre_w_out_sig = T.dot(pre_out_sig, w_ifco)
        pre_w_out_sig_ln = self.ln(pre_w_out_sig, ln_b2, ln_s2)

        preact = T.add(cur_w_in_sig_ln, pre_w_out_sig_ln, b_ifco)

        inner_act = self.activation  # T.nnet.hard_sigmoid T.tanh
        gate_act = self.sigmoid()  # T.nnet.hard_sigmoid

        # Input Gate
        ig_t1 = gate_act(T.add(preact[:, 0:t_n_out], T.mul(pre_cell_sig, w_ig_c)))
        # Forget Gate
        fg_t1 = gate_act(T.add(preact[:, 1 * t_n_out:2 * t_n_out], T.mul(pre_cell_sig, w_fg_c),))
        # Cell State
        cs_t1 = T.add(T.mul(fg_t1, pre_cell_sig), T.mul(ig_t1, inner_act( T.add(preact[:, 2 * t_n_out:3 * t_n_out]))))

        mask = T.addbroadcast(mask, 1)
        cs_t1 = mask * cs_t1 + (1. - mask) * pre_cell_sig
        # functionality: cs_t1 =   T.switch(mask , cs_t1, pre_cell_sig)

        cs_t1_ln = self.ln(cs_t1, ln_b3, ln_s3)

        # Output Gate
        og_t1 = gate_act(
            T.add(preact[:, 3 * t_n_out:4 * t_n_out], T.mul(cs_t1_ln, w_og_c)))
        # Output LSTM
        out_sig = T.mul(og_t1, inner_act(cs_t1_ln))

        out_sig = mask * out_sig + (1. - mask) * pre_out_sig

        return [out_sig, cs_t1]

Source File: ln_reccurent_layer.py From recnet with MIT License

5 votes

def t_forward_step(self, mask, cur_w_in_sig, pre_out_sig, pre_cell_sig, w_ifco, b_ifco,ln_b1,ln_s1, ln_b2,ln_s2,ln_b3,ln_s3,
                       t_n_out):

        cur_w_in_sig_ln = self.ln(cur_w_in_sig, ln_b1, ln_s1)

        pre_w_out_sig = T.dot(pre_out_sig, w_ifco)
        pre_w_out_sig_ln = self.ln(pre_w_out_sig, ln_b2, ln_s2)

        preact = T.add(cur_w_in_sig_ln, pre_w_out_sig_ln, b_ifco)



        inner_act = self.activation # T.nnet.hard_sigmoid #T.tanh # T.nnet.hard_sigmoid T.tanh
        gate_act = self.sigmoid()  # T.nnet.hard_sigmoid #T.nnet.sigmoid

        # Input Gate
        ig_t1 = gate_act(preact[:, 0:t_n_out])
        # Forget Gate
        fg_t1 = gate_act(preact[:, 1 * t_n_out:2 * t_n_out])
        # Cell State
        cs_t1 = T.add(T.mul(fg_t1, pre_cell_sig), T.mul(ig_t1, inner_act(preact[:, 2 * t_n_out:3 * t_n_out])))

        mask = T.addbroadcast(mask, 1)
        cs_t1 = mask * cs_t1 + (1. - mask) * pre_cell_sig

        cs_t1_ln = self.ln(cs_t1, ln_b3, ln_s3)
        # Output Gate
        og_t1 = gate_act(preact[:, 3 * t_n_out:4 * t_n_out])
        # Output LSTM
        out_sig = T.mul(og_t1, inner_act(cs_t1_ln))

        out_sig = mask * out_sig + (1. - mask) * pre_out_sig

        return [out_sig, cs_t1]

Source File: helpers.py From deep-prior with GNU General Public License v3.0

5 votes

def SlopeLinInv(slope):
    """
    Truncated linear unit
    :param slope: slope of negative quadrant
    :return: x if x > 0 else x*slope
    """
    import theano.tensor as T

    def inner(x):
        return T.switch(T.gt(x, 0), x, T.mul(x, slope))
    return inner

Source File: ln_reccurent_layer.py From recnet with MIT License

5 votes

def t_forward_step(self,mask, rzup_in_sig, h_pre,b_rzup, u_rz, u_up,ln_b1,ln_s1, ln_b2,ln_s2,ln_b3,ln_s3, t_n_out):



        signal_act = self.activation
        gate_act = self.sigmoid()

        rzup_in_sig_ln = self.ln(rzup_in_sig, ln_b1, ln_s1)

        rzup_b_in_sig_ln = T.add(rzup_in_sig_ln, b_rzup)

        preact = T.dot( h_pre, u_rz)

        preact_ln = self.ln(preact, ln_b2, ln_s2)

        r = gate_act( T.add( rzup_b_in_sig_ln[:, 0:t_n_out] , preact_ln[:, 0:t_n_out] ))
        z = gate_act( T.add( rzup_b_in_sig_ln[:, t_n_out:2 * t_n_out] , preact_ln[:, t_n_out:2 * t_n_out] ))

        preactx = T.dot(h_pre , u_up)
        preactx_ln = self.ln(preactx, ln_b3, ln_s3)
        h_pre_r_ln = T.mul( preactx_ln, r)

        h_update = signal_act( T.add( rzup_b_in_sig_ln[:, 2*t_n_out:3*t_n_out] , h_pre_r_ln ))

        h_new = T.add( (1.-z) * h_update , z * h_pre )

        mask = T.addbroadcast(mask, 1)
        out_sig =  T.add( mask * h_new   , (1. - mask) * h_pre )

        return out_sig

Source File: test_sigm.py From D-VAE with MIT License

5 votes

def test_local_sigm_times_exp(self):
        """
        Test the `local_sigm_times_exp` optimization.
        exp(x) * sigm(-x) -> sigm(x)
        exp(-x) * sigm(x) -> sigm(-x)
        """
        def match(func, ops):
            # print [node.op.scalar_op for node in func.maker.fgraph.toposort()]
            assert [node.op for node in func.maker.fgraph.toposort()] == ops
        m = self.get_mode(excluding=['local_elemwise_fusion', 'inplace'])
        x, y = tensor.vectors('x', 'y')

        f = theano.function([x], sigmoid(-x) * tensor.exp(x), mode=m)
        assert hasattr(f.maker.fgraph.outputs[0].tag, 'trace')
        match(f, [sigmoid])

        f = theano.function([x], sigmoid(x) * tensor.exp(-x), mode=m)
        assert hasattr(f.maker.fgraph.outputs[0].tag, 'trace')
        match(f, [tensor.neg, sigmoid])

        f = theano.function([x], -(-(-(sigmoid(x)))) * tensor.exp(-x), mode=m)
        assert hasattr(f.maker.fgraph.outputs[0].tag, 'trace')
        match(f, [tensor.neg, sigmoid, tensor.neg])

        f = theano.function(
                [x, y],
                (sigmoid(x) * sigmoid(-y) * -tensor.exp(-x) *
                 tensor.exp(x * y) * tensor.exp(y)),
                mode=m)
        assert hasattr(f.maker.fgraph.outputs[0].tag, 'trace')
        match(f, [sigmoid, tensor.mul, tensor.neg, tensor.exp, sigmoid,
                  tensor.mul])

Source File: test_sigm.py From attention-lvcsr with MIT License

5 votes

def test_local_sigm_times_exp(self):
        """
        Test the `local_sigm_times_exp` optimization.
        exp(x) * sigm(-x) -> sigm(x)
        exp(-x) * sigm(x) -> sigm(-x)
        """
        def match(func, ops):
            # print [node.op.scalar_op for node in func.maker.fgraph.toposort()]
            assert [node.op for node in func.maker.fgraph.toposort()] == ops
        m = self.get_mode(excluding=['local_elemwise_fusion', 'inplace'])
        x, y = tensor.vectors('x', 'y')

        f = theano.function([x], sigmoid(-x) * tensor.exp(x), mode=m)
        match(f, [sigmoid])

        f = theano.function([x], sigmoid(x) * tensor.exp(-x), mode=m)
        match(f, [tensor.neg, sigmoid])

        f = theano.function([x], -(-(-(sigmoid(x)))) * tensor.exp(-x), mode=m)
        match(f, [tensor.neg, sigmoid, tensor.neg])

        f = theano.function(
                [x, y],
                (sigmoid(x) * sigmoid(-y) * -tensor.exp(-x) *
                 tensor.exp(x * y) * tensor.exp(y)),
                mode=m)
        match(f, [sigmoid, tensor.mul, tensor.neg, tensor.exp, sigmoid,
                  tensor.mul])

Source File: recurrent_layer.py From recnet with MIT License

5 votes

def t_forward_step(self, mask, cur_w_in_sig, pre_out_sig, pre_cell_sig, w_ifco, b_ifco,
                       t_n_out):

        ifco = T.add(T.dot(pre_out_sig, w_ifco), b_ifco)

        inner_act = self.activation
        gate_act = self.sigmoid()

        # Input Gate
        ig_t1 = gate_act(T.add(ifco[:, 0:t_n_out], cur_w_in_sig[:, 0:t_n_out]))
        # Forget Gate
        fg_t1 = gate_act(T.add(ifco[:, 1 * t_n_out:2 * t_n_out],
                               cur_w_in_sig[:, 1 * t_n_out:2 * t_n_out]))
        # Cell State
        cs_t1 = T.add(T.mul(fg_t1, pre_cell_sig), T.mul(ig_t1, inner_act(
            T.add(ifco[:, 2 * t_n_out:3 * t_n_out], cur_w_in_sig[:, 2 * t_n_out:3 * t_n_out]))))

        mask = T.addbroadcast(mask, 1)
        cs_t1 = mask * cs_t1 + (1. - mask) * pre_cell_sig
        # functionality: cs_t1 =   T.switch(mask , cs_t1, pre_cell_sig)

        # Output Gate
        og_t1 = gate_act(
            T.add(ifco[:, 3 * t_n_out:4 * t_n_out], cur_w_in_sig[:, 3 * t_n_out:4 * t_n_out]))
        # Output LSTM
        out_sig = T.mul(og_t1, inner_act(cs_t1))

        out_sig = mask * out_sig + (1. - mask) * pre_out_sig

        return [out_sig, cs_t1]

Source File: NN4LogReg.py From RaptorX-Contact with GNU General Public License v3.0

5 votes

def errors(self, y, sampleWeight=None):
	assert (y.ndim == 2)
	err = T.neq(self.y_pred, y)
	if sampleWeight is None:
		return T.mean(err, axis=0)

	assert (sampleWeight.ndim == 2)
	return T.sum( T.mul(err, sampleWeight), axis=0)/T.sum(sampleWeight)

    ## this function returns a scalar

Source File: NN4Normal.py From RaptorX-Contact with GNU General Public License v3.0

5 votes

def errors(self, y, sampleWeight=None):
		assert (y.ndim == 2)
		err_sqr = T.sqr( y - self.y_pred )
		if sampleWeight is None:
			return T.sqrt(T.mean(err_sqr, axis=0 ) )

		assert (sampleWeight.ndim == 2)
		if self.n_variables == 1:
			weight = sampleWeight
		else:
			weight = T.concatenate( [ sampleWeight, sampleWeight], axis=1 )
		return T.sqrt( T.sum(T.mul( err_sqr, weight ), axis=0)/ T.sum(sampleWeight) )

	## y has shape (batchSize, n_variables), sampleWeight shall have shape (batchSize, 1) instead of (batchSize,)

Source File: utils.py From RaptorX-Contact with GNU General Public License v3.0

5 votes

def ExpandBy4dPattern(x, patterns):
    ##patterns has shape (numPatterns, nPatternRows, nPatternCols, numLabels)
    ##each element is between 0 and 1 and the sum of the vector patterns[i, j, k, :] is equal to 1
    pshape = patterns.shape

    ## y1 has shape (batchSize, nRows * pshape[1], nCols * pshape[2], pshape[0])
    y1 = MyRepeat(x, (pshape[1], pshape[2]), axes=[1, 2])
    expandedPatterns = T.tile(patterns, (1, x.shape[1], x.shape[2], 1) ).dimshuffle('x', 1, 2, 0, 3)

    ylist = []
    for i in xrange(pshape[3]):
	y2 = T.mul( y1, expandedPatterns[:, :, :, :, i] )
	y3 = T.sum( y2, axis=3, keepdims=True)
	ylist.append(y3)
    return T.concatenate( ylist, axis=3)

Source File: LogReg.py From RaptorX-Contact with GNU General Public License v3.0

5 votes

def errorsBreakdown(self, y):
	
	##truth shall be casted to at least int32
	def breakDown3C(pred=None, truth=None):
	    labelcount = T.bincount(truth, minlength=3)
            err = T.neq(pred, truth)
            truth_with_wrong_pred = truth[err.nonzero()]
	    errcount = T.bincount(truth_with_wrong_pred, minlength=3)

	    ## use 0.0001 to avoid division by 0
            return T.mul(errcount, 1./(labelcount + 0.0001) )

	if self.n_out == 3:
	    truth = T.cast(y, 'int32')
	    return breakDown3C(self.y_pred, truth)

	if self.n_out == 12:
	    ## convert the 12-label system to the 3-label system
	    ## 0, 1, 2, 3 to 0; 4,5,6,7,8,9,10 to 1; and 11 to 2
	    y1 = T.zeros_like(y)
	    y2 = T.gt(y, 3)
	    y3 = T.gt(y, 10)
	    truth = T.cast(y1 + y2 + y3, 'int32')

	    pred1 = T.zeros_like(self.y_pred)
	    pred2 = T.gt(self.y_pred, 3)
	    pred3 = T.gt(self.y_pred, 10)
	    pred = T.cast( y1 + y2 + y3, 'int32')

	    return breakDown3C(pred, truth)
            
	else:
	    print 'this function only works when n_out is either 3 or 12'
	    sys.exit(-1)

    ## calculate the confusion matrix of the prediction

Source File: LogReg.py From RaptorX-Contact with GNU General Public License v3.0

5 votes

def errors(self, y, sampleWeight=None):
        """Return a float representing the number of errors in the minibatch
        over the total number of examples of the minibatch ; zero one
        loss over the size of the minibatch

        :type y: theano.tensor.TensorType
        :param y: corresponds to a vector that gives for each example the
                  correct label
        """

        # check if y has same dimension of y_pred
        if y.ndim != self.y_pred.ndim:
            raise TypeError(
                'y should have the same shape as self.y_pred',
                ('y', y.type, 'y_pred', self.y_pred.type)
            )
        # check if y is of the correct datatype
        if y.dtype.startswith('int'):
            # the T.neq operator returns a vector of 0s and 1s, where 1
            # represents a mistake in prediction
	    if sampleWeight is not None:
		return T.sum( T.mul(sampleWeight, T.neq(self.y_pred, y) ) ) * 1./T.sum(sampleWeight)
	    else:
                return T.mean(T.neq(self.y_pred, y))
        else:
            raise NotImplementedError()

    ## T.bincount is a weird function. Its return value has the same type as the dtype of the elements in the array to be counted.
    ##calculate the classification errors for each of the three categories

Source File: LogReg.py From RaptorX-Contact with GNU General Public License v3.0

5 votes

def negative_log_likelihood(self, y, sampleWeight=None):
        """Return the mean of the negative log-likelihood of the prediction
        of this model under a given target distribution.

        .. math::

            \frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
            \frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|}
                \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
            \ell (\theta=\{W,b\}, \mathcal{D})

        :type y: theano.tensor.TensorType
        :param y: corresponds to a vector that gives for each example the
                  correct label

        Note: we use the mean instead of the sum so that
              the learning rate is less dependent on the batch size
        """
        # start-snippet-2
        # y.shape[0] is (symbolically) the number of rows in y, i.e.,
        # number of examples (call it n) in the minibatch
        # T.arange(y.shape[0]) is a symbolic vector which will contain
        # [0,1,2,... n-1] T.log(self.p_y_given_x) is a matrix of
        # Log-Probabilities (call it LP) with one row per example and
        # one column per class LP[T.arange(y.shape[0]),y] is a vector
        # v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ...,
        # LP[n-1,y[n-1]]] and T.mean(LP[T.arange(y.shape[0]),y]) is
        # the mean (across minibatch examples) of the elements in v,
        # i.e., the mean log-likelihood across the minibatch.

        if sampleWeight is not None:
            return -T.sum(T.mul(sampleWeight, T.log(self.p_y_given_x)[T.arange(y.shape[0]), y] ) )/T.sum(sampleWeight)
	else:
            return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])

        # end-snippet-2

Source File: loss_function.py From recnet with MIT License

5 votes

def output_error(self, input_sequence,   true_output, mask):

        outputs = T.pow(true_output - input_sequence, 2)
        outputs = T.sum(outputs, axis=2) / outputs.shape[2]
        outputs = T.mul(outputs.dimshuffle(0,1,'x'), mask)
        return T.sum(outputs) / T.sum(mask)



######    2-class weightes cross entropy
########################################

Source File: loss_function.py From recnet with MIT License

5 votes

def output_error(self, input_sequence,   true_output, mask):

        outputs = self._w_crossentropy(input_sequence, true_output)

        #outputs = T.mul(outputs.dimshuffle(0,1,'x'), mask) #todo correct mask implementation? influence on result?

        return T.sum(outputs) / T.sum(mask)



######            Standard cross entropy
########################################

Source File: loss_function.py From recnet with MIT License

5 votes

def output_error(self, input_sequence,   true_output, mask):

        outputs = T.nnet.categorical_crossentropy(input_sequence, true_output)

        outputs = T.mul(outputs.dimshuffle(0,1,'x'), mask)

        return T.sum(outputs) / T.sum(mask)

Source File: test_blas.py From attention-lvcsr with MIT License

4 votes

def test_gemm_canonicalize():
    X, Y, Z, a, b = T.matrix('X'), T.matrix('Y'), T.matrix('Z'), T.scalar(
        'a'), T.scalar('b')
    R, S, U, c, d = T.matrix('R'), T.matrix('S'), T.matrix('U'), T.scalar(
        'c'), T.scalar('d')
    u = T.row('u')
    v = T.vector('v')
    w = T.col('w')

    can = []
    _gemm_canonicalize(X + Y + Z, 1.0, can, 0)
    assert can == [(1.0, X), (1.0, Y), (1.0, Z)]

    can = []
    _gemm_canonicalize(X + Y + u, 1.0, can, 0)
    assert can == [(1.0, X), (1.0, Y), (1.0, u)], can

    can = []
    _gemm_canonicalize(X + Y + v, 1.0, can, 0)
    # [(1.0, X), (1.0, Y), (1.0, InplaceDimShuffle{x,0}(v))]
    assert can[:2] == [(1.0, X), (1.0, Y)]
    assert isinstance(can[2], tuple)
    assert len(can[2]) == 2
    assert can[2][0] == 1.0
    assert can[2][1].owner
    assert isinstance(can[2][1].owner.op, T.DimShuffle)
    assert can[2][1].owner.inputs == [v]

    can = []
    _gemm_canonicalize(X + Y + w, 1.0, can, 0)
    assert can == [(1.0, X), (1.0, Y), (1.0, w)], can

    can = []
    _gemm_canonicalize(a * X + Y - b * Z * c, 1.0, can, 0)
    assert can[0] == (a, X)
    assert can[1] == (1.0, Y)
    assert can[2][0].owner.op == T.mul
    assert can[2][0].owner.inputs[0].owner.op == T.neg
    assert can[2][0].owner.inputs[0].owner.inputs[0] == c
    assert can[2][0].owner.inputs[1] == b

    can = []
    _gemm_canonicalize((-d) * X - (a * X + Y - b * Z * c), 1.0, can, 0)
    # print can
    assert can[0][0].owner.op == T.neg
    assert can[0][0].owner.inputs[0] == d
    assert can[0][1] == X
    assert can[1][0].owner.op == T.neg
    assert can[1][0].owner.inputs[0] == a
    assert can[2] == (-1.0, Y)
    assert can[3][0].owner.op == T.mul
    assert can[3][0].owner.inputs == [c, b]

Source File: main.py From MemN2N with MIT License

4 votes

def __init__(self, incomings, vocab, embedding_size, A, A_T, C, C_T, nonlinearity=lasagne.nonlinearities.softmax, **kwargs):
        super(MemoryNetworkLayer, self).__init__(incomings, **kwargs)
        if len(incomings) != 3:
            raise NotImplementedError

        batch_size, max_seqlen, max_sentlen = self.input_shapes[0]

        l_context_in = lasagne.layers.InputLayer(shape=(batch_size, max_seqlen, max_sentlen))
        l_B_embedding = lasagne.layers.InputLayer(shape=(batch_size, embedding_size))
        l_context_pe_in = lasagne.layers.InputLayer(shape=(batch_size, max_seqlen, max_sentlen, embedding_size))

        l_context_in = lasagne.layers.ReshapeLayer(l_context_in, shape=(batch_size * max_seqlen * max_sentlen, ))
        l_A_embedding = lasagne.layers.EmbeddingLayer(l_context_in, len(vocab)+1, embedding_size, W=A)
        self.A = l_A_embedding.W
        l_A_embedding = lasagne.layers.ReshapeLayer(l_A_embedding, shape=(batch_size, max_seqlen, max_sentlen, embedding_size))
        l_A_embedding = lasagne.layers.ElemwiseMergeLayer((l_A_embedding, l_context_pe_in), merge_function=T.mul)
        l_A_embedding = SumLayer(l_A_embedding, axis=2)
        l_A_embedding = TemporalEncodingLayer(l_A_embedding, T=A_T)
        self.A_T = l_A_embedding.T

        l_C_embedding = lasagne.layers.EmbeddingLayer(l_context_in, len(vocab)+1, embedding_size, W=C)
        self.C = l_C_embedding.W
        l_C_embedding = lasagne.layers.ReshapeLayer(l_C_embedding, shape=(batch_size, max_seqlen, max_sentlen, embedding_size))
        l_C_embedding = lasagne.layers.ElemwiseMergeLayer((l_C_embedding, l_context_pe_in), merge_function=T.mul)
        l_C_embedding = SumLayer(l_C_embedding, axis=2)
        l_C_embedding = TemporalEncodingLayer(l_C_embedding, T=C_T)
        self.C_T = l_C_embedding.T

        l_prob = InnerProductLayer((l_A_embedding, l_B_embedding), nonlinearity=nonlinearity)
        l_weighted_output = BatchedDotLayer((l_prob, l_C_embedding))

        l_sum = lasagne.layers.ElemwiseSumLayer((l_weighted_output, l_B_embedding))

        self.l_context_in = l_context_in
        self.l_B_embedding = l_B_embedding
        self.l_context_pe_in = l_context_pe_in
        self.network = l_sum

        params = lasagne.layers.helper.get_all_params(self.network, trainable=True)
        values = lasagne.layers.helper.get_all_param_values(self.network, trainable=True)
        for p, v in zip(params, values):
            self.add_param(p, v.shape, name=p.name)

        zero_vec_tensor = T.vector()
        self.zero_vec = np.zeros(embedding_size, dtype=theano.config.floatX)
        self.set_zero = theano.function([zero_vec_tensor], updates=[(x, T.set_subtensor(x[0, :], zero_vec_tensor)) for x in [self.A, self.C]])

Source File: ind_model.py From planetoid with MIT License

4 votes

def build(self):
        """build the model. This method should be called after self.add_data.
        """
        x_sym = sparse.csr_matrix('x', dtype = 'float32')
        self.x_sym = x_sym
        y_sym = T.imatrix('y')
        gx_sym = sparse.csr_matrix('gx', dtype = 'float32')
        gy_sym = T.ivector('gy')
        gz_sym = T.vector('gz')

        l_x_in = lasagne.layers.InputLayer(shape = (None, self.x.shape[1]), input_var = x_sym)
        l_gx_in = lasagne.layers.InputLayer(shape = (None, self.x.shape[1]), input_var = gx_sym)
        l_gy_in = lasagne.layers.InputLayer(shape = (None, ), input_var = gy_sym)

        l_x_1 = layers.SparseLayer(l_x_in, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax)
        l_x_2 = layers.SparseLayer(l_x_in, self.embedding_size)
        W = l_x_2.W
        l_x_2 = layers.DenseLayer(l_x_2, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax)
        if self.use_feature:
            l_x = lasagne.layers.ConcatLayer([l_x_1, l_x_2], axis = 1)
            l_x = layers.DenseLayer(l_x, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax)
        else:
            l_x = l_x_2

        l_gx = layers.SparseLayer(l_gx_in, self.embedding_size, W = W)
        if self.neg_samp > 0:
            l_gy = lasagne.layers.EmbeddingLayer(l_gy_in, input_size = self.num_ver, output_size = self.embedding_size)
            l_gx = lasagne.layers.ElemwiseMergeLayer([l_gx, l_gy], T.mul)
            pgy_sym = lasagne.layers.get_output(l_gx)
            g_loss = - T.log(T.nnet.sigmoid(T.sum(pgy_sym, axis = 1) * gz_sym)).sum()
        else:
            l_gx = lasagne.layers.DenseLayer(l_gx, self.num_ver, nonlinearity = lasagne.nonlinearities.softmax)
            pgy_sym = lasagne.layers.get_output(l_gx)
            g_loss = lasagne.objectives.categorical_crossentropy(pgy_sym, gy_sym).sum()
        
        self.l = [l_x, l_gx]

        py_sym = lasagne.layers.get_output(l_x)
        loss = lasagne.objectives.categorical_crossentropy(py_sym, y_sym).mean()
        if self.layer_loss and self.use_feature:
            hid_sym = lasagne.layers.get_output(l_x_1)
            loss += lasagne.objectives.categorical_crossentropy(hid_sym, y_sym).mean()
            emd_sym = lasagne.layers.get_output(l_x_2)
            loss += lasagne.objectives.categorical_crossentropy(emd_sym, y_sym).mean()

        params = [l_x_1.W, l_x_1.b, l_x_2.W, l_x_2.b, l_x.W, l_x.b] if self.use_feature else [l_x.W, l_x.b]
        if self.update_emb:
            params = lasagne.layers.get_all_params(l_x)
        updates = lasagne.updates.sgd(loss, params, learning_rate = self.learning_rate)
        self.train_fn = theano.function([x_sym, y_sym], loss, updates = updates)

        g_params = lasagne.layers.get_all_params(l_gx)
        g_updates = lasagne.updates.sgd(g_loss, g_params, learning_rate = self.g_learning_rate)
        self.g_fn = theano.function([gx_sym, gy_sym, gz_sym], g_loss, updates = g_updates, on_unused_input = 'ignore')

        self.test_fn = theano.function([x_sym], py_sym)

Source File: trans_model.py From planetoid with MIT License

4 votes

def build(self):
        """build the model. This method should be called after self.add_data.
        """
        x_sym = sparse.csr_matrix('x', dtype = 'float32')
        y_sym = T.imatrix('y')
        g_sym = T.imatrix('g')
        gy_sym = T.vector('gy')
        ind_sym = T.ivector('ind')

        l_x_in = lasagne.layers.InputLayer(shape = (None, self.x.shape[1]), input_var = x_sym)
        l_g_in = lasagne.layers.InputLayer(shape = (None, 2), input_var = g_sym)
        l_ind_in = lasagne.layers.InputLayer(shape = (None, ), input_var = ind_sym)
        l_gy_in = lasagne.layers.InputLayer(shape = (None, ), input_var = gy_sym)

        num_ver = max(self.graph.keys()) + 1
        l_emb_in = lasagne.layers.SliceLayer(l_g_in, indices = 0, axis = 1)
        l_emb_in = lasagne.layers.EmbeddingLayer(l_emb_in, input_size = num_ver, output_size = self.embedding_size)
        l_emb_out = lasagne.layers.SliceLayer(l_g_in, indices = 1, axis = 1)
        if self.neg_samp > 0:
            l_emb_out = lasagne.layers.EmbeddingLayer(l_emb_out, input_size = num_ver, output_size = self.embedding_size)

        l_emd_f = lasagne.layers.EmbeddingLayer(l_ind_in, input_size = num_ver, output_size = self.embedding_size, W = l_emb_in.W)
        l_x_hid = layers.SparseLayer(l_x_in, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax)
        
        if self.use_feature:
            l_emd_f = layers.DenseLayer(l_emd_f, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax)
            l_y = lasagne.layers.ConcatLayer([l_x_hid, l_emd_f], axis = 1)
            l_y = layers.DenseLayer(l_y, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax)
        else:
            l_y = layers.DenseLayer(l_emd_f, self.y.shape[1], nonlinearity = lasagne.nonlinearities.softmax)

        py_sym = lasagne.layers.get_output(l_y)
        loss = lasagne.objectives.categorical_crossentropy(py_sym, y_sym).mean()
        if self.layer_loss and self.use_feature:
            hid_sym = lasagne.layers.get_output(l_x_hid)
            loss += lasagne.objectives.categorical_crossentropy(hid_sym, y_sym).mean()
            emd_sym = lasagne.layers.get_output(l_emd_f)
            loss += lasagne.objectives.categorical_crossentropy(emd_sym, y_sym).mean()

        if self.neg_samp == 0:
            l_gy = layers.DenseLayer(l_emb_in, num_ver, nonlinearity = lasagne.nonlinearities.softmax)
            pgy_sym = lasagne.layers.get_output(l_gy)
            g_loss = lasagne.objectives.categorical_crossentropy(pgy_sym, lasagne.layers.get_output(l_emb_out)).sum()
        else:
            l_gy = lasagne.layers.ElemwiseMergeLayer([l_emb_in, l_emb_out], T.mul)
            pgy_sym = lasagne.layers.get_output(l_gy)
            g_loss = - T.log(T.nnet.sigmoid(T.sum(pgy_sym, axis = 1) * gy_sym)).sum()

        params = [l_emd_f.W, l_emd_f.b, l_x_hid.W, l_x_hid.b, l_y.W, l_y.b] if self.use_feature else [l_y.W, l_y.b]
        if self.update_emb:
            params = lasagne.layers.get_all_params(l_y)
        updates = lasagne.updates.sgd(loss, params, learning_rate = self.learning_rate)

        self.train_fn = theano.function([x_sym, y_sym, ind_sym], loss, updates = updates, on_unused_input = 'ignore')
        self.test_fn = theano.function([x_sym, ind_sym], py_sym, on_unused_input = 'ignore')
        self.l = [l_gy, l_y]

        g_params = lasagne.layers.get_all_params(l_gy, trainable = True)
        g_updates = lasagne.updates.sgd(g_loss, g_params, learning_rate = self.g_learning_rate)
        self.g_fn = theano.function([g_sym, gy_sym], g_loss, updates = g_updates, on_unused_input = 'ignore')

Source File: test_blas.py From attention-lvcsr with MIT License

4 votes

def test_local_dot22_to_dot22scalar():
    """
    This test that the bug in gh-1507 is really fixed
    """
    A = T.dmatrix()
    mode = theano.compile.mode.get_default_mode()
    opt = theano.tensor.opt.in2out(
        theano.tensor.blas.local_dot22_to_dot22scalar)
    mode = mode.__class__(optimizer=opt)

    x = T.dscalar()
    y = T.dscalar()
    z = T.dscalar()
    # make sure to don't have dimshuffle as we don't opt those cases
    m = T.dmatrix()
    r = T.drow()
    for idx, node in enumerate([
        # Old working cases
        T.mul(_dot22(A, A), x),
        T.mul(_dot22(A, A), x, y),
        T.mul(_dot22(A, A), x, r),
        T.mul(_dot22(A, A), m, x),
        T.mul(_dot22(A, A), x, m),
        T.mul(_dot22(A, A), x, (m * y)),
        T.mul(_dot22(A, A), (m * y), x),
        T.mul(_dot22(A, A), x, (r * y)),
        T.mul(_dot22(A, A), (r * y), x),
        T.mul(_dot22(A, A), (x * y), (m * x)),
        T.mul(_dot22(A, A), (r * y), (y * x)),

        # Case that was raising an assert that is fixed in gh-1507
        T.mul(_dot22(A, A), (m * y), m),
        T.mul(_dot22(A, A), m, (m * y)),
        T.mul(_dot22(A, A), (r * y), (m * x)),

        # assert fixed in gh-1507 and opt case added in gh-1515
        T.mul(_dot22(A, A), (m * y * z), m),
        T.mul(_dot22(A, A), m, (m * y * z)),

        # Opt case added in gh-1515
        T.mul(_dot22(A, A), T.mul(m, y, z), m),
        T.mul(_dot22(A, A), m, T.mul(m, y, z)),

        # Case that opt later in gh-1515
        T.mul(_dot22(A, A), (r * m), (m * x)),
    ]):
        node2 = theano.tensor.blas.local_dot22_to_dot22scalar.transform(
            node.owner)
        assert node2
        f = theano.function([x, y, z, m, r, A], node,
                            mode=mode, on_unused_input='ignore')
        f(.1, .2, .3, [[1, 2], [3, 4]], [[5, 6]], [[7, 8], [9, 10]])

Source File: net_theano.py From visual_dynamics with MIT License

4 votes

def build_action_cond_encoder_net(input_shapes, **kwargs):
    x_shape, u_shape = input_shapes

    X_var = T.tensor4('X')
    U_var = T.matrix('U')
    X_diff_var = T.tensor4('X_diff')
    X_next_var = X_var + X_diff_var

    l_x0 = L.InputLayer(shape=(None,) + x_shape, input_var=X_var, name='x')
    l_u = L.InputLayer(shape=(None,) + u_shape, input_var=U_var, name='u')

    l_x1 = L.Conv2DLayer(l_x0, 64, filter_size=6, stride=2, pad=0,
                         nonlinearity=nl.rectify,
                         name='x1')
    l_x2 = L.Conv2DLayer(l_x1, 64, filter_size=6, stride=2, pad=2,
                         nonlinearity=nl.rectify,
                         name='x2')
    l_x3 = L.Conv2DLayer(l_x2, 64, filter_size=6, stride=2, pad=2,
                         nonlinearity=nl.rectify,
                         name='x3')
    l_x3_shape = lasagne.layers.get_output_shape(l_x3)

    l_y4 = L.DenseLayer(l_x3, 1024, nonlinearity=nl.rectify, name='y')
    l_y4d = L.DenseLayer(l_y4, 2048, W=init.Uniform(1.0), nonlinearity=None)
    l_ud = L.DenseLayer(l_u, 2048, W=init.Uniform(0.1), nonlinearity=None)

    l_y4d_diff_pred = L.ElemwiseMergeLayer([l_y4d, l_ud], T.mul)
    l_y4_diff_pred = L.DenseLayer(l_y4d_diff_pred, 1024, W=init.Uniform(1.0), nonlinearity=None, name='y_diff_pred')

    l_y4_next_pred = L.ElemwiseMergeLayer([l_y4, l_y4_diff_pred], T.add, name='y_next_pred')

    l_y3_next_pred = L.DenseLayer(l_y4_next_pred, np.prod(l_x3_shape[1:]), nonlinearity=nl.rectify)
    l_x3_next_pred = L.ReshapeLayer(l_y3_next_pred, ([0],) + l_x3_shape[1:],
                                   name='x3_next_pred')

    l_x2_next_pred = LT.Deconv2DLayer(l_x3_next_pred, 64, filter_size=6, stride=2, pad=2,
                                   nonlinearity=nl.rectify,
                                   name='x2_next_pred')
    l_x1_next_pred = LT.Deconv2DLayer(l_x2_next_pred, 64, filter_size=6, stride=2, pad=2,
                                   nonlinearity=nl.rectify,
                                   name='x1_next_pred')
    l_x0_next_pred = LT.Deconv2DLayer(l_x1_next_pred, 3, filter_size=6, stride=2, pad=0,
                                   nonlinearity=None,
                                   name='x0_next_pred')

    loss_fn = lambda X, X_pred: ((X - X_pred) ** 2).mean(axis=0).sum() / 2.
    loss = loss_fn(X_next_var, lasagne.layers.get_output(l_x0_next_pred))

    net_name = 'ActionCondEncoderNet'
    input_vars = OrderedDict([(var.name, var) for var in [X_var, U_var, X_diff_var]])
    pred_layers = OrderedDict([('x0_next_pred', l_x0_next_pred)])
    return net_name, input_vars, pred_layers, loss

Source File: NN4Normal.py From RaptorX-Contact with GNU General Public License v3.0

4 votes

def NLL(self, y, useMeanOnly=False, sampleWeight=None):

		assert (y.ndim == 2)

                pi = numpy.pi

		if self.n_variables == 1:
			e = T.sqr( y -self.mean )/2.
			nll = numpy.log(2*pi)/2.
			
			if useMeanOnly or (self.sigma_sqr is None):
				nll = nll + e
			else:
				e = e / self.sigma_sqr
				nll = nll + e + T.log(self.sigma_sqr)/2.

		else:
			err = y - self.mean
			err_sqr = T.sqr( err )

			if useMeanOnly or (self.sigma_sqr is None):
				sig_sqr = T.ones_like(e)
			else:
				sig_sqr = self.sigma_sqr

			nll = T.sum(T.log(sig_sqr) + numpy.log(2*pi), axis=1, keepdims=True)/2.

			e = T.sum( err_sqr/sig_sqr, axis=1, keepdims=True )

			sig = T.sqrt( sig_sqr )
			f = T.prod( err/sig, axis=1, keepdims=True )

			if useMeanOnly or (self.corr is None):
				rho = T.zeros_like(e)
			else:
				rho = T.corr
				
			g = e - T.mul(rho, f) * 2.

			rho_sqr = T.sqr(rho)
			h = g / (2 * ( 1 - rho_sqr ) )

			nll = nll + h + T.log(1 - rho_sqr)/2. 


		if sampleWeight is None:
			return T.mean(nll)
		return T.sum(T.mul(nll, sampleWeight) )/T.sum(sampleWeight)


	## y has shape (batchSize, n_variables), sampleWeight shall have shape (batchSize, 1) instead of (batchSize,)
	## this function returns a vector

Python theano.tensor.mul() Examples