Python theano.tensor.arange() Examples

The following are 30 code examples of theano.tensor.arange(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module theano.tensor , or try the search function .
Example #1
Source File: NN_PerceptronLayer.py    From Deep_MRI_brain_extraction with MIT License 6 votes vote down vote up
def negative_log_likelihood(self, y):
        """Return the mean of the negative log-likelihood of the prediction
        of this model under a given target distribution.

        .. math::

            \frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
            \frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|} \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
                \ell (\theta=\{W,b\}, \mathcal{D})

        :type y: theano.tensor.TensorType
        :param y: corresponds to a vector that gives for each example the
                  correct label

        Note: we use the mean instead of the sum so that
              the learning rate is less dependent on the batch size
        """
      
        return -T.mean(T.log(self.class_probabilities)[T.arange(y.shape[0]), y]) 
Example #2
Source File: mlp_test.py    From D-VAE with MIT License 6 votes vote down vote up
def negative_log_likelihood(self, y):
        """Return the mean of the negative log-likelihood of the prediction
        of this model under a given target distribution.

        .. math::

            \frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
            \frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|} \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
                \ell (\theta=\{W,b\}, \mathcal{D})

        :type y: theano.tensor.TensorType
        :param y: corresponds to a vector that gives for each example the
                  correct label

        Note: we use the mean instead of the sum so that
              the learning rate is less dependent on the batch size
        """
        # y.shape[0] is (symbolically) the number of rows in y, i.e., number of examples (call it n) in the minibatch
        # T.arange(y.shape[0]) is a symbolic vector which will contain [0,1,2,... n-1]
        # T.log(self.p_y_given_x) is a matrix of Log-Probabilities (call it LP) with one row per example and one column per class
        # LP[T.arange(y.shape[0]),y] is a vector v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ..., LP[n-1,y[n-1]]]
        # and T.mean(LP[T.arange(y.shape[0]),y]) is the mean (across minibatch examples) of the elements in v,
        # i.e., the mean log-likelihood across the minibatch.
        return T.log(self.p_y_given_x[T.arange(y.shape[0]), y]) 
Example #3
Source File: NN_ConvLayer_3D.py    From Deep_MRI_brain_extraction with MIT License 6 votes vote down vote up
def max_pool_along_channel_axis(sym_input, pool_factor):
    """ for 3D conv."""
    s = None
    for i in xrange(pool_factor):
        t = sym_input[:,:,i::pool_factor]
        if s is None:
            s = t
        else:
            s = T.maximum(s, t)
    return s
#    Ns, Ts, C, Hs, Ws = 1, 70, 1, 70, 70  -> 70^3
#    Nf, Tf, C, Hf, Wf = 32, 5 , 1, 5 , 5  -> 32 filters of shape 5^3
#    signals = numpy.arange(Ns*Ts*C*Hs*Ws).reshape(Ns, Ts, C, Hs, Ws).astype('float32')
#    filters = numpy.arange(Nf*Tf*C*Hf*Wf).reshape(Nf, Tf, C, Hf, Wf).astype('float32')
#
# in 3D
#        input:  (1, 70,  3, 70, 70)
#       filters: (32, 5 , 3,  5 , 5)
#    --> output: (1, 66, 32, 66, 66) 
Example #4
Source File: agent.py    From StockRecommendSystem with MIT License 6 votes vote down vote up
def __init__(self, seq_len, n_feature):
        import theano.tensor as T
        self.Input = lasagne.layers.InputLayer(shape=(None, seq_len, n_feature))
        self.buildNetwork()
        self.output = lasagne.layers.get_output(self.network)
        self.params = lasagne.layers.get_all_params(self.network, trainable=True)
        self.output_fn = theano.function([self.Input.input_var], self.output)

        fx = T.fvector().astype("float64")
        choices = T.ivector()
        px = self.output[T.arange(self.output.shape[0]), choices]
        log_px = T.log(px)
        cost = -fx.dot(log_px)
        updates = lasagne.updates.adagrad(cost, self.params, 0.0008)
        Input = lasagne.layers.InputLayer(shape=(None, seq_len, n_feature))
        self.train_fn = theano.function([self.Input.input_var, choices, fx], [cost, px, log_px], updates=updates) 
Example #5
Source File: toolbox.py    From Theano-Lights with MIT License 6 votes vote down vote up
def depool(X, factor=2):
    """
    Luke perforated upsample: http://www.brml.org/uploads/tx_sibibtex/281.pdf
    """
    output_shape = [
        X.shape[1],
        X.shape[2]*factor,
        X.shape[3]*factor
    ]
    stride = X.shape[2]
    offset = X.shape[3]
    in_dim = stride * offset
    out_dim = in_dim * factor * factor

    upsamp_matrix = T.zeros((in_dim, out_dim))
    rows = T.arange(in_dim)
    cols = rows*factor + (rows/stride * factor * offset)
    upsamp_matrix = T.set_subtensor(upsamp_matrix[rows, cols], 1.)

    flat = T.reshape(X, (X.shape[0], output_shape[0], X.shape[2] * X.shape[3]))

    up_flat = T.dot(flat, upsamp_matrix)
    upsamp = T.reshape(up_flat, (X.shape[0], output_shape[0], output_shape[1], output_shape[2]))

    return upsamp 
Example #6
Source File: agent.py    From StockRecommendSystem with MIT License 6 votes vote down vote up
def update_critic(self, random_sample):
        #random_sample = np.random.choice(np.arange(len(self.rewards)-1), self.batch_size)

        states_batch = np.zeros((self.batch_size, self.lookback_size, self.n_feature), dtype = "float32")
        states_next_batch = np.zeros((self.batch_size, self.lookback_size, self.n_feature),dtype = "float32")

        #print random_sample

        for i in range(self.batch_size):
            random_id = random_sample[i]
            states_batch[i,:,:] =np.array(self.states[random_id:random_id+self.lookback_size]).astype("float32")
            states_next_batch[i,:,:] =np.array(self.states[random_id + 1:(random_id+self.lookback_size +1)]).astype("float32")

        reward_batch = np.array([self.rewards[i] for i in random_sample]).astype("float32")
        #using target model to predict
        target_value = self.target_model.predict(states_next_batch).flatten()*self.gamma + reward_batch

        self.critic_model.train(states_batch, target_value.reshape(self.batch_size,1)) 
Example #7
Source File: NN_ConvLayer_2D.py    From Deep_MRI_brain_extraction with MIT License 6 votes vote down vote up
def negative_log_likelihood(self, y):
        """Return the mean of the negative log-likelihood of the prediction
        of this model under a given target distribution.

        Note: we use the mean instead of the sum so that
              the learning rate is less dependent on the batch size
        """
        # y.shape[0] is (symbolically) the number of rows in y, i.e.,
        # number of examples (call it n) in the minibatch
        # T.arange(y.shape[0]) is a symbolic vector which will contain [0,1,2,... n-1]
        # T.log(self.p_y_given_x) is a matrix of
        # Log-Probabilities (call it LP) with one row per example and
        # one column per class LP[T.arange(y.shape[0]),y] is a vector
        # v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ...,
        # LP[n-1,y[n-1]]] and T.mean(LP[T.arange(y.shape[0]),y]) is
        # the mean (across minibatch examples) of the elements in v,
        # i.e., the mean log-likelihood across the minibatch.
        #print "at least, y must be provided in a flattened view (a list of class values)!"

        return -T.mean(T.log(self.class_probabilities)[T.arange(y.shape[0]),y]) #shape of class_probabilities is e.g. (14*14,2) for 2 classes and 14**2 labels 
Example #8
Source File: test_basic.py    From D-VAE with MIT License 6 votes vote down vote up
def test_infer_shape(self):
        mat = (numpy.arange(12) + 1).reshape((4, 3))
        mat[0, 1] = mat[1, 0] = mat[2, 2] = 0

        x_csc = theano.sparse.csc_matrix(dtype=theano.config.floatX)
        mat_csc = sp.csc_matrix(mat, dtype=theano.config.floatX)
        self._compile_and_check([x_csc],
                                [Remove0()(x_csc)],
                                [mat_csc],
                                self.op_class)

        x_csr = theano.sparse.csr_matrix(dtype=theano.config.floatX)
        mat_csr = sp.csr_matrix(mat, dtype=theano.config.floatX)
        self._compile_and_check([x_csr],
                                [Remove0()(x_csr)],
                                [mat_csr],
                                self.op_class) 
Example #9
Source File: test_basic.py    From D-VAE with MIT License 6 votes vote down vote up
def test_int32_dtype(self):
        # Reported on the theano-user mailing-list:
        # https://groups.google.com/d/msg/theano-users/MT9ui8LtTsY/rwatwEF9zWAJ
        size = 9
        intX = 'int32'

        C = tensor.matrix('C', dtype=intX)
        I = tensor.matrix('I', dtype=intX)

        fI = I.flatten()
        data = tensor.ones_like(fI)
        indptr = tensor.arange(data.shape[0] + 1, dtype='int32')

        m1 = sparse.CSR(data, fI, indptr, (8, size))
        m2 = sparse.dot(m1, C)
        y = m2.reshape(shape=(2, 4, 9), ndim=3)

        f = theano.function(inputs=[I, C], outputs=y)
        i = numpy.asarray([[4, 3, 7, 7], [2, 8, 4, 5]], dtype=intX)
        a = numpy.asarray(numpy.random.randint(0, 100, (size, size)),
                          dtype=intX)
        f(i, a) 
Example #10
Source File: metrics.py    From ntm-one-shot with MIT License 6 votes vote down vote up
def accuracy_instance(predictions, targets, n=[1, 2, 3, 4, 5, 10], \
        nb_classes=5, nb_samples_per_class=10, batch_size=1):
    accuracy_0 = theano.shared(np.zeros((batch_size, nb_samples_per_class), \
        dtype=theano.config.floatX))
    indices_0 = theano.shared(np.zeros((batch_size, nb_classes), \
        dtype=np.int32))
    batch_range = T.arange(batch_size)
    def step_(p, t, acc, idx):
        acc = T.inc_subtensor(acc[batch_range, idx[batch_range, t]], T.eq(p, t))
        idx = T.inc_subtensor(idx[batch_range, t], 1)
        return (acc, idx)
    (raw_accuracy, _), _ = theano.foldl(step_, sequences=[predictions.dimshuffle(1, 0), \
        targets.dimshuffle(1, 0)], outputs_info=[accuracy_0, indices_0])
    accuracy = T.mean(raw_accuracy / nb_classes, axis=0)

    return accuracy 
Example #11
Source File: ctc_cost.py    From CTC-Connectionist-Temporal-Classification with Apache License 2.0 6 votes vote down vote up
def log_cost(cls, y, y_hat, y_mask, y_hat_mask, blank_symbol):
        y_hat_mask_len = tensor.sum(y_hat_mask, axis=0, dtype='int32')
        y_mask_len = tensor.sum(y_mask, axis=0, dtype='int32')
        log_probabs = cls.log_path_probabs(y, y_hat,
                                           y_mask, y_hat_mask,
                                           blank_symbol)
        batch_size = log_probabs.shape[1]
        labels_probab = cls.log_add(
            log_probabs[y_hat_mask_len - 1,
                        tensor.arange(batch_size),
                        y_mask_len - 1],
            log_probabs[y_hat_mask_len - 1,
                        tensor.arange(batch_size),
                        y_mask_len - 2])
        avg_cost = tensor.mean(-labels_probab)
        return avg_cost 
Example #12
Source File: toolbox.py    From Theano-Lights with MIT License 5 votes vote down vote up
def one_hot(x, n):
	if type(x) == list:
		x = np.array(x)
	x = x.flatten()
	o_h = np.zeros((len(x),n))
	o_h[np.arange(len(x)),x] = 1
	return o_h 
Example #13
Source File: cnn_classifier.py    From sentence_classification with MIT License 5 votes vote down vote up
def build_model(tparams,options):
    
    trng = RandomStreams(SEED)
    
    # Used for dropout.
    use_noise = theano.shared(numpy_floatX(0.))
    
    # input sentence: n_samples * n_steps 
    x = tensor.matrix('x', dtype='int32')
    # label: (n_samples,)
    y = tensor.vector('y',dtype='int32')
    
    layer0_input = tparams['Wemb'][tensor.cast(x.flatten(),dtype='int32')].reshape((x.shape[0],1,x.shape[1],tparams['Wemb'].shape[1])) 
    layer0_input = dropout(layer0_input, trng, use_noise)
 
    layer1_inputs = []
    for i in xrange(len(options['filter_hs'])):
        filter_shape = options['filter_shapes'][i]
        pool_size = options['pool_sizes'][i]
        conv_layer = encoder(tparams, layer0_input,filter_shape=filter_shape, pool_size=pool_size,prefix=_p('cnn_encoder',i))                          
        layer1_input = conv_layer
        layer1_inputs.append(layer1_input)
    layer1_input = tensor.concatenate(layer1_inputs,1)
    layer1_input = dropout(layer1_input, trng, use_noise) 
    
    # this is the label prediction you made 
    pred = tensor.nnet.softmax(tensor.dot(layer1_input, tparams['Wy']) + tparams['by'])
    
    f_pred_prob = theano.function([x], pred, name='f_pred_prob')
    f_pred = theano.function([x], pred.argmax(axis=1), name='f_pred')

    # get the expression of how we calculate the cost function
    # i.e. corss-entropy loss
    index = tensor.arange(x.shape[0])
    cost = -tensor.log(pred[index, y] + 1e-6).mean()                          

    return use_noise, x, y, f_pred_prob, f_pred, cost 
Example #14
Source File: nn.py    From weightnorm with MIT License 5 votes vote down vote up
def softmax_loss(p_true, output_before_softmax):
    output_before_softmax -= T.max(output_before_softmax, axis=1, keepdims=True)
    if p_true.ndim==2:
        return T.mean(T.log(T.sum(T.exp(output_before_softmax),axis=1)) - T.sum(p_true*output_before_softmax, axis=1))
    else:
        return T.mean(T.log(T.sum(T.exp(output_before_softmax),axis=1)) - output_before_softmax[T.arange(p_true.shape[0]),p_true]) 
Example #15
Source File: toolbox.py    From Theano-Lights with MIT License 5 votes vote down vote up
def shuffledata(*data):
    idxs = np.random.permutation(np.arange(len(data[0])))
    if len(data) == 1:
        return [data[0][idx] for idx in idxs]
    else:
        return [np.matrix([d[idx] for idx in idxs]) for d in data] 
Example #16
Source File: toolbox.py    From Theano-Lights with MIT License 5 votes vote down vote up
def __init__(self, img_height, img_width, N):
        self.n_att_params = 5
        self.img_height = img_height
        self.img_width = img_width
        self.N = N
        self.a = T.arange(self.img_width)
        self.b = T.arange(self.img_height)
        self.rngN = T.arange(self.N) - self.N/2 - 0.5
        self.numtol = 1e-4
        self.delta_factor = (max(self.img_width, self.img_height)-1) / (self.N-1)
        self.center_x_factor = (self.img_width+1.) /2.
        self.center_y_factor = (self.img_height+1.) /2. 
Example #17
Source File: toolbox.py    From Theano-Lights with MIT License 5 votes vote down vote up
def theano_one_hot(idxs, n):
    z = T.zeros((idxs.shape[0], n))
    one_hot = T.set_subtensor(z[T.arange(idxs.shape[0]), idxs], 1)
    return one_hot 
Example #18
Source File: theano_backend.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def arange(start, stop=None, step=1, dtype='int32'):
    """Creates a 1-D tensor containing a sequence of integers.

    The function arguments use the same convention as
    Theano's arange: if only one argument is provided,
    it is in fact the "stop" argument.

    The default type of the returned tensor is 'int32' to
    match TensorFlow's default.
    """
    return T.arange(start, stop=stop, step=step, dtype=dtype) 
Example #19
Source File: theano_backend.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def in_top_k(predictions, targets, k):
    """Returns whether the `targets` are in the top `k` `predictions`.

    # Arguments
        predictions: A tensor of shape `(batch_size, classes)` and type `float32`.
        targets: A 1D tensor of length `batch_size` and type `int32` or `int64`.
        k: An `int`, number of top elements to consider.

    # Returns
        A 1D tensor of length `batch_size` and type `bool`.
        `output[i]` is `True` if `predictions[i, targets[i]]` is within top-`k`
        values of `predictions[i]`.
    """
    # handle k < 1 and k >= predictions.shape[1] cases to match TF behavior
    if k < 1:
        # dtype='bool' is only available since Theano 0.9.0
        try:
            return T.zeros_like(targets, dtype='bool')
        except TypeError:
            return T.zeros_like(targets, dtype='int8')

    if k >= int_shape(predictions)[1]:
        try:
            return T.ones_like(targets, dtype='bool')
        except TypeError:
            return T.ones_like(targets, dtype='int8')

    predictions_k = T.sort(predictions)[:, -k]
    targets_values = predictions[T.arange(targets.shape[0]), targets]
    return T.ge(targets_values, predictions_k)


# CONVOLUTIONS 
Example #20
Source File: ctc_cost.py    From CTC-Connectionist-Temporal-Classification with Apache License 2.0 5 votes vote down vote up
def cost(cls, y, y_hat, y_mask, y_hat_mask, blank_symbol):
        y_hat_mask_len = tensor.sum(y_hat_mask, axis=0, dtype='int32')
        y_mask_len = tensor.sum(y_mask, axis=0, dtype='int32')
        probabilities, sth = cls.path_probabs(y, y_hat,
                                              y_mask, y_hat_mask,
                                              blank_symbol)
        batch_size = probabilities.shape[1]
        labels_probab = (probabilities[y_hat_mask_len - 1,
                                       tensor.arange(batch_size),
                                       y_mask_len - 1] +
                         probabilities[y_hat_mask_len - 1,
                                       tensor.arange(batch_size),
                                       y_mask_len - 2])
        avg_cost = tensor.mean(-tensor.log(labels_probab))
        return avg_cost, sth 
Example #21
Source File: ctc_cost.py    From CTC-Connectionist-Temporal-Classification with Apache License 2.0 5 votes vote down vote up
def class_batch_to_labeling_batch(y, y_hat, y_hat_mask=None):
        y_hat = y_hat * y_hat_mask.dimshuffle(0, 'x', 1)
        batch_size = y_hat.shape[2]
        res = y_hat[:, y.astype('int32'), tensor.arange(batch_size)]
        return res 
Example #22
Source File: NN_PerceptronLayer.py    From Deep_MRI_brain_extraction with MIT License 5 votes vote down vote up
def negative_log_likelihood_modulated_margin(self, y, modulation=1, margin=0.7, penalty_multiplier = 0):
        print "negative_log_likelihood_modulated_margin:: Penalty down to ",100.*penalty_multiplier,"% if prediction is close to the target! Threshold is",margin
        penalty_multiplier = np.float32(penalty_multiplier)
        margin = np.float32(margin)
        selected = self.class_probabilities[T.arange(y.shape[0]),y]
        r = modulation*T.log(selected)
        return -T.mean(r*(selected<margin) + (0 if penalty_multiplier==0 else penalty_multiplier*r*(selected>=margin))  ) 
Example #23
Source File: network3.py    From WannaPark with GNU General Public License v3.0 5 votes vote down vote up
def cost(self, net):
        "Return the log-likelihood cost."
        return -T.mean(T.log(self.output_dropout)[T.arange(net.y.shape[0]), net.y]) 
Example #24
Source File: attention.py    From text2image with MIT License 5 votes vote down vote up
def get_filterbank_matrices(self, g_y, g_x, delta, sigma):

        tol = 1e-04
        mu_x = g_x.dimshuffle([0, 'x']) + delta.dimshuffle([0, 'x'])*(T.arange(self.N)-self.N/2-0.5) # dimension (batch_size, N)
        mu_y = g_y.dimshuffle([0, 'x']) + delta.dimshuffle([0, 'x'])*(T.arange(self.N)-self.N/2-0.5)

        a = T.arange(self.A)
        b = T.arange(self.B)

        f_x = T.exp( -(a-mu_x.dimshuffle([0,1,'x']))**2 / 2. / sigma.dimshuffle([0,'x','x'])**2 ) # dimension (batch_size, N, A)
        f_y = T.exp( -(b-mu_y.dimshuffle([0,1,'x']))**2 / 2. / sigma.dimshuffle([0,'x','x'])**2 )

        f_x = f_x / (f_x.sum(axis=2).dimshuffle(0, 1, 'x') + tol) # dimension (batch_size, N, A)
        f_y = f_y / (f_y.sum(axis=2).dimshuffle(0, 1, 'x') + tol)
        return f_y, f_x 
Example #25
Source File: alignDraw.py    From text2image with MIT License 5 votes vote down vote up
def validate(self):

        self._build_validate_function()
        sys.stdout.flush()

        all_outputs = np.array([0.0,0.0,0.0])
        for i in xrange(0,self.val_shape[0],self.batch_size):
            i_vector = np.int32(np.arange(i,i+self.batch_size))
            [kl, logpxz, log_likelihood] = self._validate_function(i_vector, i_vector, self.runSteps)
            all_outputs[0] = all_outputs[0] + kl * i_vector.shape[0]
            all_outputs[1] = all_outputs[1] + logpxz * i_vector.shape[0]
            all_outputs[2] = all_outputs[2] + log_likelihood * i_vector.shape[0]

        all_outputs = all_outputs / self.val_shape[0]
        return all_outputs 
Example #26
Source File: attention.py    From text2image with MIT License 5 votes vote down vote up
def get_mean_filters_cpu(self, g_y, g_x, delta, sigma):
        mu_x = g_x + delta * (np.arange(self.N) - self.N/2 - 0.5)
        mu_y = g_y + delta * (np.arange(self.N) - self.N/2 - 0.5)

        return mu_y, mu_x 
Example #27
Source File: attention.py    From text2image with MIT License 5 votes vote down vote up
def get_filterbank_matrices(self, g_y, g_x, delta, sigma):

        tol = 1e-04
        mu_x = g_x.dimshuffle([0, 'x']) + delta.dimshuffle([0, 'x'])*(T.arange(self.N)-self.N/2-0.5) # dimension (batch_size, N)
        mu_y = g_y.dimshuffle([0, 'x']) + delta.dimshuffle([0, 'x'])*(T.arange(self.N)-self.N/2-0.5)

        a = T.arange(self.A)
        b = T.arange(self.B)

        f_x = T.exp( -(a-mu_x.dimshuffle([0,1,'x']))**2 / 2. / sigma.dimshuffle([0,'x','x'])**2 ) # dimension (batch_size, N, A)
        f_y = T.exp( -(b-mu_y.dimshuffle([0,1,'x']))**2 / 2. / sigma.dimshuffle([0,'x','x'])**2 )

        f_x = f_x / (f_x.sum(axis=2).dimshuffle(0, 1, 'x') + tol) # dimension (batch_size, N, A)
        f_y = f_y / (f_y.sum(axis=2).dimshuffle(0, 1, 'x') + tol)
        return f_y, f_x 
Example #28
Source File: NN4LogReg.py    From RaptorX-Contact with GNU General Public License v3.0 5 votes vote down vote up
def NLL(self, y, sampleWeight=None):
        ###Return the mean of the negative log-likelihood of the prediction of this model under a given target distribution.

        if sampleWeight is not None:
            return -T.sum(T.mul(sampleWeight, T.log(self.p_y_given_x)[T.arange(y.shape[0]), y] ) )/T.sum(sampleWeight)
        else:
            return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y]) 
Example #29
Source File: nn.py    From Att-ChemdNER with Apache License 2.0 5 votes vote down vote up
def link(self, input,words):
#{{{
        """
        Propagate the input through the network and return the last hidden
        vector. The whole sequence is also accessible via self.h, but
        where self.h of shape (sequence_length, batch_size, output_dim)
        """

        # If we use batches, we have to permute the first and second dimension.
        if self.with_batch:
            assert 0,"AttentionLSTM not implement with_batch";
        else:
            self.input = input
            initial_states = [self.h_0, self.c_0] 
        
        step_function=self.step;  

        [e,h,c], _ = theano.scan(
            fn=step_function,
            sequences=[words,T.arange(words.shape[0])],
            outputs_info=[T.zeros((input.shape[0],),
                                  dtype=theano.config.floatX)]+initial_states,
            non_sequences=[self.input],
        )
        self.h = h
        self.output = h[-1]
        self.e=e;
        self.c=c;
        return self.output
#}}}
 
#}}} 
Example #30
Source File: nn.py    From GELUs with MIT License 5 votes vote down vote up
def softmax_loss(p_true, output_before_softmax):
    output_before_softmax -= T.max(output_before_softmax, axis=1, keepdims=True)
    if p_true.ndim==2:
        return T.mean(T.log(T.sum(T.exp(output_before_softmax),axis=1)) - T.sum(p_true*output_before_softmax, axis=1))
    else:
        return T.mean(T.log(T.sum(T.exp(output_before_softmax),axis=1)) - output_before_softmax[T.arange(p_true.shape[0]),p_true])