Python lasagne.objectives.categorical_crossentropy() Examples

The following are 20 code examples of lasagne.objectives.categorical_crossentropy(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module lasagne.objectives , or try the search function .
Example #1
Source File: resnet152_outdated.py    From Theano-MPI with Educational Community License v2.0 6 votes vote down vote up
def build_model(self):
        
        import theano.tensor as T
        self.x = T.ftensor4('x')
        self.y = T.lvector('y')
        self.lr = T.scalar('lr')
        
        net = build_model_resnet152(input_shape=(None, 3, 224, 224))
        
        self.output_layer = net['prob']
        
        from lasagne.layers import get_output
        self.output = lasagne.layers.get_output(self.output_layer, self.x, deterministic=False)
        self.cost = lasagne.objectives.categorical_crossentropy(self.output, self.y).mean()
        from lasagne.objectives import categorical_accuracy
        self.error = 1-categorical_accuracy(self.output, self.y, top_k=1).mean()
        self.error_top_5 = 1-categorical_accuracy(self.output, self.y, top_k=5).mean() 
Example #2
Source File: resnet50.py    From Theano-MPI with Educational Community License v2.0 6 votes vote down vote up
def compile_val(self):

        if self.verbose: print('compiling validation function...')
        
        import theano
        
        from lasagne.layers import get_output
        
        output_val = lasagne.layers.get_output(self.output_layer, self.x, deterministic=True)
        
        from lasagne.objectives import categorical_accuracy, categorical_crossentropy
        
        cost = categorical_crossentropy(output_val, self.y).mean()
        error = 1-categorical_accuracy(output_val, self.y, top_k=1).mean()
        error_top_5 = 1-categorical_accuracy(output_val, self.y, top_k=5).mean()
        
        self.val_fn=  theano.function([self.subb_ind], [cost,error,error_top_5], updates=[], 
                                          givens=[(self.x, self.shared_x_slice),
                                                  (self.y, self.shared_y_slice)]
                                                                ) 
Example #3
Source File: resnet50.py    From Theano-MPI with Educational Community License v2.0 6 votes vote down vote up
def build_model(self):
        
        import theano.tensor as T
        self.x = T.ftensor4('x')
        self.y = T.lvector('y')
        self.lr = T.scalar('lr')
        
        net = build_model_resnet50(input_shape=(None, 3, 224, 224))
        
        if self.verbose: print('Total number of layers:', len(lasagne.layers.get_all_layers(net['prob'])))
        
        self.output_layer = net['prob']
        
        from lasagne.layers import get_output
        self.output = lasagne.layers.get_output(self.output_layer, self.x, deterministic=False)
        self.cost = lasagne.objectives.categorical_crossentropy(self.output, self.y).mean()
        from lasagne.objectives import categorical_accuracy
        self.error = 1-categorical_accuracy(self.output, self.y, top_k=1).mean()
        self.error_top_5 = 1-categorical_accuracy(self.output, self.y, top_k=5).mean() 
Example #4
Source File: vgg16.py    From Theano-MPI with Educational Community License v2.0 6 votes vote down vote up
def compile_val(self):

        if self.verbose: print('compiling validation function...')
        
        import theano
        
        from lasagne.layers import get_output
        
        output_val = lasagne.layers.get_output(self.output_layer, self.x, deterministic=True)
        
        from lasagne.objectives import categorical_accuracy, categorical_crossentropy
        
        cost = categorical_crossentropy(output_val, self.y).mean()
        error = 1-categorical_accuracy(output_val, self.y, top_k=1).mean()
        error_top_5 = 1-categorical_accuracy(output_val, self.y, top_k=5).mean()
        
        self.val_fn=  theano.function([self.subb_ind], [cost,error,error_top_5], updates=[], 
                                          givens=[(self.x, self.shared_x_slice),
                                                  (self.y, self.shared_y_slice)]
                                                                ) 
Example #5
Source File: vgg16.py    From Theano-MPI with Educational Community License v2.0 6 votes vote down vote up
def build_model(self):
        
        import theano.tensor as T
        self.x = T.ftensor4('x')
        self.y = T.lvector('y')
        self.lr = T.scalar('lr')
        
        net = build_model_vgg16(input_shape=(None, 3, 224, 224), verbose=self.verbose)
        self.output_layer = net['prob']
        
        from lasagne.layers import get_output
        self.output = lasagne.layers.get_output(self.output_layer, self.x, deterministic=False)
        self.cost = lasagne.objectives.categorical_crossentropy(self.output, self.y).mean()
        from lasagne.objectives import categorical_accuracy
        self.error = 1-categorical_accuracy(self.output, self.y, top_k=1).mean()
        self.error_top_5 = 1-categorical_accuracy(self.output, self.y, top_k=5).mean() 
Example #6
Source File: bagging_nn_rmsprop.py    From kaggle_otto with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def create_iter_functions(self, dataset, output_layer, X_tensor_type=T.matrix):
        batch_index = T.iscalar('batch_index')
        X_batch = X_tensor_type('x')
        y_batch = T.ivector('y')

        batch_slice = slice(batch_index * self.batch_size, (batch_index + 1) * self.batch_size)

        objective = Objective(output_layer, loss_function=categorical_crossentropy)

        loss_train = objective.get_loss(X_batch, target=y_batch)
        loss_eval = objective.get_loss(X_batch, target=y_batch, deterministic=True)

        pred = T.argmax(output_layer.get_output(X_batch, deterministic=True), axis=1)
        proba = output_layer.get_output(X_batch, deterministic=True)
        accuracy = T.mean(T.eq(pred, y_batch), dtype=theano.config.floatX)

        all_params = get_all_params(output_layer)
        updates = rmsprop(loss_train, all_params, self.lr, self.rho)

        iter_train = theano.function(
            [batch_index], loss_train,
            updates=updates,
            givens={
                X_batch: dataset['X_train'][batch_slice],
                y_batch: dataset['y_train'][batch_slice],
            },
            on_unused_input='ignore',
        )

        iter_valid = None
        if self.use_valid:
            iter_valid = theano.function(
                [batch_index], [loss_eval, accuracy, proba],
                givens={
                    X_batch: dataset['X_valid'][batch_slice],
                    y_batch: dataset['y_valid'][batch_slice],
                },
            )

        return dict(train=iter_train, valid=iter_valid) 
Example #7
Source File: birdCLEF_train.py    From BirdCLEF2017 with MIT License 5 votes vote down vote up
def calc_loss(prediction, targets):

    #categorical crossentropy is the best choice for a multi-class softmax output
    loss = T.mean(objectives.categorical_crossentropy(prediction, targets))
    
    return loss 
Example #8
Source File: models.py    From acnn with GNU General Public License v3.0 5 votes vote down vote up
def categorical_crossentropy_stable(predictions, targets):
    epsilon = 1e-08
    predictions = T.clip(predictions, epsilon, 1.0 - epsilon)
    return T.nnet.categorical_crossentropy(predictions, targets) 
Example #9
Source File: nn_adagrad.py    From kaggle_otto with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def create_iter_functions(self, dataset, output_layer, X_tensor_type=T.matrix):
        batch_index = T.iscalar('batch_index')
        X_batch = X_tensor_type('x')
        y_batch = T.ivector('y')

        batch_slice = slice(batch_index * self.batch_size, (batch_index + 1) * self.batch_size)

        objective = Objective(output_layer, loss_function=categorical_crossentropy)

        loss_train = objective.get_loss(X_batch, target=y_batch)
        loss_eval = objective.get_loss(X_batch, target=y_batch, deterministic=True)

        pred = T.argmax(output_layer.get_output(X_batch, deterministic=True), axis=1)
        proba = output_layer.get_output(X_batch, deterministic=True)
        accuracy = T.mean(T.eq(pred, y_batch), dtype=theano.config.floatX)

        all_params = get_all_params(output_layer)
        updates = adagrad(loss_train, all_params, self.lr, self.epsilon)

        iter_train = theano.function(
            [batch_index], loss_train,
            updates=updates,
            givens={
                X_batch: dataset['X_train'][batch_slice],
                y_batch: dataset['y_train'][batch_slice],
            },
            on_unused_input='ignore',
        )

        iter_valid = None
        if self.use_valid:
            iter_valid = theano.function(
                [batch_index], [loss_eval, accuracy, proba],
                givens={
                    X_batch: dataset['X_valid'][batch_slice],
                    y_batch: dataset['y_valid'][batch_slice],
                },
            )

        return dict(train=iter_train, valid=iter_valid) 
Example #10
Source File: bagging_nn_nesterov.py    From kaggle_otto with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def create_iter_functions(self, dataset, output_layer, X_tensor_type=T.matrix):
        batch_index = T.iscalar('batch_index')
        X_batch = X_tensor_type('x')
        y_batch = T.ivector('y')

        batch_slice = slice(batch_index * self.batch_size, (batch_index + 1) * self.batch_size)

        objective = Objective(output_layer, loss_function=categorical_crossentropy)

        loss_train = objective.get_loss(X_batch, target=y_batch)
        loss_eval = objective.get_loss(X_batch, target=y_batch, deterministic=True)

        pred = T.argmax(output_layer.get_output(X_batch, deterministic=True), axis=1)
        proba = output_layer.get_output(X_batch, deterministic=True)
        accuracy = T.mean(T.eq(pred, y_batch), dtype=theano.config.floatX)

        all_params = get_all_params(output_layer)
        updates = lasagne.updates.nesterov_momentum(loss_train, all_params, self.lr, self.momentum)

        iter_train = theano.function(
            [batch_index], loss_train,
            updates=updates,
            givens={
                X_batch: dataset['X_train'][batch_slice],
                y_batch: dataset['y_train'][batch_slice],
            },
            on_unused_input='ignore',
        )

        iter_valid = None
        if self.use_valid:
            iter_valid = theano.function(
                [batch_index], [loss_eval, accuracy, proba],
                givens={
                    X_batch: dataset['X_valid'][batch_slice],
                    y_batch: dataset['y_valid'][batch_slice],
                },
            )

        return dict(train=iter_train, valid=iter_valid) 
Example #11
Source File: nn_rmsprop_features.py    From kaggle_otto with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def create_iter_functions(self, dataset, output_layer, X_tensor_type=T.matrix):
        batch_index = T.iscalar('batch_index')
        X_batch = X_tensor_type('x')
        y_batch = T.ivector('y')

        batch_slice = slice(batch_index * self.batch_size, (batch_index + 1) * self.batch_size)

        objective = Objective(output_layer, loss_function=categorical_crossentropy)

        loss_train = objective.get_loss(X_batch, target=y_batch)
        loss_eval = objective.get_loss(X_batch, target=y_batch, deterministic=True)

        pred = T.argmax(output_layer.get_output(X_batch, deterministic=True), axis=1)
        proba = output_layer.get_output(X_batch, deterministic=True)
        accuracy = T.mean(T.eq(pred, y_batch), dtype=theano.config.floatX)

        all_params = get_all_params(output_layer)
        updates = rmsprop(loss_train, all_params, self.lr, self.rho)

        iter_train = theano.function(
            [batch_index], loss_train,
            updates=updates,
            givens={
                X_batch: dataset['X_train'][batch_slice],
                y_batch: dataset['y_train'][batch_slice],
            },
            on_unused_input='ignore',
        )

        iter_valid = None
        if self.use_valid:
            iter_valid = theano.function(
                [batch_index], [loss_eval, accuracy, proba],
                givens={
                    X_batch: dataset['X_valid'][batch_slice],
                    y_batch: dataset['y_valid'][batch_slice],
                },
            )

        return dict(train=iter_train, valid=iter_valid) 
Example #12
Source File: nn_adagrad_pca.py    From kaggle_otto with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def create_iter_functions(self, dataset, output_layer, X_tensor_type=T.matrix):
        batch_index = T.iscalar('batch_index')
        X_batch = X_tensor_type('x')
        y_batch = T.ivector('y')

        batch_slice = slice(batch_index * self.batch_size, (batch_index + 1) * self.batch_size)

        objective = Objective(output_layer, loss_function=categorical_crossentropy)

        loss_train = objective.get_loss(X_batch, target=y_batch)
        loss_eval = objective.get_loss(X_batch, target=y_batch, deterministic=True)

        pred = T.argmax(output_layer.get_output(X_batch, deterministic=True), axis=1)
        proba = output_layer.get_output(X_batch, deterministic=True)
        accuracy = T.mean(T.eq(pred, y_batch), dtype=theano.config.floatX)

        all_params = get_all_params(output_layer)
        updates = adagrad(loss_train, all_params, self.lr, self.epsilon)

        iter_train = theano.function(
            [batch_index], loss_train,
            updates=updates,
            givens={
                X_batch: dataset['X_train'][batch_slice],
                y_batch: dataset['y_train'][batch_slice],
            },
            on_unused_input='ignore',
        )

        iter_valid = None
        if self.use_valid:
            iter_valid = theano.function(
                [batch_index], [loss_eval, accuracy, proba],
                givens={
                    X_batch: dataset['X_valid'][batch_slice],
                    y_batch: dataset['y_valid'][batch_slice],
                },
            )

        return dict(train=iter_train, valid=iter_valid) 
Example #13
Source File: nn_adagrad_log.py    From kaggle_otto with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def create_iter_functions(self, dataset, output_layer, X_tensor_type=T.matrix):
        batch_index = T.iscalar('batch_index')
        X_batch = X_tensor_type('x')
        y_batch = T.ivector('y')

        batch_slice = slice(batch_index * self.batch_size, (batch_index + 1) * self.batch_size)

        objective = Objective(output_layer, loss_function=categorical_crossentropy)

        loss_train = objective.get_loss(X_batch, target=y_batch)
        loss_eval = objective.get_loss(X_batch, target=y_batch, deterministic=True)

        pred = T.argmax(output_layer.get_output(X_batch, deterministic=True), axis=1)
        proba = output_layer.get_output(X_batch, deterministic=True)
        accuracy = T.mean(T.eq(pred, y_batch), dtype=theano.config.floatX)

        all_params = get_all_params(output_layer)
        updates = adagrad(loss_train, all_params, self.lr, self.rho)

        iter_train = theano.function(
            [batch_index], loss_train,
            updates=updates,
            givens={
                X_batch: dataset['X_train'][batch_slice],
                y_batch: dataset['y_train'][batch_slice],
            },
            on_unused_input='ignore',
        )

        iter_valid = None
        if self.use_valid:
            iter_valid = theano.function(
                [batch_index], [loss_eval, accuracy, proba],
                givens={
                    X_batch: dataset['X_valid'][batch_slice],
                    y_batch: dataset['y_valid'][batch_slice],
                },
            )

        return dict(train=iter_train, valid=iter_valid) 
Example #14
Source File: nn_adagrad_pca.py    From kaggle_otto with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def create_iter_functions(self, dataset, output_layer, X_tensor_type=T.matrix):
        batch_index = T.iscalar('batch_index')
        X_batch = X_tensor_type('x')
        y_batch = T.ivector('y')

        batch_slice = slice(batch_index * self.batch_size, (batch_index + 1) * self.batch_size)

        objective = Objective(output_layer, loss_function=categorical_crossentropy)

        loss_train = objective.get_loss(X_batch, target=y_batch)
        loss_eval = objective.get_loss(X_batch, target=y_batch, deterministic=True)

        pred = T.argmax(output_layer.get_output(X_batch, deterministic=True), axis=1)
        proba = output_layer.get_output(X_batch, deterministic=True)
        accuracy = T.mean(T.eq(pred, y_batch), dtype=theano.config.floatX)

        all_params = get_all_params(output_layer)
        updates = adagrad(loss_train, all_params, self.lr, self.epsilon)

        iter_train = theano.function(
            [batch_index], loss_train,
            updates=updates,
            givens={
                X_batch: dataset['X_train'][batch_slice],
                y_batch: dataset['y_train'][batch_slice],
            },
            on_unused_input='ignore',
        )

        iter_valid = None
        if self.use_valid:
            iter_valid = theano.function(
                [batch_index], [loss_eval, accuracy, proba],
                givens={
                    X_batch: dataset['X_valid'][batch_slice],
                    y_batch: dataset['y_valid'][batch_slice],
                },
            )

        return dict(train=iter_train, valid=iter_valid) 
Example #15
Source File: models.py    From drmad with MIT License 5 votes vote down vote up
def __init__(self, x, y, args):
        self.params_theta = []
        self.params_lambda = []
        self.params_weight = []
        if args.dataset == 'mnist':
            input_size = (None, 1, 28, 28)
        elif args.dataset == 'cifar10':
            input_size = (None, 3, 32, 32)
        else:
            raise AssertionError
        layers = [ll.InputLayer(input_size)]
        self.penalty = theano.shared(np.array(0.))

        #conv1
        layers.append(Conv2DLayerWithReg(args, layers[-1], 20, 5))
        self.add_params_to_self(args, layers[-1])
        layers.append(ll.MaxPool2DLayer(layers[-1], pool_size=2, stride=2))
        #conv1
        layers.append(Conv2DLayerWithReg(args, layers[-1], 50, 5))
        self.add_params_to_self(args, layers[-1])
        layers.append(ll.MaxPool2DLayer(layers[-1], pool_size=2, stride=2))
        #fc1
        layers.append(DenseLayerWithReg(args, layers[-1], num_units=500))
        self.add_params_to_self(args, layers[-1])
        #softmax
        layers.append(DenseLayerWithReg(args, layers[-1], num_units=10, nonlinearity=nonlinearities.softmax))
        self.add_params_to_self(args, layers[-1])

        self.layers = layers
        self.y = ll.get_output(layers[-1], x, deterministic=False)
        self.prediction = T.argmax(self.y, axis=1)
        # self.penalty = penalty if penalty != 0. else T.constant(0.)
        print(self.params_lambda)
        # time.sleep(20)
        # cost function
        self.loss = T.mean(categorical_crossentropy(self.y, y))
        self.lossWithPenalty = T.add(self.loss, self.penalty)
        print "loss and losswithpenalty", type(self.loss), type(self.lossWithPenalty) 
Example #16
Source File: lasagne_net.py    From BirdCLEF-Baseline with MIT License 5 votes vote down vote up
def calc_loss(prediction, targets):

    # Categorical crossentropy is the best choice for a multi-class softmax output
    loss = T.mean(objectives.categorical_crossentropy(prediction, targets))
    
    return loss 
Example #17
Source File: AED_train.py    From AcousticEventDetection with MIT License 5 votes vote down vote up
def calc_loss(prediction, targets):

    #categorical crossentropy is the best choice for a multi-class softmax output
    loss = T.mean(objectives.categorical_crossentropy(prediction, targets))
    
    return loss 
Example #18
Source File: adda_network.py    From adda_mnist64 with MIT License 4 votes vote down vote up
def __init__(self):

        # Define inputs
        input_var = T.ftensor4('input_var')  # input images (batchx3x64x64)
        labels_classifier_var = T.ivector('labels_classifier_var')  # labels for images
        labels_domain_var = T.ivector('labels_domain_var')  # labels for domain (1 for source, 0 for target)
        learning_rate = T.fscalar('learning_rate')

        # Define classifier networks
        network_classifier = self.network_classifier(input_var)
        network_discriminator = self.network_discriminator(network_classifier['classifier/pool1'])

        # Define outputs
        prediction_classifier = get_output(network_classifier['classifier/output'])  # prob image classification
        prediction_discriminator = get_output(network_discriminator['discriminator/output'])  # prob image domain (should be 1 for source)

        # Define losses (objectives)
        loss_classifier_only = T.mean(categorical_crossentropy(prediction_classifier, labels_classifier_var) * labels_domain_var)
        loss_discriminator = T.mean(categorical_crossentropy(prediction_discriminator, labels_domain_var))
        loss_classifier = loss_classifier_only - loss_discriminator

        # Define performance
        perf_classifier_only = categorical_accuracy(prediction_classifier, labels_classifier_var).mean()
        perf_discriminator = categorical_accuracy(prediction_discriminator, labels_domain_var).mean()

        # Define params
        params_classifier = lasagne.layers.get_all_params(network_classifier['classifier/output'], trainable=True)
        params_discriminator = lasagne.layers.get_all_params(network_discriminator['discriminator/output'], trainable=True)
        params_discriminator = [param for param in params_discriminator if 'discriminator' in param.name]

        # Define updates
        updates_classifier = lasagne.updates.adam(loss_classifier, params_classifier, learning_rate=learning_rate)
        updates_classifier_only = lasagne.updates.adam(loss_classifier_only, params_classifier, learning_rate=learning_rate)
        updates_discriminator = lasagne.updates.adam(loss_discriminator, params_discriminator, learning_rate=learning_rate)

        # Define training functions
        self.train_fn_classifier = theano.function(
            [input_var, labels_classifier_var, labels_domain_var, learning_rate],
            [loss_classifier, loss_classifier_only, prediction_classifier],
            updates=updates_classifier)
        self.train_fn_classifier_only = theano.function(
            [input_var, labels_classifier_var, labels_domain_var, learning_rate],
            [loss_classifier, loss_classifier_only, prediction_classifier],
            updates=updates_classifier_only)
        self.train_fn_discriminator = theano.function(
            [input_var, labels_domain_var, learning_rate],
            [loss_discriminator, prediction_discriminator],
            updates=updates_discriminator)

        # Define validation functions
        self.valid_fn_classifier = theano.function(
            [input_var, labels_classifier_var],
            [perf_classifier_only, prediction_classifier])

        self.valid_fn_discriminator = theano.function(
            [input_var, labels_domain_var],
            [perf_discriminator, prediction_discriminator]) 
Example #19
Source File: updates.py    From Deep-SVDD with MIT License 4 votes vote down vote up
def compile_update_softmax(nnet, inputs, targets):
    """
    create a softmax loss for network given in argument
    """

    floatX = Cfg.floatX
    C = Cfg.C

    final_layer = nnet.all_layers[-1]
    trainable_params = lasagne.layers.get_all_params(final_layer, trainable=True)

    # Regularization
    if Cfg.weight_decay:
        l2_penalty = (floatX(0.5) / C) * get_l2_penalty(nnet)
    else:
        l2_penalty = T.cast(0, dtype='floatX')

    # Backpropagation
    prediction = lasagne.layers.get_output(final_layer, inputs=inputs, deterministic=False)

    if Cfg.ad_experiment:
        train_loss = T.mean(l_objectives.binary_crossentropy(prediction.flatten(), targets), dtype='floatX')
        train_acc = T.mean(l_objectives.binary_accuracy(prediction.flatten(), targets), dtype='floatX')
    else:
        train_loss = T.mean(l_objectives.categorical_crossentropy(prediction, targets), dtype='floatX')
        train_acc = T.mean(T.eq(T.argmax(prediction, axis=1), targets), dtype='floatX')

    train_obj = T.cast(train_loss + l2_penalty, dtype='floatX')
    updates = get_updates(nnet, train_obj, trainable_params, solver=nnet.solver)
    nnet.backprop = theano.function([inputs, targets], [train_obj, train_acc], updates=updates)

    # Forwardpropagation
    test_prediction = lasagne.layers.get_output(final_layer, inputs=inputs, deterministic=True)

    if Cfg.ad_experiment:
        test_loss = T.mean(l_objectives.binary_crossentropy(test_prediction.flatten(), targets), dtype='floatX')
        test_acc = T.mean(l_objectives.binary_accuracy(test_prediction.flatten(), targets), dtype='floatX')
    else:
        test_loss = T.mean(l_objectives.categorical_crossentropy(test_prediction, targets), dtype='floatX')
        test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), targets), dtype='floatX')

    test_obj = T.cast(test_loss + l2_penalty, dtype='floatX')
    nnet.forward = theano.function([inputs, targets], [test_obj, test_acc, test_prediction, l2_penalty, test_loss]) 
Example #20
Source File: models.py    From drmad with MIT License 4 votes vote down vote up
def __init__(self, x, y, args):
        self.params_theta = []
        self.params_lambda = []
        self.params_weight = []
        if args.dataset == 'mnist':
            input_size = (None, 28*28)
        elif args.dataset == 'cifar10':
            input_size = (None, 3, 32*32)
        else:
            raise AssertionError
        layers = [ll.InputLayer(input_size)]
        penalty = theano.shared(np.array(0.))
        for (k, num) in enumerate(args.MLPlayer):
            # the last layer should use softmax
            if k == len(args.MLPlayer) - 1:
                # layers.append(ll.DenseLayer(layers[-1], num, nonlinearity=nonlinearities.softmax))
                layers.append(DenseLayerWithReg(args, layers[-1], num_units=num,
                                                nonlinearity=nonlinearities.softmax))
            else:
                # layers.append(ll.DenseLayer(layers[-1], num))
                layers.append(DenseLayerWithReg(args, layers[-1], num_units=num))
            if layers[-1].W is not None:
                self.params_theta += [layers[-1].W, layers[-1].b]
                self.params_weight += [layers[-1].W]

                # define new regularization term for a layer
                if args.regL2 is True:
                    tempL2 = layers[-1].L2 * T.sqr(layers[-1].W)
                    penalty += T.sum(tempL2)
                    self.params_lambda += [layers[-1].L2]
                if args.regL1 is True:
                    tempL1 = layers[-1].L1 * layers[-1].W
                    penalty += T.sum(tempL1)
                    self.params_lambda += [layers[-1].L1]

        self.layers = layers
        self.y = ll.get_output(layers[-1], x, deterministic=False)
        self.prediction = T.argmax(self.y, axis=1)
        self.penalty = penalty
        # self.penalty = penalty if penalty != 0. else T.constant(0.)
        print(self.params_lambda)
        # time.sleep(20)
        # cost function
        self.loss = T.mean(categorical_crossentropy(self.y, y))
        self.lossWithPenalty = T.add(self.loss, self.penalty)
        print "loss and losswithpenalty", type(self.loss), type(self.lossWithPenalty)
        # self.classError = T.mean(T.cast(T.neq(self.prediction, y), 'float32'))