Python Examples of caffe2.python.brew.softmax

Source File: classification_no_db_example.py From peters-stuff with GNU General Public License v3.0

6 votes

def get_data(batchsize) :
    data = []
    label = []
    for i in range(batchsize) :
        r = np.random.randint(0, 2)
        if r==0 :
            d = np.zeros((1,30,30))
            l = 0
        else :
            d = np.ones((1,30,30))
            l = 1
        data.append(d)
        label.append(l)
    return np.array(data).astype('float32'), np.array(label).astype('int32')

# create actual network structure (from input to output (here softmax))

Source File: mnist.py From batch-shipyard with MIT License

6 votes

def AddLeNetModel(model, data):
    '''
    This part is the standard LeNet model: from data to the softmax prediction.

    For each convolutional layer we specify dim_in - number of input channels
    and dim_out - number or output channels. Also each Conv and MaxPool layer changes the
    image size. For example, kernel of size 5 reduces each side of an image by 4.

    While when we have kernel and stride sizes equal 2 in a MaxPool layer, it divides
    each side in half.
    '''
    # Image size: 28 x 28 -> 24 x 24
    conv1 = brew.conv(model, data, 'conv1', dim_in=1, dim_out=20, kernel=5)
    # Image size: 24 x 24 -> 12 x 12
    pool1 = brew.max_pool(model, conv1, 'pool1', kernel=2, stride=2)
    # Image size: 12 x 12 -> 8 x 8
    conv2 = brew.conv(model, pool1, 'conv2', dim_in=20, dim_out=50, kernel=5)
    # Image size: 8 x 8 -> 4 x 4
    pool2 = brew.max_pool(model, conv2, 'pool2', kernel=2, stride=2)
    # 50 * 4 * 4 stands for dim_out from previous layer multiplied by the image size
    fc3 = brew.fc(model, pool2, 'fc3', dim_in=50 * 4 * 4, dim_out=500)
    fc3 = brew.relu(model, fc3, fc3)
    pred = brew.fc(model, fc3, 'pred', 500, 10)
    softmax = brew.softmax(model, pred, 'softmax')
    return softmax

Source File: mnist.py From batch-shipyard with MIT License

6 votes

def AddTrainingOperators(model, softmax, label):
    """Adds training operators to the model."""
    xent = model.LabelCrossEntropy([softmax, label], 'xent')
    # compute the expected loss
    loss = model.AveragedLoss(xent, "loss")
    # track the accuracy of the model
    AddAccuracy(model, softmax, label)
    # use the average loss we just computed to add gradient operators to the model
    model.AddGradientOperators([loss])
    # do a simple stochastic gradient descent
    ITER = brew.iter(model, "iter")
    # set the learning rate schedule
    LR = model.LearningRate(
        ITER, "LR", base_lr=-0.1, policy="step", stepsize=1, gamma=0.999 )
    # ONE is a constant value that is used in the gradient update. We only need
    # to create it once, so it is explicitly placed in param_init_net.
    ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
    # Now, for each parameter, we do the gradient updates.
    for param in model.params:
        # Note how we get the gradient of each parameter - ModelHelper keeps
        # track of that.
        param_grad = model.param_to_grad[param]
        # The update is a simple weighted sum: param = param + param_grad * LR
        model.WeightedSum([param, ONE, param_grad, LR], param)

Source File: demo_caffe2.py From tensorboardX with MIT License

6 votes

def AddTrainingOperators(model, softmax, label):
    """Adds training operators to the model."""
    xent = model.LabelCrossEntropy([softmax, label], 'xent')
    # compute the expected loss
    loss = model.AveragedLoss(xent, "loss")
    # track the accuracy of the model
    AddAccuracy(model, softmax, label)
    # use the average loss we just computed to add gradient operators to the
    # model
    model.AddGradientOperators([loss])
    # do a simple stochastic gradient descent
    ITER = brew.iter(model, "iter")
    # set the learning rate schedule
    LR = model.LearningRate(
        ITER, "LR", base_lr=-0.1, policy="step", stepsize=1, gamma=0.999)
    # ONE is a constant value that is used in the gradient update. We only need
    # to create it once, so it is explicitly placed in param_init_net.
    ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
    # Now, for each parameter, we do the gradient updates.
    for param in model.params:
        # Note how we get the gradient of each parameter - ModelHelper keeps
        # track of that.
        param_grad = model.param_to_grad[param]
        # The update is a simple weighted sum: param = param + param_grad * LR
        model.WeightedSum([param, ONE, param_grad, LR], param)

Source File: model.py From dlcookbook-dlbs with Apache License 2.0

6 votes

def add_head_nodes(self, model, v, dim_in, fc_name, loss_scale=1.0):
        """Adds dense and softmax head nodes.

        :param model_helper.ModelHelper model: Current model to use.
        :param obj v: Input blobs.
        :param int dim_in: Number of input features.
        :param str fc_name: Name of a fully connected operator.
        :param float loss_scale: For multi-GPU case.
        :return: List with one head node. A softmax node if `phase` is `inference`
                 else `loss`.
        """
        v = brew.fc(model, v, fc_name, dim_in=dim_in, dim_out=self.num_classes)
        if self.dtype == 'float16':
            print("[INFO] Converting logits from float16 to float32 for softmax layer")
            v = model.net.HalfToFloat(v, v + '_fp32')
        if self.phase == 'inference':
            softmax = brew.softmax(model, v, 'softmax')
            head_nodes = [softmax]
        else:
            softmax, loss = model.SoftmaxWithLoss([v, 'softmax_label'], ['softmax', 'loss'])
            prefix = model.net.Proto().name
            loss = model.Scale(loss, prefix + "_loss", scale=loss_scale)
            head_nodes = [loss]
        return head_nodes

Source File: classification_no_db_example.py From peters-stuff with GNU General Public License v3.0

5 votes

def create_model(m, device_opts) :
    with core.DeviceScope(device_opts):

        conv1 = brew.conv(m, 'data', 'conv1', dim_in=1, dim_out=20, kernel=5)
        pool1 = brew.max_pool(m, conv1, 'pool1', kernel=2, stride=2)
        conv2 = brew.conv(m, pool1, 'conv2', dim_in=20, dim_out=50, kernel=5)
        pool2 = brew.max_pool(m, conv2, 'pool2', kernel=2, stride=2)
        fc3 = brew.fc(m, pool2, 'fc3', dim_in=50 * 4 * 4, dim_out=500)
        fc3 = brew.relu(m, fc3, fc3)
        pred = brew.fc(m, fc3, 'pred', 500, 2)
        softmax = brew.softmax(m, pred, 'softmax')
        m.net.AddExternalOutput(softmax)
        return softmax

# add loss and optimizer

Source File: classification_no_db_example.py From peters-stuff with GNU General Public License v3.0

5 votes

def add_training_operators(softmax, model, device_opts) :

    with core.DeviceScope(device_opts):
        xent = model.LabelCrossEntropy([softmax, "label"], 'xent')
        loss = model.AveragedLoss(xent, "loss")
        brew.accuracy(model, [softmax, "label"], "accuracy")

        model.AddGradientOperators([loss])
        opt = optimizer.build_sgd(model, base_learning_rate=0.01, policy="step", stepsize=1, gamma=0.999)  # , momentum=0.9
        #opt = optimizer.build_adam(model, base_learning_rate=0.001)

Source File: mnist.py From batch-shipyard with MIT License

5 votes

def AddAccuracy(model, softmax, label):
    """Adds an accuracy op to the model"""
    accuracy = brew.accuracy(model, [softmax, label], "accuracy")
    return accuracy

Source File: demo_caffe2.py From tensorboardX with MIT License

5 votes

def AddLeNetModel(model, data):
    '''
    This part is the standard LeNet model: from data to the softmax prediction.

    For each convolutional layer we specify dim_in - number of input channels
    and dim_out - number or output channels. Also each Conv and MaxPool layer changes the
    image size. For example, kernel of size 5 reduces each side of an image by 4.

    While when we have kernel and stride sizes equal 2 in a MaxPool layer, it divides
    each side in half.
    '''
    # Image size: 28 x 28 -> 24 x 24
    conv1 = brew.conv(model, data, 'conv1', dim_in=1, dim_out=20, kernel=5)
    # Image size: 24 x 24 -> 12 x 12
    pool1 = brew.max_pool(model, conv1, 'pool1', kernel=2, stride=2)
    # Image size: 12 x 12 -> 8 x 8
    conv2 = brew.conv(model, pool1, 'conv2', dim_in=20, dim_out=100, kernel=5)
    # Image size: 8 x 8 -> 4 x 4
    pool2 = brew.max_pool(model, conv2, 'pool2', kernel=2, stride=2)
    # 50 * 4 * 4 stands for dim_out from previous layer multiplied by the
    # image size
    fc3 = brew.fc(model, pool2, 'fc3', dim_in=100 * 4 * 4, dim_out=500)
    relu = brew.relu(model, fc3, fc3)
    pred = brew.fc(model, relu, 'pred', 500, 10)
    softmax = brew.softmax(model, pred, 'softmax')
    return softmax

Source File: demo_caffe2.py From tensorboardX with MIT License

5 votes

def AddAccuracy(model, softmax, label):
    """Adds an accuracy op to the model"""
    accuracy = brew.accuracy(model, [softmax, label], "accuracy")
    return accuracy

Source File: test_caffe2.py From tensorboardX with MIT License

5 votes

def test_simple_model(self):
        model = model_helper.ModelHelper(name="mnist")
        # how come those inputs don't break the forward pass =.=a
        workspace.FeedBlob("data", np.random.randn(1, 3, 64, 64).astype(np.float32))
        workspace.FeedBlob("label", np.random.randn(1, 1000).astype(np.int))

        with core.NameScope("conv1"):
            conv1 = brew.conv(model, "data", 'conv1', dim_in=1, dim_out=20, kernel=5)
            # Image size: 24 x 24 -> 12 x 12
            pool1 = brew.max_pool(model, conv1, 'pool1', kernel=2, stride=2)
            # Image size: 12 x 12 -> 8 x 8
            conv2 = brew.conv(model, pool1, 'conv2', dim_in=20, dim_out=100, kernel=5)
            # Image size: 8 x 8 -> 4 x 4
            pool2 = brew.max_pool(model, conv2, 'pool2', kernel=2, stride=2)
        with core.NameScope("classifier"):
            # 50 * 4 * 4 stands for dim_out from previous layer multiplied by the image size
            fc3 = brew.fc(model, pool2, 'fc3', dim_in=100 * 4 * 4, dim_out=500)
            relu = brew.relu(model, fc3, fc3)
            pred = brew.fc(model, relu, 'pred', 500, 10)
            softmax = brew.softmax(model, pred, 'softmax')
            xent = model.LabelCrossEntropy([softmax, "label"], 'xent')
            # compute the expected loss
            loss = model.AveragedLoss(xent, "loss")
        model.net.RunAllOnMKL()
        model.param_init_net.RunAllOnMKL()
        model.AddGradientOperators([loss], skip=1)
        blob_name_tracker = {}
        graph = tb.model_to_graph_def(
            model,
            blob_name_tracker=blob_name_tracker,
            shapes={},
            show_simplified=False,
        )

        compare_proto(graph, self)

Source File: CIFAR10_Part2.py From tutorials with Apache License 2.0

4 votes

def Add_Original_CIFAR10_Model(model, data, num_classes, image_height, image_width, image_channels):
    # Convolutional layer 1
    conv1 = brew.conv(model, data, 'conv1', dim_in=image_channels, dim_out=32, kernel=5, stride=1, pad=2)
    h,w = update_dims(height=image_height, width=image_width, kernel=5, stride=1, pad=2)
    # Pooling layer 1
    pool1 = brew.max_pool(model, conv1, 'pool1', kernel=3, stride=2)
    h,w = update_dims(height=h, width=w, kernel=3, stride=2, pad=0)
    # ReLU layer 1
    relu1 = brew.relu(model, pool1, 'relu1')
    
    # Convolutional layer 2
    conv2 = brew.conv(model, relu1, 'conv2', dim_in=32, dim_out=32, kernel=5, stride=1, pad=2)
    h,w = update_dims(height=h, width=w, kernel=5, stride=1, pad=2)
    # ReLU layer 2
    relu2 = brew.relu(model, conv2, 'relu2')
    # Pooling layer 1
    pool2 = brew.average_pool(model, relu2, 'pool2', kernel=3, stride=2)
    h,w = update_dims(height=h, width=w, kernel=3, stride=2, pad=0)
    
    # Convolutional layer 3
    conv3 = brew.conv(model, pool2, 'conv3', dim_in=32, dim_out=64, kernel=5, stride=1, pad=2)
    h,w = update_dims(height=h, width=w, kernel=5, stride=1, pad=2)
    # ReLU layer 3
    relu3 = brew.relu(model, conv3, 'relu3')
    # Pooling layer 3
    pool3 = brew.average_pool(model, relu3, 'pool3', kernel=3, stride=2)
    h,w = update_dims(height=h, width=w, kernel=3, stride=2, pad=0)
    
    # Fully connected layers
    fc1 = brew.fc(model, pool3, 'fc1', dim_in=64*h*w, dim_out=64)
    fc2 = brew.fc(model, fc1, 'fc2', dim_in=64, dim_out=num_classes)
    
    # Softmax layer
    softmax = brew.softmax(model, fc2, 'softmax')
    return softmax


# ## Test Saved Model From Part 1
# 
# ### Construct Model for Testing
# 
# The first thing we need is a model helper object that we can attach the lmdb reader to.

# In[4]:


# Create a ModelHelper object with init_params=False

Source File: classification_no_db_example.py From peters-stuff with GNU General Public License v3.0

4 votes

def train(INIT_NET, PREDICT_NET, epochs, batch_size, device_opts) :

    data, label = get_data(batch_size)
    workspace.FeedBlob("data", data, device_option=device_opts)
    workspace.FeedBlob("label", label, device_option=device_opts)

    train_model= model_helper.ModelHelper(name="train_net")
    softmax = create_model(train_model, device_opts=device_opts)
    add_training_operators(softmax, train_model, device_opts=device_opts)
    with core.DeviceScope(device_opts):
        brew.add_weight_decay(train_model, 0.001)  # any effect???

    workspace.RunNetOnce(train_model.param_init_net)
    workspace.CreateNet(train_model.net)

    print '\ntraining for', epochs, 'epochs'

    for j in range(0, epochs):
        data, label = get_data(batch_size)

        workspace.FeedBlob("data", data, device_option=device_opts)
        workspace.FeedBlob("label", label, device_option=device_opts)

        workspace.RunNet(train_model.net, 10)   # run for 10 times
        print str(j) + ': ' + str(workspace.FetchBlob("loss")) + ' - ' + str(workspace.FetchBlob("accuracy"))

    print 'training done'

    print '\nrunning test model'

    test_model= model_helper.ModelHelper(name="test_net", init_params=False)
    create_model(test_model, device_opts=device_opts)
    workspace.RunNetOnce(test_model.param_init_net)
    workspace.CreateNet(test_model.net, overwrite=True)

    data = np.zeros((1,1,30,30)).astype('float32')
    workspace.FeedBlob("data", data, device_option=device_opts)
    workspace.RunNet(test_model.net, 1)
    print "\nInput: zeros"
    print "Output:", workspace.FetchBlob("softmax")
    print "Output class:", np.argmax(workspace.FetchBlob("softmax"))

    data = np.ones((1,1,30,30)).astype('float32')
    workspace.FeedBlob("data", data, device_option=device_opts)
    workspace.RunNet(test_model.net, 1)
    print "\nInput: ones"
    print "Output:", workspace.FetchBlob("softmax")
    print "Output class:", np.argmax(workspace.FetchBlob("softmax"))

    print '\nsaving test model'

    save_net(INIT_NET, PREDICT_NET, test_model)

Python caffe2.python.brew.softmax() Examples