Python Examples of blocks.bricks.MLP

Source File: test_graph.py From attention-lvcsr with MIT License

6 votes

def test_collect():
    x = tensor.matrix()
    mlp = MLP(activations=[Logistic(), Logistic()], dims=[784, 100, 784],
              use_bias=False)
    cost = SquaredError().apply(x, mlp.apply(x))
    cg = ComputationGraph(cost)
    var_filter = VariableFilter(roles=[PARAMETER])
    W1, W2 = var_filter(cg.variables)
    for i, W in enumerate([W1, W2]):
        W.set_value(numpy.ones_like(W.get_value()) * (i + 1))
    new_cg = collect_parameters(cg, cg.shared_variables)
    collected_parameters, = new_cg.shared_variables
    assert numpy.all(collected_parameters.get_value()[:784 * 100] == 1.)
    assert numpy.all(collected_parameters.get_value()[784 * 100:] == 2.)
    assert collected_parameters.ndim == 1
    W1, W2 = VariableFilter(roles=[COLLECTED])(new_cg.variables)
    assert W1.eval().shape == (784, 100)
    assert numpy.all(W1.eval() == 1.)
    assert W2.eval().shape == (100, 784)
    assert numpy.all(W2.eval() == 2.)

Source File: regression.py From Diffusion-Probabilistic-Models with MIT License

5 votes

def __init__(self, n_layers_conv, n_layers_dense_lower, n_layers_dense_upper,
        n_hidden_conv, n_hidden_dense_lower, n_hidden_dense_lower_output, n_hidden_dense_upper,
        spatial_width, n_colors, n_scales, n_temporal_basis):
        """
        The multilayer perceptron, that provides temporal weighting coefficients for mu and sigma
        images. This consists of a lower segment with a convolutional MLP, and optionally with a
        dense MLP in parallel. The upper segment then consists of a per-pixel dense MLP
        (convolutional MLP with 1x1 kernel).
        """
        super(MLP_conv_dense, self).__init__()

        self.n_colors = n_colors
        self.spatial_width = spatial_width
        self.n_hidden_dense_lower = n_hidden_dense_lower
        self.n_hidden_dense_lower_output = n_hidden_dense_lower_output
        self.n_hidden_conv = n_hidden_conv

        ## the lower layers
        self.mlp_conv = MultiLayerConvolution(n_layers_conv, n_hidden_conv, spatial_width, n_colors, n_scales)
        self.children = [self.mlp_conv]
        if n_hidden_dense_lower > 0 and n_layers_dense_lower > 0:
            n_input = n_colors*spatial_width**2
            n_output = n_hidden_dense_lower_output*spatial_width**2
            self.mlp_dense_lower = MLP([dense_nonlinearity] * n_layers_conv,
                [n_input] + [n_hidden_dense_lower] * (n_layers_conv-1) + [n_output],
                name='MLP dense lower', weights_init=Orthogonal(), biases_init=Constant(0))
            self.children.append(self.mlp_dense_lower)
        else:
            n_hidden_dense_lower_output = 0

        ## the upper layers (applied to each pixel independently)
        n_output = n_colors*n_temporal_basis*2 # "*2" for both mu and sigma
        self.mlp_dense_upper = MLP([dense_nonlinearity] * (n_layers_dense_upper-1) + [Identity()],
            [n_hidden_conv+n_hidden_dense_lower_output] +
            [n_hidden_dense_upper] * (n_layers_dense_upper-1) + [n_output],
            name='MLP dense upper', weights_init=Orthogonal(), biases_init=Constant(0))
        self.children.append(self.mlp_dense_upper)

Source File: test_utils.py From attention-lvcsr with MIT License

5 votes

def setUp(self):
        self.mlp = MLP([Sequence([Identity(name='id1').apply,
                                  Tanh(name='tanh1').apply],
                                 name='sequence1'),
                        Sequence([Logistic(name='logistic1').apply,
                                  Identity(name='id2').apply,
                                  Tanh(name='tanh2').apply],
                                 name='sequence2'),
                        Logistic(name='logistic2'),
                        Sequence([Sequence([Logistic(name='logistic3').apply],
                                           name='sequence4').apply],
                                 name='sequence3')],
                       [10, 5, 9, 5, 9])

Source File: test_utils.py From attention-lvcsr with MIT License

5 votes

def test_find_zeroth_level(self):
        found = find_bricks([self.mlp], lambda x: isinstance(x, MLP))
        assert len(found) == 1
        assert found[0] == self.mlp

Source File: test_utils.py From attention-lvcsr with MIT License

5 votes

def test_find_zeroth_level_repeated(self):
        found = find_bricks([self.mlp, self.mlp], lambda x: isinstance(x, MLP))
        assert len(found) == 1
        assert found[0] == self.mlp

Source File: test_graph.py From attention-lvcsr with MIT License

5 votes

def test_snapshot():
    x = tensor.matrix('x')
    linear = MLP([Identity(), Identity()], [10, 10, 10],
                 weights_init=Constant(1), biases_init=Constant(2))
    linear.initialize()
    y = linear.apply(x)
    cg = ComputationGraph(y)
    snapshot = cg.get_snapshot(dict(x=numpy.zeros((1, 10),
                                                  dtype=theano.config.floatX)))
    assert len(snapshot) == 14

Source File: __init__.py From blocks-examples with MIT License

5 votes

def main(save_to, num_batches):
    mlp = MLP([Tanh(), Identity()], [1, 10, 1],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0), seed=1)
    mlp.initialize()
    x = tensor.vector('numbers')
    y = tensor.vector('roots')
    cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None]))
    cost.name = "cost"

    main_loop = MainLoop(
        GradientDescent(
            cost=cost, parameters=ComputationGraph(cost).parameters,
            step_rule=Scale(learning_rate=0.001)),
        get_data_stream(range(100)),
        model=Model(cost),
        extensions=[
            Timing(),
            FinishAfter(after_n_batches=num_batches),
            DataStreamMonitoring(
                [cost], get_data_stream(range(100, 200)),
                prefix="test"),
            TrainingDataMonitoring([cost], after_epoch=True),
            Checkpoint(save_to),
            Printing()])
    main_loop.run()
    return main_loop

Source File: test_serialization.py From attention-lvcsr with MIT License

4 votes

def test_serialization():

    # Create a simple MLP to dump.
    mlp = MLP(activations=[None, None], dims=[10, 10, 10],
              weights_init=Constant(1.), use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[1].W
    W.set_value(W.get_value() * 2)

    # Ensure warnings are raised when __main__ namespace objects are dumped.
    foo.__module__ = '__main__'
    import __main__
    __main__.__dict__['foo'] = foo
    mlp.foo = foo
    with NamedTemporaryFile(delete=False) as f:
        with warnings.catch_warnings(record=True) as w:
            dump(mlp.foo, f)
            assert len(w) == 1
            assert '__main__' in str(w[-1].message)

    # Check the parameters.
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with open(f.name, 'rb') as ff:
        numpy_data = load_parameters(ff)
    assert set(numpy_data.keys()) == \
        set(['/mlp/linear_0.W', '/mlp/linear_1.W'])
    assert_allclose(numpy_data['/mlp/linear_0.W'], numpy.ones((10, 10)))
    assert numpy_data['/mlp/linear_0.W'].dtype == theano.config.floatX

    # Ensure that it can be unpickled.
    with open(f.name, 'rb') as ff:
        mlp = load(ff)
    assert_allclose(mlp.linear_transformations[1].W.get_value(),
                    numpy.ones((10, 10)) * 2)

    # Ensure that duplicate names are dealt with.
    for child in mlp.children:
        child.name = 'linear'
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with open(f.name, 'rb') as ff:
        numpy_data = load_parameters(ff)
    assert set(numpy_data.keys()) == \
        set(['/mlp/linear.W', '/mlp/linear.W_2'])

    # Check when we don't dump the main object.
    with NamedTemporaryFile(delete=False) as f:
        dump(None, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with tarfile.open(f.name, 'r') as tarball:
        assert set(tarball.getnames()) == set(['_parameters'])

Source File: test_serialization.py From attention-lvcsr with MIT License

4 votes

def test_add_to_dump():

    # Create a simple MLP to dump.
    mlp = MLP(activations=[None, None], dims=[10, 10, 10],
              weights_init=Constant(1.), use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[1].W
    W.set_value(W.get_value() * 2)
    mlp2 = MLP(activations=[None, None], dims=[10, 10, 10],
               weights_init=Constant(1.), use_bias=False,
               name='mlp2')
    mlp2.initialize()

    # Ensure that adding to dump is working.
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with open(f.name, 'rb+') as ff:
        add_to_dump(mlp.children[0], ff, 'child_0',
                    parameters=[mlp.children[0].W])
        add_to_dump(mlp.children[1], ff, 'child_1')
    with tarfile.open(f.name, 'r') as tarball:
        assert set(tarball.getnames()) == set(['_pkl', '_parameters',
                                               'child_0', 'child_1'])

    # Ensure that we can load any object from the tarball.
    with open(f.name, 'rb') as ff:
        saved_children_0 = load(ff, 'child_0')
        saved_children_1 = load(ff, 'child_1')
        assert_allclose(saved_children_0.W.get_value(),
                        numpy.ones((10, 10)))
        assert_allclose(saved_children_1.W.get_value(),
                        numpy.ones((10, 10)) * 2)
    
    # Check the error if using a reserved name.
    with open(f.name, 'rb+') as ff:
        assert_raises(ValueError, add_to_dump, *[mlp.children[0], ff, '_pkl'])

    # Check the error if saving an object with other parameters
    with open(f.name, 'rb+') as ff:
        assert_raises(ValueError, add_to_dump, *[mlp2, ff, 'mlp2'],
                      **dict(parameters=[mlp2.children[0].W,
                                         mlp2.children[1].W]))

    # Check the warning if adding to a dump with no parameters
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f)
    with open(f.name, 'rb+') as ff:
        assert_raises(ValueError, add_to_dump, *[mlp2, ff, 'mlp2'],
                      **dict(parameters=[mlp2.children[0].W,
                                         mlp2.children[1].W]))

Source File: test_model.py From attention-lvcsr with MIT License

4 votes

def test_model():
    x = tensor.matrix('x')
    mlp1 = MLP([Tanh(), Tanh()], [10, 20, 30], name="mlp1")
    mlp2 = MLP([Tanh()], [30, 40], name="mlp2")
    h1 = mlp1.apply(x)
    h2 = mlp2.apply(h1)

    model = Model(h2)
    assert model.get_top_bricks() == [mlp1, mlp2]
    # The order of parameters returned is deterministic but
    # not sensible.
    assert list(model.get_parameter_dict().items()) == [
        ('/mlp2/linear_0.b', mlp2.linear_transformations[0].b),
        ('/mlp1/linear_1.b', mlp1.linear_transformations[1].b),
        ('/mlp1/linear_0.b', mlp1.linear_transformations[0].b),
        ('/mlp1/linear_0.W', mlp1.linear_transformations[0].W),
        ('/mlp1/linear_1.W', mlp1.linear_transformations[1].W),
        ('/mlp2/linear_0.W', mlp2.linear_transformations[0].W)]

    # Test getting and setting parameter values
    mlp3 = MLP([Tanh()], [10, 10])
    mlp3.allocate()
    model3 = Model(mlp3.apply(x))
    parameter_values = {
        '/mlp/linear_0.W': 2 * numpy.ones((10, 10),
                                          dtype=theano.config.floatX),
        '/mlp/linear_0.b': 3 * numpy.ones(10, dtype=theano.config.floatX)}
    model3.set_parameter_values(parameter_values)
    assert numpy.all(
        mlp3.linear_transformations[0].parameters[0].get_value() == 2)
    assert numpy.all(
        mlp3.linear_transformations[0].parameters[1].get_value() == 3)
    got_parameter_values = model3.get_parameter_values()
    assert len(got_parameter_values) == len(parameter_values)
    for name, value in parameter_values.items():
        assert_allclose(value, got_parameter_values[name])

    # Test exception is raised if parameter shapes don't match
    def helper():
        parameter_values = {
            '/mlp/linear_0.W': 2 * numpy.ones((11, 11),
                                              dtype=theano.config.floatX),
            '/mlp/linear_0.b': 3 * numpy.ones(11, dtype=theano.config.floatX)}
        model3.set_parameter_values(parameter_values)
    assert_raises(ValueError, helper)

    # Test name conflict handling
    mlp4 = MLP([Tanh()], [10, 10])

    def helper():
        Model(mlp4.apply(mlp3.apply(x)))
    assert_raises(ValueError, helper)

Source File: __init__.py From blocks-examples with MIT License

4 votes

def main(save_to, num_epochs):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(x)
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * (W1 ** 2).sum() + .00005 * (W2 ** 2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST(("train",))
    mnist_test = MNIST(("test",))

    algorithm = GradientDescent(
        cost=cost, parameters=cg.parameters,
        step_rule=Scale(learning_rate=0.1))
    extensions = [Timing(),
                  FinishAfter(after_n_epochs=num_epochs),
                  DataStreamMonitoring(
                      [cost, error_rate],
                      Flatten(
                          DataStream.default_stream(
                              mnist_test,
                              iteration_scheme=SequentialScheme(
                                  mnist_test.num_examples, 500)),
                          which_sources=('features',)),
                      prefix="test"),
                  TrainingDataMonitoring(
                      [cost, error_rate,
                       aggregation.mean(algorithm.total_gradient_norm)],
                      prefix="train",
                      after_epoch=True),
                  Checkpoint(save_to),
                  Printing()]

    if BLOCKS_EXTRAS_AVAILABLE:
        extensions.append(Plot(
            'MNIST example',
            channels=[
                ['test_final_cost',
                 'test_misclassificationrate_apply_error_rate'],
                ['train_total_gradient_norm']]))

    main_loop = MainLoop(
        algorithm,
        Flatten(
            DataStream.default_stream(
                mnist_train,
                iteration_scheme=SequentialScheme(
                    mnist_train.num_examples, 50)),
            which_sources=('features',)),
        model=Model(cost),
        extensions=extensions)

    main_loop.run()

Source File: pacgan_task.py From PacGAN with MIT License

4 votes

def create_model_brick(self):
        decoder = MLP(
            dims=[self._config["num_zdim"], self._config["gen_hidden_size"], self._config["gen_hidden_size"], self._config["gen_hidden_size"], self._config["gen_hidden_size"], self._config["num_xdim"]],
            activations=[Sequence([BatchNormalization(self._config["gen_hidden_size"]).apply,
                                   self._config["gen_activation"]().apply],
                                  name='decoder_h1'),
                         Sequence([BatchNormalization(self._config["gen_hidden_size"]).apply,
                                   self._config["gen_activation"]().apply],
                                  name='decoder_h2'),
                         Sequence([BatchNormalization(self._config["gen_hidden_size"]).apply,
                                   self._config["gen_activation"]().apply],
                                  name='decoder_h3'),
                         Sequence([BatchNormalization(self._config["gen_hidden_size"]).apply,
                                   self._config["gen_activation"]().apply],
                                  name='decoder_h4'),
                         Identity(name='decoder_out')],
            use_bias=False,
            name='decoder')

        discriminator = Sequence(
            application_methods=[
                LinearMaxout(
                    input_dim=self._config["num_xdim"] * self._config["num_packing"],
                    output_dim=self._config["disc_hidden_size"],
                    num_pieces=self._config["disc_maxout_pieces"],
                    weights_init=IsotropicGaussian(self._config["weights_init_std"]),
                    biases_init=self._config["biases_init"],
                    name='discriminator_h1').apply,
                LinearMaxout(
                    input_dim=self._config["disc_hidden_size"],
                    output_dim=self._config["disc_hidden_size"],
                    num_pieces=self._config["disc_maxout_pieces"],
                    weights_init=IsotropicGaussian(self._config["weights_init_std"]),
                    biases_init=self._config["biases_init"],
                    name='discriminator_h2').apply,
                LinearMaxout(
                    input_dim=self._config["disc_hidden_size"],
                    output_dim=self._config["disc_hidden_size"],
                    num_pieces=self._config["disc_maxout_pieces"],
                    weights_init=IsotropicGaussian(self._config["weights_init_std"]),
                    biases_init=self._config["biases_init"],
                    name='discriminator_h3').apply,
                Linear(
                    input_dim=self._config["disc_hidden_size"],
                    output_dim=1,
                    weights_init=IsotropicGaussian(self._config["weights_init_std"]),
                    biases_init=self._config["biases_init"],
                    name='discriminator_out').apply],
            name='discriminator')

        gan = PacGAN(decoder=decoder, discriminator=discriminator, weights_init=IsotropicGaussian(self._config["weights_init_std"]), biases_init=self._config["biases_init"], name='gan')
        gan.push_allocation_config()
        decoder.linear_transformations[-1].use_bias = True
        gan.initialize()
            
        print("Number of parameters in discriminator: {}".format(numpy.sum([numpy.prod(v.shape.eval()) for v in Selector(gan.discriminator).get_parameters().values()])))
        print("Number of parameters in decoder: {}".format(numpy.sum([numpy.prod(v.shape.eval()) for v in Selector(gan.decoder).get_parameters().values()])))
        
        return gan

Source File: pacgan_task.py From PacGAN with MIT License

4 votes

def create_model_brick(self):
        decoder = MLP(
            dims=[self._config["num_zdim"], self._config["gen_hidden_size"], self._config["gen_hidden_size"], self._config["gen_hidden_size"], self._config["gen_hidden_size"], self._config["num_xdim"]],
            activations=[Sequence([BatchNormalization(self._config["gen_hidden_size"]).apply,
                                   self._config["gen_activation"]().apply],
                                  name='decoder_h1'),
                         Sequence([BatchNormalization(self._config["gen_hidden_size"]).apply,
                                   self._config["gen_activation"]().apply],
                                  name='decoder_h2'),
                         Sequence([BatchNormalization(self._config["gen_hidden_size"]).apply,
                                   self._config["gen_activation"]().apply],
                                  name='decoder_h3'),
                         Sequence([BatchNormalization(self._config["gen_hidden_size"]).apply,
                                   self._config["gen_activation"]().apply],
                                  name='decoder_h4'),
                         Identity(name='decoder_out')],
            use_bias=False,
            name='decoder')

        discriminator = Sequence(
            application_methods=[
                LinearMaxout(
                    input_dim=self._config["num_xdim"] * self._config["num_packing"],
                    output_dim=self._config["disc_hidden_size"],
                    num_pieces=self._config["disc_maxout_pieces"],
                    weights_init=IsotropicGaussian(self._config["weights_init_std"]),
                    biases_init=self._config["biases_init"],
                    name='discriminator_h1').apply,
                LinearMaxout(
                    input_dim=self._config["disc_hidden_size"],
                    output_dim=self._config["disc_hidden_size"],
                    num_pieces=self._config["disc_maxout_pieces"],
                    weights_init=IsotropicGaussian(self._config["weights_init_std"]),
                    biases_init=self._config["biases_init"],
                    name='discriminator_h2').apply,
                LinearMaxout(
                    input_dim=self._config["disc_hidden_size"],
                    output_dim=self._config["disc_hidden_size"],
                    num_pieces=self._config["disc_maxout_pieces"],
                    weights_init=IsotropicGaussian(self._config["weights_init_std"]),
                    biases_init=self._config["biases_init"],
                    name='discriminator_h3').apply,
                Linear(
                    input_dim=self._config["disc_hidden_size"],
                    output_dim=1,
                    weights_init=IsotropicGaussian(self._config["weights_init_std"]),
                    biases_init=self._config["biases_init"],
                    name='discriminator_out').apply],
            name='discriminator')

        gan = PacGAN(decoder=decoder, discriminator=discriminator, weights_init=IsotropicGaussian(self._config["weights_init_std"]), biases_init=self._config["biases_init"], name='gan')
        gan.push_allocation_config()
        decoder.linear_transformations[-1].use_bias = True
        gan.initialize()
            
        print("Number of parameters in discriminator: {}".format(numpy.sum([numpy.prod(v.shape.eval()) for v in Selector(gan.discriminator).get_parameters().values()])))
        print("Number of parameters in decoder: {}".format(numpy.sum([numpy.prod(v.shape.eval()) for v in Selector(gan.decoder).get_parameters().values()])))
        
        return gan

Python blocks.bricks.MLP Examples