Python blocks.initialization.Constant() Examples

The following are 30 code examples of blocks.initialization.Constant(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module blocks.initialization , or try the search function .
Example #1
Source File: test_bricks.py    From attention-lvcsr with MIT License 6 votes vote down vote up
def test_mlp_apply():
    x = tensor.matrix()
    x_val = numpy.random.rand(2, 16).astype(theano.config.floatX)
    mlp = MLP(activations=[Tanh().apply, None], dims=[16, 8, 4],
              weights_init=Constant(1), biases_init=Constant(1))
    y = mlp.apply(x)
    mlp.initialize()
    assert_allclose(
        numpy.tanh(x_val.dot(numpy.ones((16, 8))) + numpy.ones((2, 8))).dot(
            numpy.ones((8, 4))) + numpy.ones((2, 4)),
        y.eval({x: x_val}), rtol=1e-06)

    mlp = MLP(activations=[None], weights_init=Constant(1), use_bias=False)
    mlp.dims = [16, 8]
    y = mlp.apply(x)
    mlp.initialize()
    assert_allclose(x_val.dot(numpy.ones((16, 8))),
                    y.eval({x: x_val}), rtol=1e-06)
    assert mlp.rng == mlp.linear_transformations[0].rng 
Example #2
Source File: test_bricks.py    From attention-lvcsr with MIT License 6 votes vote down vote up
def test_mlp():
    x = tensor.matrix()
    x_val = numpy.random.rand(2, 16).astype(theano.config.floatX)
    mlp = MLP(activations=[Tanh(), None], dims=[16, 8, 4],
              weights_init=Constant(1), biases_init=Constant(1))
    y = mlp.apply(x)
    mlp.initialize()
    assert_allclose(
        numpy.tanh(x_val.dot(numpy.ones((16, 8))) + numpy.ones((2, 8))).dot(
            numpy.ones((8, 4))) + numpy.ones((2, 4)),
        y.eval({x: x_val}), rtol=1e-06)

    mlp = MLP(activations=[None], weights_init=Constant(1), use_bias=False)
    mlp.dims = [16, 8]
    y = mlp.apply(x)
    mlp.initialize()
    assert_allclose(x_val.dot(numpy.ones((16, 8))),
                    y.eval({x: x_val}), rtol=1e-06)
    assert mlp.rng == mlp.linear_transformations[0].rng 
Example #3
Source File: test_bricks.py    From attention-lvcsr with MIT License 6 votes vote down vote up
def test_sequence_variable_outputs():
    x = tensor.matrix()

    linear_1 = Linear(input_dim=16, output_dim=8, weights_init=Constant(2),
                      biases_init=Constant(1))

    fork = Fork(input_dim=8, output_names=['linear_2_1', 'linear_2_2'],
                output_dims=[4, 5], prototype=Linear(),
                weights_init=Constant(3), biases_init=Constant(4))
    sequence = Sequence([linear_1.apply, fork.apply])
    sequence.initialize()
    y_1, y_2 = sequence.apply(x)
    x_val = numpy.ones((4, 16), dtype=theano.config.floatX)
    assert_allclose(
        y_1.eval({x: x_val}),
        (x_val.dot(2 * numpy.ones((16, 8))) + numpy.ones((4, 8))).dot(
            3 * numpy.ones((8, 4))) + 4 * numpy.ones((4, 4)))
    assert_allclose(
        y_2.eval({x: x_val}),
        (x_val.dot(2 * numpy.ones((16, 8))) + numpy.ones((4, 8))).dot(
            3 * numpy.ones((8, 5))) + 4 * numpy.ones((4, 5))) 
Example #4
Source File: test_bn.py    From attention-lvcsr with MIT License 6 votes vote down vote up
def test_apply_batch_normalization_nested():
    x = tensor.matrix()
    eps = 1e-8
    batch_dims = (3, 9)
    bn = BatchNormalization(input_dim=5, epsilon=eps)
    mlp = MLP([Sequence([bn.apply, Tanh().apply])], [9, 5],
              weights_init=Constant(0.4), biases_init=Constant(1))
    mlp.initialize()
    y = mlp.apply(x)
    cg = apply_batch_normalization(ComputationGraph([y]))
    y_bn = cg.outputs[0]
    rng = numpy.random.RandomState((2016, 1, 18))
    x_ = rng.uniform(size=batch_dims).astype(theano.config.floatX)
    y_ = y_bn.eval({x: x_})
    W_, b_ = map(lambda s: (getattr(mlp.linear_transformations[0], s)
                            .get_value(borrow=True)), ['W', 'b'])
    z_ = numpy.dot(x_, W_) + b_
    y_expected = numpy.tanh((z_ - z_.mean(axis=0)) /
                            numpy.sqrt(z_.var(axis=0) + eps))
    assert_allclose(y_, y_expected, rtol=1e-3) 
Example #5
Source File: test_bricks.py    From attention-lvcsr with MIT License 6 votes vote down vote up
def test_linear():
    x = tensor.matrix()

    linear = Linear(input_dim=16, output_dim=8, weights_init=Constant(2),
                    biases_init=Constant(1))
    y = linear.apply(x)
    linear.initialize()
    x_val = numpy.ones((4, 16), dtype=theano.config.floatX)
    assert_allclose(
        y.eval({x: x_val}),
        x_val.dot(2 * numpy.ones((16, 8))) + numpy.ones((4, 8)))

    linear = Linear(input_dim=16, output_dim=8, weights_init=Constant(2),
                    use_bias=False)
    y = linear.apply(x)
    linear.initialize()
    x_val = numpy.ones((4, 16), dtype=theano.config.floatX)
    assert_allclose(y.eval({x: x_val}), x_val.dot(2 * numpy.ones((16, 8)))) 
Example #6
Source File: test_initialization.py    From attention-lvcsr with MIT License 6 votes vote down vote up
def test_sparse():
    rng = numpy.random.RandomState(1)

    def check_sparse(rng, num_init, weights_init, sparse_init, shape, total):
        weights = Sparse(num_init=num_init, weights_init=weights_init,
                         sparse_init=sparse_init).generate(rng, shape)
        assert weights.shape == shape
        assert weights.dtype == theano.config.floatX
        if sparse_init is None:
            if isinstance(num_init, numbers.Integral):
                assert (numpy.count_nonzero(weights) <=
                        weights.size - num_init * weights.shape[0])
            else:
                assert (numpy.count_nonzero(weights) <=
                        weights.size - num_init * weights.shape[1])
        if total is not None:
            assert numpy.sum(weights) == total

    yield check_sparse, rng, 5, Constant(1.), None, (10, 10), None
    yield check_sparse, rng, 0.5, Constant(1.), None, (10, 10), None
    yield check_sparse, rng, 0.5, Constant(1.), Constant(1.), (10, 10), None
    yield check_sparse, rng, 3, Constant(1.), None, (10, 10), 30
    yield check_sparse, rng, 3, Constant(0.), Constant(1.), (10, 10), 70
    yield check_sparse, rng, 0.3, Constant(1.), None, (10, 10), 30
    yield check_sparse, rng, 0.3, Constant(0.), Constant(1.), (10, 10), 70 
Example #7
Source File: test_initialization.py    From attention-lvcsr with MIT License 6 votes vote down vote up
def test_constant():
    def check_constant(const, shape, ground_truth):
        # rng unused, so pass None.
        init = Constant(const).generate(None, ground_truth.shape)
        assert ground_truth.dtype == theano.config.floatX
        assert ground_truth.shape == init.shape
        assert_equal(ground_truth, init)

    # Test scalar init.
    yield (check_constant, 5, (5, 5),
           5 * numpy.ones((5, 5), dtype=theano.config.floatX))
    # Test broadcasting.
    yield (check_constant, [1, 2, 3], (7, 3),
           numpy.array([[1, 2, 3]] * 7, dtype=theano.config.floatX))
    yield (check_constant, numpy.array([[1], [2], [3]]), (3, 2),
           numpy.array([[1, 1], [2, 2], [3, 3]], dtype=theano.config.floatX)) 
Example #8
Source File: test_attention.py    From attention-lvcsr with MIT License 6 votes vote down vote up
def test_compute_weights_with_zero_mask():
    state_dim = 2
    attended_dim = 3
    match_dim = 4
    attended_length = 5
    batch_size = 6

    attention = SequenceContentAttention(
        state_names=["states"], state_dims=[state_dim],
        attended_dim=attended_dim, match_dim=match_dim,
        weights_init=IsotropicGaussian(0.5),
        biases_init=Constant(0))
    attention.initialize()

    energies = tensor.as_tensor_variable(
        numpy.random.rand(attended_length, batch_size))
    mask = tensor.as_tensor_variable(
        numpy.zeros((attended_length, batch_size)))
    weights = attention.compute_weights(energies, mask).eval()
    assert numpy.all(numpy.isfinite(weights)) 
Example #9
Source File: test_bricks.py    From attention-lvcsr with MIT License 6 votes vote down vote up
def test_sequence_variable_inputs():
    x, y = tensor.matrix(), tensor.matrix()

    parallel_1 = Parallel(input_names=['input_1', 'input_2'],
                          input_dims=[4, 5], output_dims=[3, 2],
                          prototype=Linear(), weights_init=Constant(2),
                          biases_init=Constant(1))
    parallel_2 = Parallel(input_names=['input_1', 'input_2'],
                          input_dims=[3, 2], output_dims=[5, 4],
                          prototype=Linear(), weights_init=Constant(2),
                          biases_init=Constant(1))
    sequence = Sequence([parallel_1.apply, parallel_2.apply])
    sequence.initialize()
    new_x, new_y = sequence.apply(x, y)
    x_val = numpy.ones((4, 4), dtype=theano.config.floatX)
    y_val = numpy.ones((4, 5), dtype=theano.config.floatX)
    assert_allclose(
        new_x.eval({x: x_val}),
        (x_val.dot(2 * numpy.ones((4, 3))) + numpy.ones((4, 3))).dot(
            2 * numpy.ones((3, 5))) + numpy.ones((4, 5)))
    assert_allclose(
        new_y.eval({y: y_val}),
        (y_val.dot(2 * numpy.ones((5, 2))) + numpy.ones((4, 2))).dot(
            2 * numpy.ones((2, 4))) + numpy.ones((4, 4))) 
Example #10
Source File: test_conv.py    From attention-lvcsr with MIT License 6 votes vote down vote up
def test_convolutional():
    x = tensor.tensor4('x')
    num_channels = 4
    num_filters = 3
    batch_size = 5
    filter_size = (3, 3)
    conv = Convolutional(filter_size, num_filters, num_channels,
                         image_size=(17, 13), weights_init=Constant(1.),
                         biases_init=Constant(5.))
    conv.initialize()
    y = conv.apply(x)
    func = function([x], y)

    x_val = numpy.ones((batch_size, num_channels, 17, 13),
                       dtype=theano.config.floatX)
    assert_allclose(func(x_val),
                    numpy.prod(filter_size) * num_channels *
                    numpy.ones((batch_size, num_filters, 15, 11)) + 5)
    conv.image_size = (17, 13)
    conv.batch_size = 2  # This should have effect on get_dim
    assert conv.get_dim('output') == (num_filters, 15, 11) 
Example #11
Source File: test_conv.py    From attention-lvcsr with MIT License 6 votes vote down vote up
def test_convolutional_transpose():
    x = tensor.tensor4('x')
    num_channels = 4
    num_filters = 3
    image_size = (8, 6)
    original_image_size = (17, 13)
    batch_size = 5
    filter_size = (3, 3)
    step = (2, 2)
    conv = ConvolutionalTranspose(
        original_image_size, filter_size, num_filters, num_channels, step=step,
        image_size=image_size, weights_init=Constant(1.),
        biases_init=Constant(5.))
    conv.initialize()
    y = conv.apply(x)
    func = function([x], y)

    x_val = numpy.ones((batch_size, num_channels) + image_size,
                       dtype=theano.config.floatX)
    expected_value = num_channels * numpy.ones(
        (batch_size, num_filters) + original_image_size)
    expected_value[:, :, 2:-2:2, :] += num_channels
    expected_value[:, :, :, 2:-2:2] += num_channels
    expected_value[:, :, 2:-2:2, 2:-2:2] += num_channels
    assert_allclose(func(x_val), expected_value + 5) 
Example #12
Source File: test_conv.py    From attention-lvcsr with MIT License 6 votes vote down vote up
def test_no_input_size():
    # suppose x is outputted by some RNN
    x = tensor.tensor4('x')
    filter_size = (1, 3)
    num_filters = 2
    num_channels = 5
    c = Convolutional(filter_size, num_filters, num_channels, tied_biases=True,
                      weights_init=Constant(1.), biases_init=Constant(1.))
    c.initialize()
    out = c.apply(x)
    assert c.get_dim('output') == (2, None, None)
    assert out.ndim == 4

    c = Convolutional(filter_size, num_filters, num_channels,
                      tied_biases=False, weights_init=Constant(1.),
                      biases_init=Constant(1.))
    assert_raises_regexp(ValueError, 'Cannot infer bias size \S+',
                         c.initialize) 
Example #13
Source File: test_bricks.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_linear_nan_allocation():
    x = tensor.matrix()

    linear = Linear(input_dim=16, output_dim=8, weights_init=Constant(2),
                    biases_init=Constant(1))
    linear.apply(x)
    w1 = numpy.nan * numpy.zeros((16, 8))
    w2 = linear.parameters[0].get_value()
    b1 = numpy.nan * numpy.zeros(8)
    b2 = linear.parameters[1].get_value()
    numpy.testing.assert_equal(w1, w2)
    numpy.testing.assert_equal(b1, b2) 
Example #14
Source File: regression.py    From Diffusion-Probabilistic-Models with MIT License 5 votes vote down vote up
def __init__(self, num_channels, num_filters, spatial_width, num_scales, filter_size, downsample_method='meanout', name=""):
        """
        A brick implementing a single layer in a multi-scale convolutional network.
        """
        super(MultiScaleConvolution, self).__init__()

        self.num_scales = num_scales
        self.filter_size = filter_size
        self.num_filters = num_filters
        self.spatial_width = spatial_width
        self.downsample_method = downsample_method
        self.children = []

        print "adding MultiScaleConvolution layer"

        # for scale in range(self.num_scales-1, -1, -1):
        for scale in range(self.num_scales):
            print "scale %d"%scale
            conv_layer = ConvolutionalActivation(activation=conv_nonlinearity.apply,
                filter_size=(filter_size,filter_size), num_filters=num_filters,
                num_channels=num_channels, image_size=(spatial_width/2**scale, spatial_width/2**scale),
                # assume images are spatially smooth -- in which case output magnitude scales with
                # # filter pixels rather than square root of # filter pixels, so initialize
                # accordingly.
                weights_init=IsotropicGaussian(std=np.sqrt(1./(num_filters))/filter_size**2),
                biases_init=Constant(0), border_mode='full', name=name+"scale%d"%scale)
            self.children.append(conv_layer) 
Example #15
Source File: test_graph.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_snapshot():
    x = tensor.matrix('x')
    linear = MLP([Identity(), Identity()], [10, 10, 10],
                 weights_init=Constant(1), biases_init=Constant(2))
    linear.initialize()
    y = linear.apply(x)
    cg = ComputationGraph(y)
    snapshot = cg.get_snapshot(dict(x=numpy.zeros((1, 10),
                                                  dtype=theano.config.floatX)))
    assert len(snapshot) == 14 
Example #16
Source File: bricks.py    From image-captioning-for-mortals with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_encoder():
        image_vects = tensor.matrix('image_vects')
        word_vects = tensor.tensor3('word_vects')
        batch_size = 2
        image_feature_dim = 64
        seq_len = 4
        embedding_dim = 300


        s = Encoder(
                  image_feature_dim=image_feature_dim
                , embedding_dim=embedding_dim
                , biases_init=Constant(0.)
                , weights_init=IsotropicGaussian(0.02)
                )
        s.initialize()
        iem, sem = s.apply(image_vects, word_vects)

        image_vects_tv = np.zeros((batch_size, image_feature_dim), dtype='float32')
        word_vects_tv = np.zeros((batch_size, seq_len, embedding_dim), dtype='float32')

        # expecting sentence embedding to be [batch_size, embedding_dim]
        f = theano.function([image_vects, word_vects], [iem, sem])
        i_emb, s_emb = f(image_vects_tv, word_vects_tv)

        print("""
            batch_size: %d
            image_feature_dim: %d
            sequence length: %d
            embedding dim: %d \n"""
            % (
                batch_size
              , image_feature_dim
              , seq_len
              , embedding_dim)
        )

        print "input image vectors: ", (batch_size, image_feature_dim)
        print "input word vectors: ", (batch_size, seq_len, embedding_dim)
        print "image embedding: ", i_emb.shape
        print "sentence embedding: ", s_emb.shape 
Example #17
Source File: test_machine_translation.py    From blocks-examples with MIT License 5 votes vote down vote up
def test_sampling():

    # Create Theano variables
    sampling_input = theano.tensor.lmatrix('input')

    # Construct model
    encoder = BidirectionalEncoder(
        vocab_size=10, embedding_dim=5, state_dim=8)
    decoder = Decoder(
        vocab_size=12, embedding_dim=6, state_dim=8, representation_dim=16,
        theano_seed=1234)
    sampling_representation = encoder.apply(
        sampling_input, theano.tensor.ones(sampling_input.shape))
    generateds = decoder.generate(sampling_input, sampling_representation)
    model = Model(generateds[1])

    # Initialize model
    encoder.weights_init = decoder.weights_init = IsotropicGaussian(
        0.01)
    encoder.biases_init = decoder.biases_init = Constant(0)
    encoder.push_initialization_config()
    decoder.push_initialization_config()
    encoder.bidir.prototype.weights_init = Orthogonal()
    decoder.transition.weights_init = Orthogonal()
    encoder.initialize()
    decoder.initialize()

    # Compile a function for the generated
    sampling_fn = model.get_theano_function()

    # Create literal variables
    numpy.random.seed(1234)
    x = numpy.random.randint(0, 10, size=(1, 2))

    # Call function and check result
    generated_step = sampling_fn(x)
    assert len(generated_step[0].flatten()) == 4 
Example #18
Source File: __init__.py    From blocks-examples with MIT License 5 votes vote down vote up
def main(save_to, num_batches):
    mlp = MLP([Tanh(), Identity()], [1, 10, 1],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0), seed=1)
    mlp.initialize()
    x = tensor.vector('numbers')
    y = tensor.vector('roots')
    cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None]))
    cost.name = "cost"

    main_loop = MainLoop(
        GradientDescent(
            cost=cost, parameters=ComputationGraph(cost).parameters,
            step_rule=Scale(learning_rate=0.001)),
        get_data_stream(range(100)),
        model=Model(cost),
        extensions=[
            Timing(),
            FinishAfter(after_n_batches=num_batches),
            DataStreamMonitoring(
                [cost], get_data_stream(range(100, 200)),
                prefix="test"),
            TrainingDataMonitoring([cost], after_epoch=True),
            Checkpoint(save_to),
            Printing()])
    main_loop.run()
    return main_loop 
Example #19
Source File: test_bricks.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_linear_maxout():
    x = tensor.matrix()

    linear_maxout = LinearMaxout(input_dim=16, output_dim=8, num_pieces=3,
                                 weights_init=Constant(2),
                                 biases_init=Constant(1))
    y = linear_maxout.apply(x)
    linear_maxout.initialize()
    x_val = numpy.ones((4, 16), dtype=theano.config.floatX)
    assert_allclose(
        y.eval({x: x_val}),
        (x_val.dot(2 * numpy.ones((16, 24))) +
            numpy.ones((4, 24))).reshape(4, 8, 3).max(2)) 
Example #20
Source File: test_conv.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_convolutional_sequence():
    x = tensor.tensor4('x')
    num_channels = 4
    pooling_size = 3
    batch_size = 5
    act = Rectifier()

    conv = Convolutional((3, 3), 5, weights_init=Constant(1.),
                         biases_init=Constant(5.))
    pooling = MaxPooling(pooling_size=(pooling_size, pooling_size))
    conv2 = Convolutional((2, 2), 4, weights_init=Constant(1.))

    seq = ConvolutionalSequence([conv, act, pooling, conv2, act], num_channels,
                                image_size=(17, 13))
    seq.push_allocation_config()
    assert conv.num_channels == 4
    assert conv2.num_channels == 5
    conv2.use_bias = False
    y = seq.apply(x)
    seq.initialize()
    func = function([x], y)

    x_val = numpy.ones((batch_size, 4, 17, 13), dtype=theano.config.floatX)
    y_val = (numpy.ones((batch_size, 4, 4, 2)) *
             (9 * 4 + 5) * 4 * 5)
    assert_allclose(func(x_val), y_val) 
Example #21
Source File: test_interfaces.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_linearlike_subclass_initialize_works_overridden_w():
    class NotQuiteLinear(Linear):
        @property
        def W(self):
            W = super(NotQuiteLinear, self).W
            return W / tensor.sqrt((W ** 2).sum(axis=0))

    brick = NotQuiteLinear(5, 10, weights_init=IsotropicGaussian(0.02),
                           biases_init=Constant(1))
    brick.initialize()
    assert not numpy.isnan(brick.parameters[0].get_value()).any()
    numpy.testing.assert_allclose((brick.W ** 2).sum(axis=0).eval(), 1,
                                  rtol=1e-6) 
Example #22
Source File: test_wrappers.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_with_extra_dims_is_serializable():
    brick = LinearWithExtraDims(
        input_dim=3, output_dim=4,
        weights_init=Constant(1), biases_init=Constant(0))
    brick.initialize()
    cPickle.loads(cPickle.dumps(brick)) 
Example #23
Source File: test_wrappers.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_with_extra_dims_ndim_gt_2():
    X = tensor.tensor4('X')
    brick = LinearWithExtraDims(
        input_dim=3, output_dim=4,
        weights_init=Constant(1), biases_init=Constant(0))
    brick.initialize()
    f = theano.function([X], brick.apply(X, extra_ndim=2))
    assert_allclose(
        f(numpy.ones(shape=(2, 2, 2, 3), dtype=theano.config.floatX)),
        3 * numpy.ones(shape=(2, 2, 2, 4), dtype=theano.config.floatX)) 
Example #24
Source File: test_bn.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_batch_normalization_inside_convolutional_sequence():
    """Test that BN bricks work in ConvolutionalSequences."""
    conv_seq = ConvolutionalSequence(
        [Convolutional(filter_size=(3, 3), num_filters=4),
         BatchNormalization(broadcastable=(False, True, True)),
         AveragePooling(pooling_size=(2, 2)),
         BatchNormalization(broadcastable=(False, False, False)),
         MaxPooling(pooling_size=(2, 2), step=(1, 1))],
        weights_init=Constant(1.),
        biases_init=Constant(2.),
        image_size=(10, 8), num_channels=9)

    conv_seq_no_bn = ConvolutionalSequence(
        [Convolutional(filter_size=(3, 3), num_filters=4),
         AveragePooling(pooling_size=(2, 2)),
         MaxPooling(pooling_size=(2, 2), step=(1, 1))],
        weights_init=Constant(1.),
        biases_init=Constant(2.),
        image_size=(10, 8), num_channels=9)

    conv_seq.initialize()
    conv_seq_no_bn.initialize()
    rng = numpy.random.RandomState((2015, 12, 17))
    input_ = random_unif(rng, (2, 9, 10, 8))

    x = theano.tensor.tensor4()
    ybn = conv_seq.apply(x)
    y = conv_seq_no_bn.apply(x)
    yield (assert_equal, ybn.eval({x: input_}), y.eval({x: input_}))

    std = conv_seq.children[-2].population_stdev
    std.set_value(3 * std.get_value(borrow=True))
    yield (assert_equal, ybn.eval({x: input_}), y.eval({x: input_}) / 3.) 
Example #25
Source File: regression.py    From Diffusion-Probabilistic-Models with MIT License 5 votes vote down vote up
def __init__(self, n_layers_conv, n_layers_dense_lower, n_layers_dense_upper,
        n_hidden_conv, n_hidden_dense_lower, n_hidden_dense_lower_output, n_hidden_dense_upper,
        spatial_width, n_colors, n_scales, n_temporal_basis):
        """
        The multilayer perceptron, that provides temporal weighting coefficients for mu and sigma
        images. This consists of a lower segment with a convolutional MLP, and optionally with a
        dense MLP in parallel. The upper segment then consists of a per-pixel dense MLP
        (convolutional MLP with 1x1 kernel).
        """
        super(MLP_conv_dense, self).__init__()

        self.n_colors = n_colors
        self.spatial_width = spatial_width
        self.n_hidden_dense_lower = n_hidden_dense_lower
        self.n_hidden_dense_lower_output = n_hidden_dense_lower_output
        self.n_hidden_conv = n_hidden_conv

        ## the lower layers
        self.mlp_conv = MultiLayerConvolution(n_layers_conv, n_hidden_conv, spatial_width, n_colors, n_scales)
        self.children = [self.mlp_conv]
        if n_hidden_dense_lower > 0 and n_layers_dense_lower > 0:
            n_input = n_colors*spatial_width**2
            n_output = n_hidden_dense_lower_output*spatial_width**2
            self.mlp_dense_lower = MLP([dense_nonlinearity] * n_layers_conv,
                [n_input] + [n_hidden_dense_lower] * (n_layers_conv-1) + [n_output],
                name='MLP dense lower', weights_init=Orthogonal(), biases_init=Constant(0))
            self.children.append(self.mlp_dense_lower)
        else:
            n_hidden_dense_lower_output = 0

        ## the upper layers (applied to each pixel independently)
        n_output = n_colors*n_temporal_basis*2 # "*2" for both mu and sigma
        self.mlp_dense_upper = MLP([dense_nonlinearity] * (n_layers_dense_upper-1) + [Identity()],
            [n_hidden_conv+n_hidden_dense_lower_output] +
            [n_hidden_dense_upper] * (n_layers_dense_upper-1) + [n_output],
            name='MLP dense upper', weights_init=Orthogonal(), biases_init=Constant(0))
        self.children.append(self.mlp_dense_upper) 
Example #26
Source File: model.py    From blocks-char-rnn with MIT License 5 votes vote down vote up
def initialize(to_init):
    for bricks in to_init:
        bricks.weights_init = initialization.Uniform(width=0.08)
        bricks.biases_init = initialization.Constant(0)
        bricks.initialize() 
Example #27
Source File: test_attention.py    From attention-lvcsr with MIT License 4 votes vote down vote up
def test_sequence_content_attention():
    # Disclaimer: only check dimensions, not values
    rng = numpy.random.RandomState([2014, 12, 2])

    seq_len = 5
    batch_size = 6
    state_dim = 2
    attended_dim = 3
    match_dim = 4

    attention = SequenceContentAttention(
        state_names=["states"], state_dims=[state_dim],
        attended_dim=attended_dim, match_dim=match_dim,
        weights_init=IsotropicGaussian(0.5),
        biases_init=Constant(0))
    attention.initialize()

    sequences = tensor.tensor3('sequences')
    states = tensor.matrix('states')
    mask = tensor.matrix('mask')
    glimpses, weights = attention.take_glimpses(
        sequences, attended_mask=mask, states=states)
    assert glimpses.ndim == 2
    assert weights.ndim == 2

    seq_values = numpy.zeros((seq_len, batch_size, attended_dim),
                             dtype=theano.config.floatX)
    states_values = numpy.zeros((batch_size, state_dim),
                                dtype=theano.config.floatX)
    mask_values = numpy.zeros((seq_len, batch_size),
                              dtype=theano.config.floatX)
    # randomly generate a sensible mask
    for sed_idx in range(batch_size):
        mask_values[:rng.randint(1, seq_len), sed_idx] = 1
    glimpses_values, weight_values = theano.function(
        [sequences, states, mask], [glimpses, weights])(
            seq_values, states_values, mask_values)
    assert glimpses_values.shape == (batch_size, attended_dim)
    assert weight_values.shape == (batch_size, seq_len)
    assert numpy.all(weight_values >= 0)
    assert numpy.all(weight_values <= 1)
    assert numpy.all(weight_values.sum(axis=1) == 1)
    assert numpy.all((weight_values.T == 0) == (mask_values == 0)) 
Example #28
Source File: test_saveload.py    From attention-lvcsr with MIT License 4 votes vote down vote up
def test_checkpointing():
    # Create a main loop and checkpoint it
    mlp = MLP(activations=[None], dims=[10, 10], weights_init=Constant(1.),
              use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[0].W
    x = tensor.vector('data')
    cost = mlp.apply(x).mean()
    data = numpy.random.rand(10, 10).astype(theano.config.floatX)
    data_stream = IterableDataset(data).get_example_stream()

    main_loop = MainLoop(
        data_stream=data_stream,
        algorithm=GradientDescent(cost=cost, parameters=[W]),
        extensions=[FinishAfter(after_n_batches=5),
                    Checkpoint('myweirdmodel.tar', parameters=[W])]
    )
    main_loop.run()

    # Load it again
    old_value = W.get_value()
    W.set_value(old_value * 2)
    main_loop = MainLoop(
        model=Model(cost),
        data_stream=data_stream,
        algorithm=GradientDescent(cost=cost, parameters=[W]),
        extensions=[Load('myweirdmodel.tar')]
    )
    main_loop.extensions[0].main_loop = main_loop
    main_loop._run_extensions('before_training')
    assert_allclose(W.get_value(), old_value)

    # Make sure things work too if the model was never saved before
    main_loop = MainLoop(
        model=Model(cost),
        data_stream=data_stream,
        algorithm=GradientDescent(cost=cost, parameters=[W]),
        extensions=[Load('mynonexisting.tar')]
    )
    main_loop.extensions[0].main_loop = main_loop
    main_loop._run_extensions('before_training')

    # Cleaning
    if os.path.exists('myweirdmodel.tar'):
        os.remove('myweirdmodel.tar') 
Example #29
Source File: test_serialization.py    From attention-lvcsr with MIT License 4 votes vote down vote up
def test_add_to_dump():

    # Create a simple MLP to dump.
    mlp = MLP(activations=[None, None], dims=[10, 10, 10],
              weights_init=Constant(1.), use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[1].W
    W.set_value(W.get_value() * 2)
    mlp2 = MLP(activations=[None, None], dims=[10, 10, 10],
               weights_init=Constant(1.), use_bias=False,
               name='mlp2')
    mlp2.initialize()

    # Ensure that adding to dump is working.
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with open(f.name, 'rb+') as ff:
        add_to_dump(mlp.children[0], ff, 'child_0',
                    parameters=[mlp.children[0].W])
        add_to_dump(mlp.children[1], ff, 'child_1')
    with tarfile.open(f.name, 'r') as tarball:
        assert set(tarball.getnames()) == set(['_pkl', '_parameters',
                                               'child_0', 'child_1'])

    # Ensure that we can load any object from the tarball.
    with open(f.name, 'rb') as ff:
        saved_children_0 = load(ff, 'child_0')
        saved_children_1 = load(ff, 'child_1')
        assert_allclose(saved_children_0.W.get_value(),
                        numpy.ones((10, 10)))
        assert_allclose(saved_children_1.W.get_value(),
                        numpy.ones((10, 10)) * 2)
    
    # Check the error if using a reserved name.
    with open(f.name, 'rb+') as ff:
        assert_raises(ValueError, add_to_dump, *[mlp.children[0], ff, '_pkl'])

    # Check the error if saving an object with other parameters
    with open(f.name, 'rb+') as ff:
        assert_raises(ValueError, add_to_dump, *[mlp2, ff, 'mlp2'],
                      **dict(parameters=[mlp2.children[0].W,
                                         mlp2.children[1].W]))

    # Check the warning if adding to a dump with no parameters
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f)
    with open(f.name, 'rb+') as ff:
        assert_raises(ValueError, add_to_dump, *[mlp2, ff, 'mlp2'],
                      **dict(parameters=[mlp2.children[0].W,
                                         mlp2.children[1].W])) 
Example #30
Source File: test_serialization.py    From attention-lvcsr with MIT License 4 votes vote down vote up
def test_serialization():

    # Create a simple MLP to dump.
    mlp = MLP(activations=[None, None], dims=[10, 10, 10],
              weights_init=Constant(1.), use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[1].W
    W.set_value(W.get_value() * 2)

    # Ensure warnings are raised when __main__ namespace objects are dumped.
    foo.__module__ = '__main__'
    import __main__
    __main__.__dict__['foo'] = foo
    mlp.foo = foo
    with NamedTemporaryFile(delete=False) as f:
        with warnings.catch_warnings(record=True) as w:
            dump(mlp.foo, f)
            assert len(w) == 1
            assert '__main__' in str(w[-1].message)

    # Check the parameters.
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with open(f.name, 'rb') as ff:
        numpy_data = load_parameters(ff)
    assert set(numpy_data.keys()) == \
        set(['/mlp/linear_0.W', '/mlp/linear_1.W'])
    assert_allclose(numpy_data['/mlp/linear_0.W'], numpy.ones((10, 10)))
    assert numpy_data['/mlp/linear_0.W'].dtype == theano.config.floatX

    # Ensure that it can be unpickled.
    with open(f.name, 'rb') as ff:
        mlp = load(ff)
    assert_allclose(mlp.linear_transformations[1].W.get_value(),
                    numpy.ones((10, 10)) * 2)

    # Ensure that duplicate names are dealt with.
    for child in mlp.children:
        child.name = 'linear'
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with open(f.name, 'rb') as ff:
        numpy_data = load_parameters(ff)
    assert set(numpy_data.keys()) == \
        set(['/mlp/linear.W', '/mlp/linear.W_2'])

    # Check when we don't dump the main object.
    with NamedTemporaryFile(delete=False) as f:
        dump(None, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with tarfile.open(f.name, 'r') as tarball:
        assert set(tarball.getnames()) == set(['_parameters'])