Python Examples of blocks.initialization.Constant

Source File: test_bricks.py From attention-lvcsr with MIT License

6 votes

def test_mlp_apply():
    x = tensor.matrix()
    x_val = numpy.random.rand(2, 16).astype(theano.config.floatX)
    mlp = MLP(activations=[Tanh().apply, None], dims=[16, 8, 4],
              weights_init=Constant(1), biases_init=Constant(1))
    y = mlp.apply(x)
    mlp.initialize()
    assert_allclose(
        numpy.tanh(x_val.dot(numpy.ones((16, 8))) + numpy.ones((2, 8))).dot(
            numpy.ones((8, 4))) + numpy.ones((2, 4)),
        y.eval({x: x_val}), rtol=1e-06)

    mlp = MLP(activations=[None], weights_init=Constant(1), use_bias=False)
    mlp.dims = [16, 8]
    y = mlp.apply(x)
    mlp.initialize()
    assert_allclose(x_val.dot(numpy.ones((16, 8))),
                    y.eval({x: x_val}), rtol=1e-06)
    assert mlp.rng == mlp.linear_transformations[0].rng

Source File: test_bricks.py From attention-lvcsr with MIT License

6 votes

def test_mlp():
    x = tensor.matrix()
    x_val = numpy.random.rand(2, 16).astype(theano.config.floatX)
    mlp = MLP(activations=[Tanh(), None], dims=[16, 8, 4],
              weights_init=Constant(1), biases_init=Constant(1))
    y = mlp.apply(x)
    mlp.initialize()
    assert_allclose(
        numpy.tanh(x_val.dot(numpy.ones((16, 8))) + numpy.ones((2, 8))).dot(
            numpy.ones((8, 4))) + numpy.ones((2, 4)),
        y.eval({x: x_val}), rtol=1e-06)

    mlp = MLP(activations=[None], weights_init=Constant(1), use_bias=False)
    mlp.dims = [16, 8]
    y = mlp.apply(x)
    mlp.initialize()
    assert_allclose(x_val.dot(numpy.ones((16, 8))),
                    y.eval({x: x_val}), rtol=1e-06)
    assert mlp.rng == mlp.linear_transformations[0].rng

Source File: test_bricks.py From attention-lvcsr with MIT License

6 votes

def test_sequence_variable_outputs():
    x = tensor.matrix()

    linear_1 = Linear(input_dim=16, output_dim=8, weights_init=Constant(2),
                      biases_init=Constant(1))

    fork = Fork(input_dim=8, output_names=['linear_2_1', 'linear_2_2'],
                output_dims=[4, 5], prototype=Linear(),
                weights_init=Constant(3), biases_init=Constant(4))
    sequence = Sequence([linear_1.apply, fork.apply])
    sequence.initialize()
    y_1, y_2 = sequence.apply(x)
    x_val = numpy.ones((4, 16), dtype=theano.config.floatX)
    assert_allclose(
        y_1.eval({x: x_val}),
        (x_val.dot(2 * numpy.ones((16, 8))) + numpy.ones((4, 8))).dot(
            3 * numpy.ones((8, 4))) + 4 * numpy.ones((4, 4)))
    assert_allclose(
        y_2.eval({x: x_val}),
        (x_val.dot(2 * numpy.ones((16, 8))) + numpy.ones((4, 8))).dot(
            3 * numpy.ones((8, 5))) + 4 * numpy.ones((4, 5)))

Source File: test_bn.py From attention-lvcsr with MIT License

6 votes

def test_apply_batch_normalization_nested():
    x = tensor.matrix()
    eps = 1e-8
    batch_dims = (3, 9)
    bn = BatchNormalization(input_dim=5, epsilon=eps)
    mlp = MLP([Sequence([bn.apply, Tanh().apply])], [9, 5],
              weights_init=Constant(0.4), biases_init=Constant(1))
    mlp.initialize()
    y = mlp.apply(x)
    cg = apply_batch_normalization(ComputationGraph([y]))
    y_bn = cg.outputs[0]
    rng = numpy.random.RandomState((2016, 1, 18))
    x_ = rng.uniform(size=batch_dims).astype(theano.config.floatX)
    y_ = y_bn.eval({x: x_})
    W_, b_ = map(lambda s: (getattr(mlp.linear_transformations[0], s)
                            .get_value(borrow=True)), ['W', 'b'])
    z_ = numpy.dot(x_, W_) + b_
    y_expected = numpy.tanh((z_ - z_.mean(axis=0)) /
                            numpy.sqrt(z_.var(axis=0) + eps))
    assert_allclose(y_, y_expected, rtol=1e-3)

Source File: test_bricks.py From attention-lvcsr with MIT License

6 votes

def test_linear():
    x = tensor.matrix()

    linear = Linear(input_dim=16, output_dim=8, weights_init=Constant(2),
                    biases_init=Constant(1))
    y = linear.apply(x)
    linear.initialize()
    x_val = numpy.ones((4, 16), dtype=theano.config.floatX)
    assert_allclose(
        y.eval({x: x_val}),
        x_val.dot(2 * numpy.ones((16, 8))) + numpy.ones((4, 8)))

    linear = Linear(input_dim=16, output_dim=8, weights_init=Constant(2),
                    use_bias=False)
    y = linear.apply(x)
    linear.initialize()
    x_val = numpy.ones((4, 16), dtype=theano.config.floatX)
    assert_allclose(y.eval({x: x_val}), x_val.dot(2 * numpy.ones((16, 8))))

Source File: test_initialization.py From attention-lvcsr with MIT License

6 votes

def test_sparse():
    rng = numpy.random.RandomState(1)

    def check_sparse(rng, num_init, weights_init, sparse_init, shape, total):
        weights = Sparse(num_init=num_init, weights_init=weights_init,
                         sparse_init=sparse_init).generate(rng, shape)
        assert weights.shape == shape
        assert weights.dtype == theano.config.floatX
        if sparse_init is None:
            if isinstance(num_init, numbers.Integral):
                assert (numpy.count_nonzero(weights) <=
                        weights.size - num_init * weights.shape[0])
            else:
                assert (numpy.count_nonzero(weights) <=
                        weights.size - num_init * weights.shape[1])
        if total is not None:
            assert numpy.sum(weights) == total

    yield check_sparse, rng, 5, Constant(1.), None, (10, 10), None
    yield check_sparse, rng, 0.5, Constant(1.), None, (10, 10), None
    yield check_sparse, rng, 0.5, Constant(1.), Constant(1.), (10, 10), None
    yield check_sparse, rng, 3, Constant(1.), None, (10, 10), 30
    yield check_sparse, rng, 3, Constant(0.), Constant(1.), (10, 10), 70
    yield check_sparse, rng, 0.3, Constant(1.), None, (10, 10), 30
    yield check_sparse, rng, 0.3, Constant(0.), Constant(1.), (10, 10), 70

Source File: test_initialization.py From attention-lvcsr with MIT License

6 votes

def test_constant():
    def check_constant(const, shape, ground_truth):
        # rng unused, so pass None.
        init = Constant(const).generate(None, ground_truth.shape)
        assert ground_truth.dtype == theano.config.floatX
        assert ground_truth.shape == init.shape
        assert_equal(ground_truth, init)

    # Test scalar init.
    yield (check_constant, 5, (5, 5),
           5 * numpy.ones((5, 5), dtype=theano.config.floatX))
    # Test broadcasting.
    yield (check_constant, [1, 2, 3], (7, 3),
           numpy.array([[1, 2, 3]] * 7, dtype=theano.config.floatX))
    yield (check_constant, numpy.array([[1], [2], [3]]), (3, 2),
           numpy.array([[1, 1], [2, 2], [3, 3]], dtype=theano.config.floatX))

Source File: test_attention.py From attention-lvcsr with MIT License

6 votes

def test_compute_weights_with_zero_mask():
    state_dim = 2
    attended_dim = 3
    match_dim = 4
    attended_length = 5
    batch_size = 6

    attention = SequenceContentAttention(
        state_names=["states"], state_dims=[state_dim],
        attended_dim=attended_dim, match_dim=match_dim,
        weights_init=IsotropicGaussian(0.5),
        biases_init=Constant(0))
    attention.initialize()

    energies = tensor.as_tensor_variable(
        numpy.random.rand(attended_length, batch_size))
    mask = tensor.as_tensor_variable(
        numpy.zeros((attended_length, batch_size)))
    weights = attention.compute_weights(energies, mask).eval()
    assert numpy.all(numpy.isfinite(weights))

Source File: test_bricks.py From attention-lvcsr with MIT License

6 votes

def test_sequence_variable_inputs():
    x, y = tensor.matrix(), tensor.matrix()

    parallel_1 = Parallel(input_names=['input_1', 'input_2'],
                          input_dims=[4, 5], output_dims=[3, 2],
                          prototype=Linear(), weights_init=Constant(2),
                          biases_init=Constant(1))
    parallel_2 = Parallel(input_names=['input_1', 'input_2'],
                          input_dims=[3, 2], output_dims=[5, 4],
                          prototype=Linear(), weights_init=Constant(2),
                          biases_init=Constant(1))
    sequence = Sequence([parallel_1.apply, parallel_2.apply])
    sequence.initialize()
    new_x, new_y = sequence.apply(x, y)
    x_val = numpy.ones((4, 4), dtype=theano.config.floatX)
    y_val = numpy.ones((4, 5), dtype=theano.config.floatX)
    assert_allclose(
        new_x.eval({x: x_val}),
        (x_val.dot(2 * numpy.ones((4, 3))) + numpy.ones((4, 3))).dot(
            2 * numpy.ones((3, 5))) + numpy.ones((4, 5)))
    assert_allclose(
        new_y.eval({y: y_val}),
        (y_val.dot(2 * numpy.ones((5, 2))) + numpy.ones((4, 2))).dot(
            2 * numpy.ones((2, 4))) + numpy.ones((4, 4)))

Source File: test_conv.py From attention-lvcsr with MIT License

6 votes

def test_convolutional():
    x = tensor.tensor4('x')
    num_channels = 4
    num_filters = 3
    batch_size = 5
    filter_size = (3, 3)
    conv = Convolutional(filter_size, num_filters, num_channels,
                         image_size=(17, 13), weights_init=Constant(1.),
                         biases_init=Constant(5.))
    conv.initialize()
    y = conv.apply(x)
    func = function([x], y)

    x_val = numpy.ones((batch_size, num_channels, 17, 13),
                       dtype=theano.config.floatX)
    assert_allclose(func(x_val),
                    numpy.prod(filter_size) * num_channels *
                    numpy.ones((batch_size, num_filters, 15, 11)) + 5)
    conv.image_size = (17, 13)
    conv.batch_size = 2  # This should have effect on get_dim
    assert conv.get_dim('output') == (num_filters, 15, 11)

Source File: test_conv.py From attention-lvcsr with MIT License

6 votes

def test_convolutional_transpose():
    x = tensor.tensor4('x')
    num_channels = 4
    num_filters = 3
    image_size = (8, 6)
    original_image_size = (17, 13)
    batch_size = 5
    filter_size = (3, 3)
    step = (2, 2)
    conv = ConvolutionalTranspose(
        original_image_size, filter_size, num_filters, num_channels, step=step,
        image_size=image_size, weights_init=Constant(1.),
        biases_init=Constant(5.))
    conv.initialize()
    y = conv.apply(x)
    func = function([x], y)

    x_val = numpy.ones((batch_size, num_channels) + image_size,
                       dtype=theano.config.floatX)
    expected_value = num_channels * numpy.ones(
        (batch_size, num_filters) + original_image_size)
    expected_value[:, :, 2:-2:2, :] += num_channels
    expected_value[:, :, :, 2:-2:2] += num_channels
    expected_value[:, :, 2:-2:2, 2:-2:2] += num_channels
    assert_allclose(func(x_val), expected_value + 5)

Source File: test_conv.py From attention-lvcsr with MIT License

6 votes

def test_no_input_size():
    # suppose x is outputted by some RNN
    x = tensor.tensor4('x')
    filter_size = (1, 3)
    num_filters = 2
    num_channels = 5
    c = Convolutional(filter_size, num_filters, num_channels, tied_biases=True,
                      weights_init=Constant(1.), biases_init=Constant(1.))
    c.initialize()
    out = c.apply(x)
    assert c.get_dim('output') == (2, None, None)
    assert out.ndim == 4

    c = Convolutional(filter_size, num_filters, num_channels,
                      tied_biases=False, weights_init=Constant(1.),
                      biases_init=Constant(1.))
    assert_raises_regexp(ValueError, 'Cannot infer bias size \S+',
                         c.initialize)

Source File: test_bricks.py From attention-lvcsr with MIT License

5 votes

def test_linear_nan_allocation():
    x = tensor.matrix()

    linear = Linear(input_dim=16, output_dim=8, weights_init=Constant(2),
                    biases_init=Constant(1))
    linear.apply(x)
    w1 = numpy.nan * numpy.zeros((16, 8))
    w2 = linear.parameters[0].get_value()
    b1 = numpy.nan * numpy.zeros(8)
    b2 = linear.parameters[1].get_value()
    numpy.testing.assert_equal(w1, w2)
    numpy.testing.assert_equal(b1, b2)

Source File: regression.py From Diffusion-Probabilistic-Models with MIT License

5 votes

def __init__(self, num_channels, num_filters, spatial_width, num_scales, filter_size, downsample_method='meanout', name=""):
        """
        A brick implementing a single layer in a multi-scale convolutional network.
        """
        super(MultiScaleConvolution, self).__init__()

        self.num_scales = num_scales
        self.filter_size = filter_size
        self.num_filters = num_filters
        self.spatial_width = spatial_width
        self.downsample_method = downsample_method
        self.children = []

        print "adding MultiScaleConvolution layer"

        # for scale in range(self.num_scales-1, -1, -1):
        for scale in range(self.num_scales):
            print "scale %d"%scale
            conv_layer = ConvolutionalActivation(activation=conv_nonlinearity.apply,
                filter_size=(filter_size,filter_size), num_filters=num_filters,
                num_channels=num_channels, image_size=(spatial_width/2**scale, spatial_width/2**scale),
                # assume images are spatially smooth -- in which case output magnitude scales with
                # # filter pixels rather than square root of # filter pixels, so initialize
                # accordingly.
                weights_init=IsotropicGaussian(std=np.sqrt(1./(num_filters))/filter_size**2),
                biases_init=Constant(0), border_mode='full', name=name+"scale%d"%scale)
            self.children.append(conv_layer)

Source File: test_graph.py From attention-lvcsr with MIT License

5 votes

def test_snapshot():
    x = tensor.matrix('x')
    linear = MLP([Identity(), Identity()], [10, 10, 10],
                 weights_init=Constant(1), biases_init=Constant(2))
    linear.initialize()
    y = linear.apply(x)
    cg = ComputationGraph(y)
    snapshot = cg.get_snapshot(dict(x=numpy.zeros((1, 10),
                                                  dtype=theano.config.floatX)))
    assert len(snapshot) == 14

Source File: bricks.py From image-captioning-for-mortals with BSD 3-Clause "New" or "Revised" License

5 votes

def test_encoder():
        image_vects = tensor.matrix('image_vects')
        word_vects = tensor.tensor3('word_vects')
        batch_size = 2
        image_feature_dim = 64
        seq_len = 4
        embedding_dim = 300


        s = Encoder(
                  image_feature_dim=image_feature_dim
                , embedding_dim=embedding_dim
                , biases_init=Constant(0.)
                , weights_init=IsotropicGaussian(0.02)
                )
        s.initialize()
        iem, sem = s.apply(image_vects, word_vects)

        image_vects_tv = np.zeros((batch_size, image_feature_dim), dtype='float32')
        word_vects_tv = np.zeros((batch_size, seq_len, embedding_dim), dtype='float32')

        # expecting sentence embedding to be [batch_size, embedding_dim]
        f = theano.function([image_vects, word_vects], [iem, sem])
        i_emb, s_emb = f(image_vects_tv, word_vects_tv)

        print("""
            batch_size: %d
            image_feature_dim: %d
            sequence length: %d
            embedding dim: %d \n"""
            % (
                batch_size
              , image_feature_dim
              , seq_len
              , embedding_dim)
        )

        print "input image vectors: ", (batch_size, image_feature_dim)
        print "input word vectors: ", (batch_size, seq_len, embedding_dim)
        print "image embedding: ", i_emb.shape
        print "sentence embedding: ", s_emb.shape

Source File: test_machine_translation.py From blocks-examples with MIT License

5 votes

def test_sampling():

    # Create Theano variables
    sampling_input = theano.tensor.lmatrix('input')

    # Construct model
    encoder = BidirectionalEncoder(
        vocab_size=10, embedding_dim=5, state_dim=8)
    decoder = Decoder(
        vocab_size=12, embedding_dim=6, state_dim=8, representation_dim=16,
        theano_seed=1234)
    sampling_representation = encoder.apply(
        sampling_input, theano.tensor.ones(sampling_input.shape))
    generateds = decoder.generate(sampling_input, sampling_representation)
    model = Model(generateds[1])

    # Initialize model
    encoder.weights_init = decoder.weights_init = IsotropicGaussian(
        0.01)
    encoder.biases_init = decoder.biases_init = Constant(0)
    encoder.push_initialization_config()
    decoder.push_initialization_config()
    encoder.bidir.prototype.weights_init = Orthogonal()
    decoder.transition.weights_init = Orthogonal()
    encoder.initialize()
    decoder.initialize()

    # Compile a function for the generated
    sampling_fn = model.get_theano_function()

    # Create literal variables
    numpy.random.seed(1234)
    x = numpy.random.randint(0, 10, size=(1, 2))

    # Call function and check result
    generated_step = sampling_fn(x)
    assert len(generated_step[0].flatten()) == 4

Source File: __init__.py From blocks-examples with MIT License

5 votes

def main(save_to, num_batches):
    mlp = MLP([Tanh(), Identity()], [1, 10, 1],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0), seed=1)
    mlp.initialize()
    x = tensor.vector('numbers')
    y = tensor.vector('roots')
    cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None]))
    cost.name = "cost"

    main_loop = MainLoop(
        GradientDescent(
            cost=cost, parameters=ComputationGraph(cost).parameters,
            step_rule=Scale(learning_rate=0.001)),
        get_data_stream(range(100)),
        model=Model(cost),
        extensions=[
            Timing(),
            FinishAfter(after_n_batches=num_batches),
            DataStreamMonitoring(
                [cost], get_data_stream(range(100, 200)),
                prefix="test"),
            TrainingDataMonitoring([cost], after_epoch=True),
            Checkpoint(save_to),
            Printing()])
    main_loop.run()
    return main_loop

Source File: test_bricks.py From attention-lvcsr with MIT License

5 votes

def test_linear_maxout():
    x = tensor.matrix()

    linear_maxout = LinearMaxout(input_dim=16, output_dim=8, num_pieces=3,
                                 weights_init=Constant(2),
                                 biases_init=Constant(1))
    y = linear_maxout.apply(x)
    linear_maxout.initialize()
    x_val = numpy.ones((4, 16), dtype=theano.config.floatX)
    assert_allclose(
        y.eval({x: x_val}),
        (x_val.dot(2 * numpy.ones((16, 24))) +
            numpy.ones((4, 24))).reshape(4, 8, 3).max(2))

Source File: test_conv.py From attention-lvcsr with MIT License

5 votes

def test_convolutional_sequence():
    x = tensor.tensor4('x')
    num_channels = 4
    pooling_size = 3
    batch_size = 5
    act = Rectifier()

    conv = Convolutional((3, 3), 5, weights_init=Constant(1.),
                         biases_init=Constant(5.))
    pooling = MaxPooling(pooling_size=(pooling_size, pooling_size))
    conv2 = Convolutional((2, 2), 4, weights_init=Constant(1.))

    seq = ConvolutionalSequence([conv, act, pooling, conv2, act], num_channels,
                                image_size=(17, 13))
    seq.push_allocation_config()
    assert conv.num_channels == 4
    assert conv2.num_channels == 5
    conv2.use_bias = False
    y = seq.apply(x)
    seq.initialize()
    func = function([x], y)

    x_val = numpy.ones((batch_size, 4, 17, 13), dtype=theano.config.floatX)
    y_val = (numpy.ones((batch_size, 4, 4, 2)) *
             (9 * 4 + 5) * 4 * 5)
    assert_allclose(func(x_val), y_val)

Source File: test_interfaces.py From attention-lvcsr with MIT License

5 votes

def test_linearlike_subclass_initialize_works_overridden_w():
    class NotQuiteLinear(Linear):
        @property
        def W(self):
            W = super(NotQuiteLinear, self).W
            return W / tensor.sqrt((W ** 2).sum(axis=0))

    brick = NotQuiteLinear(5, 10, weights_init=IsotropicGaussian(0.02),
                           biases_init=Constant(1))
    brick.initialize()
    assert not numpy.isnan(brick.parameters[0].get_value()).any()
    numpy.testing.assert_allclose((brick.W ** 2).sum(axis=0).eval(), 1,
                                  rtol=1e-6)

Source File: test_wrappers.py From attention-lvcsr with MIT License

5 votes

def test_with_extra_dims_is_serializable():
    brick = LinearWithExtraDims(
        input_dim=3, output_dim=4,
        weights_init=Constant(1), biases_init=Constant(0))
    brick.initialize()
    cPickle.loads(cPickle.dumps(brick))

Source File: test_wrappers.py From attention-lvcsr with MIT License

5 votes

def test_with_extra_dims_ndim_gt_2():
    X = tensor.tensor4('X')
    brick = LinearWithExtraDims(
        input_dim=3, output_dim=4,
        weights_init=Constant(1), biases_init=Constant(0))
    brick.initialize()
    f = theano.function([X], brick.apply(X, extra_ndim=2))
    assert_allclose(
        f(numpy.ones(shape=(2, 2, 2, 3), dtype=theano.config.floatX)),
        3 * numpy.ones(shape=(2, 2, 2, 4), dtype=theano.config.floatX))

Source File: test_bn.py From attention-lvcsr with MIT License

5 votes

def test_batch_normalization_inside_convolutional_sequence():
    """Test that BN bricks work in ConvolutionalSequences."""
    conv_seq = ConvolutionalSequence(
        [Convolutional(filter_size=(3, 3), num_filters=4),
         BatchNormalization(broadcastable=(False, True, True)),
         AveragePooling(pooling_size=(2, 2)),
         BatchNormalization(broadcastable=(False, False, False)),
         MaxPooling(pooling_size=(2, 2), step=(1, 1))],
        weights_init=Constant(1.),
        biases_init=Constant(2.),
        image_size=(10, 8), num_channels=9)

    conv_seq_no_bn = ConvolutionalSequence(
        [Convolutional(filter_size=(3, 3), num_filters=4),
         AveragePooling(pooling_size=(2, 2)),
         MaxPooling(pooling_size=(2, 2), step=(1, 1))],
        weights_init=Constant(1.),
        biases_init=Constant(2.),
        image_size=(10, 8), num_channels=9)

    conv_seq.initialize()
    conv_seq_no_bn.initialize()
    rng = numpy.random.RandomState((2015, 12, 17))
    input_ = random_unif(rng, (2, 9, 10, 8))

    x = theano.tensor.tensor4()
    ybn = conv_seq.apply(x)
    y = conv_seq_no_bn.apply(x)
    yield (assert_equal, ybn.eval({x: input_}), y.eval({x: input_}))

    std = conv_seq.children[-2].population_stdev
    std.set_value(3 * std.get_value(borrow=True))
    yield (assert_equal, ybn.eval({x: input_}), y.eval({x: input_}) / 3.)

Source File: regression.py From Diffusion-Probabilistic-Models with MIT License

5 votes

def __init__(self, n_layers_conv, n_layers_dense_lower, n_layers_dense_upper,
        n_hidden_conv, n_hidden_dense_lower, n_hidden_dense_lower_output, n_hidden_dense_upper,
        spatial_width, n_colors, n_scales, n_temporal_basis):
        """
        The multilayer perceptron, that provides temporal weighting coefficients for mu and sigma
        images. This consists of a lower segment with a convolutional MLP, and optionally with a
        dense MLP in parallel. The upper segment then consists of a per-pixel dense MLP
        (convolutional MLP with 1x1 kernel).
        """
        super(MLP_conv_dense, self).__init__()

        self.n_colors = n_colors
        self.spatial_width = spatial_width
        self.n_hidden_dense_lower = n_hidden_dense_lower
        self.n_hidden_dense_lower_output = n_hidden_dense_lower_output
        self.n_hidden_conv = n_hidden_conv

        ## the lower layers
        self.mlp_conv = MultiLayerConvolution(n_layers_conv, n_hidden_conv, spatial_width, n_colors, n_scales)
        self.children = [self.mlp_conv]
        if n_hidden_dense_lower > 0 and n_layers_dense_lower > 0:
            n_input = n_colors*spatial_width**2
            n_output = n_hidden_dense_lower_output*spatial_width**2
            self.mlp_dense_lower = MLP([dense_nonlinearity] * n_layers_conv,
                [n_input] + [n_hidden_dense_lower] * (n_layers_conv-1) + [n_output],
                name='MLP dense lower', weights_init=Orthogonal(), biases_init=Constant(0))
            self.children.append(self.mlp_dense_lower)
        else:
            n_hidden_dense_lower_output = 0

        ## the upper layers (applied to each pixel independently)
        n_output = n_colors*n_temporal_basis*2 # "*2" for both mu and sigma
        self.mlp_dense_upper = MLP([dense_nonlinearity] * (n_layers_dense_upper-1) + [Identity()],
            [n_hidden_conv+n_hidden_dense_lower_output] +
            [n_hidden_dense_upper] * (n_layers_dense_upper-1) + [n_output],
            name='MLP dense upper', weights_init=Orthogonal(), biases_init=Constant(0))
        self.children.append(self.mlp_dense_upper)

Source File: model.py From blocks-char-rnn with MIT License

5 votes

def initialize(to_init):
    for bricks in to_init:
        bricks.weights_init = initialization.Uniform(width=0.08)
        bricks.biases_init = initialization.Constant(0)
        bricks.initialize()

Source File: test_attention.py From attention-lvcsr with MIT License

4 votes

def test_sequence_content_attention():
    # Disclaimer: only check dimensions, not values
    rng = numpy.random.RandomState([2014, 12, 2])

    seq_len = 5
    batch_size = 6
    state_dim = 2
    attended_dim = 3
    match_dim = 4

    attention = SequenceContentAttention(
        state_names=["states"], state_dims=[state_dim],
        attended_dim=attended_dim, match_dim=match_dim,
        weights_init=IsotropicGaussian(0.5),
        biases_init=Constant(0))
    attention.initialize()

    sequences = tensor.tensor3('sequences')
    states = tensor.matrix('states')
    mask = tensor.matrix('mask')
    glimpses, weights = attention.take_glimpses(
        sequences, attended_mask=mask, states=states)
    assert glimpses.ndim == 2
    assert weights.ndim == 2

    seq_values = numpy.zeros((seq_len, batch_size, attended_dim),
                             dtype=theano.config.floatX)
    states_values = numpy.zeros((batch_size, state_dim),
                                dtype=theano.config.floatX)
    mask_values = numpy.zeros((seq_len, batch_size),
                              dtype=theano.config.floatX)
    # randomly generate a sensible mask
    for sed_idx in range(batch_size):
        mask_values[:rng.randint(1, seq_len), sed_idx] = 1
    glimpses_values, weight_values = theano.function(
        [sequences, states, mask], [glimpses, weights])(
            seq_values, states_values, mask_values)
    assert glimpses_values.shape == (batch_size, attended_dim)
    assert weight_values.shape == (batch_size, seq_len)
    assert numpy.all(weight_values >= 0)
    assert numpy.all(weight_values <= 1)
    assert numpy.all(weight_values.sum(axis=1) == 1)
    assert numpy.all((weight_values.T == 0) == (mask_values == 0))

Source File: test_saveload.py From attention-lvcsr with MIT License

4 votes

def test_checkpointing():
    # Create a main loop and checkpoint it
    mlp = MLP(activations=[None], dims=[10, 10], weights_init=Constant(1.),
              use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[0].W
    x = tensor.vector('data')
    cost = mlp.apply(x).mean()
    data = numpy.random.rand(10, 10).astype(theano.config.floatX)
    data_stream = IterableDataset(data).get_example_stream()

    main_loop = MainLoop(
        data_stream=data_stream,
        algorithm=GradientDescent(cost=cost, parameters=[W]),
        extensions=[FinishAfter(after_n_batches=5),
                    Checkpoint('myweirdmodel.tar', parameters=[W])]
    )
    main_loop.run()

    # Load it again
    old_value = W.get_value()
    W.set_value(old_value * 2)
    main_loop = MainLoop(
        model=Model(cost),
        data_stream=data_stream,
        algorithm=GradientDescent(cost=cost, parameters=[W]),
        extensions=[Load('myweirdmodel.tar')]
    )
    main_loop.extensions[0].main_loop = main_loop
    main_loop._run_extensions('before_training')
    assert_allclose(W.get_value(), old_value)

    # Make sure things work too if the model was never saved before
    main_loop = MainLoop(
        model=Model(cost),
        data_stream=data_stream,
        algorithm=GradientDescent(cost=cost, parameters=[W]),
        extensions=[Load('mynonexisting.tar')]
    )
    main_loop.extensions[0].main_loop = main_loop
    main_loop._run_extensions('before_training')

    # Cleaning
    if os.path.exists('myweirdmodel.tar'):
        os.remove('myweirdmodel.tar')

Source File: test_serialization.py From attention-lvcsr with MIT License

4 votes

def test_add_to_dump():

    # Create a simple MLP to dump.
    mlp = MLP(activations=[None, None], dims=[10, 10, 10],
              weights_init=Constant(1.), use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[1].W
    W.set_value(W.get_value() * 2)
    mlp2 = MLP(activations=[None, None], dims=[10, 10, 10],
               weights_init=Constant(1.), use_bias=False,
               name='mlp2')
    mlp2.initialize()

    # Ensure that adding to dump is working.
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with open(f.name, 'rb+') as ff:
        add_to_dump(mlp.children[0], ff, 'child_0',
                    parameters=[mlp.children[0].W])
        add_to_dump(mlp.children[1], ff, 'child_1')
    with tarfile.open(f.name, 'r') as tarball:
        assert set(tarball.getnames()) == set(['_pkl', '_parameters',
                                               'child_0', 'child_1'])

    # Ensure that we can load any object from the tarball.
    with open(f.name, 'rb') as ff:
        saved_children_0 = load(ff, 'child_0')
        saved_children_1 = load(ff, 'child_1')
        assert_allclose(saved_children_0.W.get_value(),
                        numpy.ones((10, 10)))
        assert_allclose(saved_children_1.W.get_value(),
                        numpy.ones((10, 10)) * 2)
    
    # Check the error if using a reserved name.
    with open(f.name, 'rb+') as ff:
        assert_raises(ValueError, add_to_dump, *[mlp.children[0], ff, '_pkl'])

    # Check the error if saving an object with other parameters
    with open(f.name, 'rb+') as ff:
        assert_raises(ValueError, add_to_dump, *[mlp2, ff, 'mlp2'],
                      **dict(parameters=[mlp2.children[0].W,
                                         mlp2.children[1].W]))

    # Check the warning if adding to a dump with no parameters
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f)
    with open(f.name, 'rb+') as ff:
        assert_raises(ValueError, add_to_dump, *[mlp2, ff, 'mlp2'],
                      **dict(parameters=[mlp2.children[0].W,
                                         mlp2.children[1].W]))

Source File: test_serialization.py From attention-lvcsr with MIT License

4 votes

def test_serialization():

    # Create a simple MLP to dump.
    mlp = MLP(activations=[None, None], dims=[10, 10, 10],
              weights_init=Constant(1.), use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[1].W
    W.set_value(W.get_value() * 2)

    # Ensure warnings are raised when __main__ namespace objects are dumped.
    foo.__module__ = '__main__'
    import __main__
    __main__.__dict__['foo'] = foo
    mlp.foo = foo
    with NamedTemporaryFile(delete=False) as f:
        with warnings.catch_warnings(record=True) as w:
            dump(mlp.foo, f)
            assert len(w) == 1
            assert '__main__' in str(w[-1].message)

    # Check the parameters.
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with open(f.name, 'rb') as ff:
        numpy_data = load_parameters(ff)
    assert set(numpy_data.keys()) == \
        set(['/mlp/linear_0.W', '/mlp/linear_1.W'])
    assert_allclose(numpy_data['/mlp/linear_0.W'], numpy.ones((10, 10)))
    assert numpy_data['/mlp/linear_0.W'].dtype == theano.config.floatX

    # Ensure that it can be unpickled.
    with open(f.name, 'rb') as ff:
        mlp = load(ff)
    assert_allclose(mlp.linear_transformations[1].W.get_value(),
                    numpy.ones((10, 10)) * 2)

    # Ensure that duplicate names are dealt with.
    for child in mlp.children:
        child.name = 'linear'
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with open(f.name, 'rb') as ff:
        numpy_data = load_parameters(ff)
    assert set(numpy_data.keys()) == \
        set(['/mlp/linear.W', '/mlp/linear.W_2'])

    # Check when we don't dump the main object.
    with NamedTemporaryFile(delete=False) as f:
        dump(None, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with tarfile.open(f.name, 'r') as tarball:
        assert set(tarball.getnames()) == set(['_parameters'])

Python blocks.initialization.Constant() Examples