Python blocks.initialization.IsotropicGaussian() Examples

The following are 14 code examples of blocks.initialization.IsotropicGaussian(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module blocks.initialization , or try the search function .
Example #1
Source File: test_attention.py    From attention-lvcsr with MIT License 6 votes vote down vote up
def test_compute_weights_with_zero_mask():
    state_dim = 2
    attended_dim = 3
    match_dim = 4
    attended_length = 5
    batch_size = 6

    attention = SequenceContentAttention(
        state_names=["states"], state_dims=[state_dim],
        attended_dim=attended_dim, match_dim=match_dim,
        weights_init=IsotropicGaussian(0.5),
        biases_init=Constant(0))
    attention.initialize()

    energies = tensor.as_tensor_variable(
        numpy.random.rand(attended_length, batch_size))
    mask = tensor.as_tensor_variable(
        numpy.zeros((attended_length, batch_size)))
    weights = attention.compute_weights(energies, mask).eval()
    assert numpy.all(numpy.isfinite(weights)) 
Example #2
Source File: test_attention.py    From attention-lvcsr with MIT License 6 votes vote down vote up
def test_stable_attention_weights():
    state_dim = 2
    attended_dim = 3
    match_dim = 4
    attended_length = 5
    batch_size = 6

    attention = SequenceContentAttention(
        state_names=["states"], state_dims=[state_dim],
        attended_dim=attended_dim, match_dim=match_dim,
        weights_init=IsotropicGaussian(0.5),
        biases_init=Constant(0))
    attention.initialize()

    # Random high energies with mu=800, sigma=50
    energies_val = (
        50. * numpy.random.randn(attended_length, batch_size) + 800
        ).astype(theano.config.floatX)
    energies = tensor.as_tensor_variable(energies_val)
    mask = tensor.as_tensor_variable(
        numpy.ones((attended_length, batch_size)))
    weights = attention.compute_weights(energies, mask).eval()
    assert numpy.all(numpy.isfinite(weights)) 
Example #3
Source File: regression.py    From Diffusion-Probabilistic-Models with MIT License 5 votes vote down vote up
def __init__(self, num_channels, num_filters, spatial_width, num_scales, filter_size, downsample_method='meanout', name=""):
        """
        A brick implementing a single layer in a multi-scale convolutional network.
        """
        super(MultiScaleConvolution, self).__init__()

        self.num_scales = num_scales
        self.filter_size = filter_size
        self.num_filters = num_filters
        self.spatial_width = spatial_width
        self.downsample_method = downsample_method
        self.children = []

        print "adding MultiScaleConvolution layer"

        # for scale in range(self.num_scales-1, -1, -1):
        for scale in range(self.num_scales):
            print "scale %d"%scale
            conv_layer = ConvolutionalActivation(activation=conv_nonlinearity.apply,
                filter_size=(filter_size,filter_size), num_filters=num_filters,
                num_channels=num_channels, image_size=(spatial_width/2**scale, spatial_width/2**scale),
                # assume images are spatially smooth -- in which case output magnitude scales with
                # # filter pixels rather than square root of # filter pixels, so initialize
                # accordingly.
                weights_init=IsotropicGaussian(std=np.sqrt(1./(num_filters))/filter_size**2),
                biases_init=Constant(0), border_mode='full', name=name+"scale%d"%scale)
            self.children.append(conv_layer) 
Example #4
Source File: test_interfaces.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_linearlike_subclass_initialize_works_overridden_w():
    class NotQuiteLinear(Linear):
        @property
        def W(self):
            W = super(NotQuiteLinear, self).W
            return W / tensor.sqrt((W ** 2).sum(axis=0))

    brick = NotQuiteLinear(5, 10, weights_init=IsotropicGaussian(0.02),
                           biases_init=Constant(1))
    brick.initialize()
    assert not numpy.isnan(brick.parameters[0].get_value()).any()
    numpy.testing.assert_allclose((brick.W ** 2).sum(axis=0).eval(), 1,
                                  rtol=1e-6) 
Example #5
Source File: test_initialization.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_gaussian():
    rng = numpy.random.RandomState(1)

    def check_gaussian(rng, mean, std, shape):
        weights = IsotropicGaussian(std, mean).generate(rng, shape)
        assert weights.shape == shape
        assert weights.dtype == theano.config.floatX
        assert_allclose(weights.mean(), mean, atol=1e-2)
        assert_allclose(weights.std(), std, atol=1e-2)
    yield check_gaussian, rng, 0, 1, (500, 600)
    yield check_gaussian, rng, 5, 3, (600, 500) 
Example #6
Source File: bricks.py    From image-captioning-for-mortals with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def test_encoder():
        image_vects = tensor.matrix('image_vects')
        word_vects = tensor.tensor3('word_vects')
        batch_size = 2
        image_feature_dim = 64
        seq_len = 4
        embedding_dim = 300


        s = Encoder(
                  image_feature_dim=image_feature_dim
                , embedding_dim=embedding_dim
                , biases_init=Constant(0.)
                , weights_init=IsotropicGaussian(0.02)
                )
        s.initialize()
        iem, sem = s.apply(image_vects, word_vects)

        image_vects_tv = np.zeros((batch_size, image_feature_dim), dtype='float32')
        word_vects_tv = np.zeros((batch_size, seq_len, embedding_dim), dtype='float32')

        # expecting sentence embedding to be [batch_size, embedding_dim]
        f = theano.function([image_vects, word_vects], [iem, sem])
        i_emb, s_emb = f(image_vects_tv, word_vects_tv)

        print("""
            batch_size: %d
            image_feature_dim: %d
            sequence length: %d
            embedding dim: %d \n"""
            % (
                batch_size
              , image_feature_dim
              , seq_len
              , embedding_dim)
        )

        print "input image vectors: ", (batch_size, image_feature_dim)
        print "input word vectors: ", (batch_size, seq_len, embedding_dim)
        print "image embedding: ", i_emb.shape
        print "sentence embedding: ", s_emb.shape 
Example #7
Source File: test_machine_translation.py    From blocks-examples with MIT License 5 votes vote down vote up
def test_sampling():

    # Create Theano variables
    sampling_input = theano.tensor.lmatrix('input')

    # Construct model
    encoder = BidirectionalEncoder(
        vocab_size=10, embedding_dim=5, state_dim=8)
    decoder = Decoder(
        vocab_size=12, embedding_dim=6, state_dim=8, representation_dim=16,
        theano_seed=1234)
    sampling_representation = encoder.apply(
        sampling_input, theano.tensor.ones(sampling_input.shape))
    generateds = decoder.generate(sampling_input, sampling_representation)
    model = Model(generateds[1])

    # Initialize model
    encoder.weights_init = decoder.weights_init = IsotropicGaussian(
        0.01)
    encoder.biases_init = decoder.biases_init = Constant(0)
    encoder.push_initialization_config()
    decoder.push_initialization_config()
    encoder.bidir.prototype.weights_init = Orthogonal()
    decoder.transition.weights_init = Orthogonal()
    encoder.initialize()
    decoder.initialize()

    # Compile a function for the generated
    sampling_fn = model.get_theano_function()

    # Create literal variables
    numpy.random.seed(1234)
    x = numpy.random.randint(0, 10, size=(1, 2))

    # Call function and check result
    generated_step = sampling_fn(x)
    assert len(generated_step[0].flatten()) == 4 
Example #8
Source File: __init__.py    From blocks-examples with MIT License 5 votes vote down vote up
def main(save_to, num_batches):
    mlp = MLP([Tanh(), Identity()], [1, 10, 1],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0), seed=1)
    mlp.initialize()
    x = tensor.vector('numbers')
    y = tensor.vector('roots')
    cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None]))
    cost.name = "cost"

    main_loop = MainLoop(
        GradientDescent(
            cost=cost, parameters=ComputationGraph(cost).parameters,
            step_rule=Scale(learning_rate=0.001)),
        get_data_stream(range(100)),
        model=Model(cost),
        extensions=[
            Timing(),
            FinishAfter(after_n_batches=num_batches),
            DataStreamMonitoring(
                [cost], get_data_stream(range(100, 200)),
                prefix="test"),
            TrainingDataMonitoring([cost], after_epoch=True),
            Checkpoint(save_to),
            Printing()])
    main_loop.run()
    return main_loop 
Example #9
Source File: test_attention.py    From attention-lvcsr with MIT License 4 votes vote down vote up
def test_sequence_content_attention():
    # Disclaimer: only check dimensions, not values
    rng = numpy.random.RandomState([2014, 12, 2])

    seq_len = 5
    batch_size = 6
    state_dim = 2
    attended_dim = 3
    match_dim = 4

    attention = SequenceContentAttention(
        state_names=["states"], state_dims=[state_dim],
        attended_dim=attended_dim, match_dim=match_dim,
        weights_init=IsotropicGaussian(0.5),
        biases_init=Constant(0))
    attention.initialize()

    sequences = tensor.tensor3('sequences')
    states = tensor.matrix('states')
    mask = tensor.matrix('mask')
    glimpses, weights = attention.take_glimpses(
        sequences, attended_mask=mask, states=states)
    assert glimpses.ndim == 2
    assert weights.ndim == 2

    seq_values = numpy.zeros((seq_len, batch_size, attended_dim),
                             dtype=theano.config.floatX)
    states_values = numpy.zeros((batch_size, state_dim),
                                dtype=theano.config.floatX)
    mask_values = numpy.zeros((seq_len, batch_size),
                              dtype=theano.config.floatX)
    # randomly generate a sensible mask
    for sed_idx in range(batch_size):
        mask_values[:rng.randint(1, seq_len), sed_idx] = 1
    glimpses_values, weight_values = theano.function(
        [sequences, states, mask], [glimpses, weights])(
            seq_values, states_values, mask_values)
    assert glimpses_values.shape == (batch_size, attended_dim)
    assert weight_values.shape == (batch_size, seq_len)
    assert numpy.all(weight_values >= 0)
    assert numpy.all(weight_values <= 1)
    assert numpy.all(weight_values.sum(axis=1) == 1)
    assert numpy.all((weight_values.T == 0) == (mask_values == 0)) 
Example #10
Source File: __init__.py    From blocks-examples with MIT License 4 votes vote down vote up
def main(save_to, num_epochs):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(x)
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * (W1 ** 2).sum() + .00005 * (W2 ** 2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST(("train",))
    mnist_test = MNIST(("test",))

    algorithm = GradientDescent(
        cost=cost, parameters=cg.parameters,
        step_rule=Scale(learning_rate=0.1))
    extensions = [Timing(),
                  FinishAfter(after_n_epochs=num_epochs),
                  DataStreamMonitoring(
                      [cost, error_rate],
                      Flatten(
                          DataStream.default_stream(
                              mnist_test,
                              iteration_scheme=SequentialScheme(
                                  mnist_test.num_examples, 500)),
                          which_sources=('features',)),
                      prefix="test"),
                  TrainingDataMonitoring(
                      [cost, error_rate,
                       aggregation.mean(algorithm.total_gradient_norm)],
                      prefix="train",
                      after_epoch=True),
                  Checkpoint(save_to),
                  Printing()]

    if BLOCKS_EXTRAS_AVAILABLE:
        extensions.append(Plot(
            'MNIST example',
            channels=[
                ['test_final_cost',
                 'test_misclassificationrate_apply_error_rate'],
                ['train_total_gradient_norm']]))

    main_loop = MainLoop(
        algorithm,
        Flatten(
            DataStream.default_stream(
                mnist_train,
                iteration_scheme=SequentialScheme(
                    mnist_train.num_examples, 50)),
            which_sources=('features',)),
        model=Model(cost),
        extensions=extensions)

    main_loop.run() 
Example #11
Source File: test_machine_translation.py    From blocks-examples with MIT License 4 votes vote down vote up
def test_search_model():

    # Create Theano variables
    floatX = theano.config.floatX
    source_sentence = theano.tensor.lmatrix('source')
    source_sentence_mask = theano.tensor.matrix('source_mask', dtype=floatX)
    target_sentence = theano.tensor.lmatrix('target')
    target_sentence_mask = theano.tensor.matrix('target_mask', dtype=floatX)

    # Construct model
    encoder = BidirectionalEncoder(
        vocab_size=10, embedding_dim=5, state_dim=8)
    decoder = Decoder(
        vocab_size=12, embedding_dim=6, state_dim=8, representation_dim=16)
    cost = decoder.cost(
        encoder.apply(source_sentence, source_sentence_mask),
        source_sentence_mask, target_sentence, target_sentence_mask)

    # Compile a function for the cost
    f_cost = theano.function(
        inputs=[source_sentence, source_sentence_mask,
                target_sentence, target_sentence_mask],
        outputs=cost)

    # Create literal variables
    numpy.random.seed(1234)
    x = numpy.random.randint(0, 10, size=(22, 4))
    y = numpy.random.randint(0, 12, size=(22, 6))
    x_mask = numpy.ones_like(x).astype(floatX)
    y_mask = numpy.ones_like(y).astype(floatX)

    # Initialize model
    encoder.weights_init = decoder.weights_init = IsotropicGaussian(
        0.01)
    encoder.biases_init = decoder.biases_init = Constant(0)
    encoder.push_initialization_config()
    decoder.push_initialization_config()
    encoder.bidir.prototype.weights_init = Orthogonal()
    decoder.transition.weights_init = Orthogonal()
    encoder.initialize()
    decoder.initialize()

    cost_ = f_cost(x, x_mask, y, y_mask)
    assert_allclose(cost_, 14.90944) 
Example #12
Source File: train_celeba_classifier.py    From discgen with MIT License 4 votes vote down vote up
def create_model_bricks():
    convnet = ConvolutionalSequence(
        layers=[
            Convolutional(
                filter_size=(4, 4),
                num_filters=32,
                name='conv1'),
            SpatialBatchNormalization(name='batch_norm1'),
            Rectifier(),
            Convolutional(
                filter_size=(3, 3),
                step=(2, 2),
                num_filters=32,
                name='conv2'),
            SpatialBatchNormalization(name='batch_norm2'),
            Rectifier(),
            Convolutional(
                filter_size=(4, 4),
                num_filters=64,
                name='conv3'),
            SpatialBatchNormalization(name='batch_norm3'),
            Rectifier(),
            Convolutional(
                filter_size=(3, 3),
                step=(2, 2),
                num_filters=64,
                name='conv4'),
            SpatialBatchNormalization(name='batch_norm4'),
            Rectifier(),
            Convolutional(
                filter_size=(3, 3),
                num_filters=128,
                name='conv5'),
            SpatialBatchNormalization(name='batch_norm5'),
            Rectifier(),
            Convolutional(
                filter_size=(3, 3),
                step=(2, 2),
                num_filters=128,
                name='conv6'),
            SpatialBatchNormalization(name='batch_norm6'),
            Rectifier(),
        ],
        num_channels=3,
        image_size=(64, 64),
        use_bias=False,
        weights_init=IsotropicGaussian(0.033),
        biases_init=Constant(0),
        name='convnet')
    convnet.initialize()

    mlp = BatchNormalizedMLP(
        activations=[Rectifier(), Logistic()],
        dims=[numpy.prod(convnet.get_dim('output')), 1000, 40],
        weights_init=IsotropicGaussian(0.033),
        biases_init=Constant(0),
        name='mlp')
    mlp.initialize()

    return convnet, mlp 
Example #13
Source File: pacgan_task.py    From PacGAN with MIT License 4 votes vote down vote up
def create_model_brick(self):
        decoder = MLP(
            dims=[self._config["num_zdim"], self._config["gen_hidden_size"], self._config["gen_hidden_size"], self._config["gen_hidden_size"], self._config["gen_hidden_size"], self._config["num_xdim"]],
            activations=[Sequence([BatchNormalization(self._config["gen_hidden_size"]).apply,
                                   self._config["gen_activation"]().apply],
                                  name='decoder_h1'),
                         Sequence([BatchNormalization(self._config["gen_hidden_size"]).apply,
                                   self._config["gen_activation"]().apply],
                                  name='decoder_h2'),
                         Sequence([BatchNormalization(self._config["gen_hidden_size"]).apply,
                                   self._config["gen_activation"]().apply],
                                  name='decoder_h3'),
                         Sequence([BatchNormalization(self._config["gen_hidden_size"]).apply,
                                   self._config["gen_activation"]().apply],
                                  name='decoder_h4'),
                         Identity(name='decoder_out')],
            use_bias=False,
            name='decoder')

        discriminator = Sequence(
            application_methods=[
                LinearMaxout(
                    input_dim=self._config["num_xdim"] * self._config["num_packing"],
                    output_dim=self._config["disc_hidden_size"],
                    num_pieces=self._config["disc_maxout_pieces"],
                    weights_init=IsotropicGaussian(self._config["weights_init_std"]),
                    biases_init=self._config["biases_init"],
                    name='discriminator_h1').apply,
                LinearMaxout(
                    input_dim=self._config["disc_hidden_size"],
                    output_dim=self._config["disc_hidden_size"],
                    num_pieces=self._config["disc_maxout_pieces"],
                    weights_init=IsotropicGaussian(self._config["weights_init_std"]),
                    biases_init=self._config["biases_init"],
                    name='discriminator_h2').apply,
                LinearMaxout(
                    input_dim=self._config["disc_hidden_size"],
                    output_dim=self._config["disc_hidden_size"],
                    num_pieces=self._config["disc_maxout_pieces"],
                    weights_init=IsotropicGaussian(self._config["weights_init_std"]),
                    biases_init=self._config["biases_init"],
                    name='discriminator_h3').apply,
                Linear(
                    input_dim=self._config["disc_hidden_size"],
                    output_dim=1,
                    weights_init=IsotropicGaussian(self._config["weights_init_std"]),
                    biases_init=self._config["biases_init"],
                    name='discriminator_out').apply],
            name='discriminator')

        gan = PacGAN(decoder=decoder, discriminator=discriminator, weights_init=IsotropicGaussian(self._config["weights_init_std"]), biases_init=self._config["biases_init"], name='gan')
        gan.push_allocation_config()
        decoder.linear_transformations[-1].use_bias = True
        gan.initialize()
            
        print("Number of parameters in discriminator: {}".format(numpy.sum([numpy.prod(v.shape.eval()) for v in Selector(gan.discriminator).get_parameters().values()])))
        print("Number of parameters in decoder: {}".format(numpy.sum([numpy.prod(v.shape.eval()) for v in Selector(gan.decoder).get_parameters().values()])))
        
        return gan 
Example #14
Source File: pacgan_task.py    From PacGAN with MIT License 4 votes vote down vote up
def create_model_brick(self):
        decoder = MLP(
            dims=[self._config["num_zdim"], self._config["gen_hidden_size"], self._config["gen_hidden_size"], self._config["gen_hidden_size"], self._config["gen_hidden_size"], self._config["num_xdim"]],
            activations=[Sequence([BatchNormalization(self._config["gen_hidden_size"]).apply,
                                   self._config["gen_activation"]().apply],
                                  name='decoder_h1'),
                         Sequence([BatchNormalization(self._config["gen_hidden_size"]).apply,
                                   self._config["gen_activation"]().apply],
                                  name='decoder_h2'),
                         Sequence([BatchNormalization(self._config["gen_hidden_size"]).apply,
                                   self._config["gen_activation"]().apply],
                                  name='decoder_h3'),
                         Sequence([BatchNormalization(self._config["gen_hidden_size"]).apply,
                                   self._config["gen_activation"]().apply],
                                  name='decoder_h4'),
                         Identity(name='decoder_out')],
            use_bias=False,
            name='decoder')

        discriminator = Sequence(
            application_methods=[
                LinearMaxout(
                    input_dim=self._config["num_xdim"] * self._config["num_packing"],
                    output_dim=self._config["disc_hidden_size"],
                    num_pieces=self._config["disc_maxout_pieces"],
                    weights_init=IsotropicGaussian(self._config["weights_init_std"]),
                    biases_init=self._config["biases_init"],
                    name='discriminator_h1').apply,
                LinearMaxout(
                    input_dim=self._config["disc_hidden_size"],
                    output_dim=self._config["disc_hidden_size"],
                    num_pieces=self._config["disc_maxout_pieces"],
                    weights_init=IsotropicGaussian(self._config["weights_init_std"]),
                    biases_init=self._config["biases_init"],
                    name='discriminator_h2').apply,
                LinearMaxout(
                    input_dim=self._config["disc_hidden_size"],
                    output_dim=self._config["disc_hidden_size"],
                    num_pieces=self._config["disc_maxout_pieces"],
                    weights_init=IsotropicGaussian(self._config["weights_init_std"]),
                    biases_init=self._config["biases_init"],
                    name='discriminator_h3').apply,
                Linear(
                    input_dim=self._config["disc_hidden_size"],
                    output_dim=1,
                    weights_init=IsotropicGaussian(self._config["weights_init_std"]),
                    biases_init=self._config["biases_init"],
                    name='discriminator_out').apply],
            name='discriminator')

        gan = PacGAN(decoder=decoder, discriminator=discriminator, weights_init=IsotropicGaussian(self._config["weights_init_std"]), biases_init=self._config["biases_init"], name='gan')
        gan.push_allocation_config()
        decoder.linear_transformations[-1].use_bias = True
        gan.initialize()
            
        print("Number of parameters in discriminator: {}".format(numpy.sum([numpy.prod(v.shape.eval()) for v in Selector(gan.discriminator).get_parameters().values()])))
        print("Number of parameters in decoder: {}".format(numpy.sum([numpy.prod(v.shape.eval()) for v in Selector(gan.decoder).get_parameters().values()])))
        
        return gan