Python blocks.initialization.IsotropicGaussian() Examples
The following are 14
code examples of blocks.initialization.IsotropicGaussian().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
blocks.initialization
, or try the search function
.
Example #1
Source File: test_attention.py From attention-lvcsr with MIT License | 6 votes |
def test_compute_weights_with_zero_mask(): state_dim = 2 attended_dim = 3 match_dim = 4 attended_length = 5 batch_size = 6 attention = SequenceContentAttention( state_names=["states"], state_dims=[state_dim], attended_dim=attended_dim, match_dim=match_dim, weights_init=IsotropicGaussian(0.5), biases_init=Constant(0)) attention.initialize() energies = tensor.as_tensor_variable( numpy.random.rand(attended_length, batch_size)) mask = tensor.as_tensor_variable( numpy.zeros((attended_length, batch_size))) weights = attention.compute_weights(energies, mask).eval() assert numpy.all(numpy.isfinite(weights))
Example #2
Source File: test_attention.py From attention-lvcsr with MIT License | 6 votes |
def test_stable_attention_weights(): state_dim = 2 attended_dim = 3 match_dim = 4 attended_length = 5 batch_size = 6 attention = SequenceContentAttention( state_names=["states"], state_dims=[state_dim], attended_dim=attended_dim, match_dim=match_dim, weights_init=IsotropicGaussian(0.5), biases_init=Constant(0)) attention.initialize() # Random high energies with mu=800, sigma=50 energies_val = ( 50. * numpy.random.randn(attended_length, batch_size) + 800 ).astype(theano.config.floatX) energies = tensor.as_tensor_variable(energies_val) mask = tensor.as_tensor_variable( numpy.ones((attended_length, batch_size))) weights = attention.compute_weights(energies, mask).eval() assert numpy.all(numpy.isfinite(weights))
Example #3
Source File: regression.py From Diffusion-Probabilistic-Models with MIT License | 5 votes |
def __init__(self, num_channels, num_filters, spatial_width, num_scales, filter_size, downsample_method='meanout', name=""): """ A brick implementing a single layer in a multi-scale convolutional network. """ super(MultiScaleConvolution, self).__init__() self.num_scales = num_scales self.filter_size = filter_size self.num_filters = num_filters self.spatial_width = spatial_width self.downsample_method = downsample_method self.children = [] print "adding MultiScaleConvolution layer" # for scale in range(self.num_scales-1, -1, -1): for scale in range(self.num_scales): print "scale %d"%scale conv_layer = ConvolutionalActivation(activation=conv_nonlinearity.apply, filter_size=(filter_size,filter_size), num_filters=num_filters, num_channels=num_channels, image_size=(spatial_width/2**scale, spatial_width/2**scale), # assume images are spatially smooth -- in which case output magnitude scales with # # filter pixels rather than square root of # filter pixels, so initialize # accordingly. weights_init=IsotropicGaussian(std=np.sqrt(1./(num_filters))/filter_size**2), biases_init=Constant(0), border_mode='full', name=name+"scale%d"%scale) self.children.append(conv_layer)
Example #4
Source File: test_interfaces.py From attention-lvcsr with MIT License | 5 votes |
def test_linearlike_subclass_initialize_works_overridden_w(): class NotQuiteLinear(Linear): @property def W(self): W = super(NotQuiteLinear, self).W return W / tensor.sqrt((W ** 2).sum(axis=0)) brick = NotQuiteLinear(5, 10, weights_init=IsotropicGaussian(0.02), biases_init=Constant(1)) brick.initialize() assert not numpy.isnan(brick.parameters[0].get_value()).any() numpy.testing.assert_allclose((brick.W ** 2).sum(axis=0).eval(), 1, rtol=1e-6)
Example #5
Source File: test_initialization.py From attention-lvcsr with MIT License | 5 votes |
def test_gaussian(): rng = numpy.random.RandomState(1) def check_gaussian(rng, mean, std, shape): weights = IsotropicGaussian(std, mean).generate(rng, shape) assert weights.shape == shape assert weights.dtype == theano.config.floatX assert_allclose(weights.mean(), mean, atol=1e-2) assert_allclose(weights.std(), std, atol=1e-2) yield check_gaussian, rng, 0, 1, (500, 600) yield check_gaussian, rng, 5, 3, (600, 500)
Example #6
Source File: bricks.py From image-captioning-for-mortals with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_encoder(): image_vects = tensor.matrix('image_vects') word_vects = tensor.tensor3('word_vects') batch_size = 2 image_feature_dim = 64 seq_len = 4 embedding_dim = 300 s = Encoder( image_feature_dim=image_feature_dim , embedding_dim=embedding_dim , biases_init=Constant(0.) , weights_init=IsotropicGaussian(0.02) ) s.initialize() iem, sem = s.apply(image_vects, word_vects) image_vects_tv = np.zeros((batch_size, image_feature_dim), dtype='float32') word_vects_tv = np.zeros((batch_size, seq_len, embedding_dim), dtype='float32') # expecting sentence embedding to be [batch_size, embedding_dim] f = theano.function([image_vects, word_vects], [iem, sem]) i_emb, s_emb = f(image_vects_tv, word_vects_tv) print(""" batch_size: %d image_feature_dim: %d sequence length: %d embedding dim: %d \n""" % ( batch_size , image_feature_dim , seq_len , embedding_dim) ) print "input image vectors: ", (batch_size, image_feature_dim) print "input word vectors: ", (batch_size, seq_len, embedding_dim) print "image embedding: ", i_emb.shape print "sentence embedding: ", s_emb.shape
Example #7
Source File: test_machine_translation.py From blocks-examples with MIT License | 5 votes |
def test_sampling(): # Create Theano variables sampling_input = theano.tensor.lmatrix('input') # Construct model encoder = BidirectionalEncoder( vocab_size=10, embedding_dim=5, state_dim=8) decoder = Decoder( vocab_size=12, embedding_dim=6, state_dim=8, representation_dim=16, theano_seed=1234) sampling_representation = encoder.apply( sampling_input, theano.tensor.ones(sampling_input.shape)) generateds = decoder.generate(sampling_input, sampling_representation) model = Model(generateds[1]) # Initialize model encoder.weights_init = decoder.weights_init = IsotropicGaussian( 0.01) encoder.biases_init = decoder.biases_init = Constant(0) encoder.push_initialization_config() decoder.push_initialization_config() encoder.bidir.prototype.weights_init = Orthogonal() decoder.transition.weights_init = Orthogonal() encoder.initialize() decoder.initialize() # Compile a function for the generated sampling_fn = model.get_theano_function() # Create literal variables numpy.random.seed(1234) x = numpy.random.randint(0, 10, size=(1, 2)) # Call function and check result generated_step = sampling_fn(x) assert len(generated_step[0].flatten()) == 4
Example #8
Source File: __init__.py From blocks-examples with MIT License | 5 votes |
def main(save_to, num_batches): mlp = MLP([Tanh(), Identity()], [1, 10, 1], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), seed=1) mlp.initialize() x = tensor.vector('numbers') y = tensor.vector('roots') cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None])) cost.name = "cost" main_loop = MainLoop( GradientDescent( cost=cost, parameters=ComputationGraph(cost).parameters, step_rule=Scale(learning_rate=0.001)), get_data_stream(range(100)), model=Model(cost), extensions=[ Timing(), FinishAfter(after_n_batches=num_batches), DataStreamMonitoring( [cost], get_data_stream(range(100, 200)), prefix="test"), TrainingDataMonitoring([cost], after_epoch=True), Checkpoint(save_to), Printing()]) main_loop.run() return main_loop
Example #9
Source File: test_attention.py From attention-lvcsr with MIT License | 4 votes |
def test_sequence_content_attention(): # Disclaimer: only check dimensions, not values rng = numpy.random.RandomState([2014, 12, 2]) seq_len = 5 batch_size = 6 state_dim = 2 attended_dim = 3 match_dim = 4 attention = SequenceContentAttention( state_names=["states"], state_dims=[state_dim], attended_dim=attended_dim, match_dim=match_dim, weights_init=IsotropicGaussian(0.5), biases_init=Constant(0)) attention.initialize() sequences = tensor.tensor3('sequences') states = tensor.matrix('states') mask = tensor.matrix('mask') glimpses, weights = attention.take_glimpses( sequences, attended_mask=mask, states=states) assert glimpses.ndim == 2 assert weights.ndim == 2 seq_values = numpy.zeros((seq_len, batch_size, attended_dim), dtype=theano.config.floatX) states_values = numpy.zeros((batch_size, state_dim), dtype=theano.config.floatX) mask_values = numpy.zeros((seq_len, batch_size), dtype=theano.config.floatX) # randomly generate a sensible mask for sed_idx in range(batch_size): mask_values[:rng.randint(1, seq_len), sed_idx] = 1 glimpses_values, weight_values = theano.function( [sequences, states, mask], [glimpses, weights])( seq_values, states_values, mask_values) assert glimpses_values.shape == (batch_size, attended_dim) assert weight_values.shape == (batch_size, seq_len) assert numpy.all(weight_values >= 0) assert numpy.all(weight_values <= 1) assert numpy.all(weight_values.sum(axis=1) == 1) assert numpy.all((weight_values.T == 0) == (mask_values == 0))
Example #10
Source File: __init__.py From blocks-examples with MIT License | 4 votes |
def main(save_to, num_epochs): mlp = MLP([Tanh(), Softmax()], [784, 100, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() x = tensor.matrix('features') y = tensor.lmatrix('targets') probs = mlp.apply(x) cost = CategoricalCrossEntropy().apply(y.flatten(), probs) error_rate = MisclassificationRate().apply(y.flatten(), probs) cg = ComputationGraph([cost]) W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables) cost = cost + .00005 * (W1 ** 2).sum() + .00005 * (W2 ** 2).sum() cost.name = 'final_cost' mnist_train = MNIST(("train",)) mnist_test = MNIST(("test",)) algorithm = GradientDescent( cost=cost, parameters=cg.parameters, step_rule=Scale(learning_rate=0.1)) extensions = [Timing(), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring( [cost, error_rate], Flatten( DataStream.default_stream( mnist_test, iteration_scheme=SequentialScheme( mnist_test.num_examples, 500)), which_sources=('features',)), prefix="test"), TrainingDataMonitoring( [cost, error_rate, aggregation.mean(algorithm.total_gradient_norm)], prefix="train", after_epoch=True), Checkpoint(save_to), Printing()] if BLOCKS_EXTRAS_AVAILABLE: extensions.append(Plot( 'MNIST example', channels=[ ['test_final_cost', 'test_misclassificationrate_apply_error_rate'], ['train_total_gradient_norm']])) main_loop = MainLoop( algorithm, Flatten( DataStream.default_stream( mnist_train, iteration_scheme=SequentialScheme( mnist_train.num_examples, 50)), which_sources=('features',)), model=Model(cost), extensions=extensions) main_loop.run()
Example #11
Source File: test_machine_translation.py From blocks-examples with MIT License | 4 votes |
def test_search_model(): # Create Theano variables floatX = theano.config.floatX source_sentence = theano.tensor.lmatrix('source') source_sentence_mask = theano.tensor.matrix('source_mask', dtype=floatX) target_sentence = theano.tensor.lmatrix('target') target_sentence_mask = theano.tensor.matrix('target_mask', dtype=floatX) # Construct model encoder = BidirectionalEncoder( vocab_size=10, embedding_dim=5, state_dim=8) decoder = Decoder( vocab_size=12, embedding_dim=6, state_dim=8, representation_dim=16) cost = decoder.cost( encoder.apply(source_sentence, source_sentence_mask), source_sentence_mask, target_sentence, target_sentence_mask) # Compile a function for the cost f_cost = theano.function( inputs=[source_sentence, source_sentence_mask, target_sentence, target_sentence_mask], outputs=cost) # Create literal variables numpy.random.seed(1234) x = numpy.random.randint(0, 10, size=(22, 4)) y = numpy.random.randint(0, 12, size=(22, 6)) x_mask = numpy.ones_like(x).astype(floatX) y_mask = numpy.ones_like(y).astype(floatX) # Initialize model encoder.weights_init = decoder.weights_init = IsotropicGaussian( 0.01) encoder.biases_init = decoder.biases_init = Constant(0) encoder.push_initialization_config() decoder.push_initialization_config() encoder.bidir.prototype.weights_init = Orthogonal() decoder.transition.weights_init = Orthogonal() encoder.initialize() decoder.initialize() cost_ = f_cost(x, x_mask, y, y_mask) assert_allclose(cost_, 14.90944)
Example #12
Source File: train_celeba_classifier.py From discgen with MIT License | 4 votes |
def create_model_bricks(): convnet = ConvolutionalSequence( layers=[ Convolutional( filter_size=(4, 4), num_filters=32, name='conv1'), SpatialBatchNormalization(name='batch_norm1'), Rectifier(), Convolutional( filter_size=(3, 3), step=(2, 2), num_filters=32, name='conv2'), SpatialBatchNormalization(name='batch_norm2'), Rectifier(), Convolutional( filter_size=(4, 4), num_filters=64, name='conv3'), SpatialBatchNormalization(name='batch_norm3'), Rectifier(), Convolutional( filter_size=(3, 3), step=(2, 2), num_filters=64, name='conv4'), SpatialBatchNormalization(name='batch_norm4'), Rectifier(), Convolutional( filter_size=(3, 3), num_filters=128, name='conv5'), SpatialBatchNormalization(name='batch_norm5'), Rectifier(), Convolutional( filter_size=(3, 3), step=(2, 2), num_filters=128, name='conv6'), SpatialBatchNormalization(name='batch_norm6'), Rectifier(), ], num_channels=3, image_size=(64, 64), use_bias=False, weights_init=IsotropicGaussian(0.033), biases_init=Constant(0), name='convnet') convnet.initialize() mlp = BatchNormalizedMLP( activations=[Rectifier(), Logistic()], dims=[numpy.prod(convnet.get_dim('output')), 1000, 40], weights_init=IsotropicGaussian(0.033), biases_init=Constant(0), name='mlp') mlp.initialize() return convnet, mlp
Example #13
Source File: pacgan_task.py From PacGAN with MIT License | 4 votes |
def create_model_brick(self): decoder = MLP( dims=[self._config["num_zdim"], self._config["gen_hidden_size"], self._config["gen_hidden_size"], self._config["gen_hidden_size"], self._config["gen_hidden_size"], self._config["num_xdim"]], activations=[Sequence([BatchNormalization(self._config["gen_hidden_size"]).apply, self._config["gen_activation"]().apply], name='decoder_h1'), Sequence([BatchNormalization(self._config["gen_hidden_size"]).apply, self._config["gen_activation"]().apply], name='decoder_h2'), Sequence([BatchNormalization(self._config["gen_hidden_size"]).apply, self._config["gen_activation"]().apply], name='decoder_h3'), Sequence([BatchNormalization(self._config["gen_hidden_size"]).apply, self._config["gen_activation"]().apply], name='decoder_h4'), Identity(name='decoder_out')], use_bias=False, name='decoder') discriminator = Sequence( application_methods=[ LinearMaxout( input_dim=self._config["num_xdim"] * self._config["num_packing"], output_dim=self._config["disc_hidden_size"], num_pieces=self._config["disc_maxout_pieces"], weights_init=IsotropicGaussian(self._config["weights_init_std"]), biases_init=self._config["biases_init"], name='discriminator_h1').apply, LinearMaxout( input_dim=self._config["disc_hidden_size"], output_dim=self._config["disc_hidden_size"], num_pieces=self._config["disc_maxout_pieces"], weights_init=IsotropicGaussian(self._config["weights_init_std"]), biases_init=self._config["biases_init"], name='discriminator_h2').apply, LinearMaxout( input_dim=self._config["disc_hidden_size"], output_dim=self._config["disc_hidden_size"], num_pieces=self._config["disc_maxout_pieces"], weights_init=IsotropicGaussian(self._config["weights_init_std"]), biases_init=self._config["biases_init"], name='discriminator_h3').apply, Linear( input_dim=self._config["disc_hidden_size"], output_dim=1, weights_init=IsotropicGaussian(self._config["weights_init_std"]), biases_init=self._config["biases_init"], name='discriminator_out').apply], name='discriminator') gan = PacGAN(decoder=decoder, discriminator=discriminator, weights_init=IsotropicGaussian(self._config["weights_init_std"]), biases_init=self._config["biases_init"], name='gan') gan.push_allocation_config() decoder.linear_transformations[-1].use_bias = True gan.initialize() print("Number of parameters in discriminator: {}".format(numpy.sum([numpy.prod(v.shape.eval()) for v in Selector(gan.discriminator).get_parameters().values()]))) print("Number of parameters in decoder: {}".format(numpy.sum([numpy.prod(v.shape.eval()) for v in Selector(gan.decoder).get_parameters().values()]))) return gan
Example #14
Source File: pacgan_task.py From PacGAN with MIT License | 4 votes |
def create_model_brick(self): decoder = MLP( dims=[self._config["num_zdim"], self._config["gen_hidden_size"], self._config["gen_hidden_size"], self._config["gen_hidden_size"], self._config["gen_hidden_size"], self._config["num_xdim"]], activations=[Sequence([BatchNormalization(self._config["gen_hidden_size"]).apply, self._config["gen_activation"]().apply], name='decoder_h1'), Sequence([BatchNormalization(self._config["gen_hidden_size"]).apply, self._config["gen_activation"]().apply], name='decoder_h2'), Sequence([BatchNormalization(self._config["gen_hidden_size"]).apply, self._config["gen_activation"]().apply], name='decoder_h3'), Sequence([BatchNormalization(self._config["gen_hidden_size"]).apply, self._config["gen_activation"]().apply], name='decoder_h4'), Identity(name='decoder_out')], use_bias=False, name='decoder') discriminator = Sequence( application_methods=[ LinearMaxout( input_dim=self._config["num_xdim"] * self._config["num_packing"], output_dim=self._config["disc_hidden_size"], num_pieces=self._config["disc_maxout_pieces"], weights_init=IsotropicGaussian(self._config["weights_init_std"]), biases_init=self._config["biases_init"], name='discriminator_h1').apply, LinearMaxout( input_dim=self._config["disc_hidden_size"], output_dim=self._config["disc_hidden_size"], num_pieces=self._config["disc_maxout_pieces"], weights_init=IsotropicGaussian(self._config["weights_init_std"]), biases_init=self._config["biases_init"], name='discriminator_h2').apply, LinearMaxout( input_dim=self._config["disc_hidden_size"], output_dim=self._config["disc_hidden_size"], num_pieces=self._config["disc_maxout_pieces"], weights_init=IsotropicGaussian(self._config["weights_init_std"]), biases_init=self._config["biases_init"], name='discriminator_h3').apply, Linear( input_dim=self._config["disc_hidden_size"], output_dim=1, weights_init=IsotropicGaussian(self._config["weights_init_std"]), biases_init=self._config["biases_init"], name='discriminator_out').apply], name='discriminator') gan = PacGAN(decoder=decoder, discriminator=discriminator, weights_init=IsotropicGaussian(self._config["weights_init_std"]), biases_init=self._config["biases_init"], name='gan') gan.push_allocation_config() decoder.linear_transformations[-1].use_bias = True gan.initialize() print("Number of parameters in discriminator: {}".format(numpy.sum([numpy.prod(v.shape.eval()) for v in Selector(gan.discriminator).get_parameters().values()]))) print("Number of parameters in decoder: {}".format(numpy.sum([numpy.prod(v.shape.eval()) for v in Selector(gan.decoder).get_parameters().values()]))) return gan