Python blocks.initialization.Constant() Examples
The following are 30
code examples of blocks.initialization.Constant().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
blocks.initialization
, or try the search function
.
Example #1
Source File: test_bricks.py From attention-lvcsr with MIT License | 6 votes |
def test_mlp_apply(): x = tensor.matrix() x_val = numpy.random.rand(2, 16).astype(theano.config.floatX) mlp = MLP(activations=[Tanh().apply, None], dims=[16, 8, 4], weights_init=Constant(1), biases_init=Constant(1)) y = mlp.apply(x) mlp.initialize() assert_allclose( numpy.tanh(x_val.dot(numpy.ones((16, 8))) + numpy.ones((2, 8))).dot( numpy.ones((8, 4))) + numpy.ones((2, 4)), y.eval({x: x_val}), rtol=1e-06) mlp = MLP(activations=[None], weights_init=Constant(1), use_bias=False) mlp.dims = [16, 8] y = mlp.apply(x) mlp.initialize() assert_allclose(x_val.dot(numpy.ones((16, 8))), y.eval({x: x_val}), rtol=1e-06) assert mlp.rng == mlp.linear_transformations[0].rng
Example #2
Source File: test_bricks.py From attention-lvcsr with MIT License | 6 votes |
def test_mlp(): x = tensor.matrix() x_val = numpy.random.rand(2, 16).astype(theano.config.floatX) mlp = MLP(activations=[Tanh(), None], dims=[16, 8, 4], weights_init=Constant(1), biases_init=Constant(1)) y = mlp.apply(x) mlp.initialize() assert_allclose( numpy.tanh(x_val.dot(numpy.ones((16, 8))) + numpy.ones((2, 8))).dot( numpy.ones((8, 4))) + numpy.ones((2, 4)), y.eval({x: x_val}), rtol=1e-06) mlp = MLP(activations=[None], weights_init=Constant(1), use_bias=False) mlp.dims = [16, 8] y = mlp.apply(x) mlp.initialize() assert_allclose(x_val.dot(numpy.ones((16, 8))), y.eval({x: x_val}), rtol=1e-06) assert mlp.rng == mlp.linear_transformations[0].rng
Example #3
Source File: test_bricks.py From attention-lvcsr with MIT License | 6 votes |
def test_sequence_variable_outputs(): x = tensor.matrix() linear_1 = Linear(input_dim=16, output_dim=8, weights_init=Constant(2), biases_init=Constant(1)) fork = Fork(input_dim=8, output_names=['linear_2_1', 'linear_2_2'], output_dims=[4, 5], prototype=Linear(), weights_init=Constant(3), biases_init=Constant(4)) sequence = Sequence([linear_1.apply, fork.apply]) sequence.initialize() y_1, y_2 = sequence.apply(x) x_val = numpy.ones((4, 16), dtype=theano.config.floatX) assert_allclose( y_1.eval({x: x_val}), (x_val.dot(2 * numpy.ones((16, 8))) + numpy.ones((4, 8))).dot( 3 * numpy.ones((8, 4))) + 4 * numpy.ones((4, 4))) assert_allclose( y_2.eval({x: x_val}), (x_val.dot(2 * numpy.ones((16, 8))) + numpy.ones((4, 8))).dot( 3 * numpy.ones((8, 5))) + 4 * numpy.ones((4, 5)))
Example #4
Source File: test_bn.py From attention-lvcsr with MIT License | 6 votes |
def test_apply_batch_normalization_nested(): x = tensor.matrix() eps = 1e-8 batch_dims = (3, 9) bn = BatchNormalization(input_dim=5, epsilon=eps) mlp = MLP([Sequence([bn.apply, Tanh().apply])], [9, 5], weights_init=Constant(0.4), biases_init=Constant(1)) mlp.initialize() y = mlp.apply(x) cg = apply_batch_normalization(ComputationGraph([y])) y_bn = cg.outputs[0] rng = numpy.random.RandomState((2016, 1, 18)) x_ = rng.uniform(size=batch_dims).astype(theano.config.floatX) y_ = y_bn.eval({x: x_}) W_, b_ = map(lambda s: (getattr(mlp.linear_transformations[0], s) .get_value(borrow=True)), ['W', 'b']) z_ = numpy.dot(x_, W_) + b_ y_expected = numpy.tanh((z_ - z_.mean(axis=0)) / numpy.sqrt(z_.var(axis=0) + eps)) assert_allclose(y_, y_expected, rtol=1e-3)
Example #5
Source File: test_bricks.py From attention-lvcsr with MIT License | 6 votes |
def test_linear(): x = tensor.matrix() linear = Linear(input_dim=16, output_dim=8, weights_init=Constant(2), biases_init=Constant(1)) y = linear.apply(x) linear.initialize() x_val = numpy.ones((4, 16), dtype=theano.config.floatX) assert_allclose( y.eval({x: x_val}), x_val.dot(2 * numpy.ones((16, 8))) + numpy.ones((4, 8))) linear = Linear(input_dim=16, output_dim=8, weights_init=Constant(2), use_bias=False) y = linear.apply(x) linear.initialize() x_val = numpy.ones((4, 16), dtype=theano.config.floatX) assert_allclose(y.eval({x: x_val}), x_val.dot(2 * numpy.ones((16, 8))))
Example #6
Source File: test_initialization.py From attention-lvcsr with MIT License | 6 votes |
def test_sparse(): rng = numpy.random.RandomState(1) def check_sparse(rng, num_init, weights_init, sparse_init, shape, total): weights = Sparse(num_init=num_init, weights_init=weights_init, sparse_init=sparse_init).generate(rng, shape) assert weights.shape == shape assert weights.dtype == theano.config.floatX if sparse_init is None: if isinstance(num_init, numbers.Integral): assert (numpy.count_nonzero(weights) <= weights.size - num_init * weights.shape[0]) else: assert (numpy.count_nonzero(weights) <= weights.size - num_init * weights.shape[1]) if total is not None: assert numpy.sum(weights) == total yield check_sparse, rng, 5, Constant(1.), None, (10, 10), None yield check_sparse, rng, 0.5, Constant(1.), None, (10, 10), None yield check_sparse, rng, 0.5, Constant(1.), Constant(1.), (10, 10), None yield check_sparse, rng, 3, Constant(1.), None, (10, 10), 30 yield check_sparse, rng, 3, Constant(0.), Constant(1.), (10, 10), 70 yield check_sparse, rng, 0.3, Constant(1.), None, (10, 10), 30 yield check_sparse, rng, 0.3, Constant(0.), Constant(1.), (10, 10), 70
Example #7
Source File: test_initialization.py From attention-lvcsr with MIT License | 6 votes |
def test_constant(): def check_constant(const, shape, ground_truth): # rng unused, so pass None. init = Constant(const).generate(None, ground_truth.shape) assert ground_truth.dtype == theano.config.floatX assert ground_truth.shape == init.shape assert_equal(ground_truth, init) # Test scalar init. yield (check_constant, 5, (5, 5), 5 * numpy.ones((5, 5), dtype=theano.config.floatX)) # Test broadcasting. yield (check_constant, [1, 2, 3], (7, 3), numpy.array([[1, 2, 3]] * 7, dtype=theano.config.floatX)) yield (check_constant, numpy.array([[1], [2], [3]]), (3, 2), numpy.array([[1, 1], [2, 2], [3, 3]], dtype=theano.config.floatX))
Example #8
Source File: test_attention.py From attention-lvcsr with MIT License | 6 votes |
def test_compute_weights_with_zero_mask(): state_dim = 2 attended_dim = 3 match_dim = 4 attended_length = 5 batch_size = 6 attention = SequenceContentAttention( state_names=["states"], state_dims=[state_dim], attended_dim=attended_dim, match_dim=match_dim, weights_init=IsotropicGaussian(0.5), biases_init=Constant(0)) attention.initialize() energies = tensor.as_tensor_variable( numpy.random.rand(attended_length, batch_size)) mask = tensor.as_tensor_variable( numpy.zeros((attended_length, batch_size))) weights = attention.compute_weights(energies, mask).eval() assert numpy.all(numpy.isfinite(weights))
Example #9
Source File: test_bricks.py From attention-lvcsr with MIT License | 6 votes |
def test_sequence_variable_inputs(): x, y = tensor.matrix(), tensor.matrix() parallel_1 = Parallel(input_names=['input_1', 'input_2'], input_dims=[4, 5], output_dims=[3, 2], prototype=Linear(), weights_init=Constant(2), biases_init=Constant(1)) parallel_2 = Parallel(input_names=['input_1', 'input_2'], input_dims=[3, 2], output_dims=[5, 4], prototype=Linear(), weights_init=Constant(2), biases_init=Constant(1)) sequence = Sequence([parallel_1.apply, parallel_2.apply]) sequence.initialize() new_x, new_y = sequence.apply(x, y) x_val = numpy.ones((4, 4), dtype=theano.config.floatX) y_val = numpy.ones((4, 5), dtype=theano.config.floatX) assert_allclose( new_x.eval({x: x_val}), (x_val.dot(2 * numpy.ones((4, 3))) + numpy.ones((4, 3))).dot( 2 * numpy.ones((3, 5))) + numpy.ones((4, 5))) assert_allclose( new_y.eval({y: y_val}), (y_val.dot(2 * numpy.ones((5, 2))) + numpy.ones((4, 2))).dot( 2 * numpy.ones((2, 4))) + numpy.ones((4, 4)))
Example #10
Source File: test_conv.py From attention-lvcsr with MIT License | 6 votes |
def test_convolutional(): x = tensor.tensor4('x') num_channels = 4 num_filters = 3 batch_size = 5 filter_size = (3, 3) conv = Convolutional(filter_size, num_filters, num_channels, image_size=(17, 13), weights_init=Constant(1.), biases_init=Constant(5.)) conv.initialize() y = conv.apply(x) func = function([x], y) x_val = numpy.ones((batch_size, num_channels, 17, 13), dtype=theano.config.floatX) assert_allclose(func(x_val), numpy.prod(filter_size) * num_channels * numpy.ones((batch_size, num_filters, 15, 11)) + 5) conv.image_size = (17, 13) conv.batch_size = 2 # This should have effect on get_dim assert conv.get_dim('output') == (num_filters, 15, 11)
Example #11
Source File: test_conv.py From attention-lvcsr with MIT License | 6 votes |
def test_convolutional_transpose(): x = tensor.tensor4('x') num_channels = 4 num_filters = 3 image_size = (8, 6) original_image_size = (17, 13) batch_size = 5 filter_size = (3, 3) step = (2, 2) conv = ConvolutionalTranspose( original_image_size, filter_size, num_filters, num_channels, step=step, image_size=image_size, weights_init=Constant(1.), biases_init=Constant(5.)) conv.initialize() y = conv.apply(x) func = function([x], y) x_val = numpy.ones((batch_size, num_channels) + image_size, dtype=theano.config.floatX) expected_value = num_channels * numpy.ones( (batch_size, num_filters) + original_image_size) expected_value[:, :, 2:-2:2, :] += num_channels expected_value[:, :, :, 2:-2:2] += num_channels expected_value[:, :, 2:-2:2, 2:-2:2] += num_channels assert_allclose(func(x_val), expected_value + 5)
Example #12
Source File: test_conv.py From attention-lvcsr with MIT License | 6 votes |
def test_no_input_size(): # suppose x is outputted by some RNN x = tensor.tensor4('x') filter_size = (1, 3) num_filters = 2 num_channels = 5 c = Convolutional(filter_size, num_filters, num_channels, tied_biases=True, weights_init=Constant(1.), biases_init=Constant(1.)) c.initialize() out = c.apply(x) assert c.get_dim('output') == (2, None, None) assert out.ndim == 4 c = Convolutional(filter_size, num_filters, num_channels, tied_biases=False, weights_init=Constant(1.), biases_init=Constant(1.)) assert_raises_regexp(ValueError, 'Cannot infer bias size \S+', c.initialize)
Example #13
Source File: test_bricks.py From attention-lvcsr with MIT License | 5 votes |
def test_linear_nan_allocation(): x = tensor.matrix() linear = Linear(input_dim=16, output_dim=8, weights_init=Constant(2), biases_init=Constant(1)) linear.apply(x) w1 = numpy.nan * numpy.zeros((16, 8)) w2 = linear.parameters[0].get_value() b1 = numpy.nan * numpy.zeros(8) b2 = linear.parameters[1].get_value() numpy.testing.assert_equal(w1, w2) numpy.testing.assert_equal(b1, b2)
Example #14
Source File: regression.py From Diffusion-Probabilistic-Models with MIT License | 5 votes |
def __init__(self, num_channels, num_filters, spatial_width, num_scales, filter_size, downsample_method='meanout', name=""): """ A brick implementing a single layer in a multi-scale convolutional network. """ super(MultiScaleConvolution, self).__init__() self.num_scales = num_scales self.filter_size = filter_size self.num_filters = num_filters self.spatial_width = spatial_width self.downsample_method = downsample_method self.children = [] print "adding MultiScaleConvolution layer" # for scale in range(self.num_scales-1, -1, -1): for scale in range(self.num_scales): print "scale %d"%scale conv_layer = ConvolutionalActivation(activation=conv_nonlinearity.apply, filter_size=(filter_size,filter_size), num_filters=num_filters, num_channels=num_channels, image_size=(spatial_width/2**scale, spatial_width/2**scale), # assume images are spatially smooth -- in which case output magnitude scales with # # filter pixels rather than square root of # filter pixels, so initialize # accordingly. weights_init=IsotropicGaussian(std=np.sqrt(1./(num_filters))/filter_size**2), biases_init=Constant(0), border_mode='full', name=name+"scale%d"%scale) self.children.append(conv_layer)
Example #15
Source File: test_graph.py From attention-lvcsr with MIT License | 5 votes |
def test_snapshot(): x = tensor.matrix('x') linear = MLP([Identity(), Identity()], [10, 10, 10], weights_init=Constant(1), biases_init=Constant(2)) linear.initialize() y = linear.apply(x) cg = ComputationGraph(y) snapshot = cg.get_snapshot(dict(x=numpy.zeros((1, 10), dtype=theano.config.floatX))) assert len(snapshot) == 14
Example #16
Source File: bricks.py From image-captioning-for-mortals with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_encoder(): image_vects = tensor.matrix('image_vects') word_vects = tensor.tensor3('word_vects') batch_size = 2 image_feature_dim = 64 seq_len = 4 embedding_dim = 300 s = Encoder( image_feature_dim=image_feature_dim , embedding_dim=embedding_dim , biases_init=Constant(0.) , weights_init=IsotropicGaussian(0.02) ) s.initialize() iem, sem = s.apply(image_vects, word_vects) image_vects_tv = np.zeros((batch_size, image_feature_dim), dtype='float32') word_vects_tv = np.zeros((batch_size, seq_len, embedding_dim), dtype='float32') # expecting sentence embedding to be [batch_size, embedding_dim] f = theano.function([image_vects, word_vects], [iem, sem]) i_emb, s_emb = f(image_vects_tv, word_vects_tv) print(""" batch_size: %d image_feature_dim: %d sequence length: %d embedding dim: %d \n""" % ( batch_size , image_feature_dim , seq_len , embedding_dim) ) print "input image vectors: ", (batch_size, image_feature_dim) print "input word vectors: ", (batch_size, seq_len, embedding_dim) print "image embedding: ", i_emb.shape print "sentence embedding: ", s_emb.shape
Example #17
Source File: test_machine_translation.py From blocks-examples with MIT License | 5 votes |
def test_sampling(): # Create Theano variables sampling_input = theano.tensor.lmatrix('input') # Construct model encoder = BidirectionalEncoder( vocab_size=10, embedding_dim=5, state_dim=8) decoder = Decoder( vocab_size=12, embedding_dim=6, state_dim=8, representation_dim=16, theano_seed=1234) sampling_representation = encoder.apply( sampling_input, theano.tensor.ones(sampling_input.shape)) generateds = decoder.generate(sampling_input, sampling_representation) model = Model(generateds[1]) # Initialize model encoder.weights_init = decoder.weights_init = IsotropicGaussian( 0.01) encoder.biases_init = decoder.biases_init = Constant(0) encoder.push_initialization_config() decoder.push_initialization_config() encoder.bidir.prototype.weights_init = Orthogonal() decoder.transition.weights_init = Orthogonal() encoder.initialize() decoder.initialize() # Compile a function for the generated sampling_fn = model.get_theano_function() # Create literal variables numpy.random.seed(1234) x = numpy.random.randint(0, 10, size=(1, 2)) # Call function and check result generated_step = sampling_fn(x) assert len(generated_step[0].flatten()) == 4
Example #18
Source File: __init__.py From blocks-examples with MIT License | 5 votes |
def main(save_to, num_batches): mlp = MLP([Tanh(), Identity()], [1, 10, 1], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), seed=1) mlp.initialize() x = tensor.vector('numbers') y = tensor.vector('roots') cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None])) cost.name = "cost" main_loop = MainLoop( GradientDescent( cost=cost, parameters=ComputationGraph(cost).parameters, step_rule=Scale(learning_rate=0.001)), get_data_stream(range(100)), model=Model(cost), extensions=[ Timing(), FinishAfter(after_n_batches=num_batches), DataStreamMonitoring( [cost], get_data_stream(range(100, 200)), prefix="test"), TrainingDataMonitoring([cost], after_epoch=True), Checkpoint(save_to), Printing()]) main_loop.run() return main_loop
Example #19
Source File: test_bricks.py From attention-lvcsr with MIT License | 5 votes |
def test_linear_maxout(): x = tensor.matrix() linear_maxout = LinearMaxout(input_dim=16, output_dim=8, num_pieces=3, weights_init=Constant(2), biases_init=Constant(1)) y = linear_maxout.apply(x) linear_maxout.initialize() x_val = numpy.ones((4, 16), dtype=theano.config.floatX) assert_allclose( y.eval({x: x_val}), (x_val.dot(2 * numpy.ones((16, 24))) + numpy.ones((4, 24))).reshape(4, 8, 3).max(2))
Example #20
Source File: test_conv.py From attention-lvcsr with MIT License | 5 votes |
def test_convolutional_sequence(): x = tensor.tensor4('x') num_channels = 4 pooling_size = 3 batch_size = 5 act = Rectifier() conv = Convolutional((3, 3), 5, weights_init=Constant(1.), biases_init=Constant(5.)) pooling = MaxPooling(pooling_size=(pooling_size, pooling_size)) conv2 = Convolutional((2, 2), 4, weights_init=Constant(1.)) seq = ConvolutionalSequence([conv, act, pooling, conv2, act], num_channels, image_size=(17, 13)) seq.push_allocation_config() assert conv.num_channels == 4 assert conv2.num_channels == 5 conv2.use_bias = False y = seq.apply(x) seq.initialize() func = function([x], y) x_val = numpy.ones((batch_size, 4, 17, 13), dtype=theano.config.floatX) y_val = (numpy.ones((batch_size, 4, 4, 2)) * (9 * 4 + 5) * 4 * 5) assert_allclose(func(x_val), y_val)
Example #21
Source File: test_interfaces.py From attention-lvcsr with MIT License | 5 votes |
def test_linearlike_subclass_initialize_works_overridden_w(): class NotQuiteLinear(Linear): @property def W(self): W = super(NotQuiteLinear, self).W return W / tensor.sqrt((W ** 2).sum(axis=0)) brick = NotQuiteLinear(5, 10, weights_init=IsotropicGaussian(0.02), biases_init=Constant(1)) brick.initialize() assert not numpy.isnan(brick.parameters[0].get_value()).any() numpy.testing.assert_allclose((brick.W ** 2).sum(axis=0).eval(), 1, rtol=1e-6)
Example #22
Source File: test_wrappers.py From attention-lvcsr with MIT License | 5 votes |
def test_with_extra_dims_is_serializable(): brick = LinearWithExtraDims( input_dim=3, output_dim=4, weights_init=Constant(1), biases_init=Constant(0)) brick.initialize() cPickle.loads(cPickle.dumps(brick))
Example #23
Source File: test_wrappers.py From attention-lvcsr with MIT License | 5 votes |
def test_with_extra_dims_ndim_gt_2(): X = tensor.tensor4('X') brick = LinearWithExtraDims( input_dim=3, output_dim=4, weights_init=Constant(1), biases_init=Constant(0)) brick.initialize() f = theano.function([X], brick.apply(X, extra_ndim=2)) assert_allclose( f(numpy.ones(shape=(2, 2, 2, 3), dtype=theano.config.floatX)), 3 * numpy.ones(shape=(2, 2, 2, 4), dtype=theano.config.floatX))
Example #24
Source File: test_bn.py From attention-lvcsr with MIT License | 5 votes |
def test_batch_normalization_inside_convolutional_sequence(): """Test that BN bricks work in ConvolutionalSequences.""" conv_seq = ConvolutionalSequence( [Convolutional(filter_size=(3, 3), num_filters=4), BatchNormalization(broadcastable=(False, True, True)), AveragePooling(pooling_size=(2, 2)), BatchNormalization(broadcastable=(False, False, False)), MaxPooling(pooling_size=(2, 2), step=(1, 1))], weights_init=Constant(1.), biases_init=Constant(2.), image_size=(10, 8), num_channels=9) conv_seq_no_bn = ConvolutionalSequence( [Convolutional(filter_size=(3, 3), num_filters=4), AveragePooling(pooling_size=(2, 2)), MaxPooling(pooling_size=(2, 2), step=(1, 1))], weights_init=Constant(1.), biases_init=Constant(2.), image_size=(10, 8), num_channels=9) conv_seq.initialize() conv_seq_no_bn.initialize() rng = numpy.random.RandomState((2015, 12, 17)) input_ = random_unif(rng, (2, 9, 10, 8)) x = theano.tensor.tensor4() ybn = conv_seq.apply(x) y = conv_seq_no_bn.apply(x) yield (assert_equal, ybn.eval({x: input_}), y.eval({x: input_})) std = conv_seq.children[-2].population_stdev std.set_value(3 * std.get_value(borrow=True)) yield (assert_equal, ybn.eval({x: input_}), y.eval({x: input_}) / 3.)
Example #25
Source File: regression.py From Diffusion-Probabilistic-Models with MIT License | 5 votes |
def __init__(self, n_layers_conv, n_layers_dense_lower, n_layers_dense_upper, n_hidden_conv, n_hidden_dense_lower, n_hidden_dense_lower_output, n_hidden_dense_upper, spatial_width, n_colors, n_scales, n_temporal_basis): """ The multilayer perceptron, that provides temporal weighting coefficients for mu and sigma images. This consists of a lower segment with a convolutional MLP, and optionally with a dense MLP in parallel. The upper segment then consists of a per-pixel dense MLP (convolutional MLP with 1x1 kernel). """ super(MLP_conv_dense, self).__init__() self.n_colors = n_colors self.spatial_width = spatial_width self.n_hidden_dense_lower = n_hidden_dense_lower self.n_hidden_dense_lower_output = n_hidden_dense_lower_output self.n_hidden_conv = n_hidden_conv ## the lower layers self.mlp_conv = MultiLayerConvolution(n_layers_conv, n_hidden_conv, spatial_width, n_colors, n_scales) self.children = [self.mlp_conv] if n_hidden_dense_lower > 0 and n_layers_dense_lower > 0: n_input = n_colors*spatial_width**2 n_output = n_hidden_dense_lower_output*spatial_width**2 self.mlp_dense_lower = MLP([dense_nonlinearity] * n_layers_conv, [n_input] + [n_hidden_dense_lower] * (n_layers_conv-1) + [n_output], name='MLP dense lower', weights_init=Orthogonal(), biases_init=Constant(0)) self.children.append(self.mlp_dense_lower) else: n_hidden_dense_lower_output = 0 ## the upper layers (applied to each pixel independently) n_output = n_colors*n_temporal_basis*2 # "*2" for both mu and sigma self.mlp_dense_upper = MLP([dense_nonlinearity] * (n_layers_dense_upper-1) + [Identity()], [n_hidden_conv+n_hidden_dense_lower_output] + [n_hidden_dense_upper] * (n_layers_dense_upper-1) + [n_output], name='MLP dense upper', weights_init=Orthogonal(), biases_init=Constant(0)) self.children.append(self.mlp_dense_upper)
Example #26
Source File: model.py From blocks-char-rnn with MIT License | 5 votes |
def initialize(to_init): for bricks in to_init: bricks.weights_init = initialization.Uniform(width=0.08) bricks.biases_init = initialization.Constant(0) bricks.initialize()
Example #27
Source File: test_attention.py From attention-lvcsr with MIT License | 4 votes |
def test_sequence_content_attention(): # Disclaimer: only check dimensions, not values rng = numpy.random.RandomState([2014, 12, 2]) seq_len = 5 batch_size = 6 state_dim = 2 attended_dim = 3 match_dim = 4 attention = SequenceContentAttention( state_names=["states"], state_dims=[state_dim], attended_dim=attended_dim, match_dim=match_dim, weights_init=IsotropicGaussian(0.5), biases_init=Constant(0)) attention.initialize() sequences = tensor.tensor3('sequences') states = tensor.matrix('states') mask = tensor.matrix('mask') glimpses, weights = attention.take_glimpses( sequences, attended_mask=mask, states=states) assert glimpses.ndim == 2 assert weights.ndim == 2 seq_values = numpy.zeros((seq_len, batch_size, attended_dim), dtype=theano.config.floatX) states_values = numpy.zeros((batch_size, state_dim), dtype=theano.config.floatX) mask_values = numpy.zeros((seq_len, batch_size), dtype=theano.config.floatX) # randomly generate a sensible mask for sed_idx in range(batch_size): mask_values[:rng.randint(1, seq_len), sed_idx] = 1 glimpses_values, weight_values = theano.function( [sequences, states, mask], [glimpses, weights])( seq_values, states_values, mask_values) assert glimpses_values.shape == (batch_size, attended_dim) assert weight_values.shape == (batch_size, seq_len) assert numpy.all(weight_values >= 0) assert numpy.all(weight_values <= 1) assert numpy.all(weight_values.sum(axis=1) == 1) assert numpy.all((weight_values.T == 0) == (mask_values == 0))
Example #28
Source File: test_saveload.py From attention-lvcsr with MIT License | 4 votes |
def test_checkpointing(): # Create a main loop and checkpoint it mlp = MLP(activations=[None], dims=[10, 10], weights_init=Constant(1.), use_bias=False) mlp.initialize() W = mlp.linear_transformations[0].W x = tensor.vector('data') cost = mlp.apply(x).mean() data = numpy.random.rand(10, 10).astype(theano.config.floatX) data_stream = IterableDataset(data).get_example_stream() main_loop = MainLoop( data_stream=data_stream, algorithm=GradientDescent(cost=cost, parameters=[W]), extensions=[FinishAfter(after_n_batches=5), Checkpoint('myweirdmodel.tar', parameters=[W])] ) main_loop.run() # Load it again old_value = W.get_value() W.set_value(old_value * 2) main_loop = MainLoop( model=Model(cost), data_stream=data_stream, algorithm=GradientDescent(cost=cost, parameters=[W]), extensions=[Load('myweirdmodel.tar')] ) main_loop.extensions[0].main_loop = main_loop main_loop._run_extensions('before_training') assert_allclose(W.get_value(), old_value) # Make sure things work too if the model was never saved before main_loop = MainLoop( model=Model(cost), data_stream=data_stream, algorithm=GradientDescent(cost=cost, parameters=[W]), extensions=[Load('mynonexisting.tar')] ) main_loop.extensions[0].main_loop = main_loop main_loop._run_extensions('before_training') # Cleaning if os.path.exists('myweirdmodel.tar'): os.remove('myweirdmodel.tar')
Example #29
Source File: test_serialization.py From attention-lvcsr with MIT License | 4 votes |
def test_add_to_dump(): # Create a simple MLP to dump. mlp = MLP(activations=[None, None], dims=[10, 10, 10], weights_init=Constant(1.), use_bias=False) mlp.initialize() W = mlp.linear_transformations[1].W W.set_value(W.get_value() * 2) mlp2 = MLP(activations=[None, None], dims=[10, 10, 10], weights_init=Constant(1.), use_bias=False, name='mlp2') mlp2.initialize() # Ensure that adding to dump is working. with NamedTemporaryFile(delete=False) as f: dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W]) with open(f.name, 'rb+') as ff: add_to_dump(mlp.children[0], ff, 'child_0', parameters=[mlp.children[0].W]) add_to_dump(mlp.children[1], ff, 'child_1') with tarfile.open(f.name, 'r') as tarball: assert set(tarball.getnames()) == set(['_pkl', '_parameters', 'child_0', 'child_1']) # Ensure that we can load any object from the tarball. with open(f.name, 'rb') as ff: saved_children_0 = load(ff, 'child_0') saved_children_1 = load(ff, 'child_1') assert_allclose(saved_children_0.W.get_value(), numpy.ones((10, 10))) assert_allclose(saved_children_1.W.get_value(), numpy.ones((10, 10)) * 2) # Check the error if using a reserved name. with open(f.name, 'rb+') as ff: assert_raises(ValueError, add_to_dump, *[mlp.children[0], ff, '_pkl']) # Check the error if saving an object with other parameters with open(f.name, 'rb+') as ff: assert_raises(ValueError, add_to_dump, *[mlp2, ff, 'mlp2'], **dict(parameters=[mlp2.children[0].W, mlp2.children[1].W])) # Check the warning if adding to a dump with no parameters with NamedTemporaryFile(delete=False) as f: dump(mlp, f) with open(f.name, 'rb+') as ff: assert_raises(ValueError, add_to_dump, *[mlp2, ff, 'mlp2'], **dict(parameters=[mlp2.children[0].W, mlp2.children[1].W]))
Example #30
Source File: test_serialization.py From attention-lvcsr with MIT License | 4 votes |
def test_serialization(): # Create a simple MLP to dump. mlp = MLP(activations=[None, None], dims=[10, 10, 10], weights_init=Constant(1.), use_bias=False) mlp.initialize() W = mlp.linear_transformations[1].W W.set_value(W.get_value() * 2) # Ensure warnings are raised when __main__ namespace objects are dumped. foo.__module__ = '__main__' import __main__ __main__.__dict__['foo'] = foo mlp.foo = foo with NamedTemporaryFile(delete=False) as f: with warnings.catch_warnings(record=True) as w: dump(mlp.foo, f) assert len(w) == 1 assert '__main__' in str(w[-1].message) # Check the parameters. with NamedTemporaryFile(delete=False) as f: dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W]) with open(f.name, 'rb') as ff: numpy_data = load_parameters(ff) assert set(numpy_data.keys()) == \ set(['/mlp/linear_0.W', '/mlp/linear_1.W']) assert_allclose(numpy_data['/mlp/linear_0.W'], numpy.ones((10, 10))) assert numpy_data['/mlp/linear_0.W'].dtype == theano.config.floatX # Ensure that it can be unpickled. with open(f.name, 'rb') as ff: mlp = load(ff) assert_allclose(mlp.linear_transformations[1].W.get_value(), numpy.ones((10, 10)) * 2) # Ensure that duplicate names are dealt with. for child in mlp.children: child.name = 'linear' with NamedTemporaryFile(delete=False) as f: dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W]) with open(f.name, 'rb') as ff: numpy_data = load_parameters(ff) assert set(numpy_data.keys()) == \ set(['/mlp/linear.W', '/mlp/linear.W_2']) # Check when we don't dump the main object. with NamedTemporaryFile(delete=False) as f: dump(None, f, parameters=[mlp.children[0].W, mlp.children[1].W]) with tarfile.open(f.name, 'r') as tarball: assert set(tarball.getnames()) == set(['_parameters'])