Python Examples of blocks.bricks.Tanh

Source File: recurrent.py From attention-lvcsr with MIT License

6 votes

def __init__(self, dim, activation=None, gate_activation=None,
                 **kwargs):
        super(GatedRecurrent, self).__init__(**kwargs)
        self.dim = dim

        self.recurrent_weights_init = None
        self.initial_states_init = None

        if not activation:
            activation = Tanh()
        if not gate_activation:
            gate_activation = Logistic()
        self.activation = activation
        self.gate_activation = gate_activation

        self.children = [activation, gate_activation]

Source File: test_search.py From attention-lvcsr with MIT License

6 votes

def __init__(self, dimension, alphabet_size, **kwargs):
        super(SimpleGenerator, self).__init__(**kwargs)
        lookup = LookupTable(alphabet_size, dimension)
        transition = SimpleRecurrent(
            activation=Tanh(),
            dim=dimension, name="transition")
        attention = SequenceContentAttention(
            state_names=transition.apply.states,
            attended_dim=dimension, match_dim=dimension, name="attention")
        readout = Readout(
            readout_dim=alphabet_size,
            source_names=[transition.apply.states[0],
                          attention.take_glimpses.outputs[0]],
            emitter=SoftmaxEmitter(name="emitter"),
            feedback_brick=LookupFeedback(alphabet_size, dimension),
            name="readout")
        generator = SequenceGenerator(
            readout=readout, transition=transition, attention=attention,
            name="generator")

        self.lookup = lookup
        self.generator = generator
        self.children = [lookup, generator]

Source File: test_recurrent.py From attention-lvcsr with MIT License

6 votes

def test_super_in_recurrent_overrider():
    # A regression test for the issue #475
    class SimpleRecurrentWithContext(SimpleRecurrent):
        @application(contexts=['context'])
        def apply(self, context, *args, **kwargs):
            kwargs['inputs'] += context
            return super(SimpleRecurrentWithContext, self).apply(*args,
                                                                 **kwargs)

        @apply.delegate
        def apply_delegate(self):
            return super(SimpleRecurrentWithContext, self).apply

    brick = SimpleRecurrentWithContext(100, Tanh())
    inputs = tensor.tensor3('inputs')
    context = tensor.matrix('context').dimshuffle('x', 0, 1)
    brick.apply(context, inputs=inputs)

Source File: test_recurrent.py From attention-lvcsr with MIT License

6 votes

def test_saved_inner_graph():
    """Make sure that the original inner graph is saved."""
    x = tensor.tensor3()
    recurrent = SimpleRecurrent(dim=3, activation=Tanh())
    y = recurrent.apply(x)

    application_call = get_application_call(y)
    assert application_call.inner_inputs
    assert application_call.inner_outputs

    cg = ComputationGraph(application_call.inner_outputs)
    # Check that the inner scan graph is annotated
    # with `recurrent.apply`
    assert len(VariableFilter(applications=[recurrent.apply])(cg)) == 3
    # Check that the inner graph is equivalent to the one
    # produced by a stand-alone of `recurrent.apply`
    assert is_same_graph(application_call.inner_outputs[0],
                         recurrent.apply(*application_call.inner_inputs,
                                         iterate=False))

Source File: test_recurrent.py From attention-lvcsr with MIT License

6 votes

def setUp(self):
        self.bidir = Bidirectional(weights_init=Orthogonal(),
                                   prototype=SimpleRecurrent(
                                       dim=3, activation=Tanh()))
        self.simple = SimpleRecurrent(dim=3, weights_init=Orthogonal(),
                                      activation=Tanh(), seed=1)
        self.bidir.allocate()
        self.simple.initialize()
        self.bidir.children[0].parameters[0].set_value(
            self.simple.parameters[0].get_value())
        self.bidir.children[1].parameters[0].set_value(
            self.simple.parameters[0].get_value())
        self.x_val = 0.1 * numpy.asarray(
            list(itertools.permutations(range(4))),
            dtype=theano.config.floatX)
        self.x_val = (numpy.ones((24, 4, 3), dtype=theano.config.floatX) *
                      self.x_val[..., None])
        self.mask_val = numpy.ones((24, 4), dtype=theano.config.floatX)
        self.mask_val[12:24, 3] = 0

Source File: test_recurrent.py From attention-lvcsr with MIT License

5 votes

def setUp(self):
        self.gated = GatedRecurrent(
            dim=3, activation=Tanh(),
            gate_activation=Tanh(), weights_init=Constant(2))
        self.gated.initialize()
        self.reset_only = GatedRecurrent(
            dim=3, activation=Tanh(),
            gate_activation=Tanh(),
            weights_init=IsotropicGaussian(), seed=1)
        self.reset_only.initialize()

Source File: __init__.py From blocks-examples with MIT License

5 votes

def main(save_to, num_batches):
    mlp = MLP([Tanh(), Identity()], [1, 10, 1],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0), seed=1)
    mlp.initialize()
    x = tensor.vector('numbers')
    y = tensor.vector('roots')
    cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None]))
    cost.name = "cost"

    main_loop = MainLoop(
        GradientDescent(
            cost=cost, parameters=ComputationGraph(cost).parameters,
            step_rule=Scale(learning_rate=0.001)),
        get_data_stream(range(100)),
        model=Model(cost),
        extensions=[
            Timing(),
            FinishAfter(after_n_batches=num_batches),
            DataStreamMonitoring(
                [cost], get_data_stream(range(100, 200)),
                prefix="test"),
            TrainingDataMonitoring([cost], after_epoch=True),
            Checkpoint(save_to),
            Printing()])
    main_loop.run()
    return main_loop

Source File: __init__.py From blocks-examples with MIT License

5 votes

def __init__(self, dimension, alphabet_size, **kwargs):
        super(WordReverser, self).__init__(**kwargs)
        encoder = Bidirectional(
            SimpleRecurrent(dim=dimension, activation=Tanh()))
        fork = Fork([name for name in encoder.prototype.apply.sequences
                    if name != 'mask'])
        fork.input_dim = dimension
        fork.output_dims = [encoder.prototype.get_dim(name) for name in fork.input_names]
        lookup = LookupTable(alphabet_size, dimension)
        transition = SimpleRecurrent(
            activation=Tanh(),
            dim=dimension, name="transition")
        attention = SequenceContentAttention(
            state_names=transition.apply.states,
            attended_dim=2 * dimension, match_dim=dimension, name="attention")
        readout = Readout(
            readout_dim=alphabet_size,
            source_names=[transition.apply.states[0],
                          attention.take_glimpses.outputs[0]],
            emitter=SoftmaxEmitter(name="emitter"),
            feedback_brick=LookupFeedback(alphabet_size, dimension),
            name="readout")
        generator = SequenceGenerator(
            readout=readout, transition=transition, attention=attention,
            name="generator")

        self.lookup = lookup
        self.fork = fork
        self.encoder = encoder
        self.generator = generator
        self.children = [lookup, fork, encoder, generator]

Source File: attentive_reader.py From DeepMind-Teaching-Machines-to-Read-and-Comprehend with MIT License

5 votes

def make_bidir_lstm_stack(seq, seq_dim, mask, sizes, skip=True, name=''):
    bricks = []

    curr_dim = [seq_dim]
    curr_hidden = [seq]

    hidden_list = []
    for k, dim in enumerate(sizes):
        fwd_lstm_ins = [Linear(input_dim=d, output_dim=4*dim, name='%s_fwd_lstm_in_%d_%d'%(name,k,l)) for l, d in enumerate(curr_dim)]
        fwd_lstm = LSTM(dim=dim, activation=Tanh(), name='%s_fwd_lstm_%d'%(name,k))

        bwd_lstm_ins = [Linear(input_dim=d, output_dim=4*dim, name='%s_bwd_lstm_in_%d_%d'%(name,k,l)) for l, d in enumerate(curr_dim)]
        bwd_lstm = LSTM(dim=dim, activation=Tanh(), name='%s_bwd_lstm_%d'%(name,k))

        bricks = bricks + [fwd_lstm, bwd_lstm] + fwd_lstm_ins + bwd_lstm_ins

        fwd_tmp = sum(x.apply(v) for x, v in zip(fwd_lstm_ins, curr_hidden))
        bwd_tmp = sum(x.apply(v) for x, v in zip(bwd_lstm_ins, curr_hidden))
        fwd_hidden, _ = fwd_lstm.apply(fwd_tmp, mask=mask)
        bwd_hidden, _ = bwd_lstm.apply(bwd_tmp[::-1], mask=mask[::-1])
        hidden_list = hidden_list + [fwd_hidden, bwd_hidden]
        if skip:
            curr_hidden = [seq, fwd_hidden, bwd_hidden[::-1]]
            curr_dim = [seq_dim, dim, dim]
        else:
            curr_hidden = [fwd_hidden, bwd_hidden[::-1]]
            curr_dim = [dim, dim]

    return bricks, hidden_list

Source File: test_conv.py From attention-lvcsr with MIT License

5 votes

def test_convolutional_sequence_activation_get_dim():
    seq = ConvolutionalSequence([Tanh()], num_channels=9, image_size=(4, 6))
    seq.allocate()
    assert seq.get_dim('output') == (9, 4, 6)

    seq = ConvolutionalSequence([Convolutional(filter_size=(7, 7),
                                               num_filters=5,
                                               border_mode=(1, 1)),
                                 Tanh()], num_channels=8, image_size=(8, 11))
    seq.allocate()
    assert seq.get_dim('output') == (5, 4, 7)

Source File: test_conv.py From attention-lvcsr with MIT License

5 votes

def test_convolutional_sequence_with_convolutions_raw_activation():
    seq = ConvolutionalSequence(
        [Convolutional(filter_size=(3, 3), num_filters=4),
         Rectifier(),
         Convolutional(filter_size=(5, 5), num_filters=3, step=(2, 2)),
         Tanh()],
        num_channels=2,
        image_size=(21, 39))
    seq.allocate()
    x = theano.tensor.tensor4()
    out = seq.apply(x).eval({x: numpy.ones((10, 2, 21, 39),
                                           dtype=theano.config.floatX)})
    assert out.shape == (10, 3, 8, 17)

Source File: model.py From blocks-char-rnn with MIT License

5 votes

def rnn_layer(dim, h, n):
    linear = Linear(input_dim=dim, output_dim=dim, name='linear' + str(n))
    rnn = SimpleRecurrent(dim=dim, activation=Tanh(), name='rnn' + str(n))
    initialize([linear, rnn])
    return rnn.apply(linear.apply(h))

Source File: test_recurrent.py From attention-lvcsr with MIT License

5 votes

def setUp(self):
        self.simple = SimpleRecurrent(dim=3, weights_init=Constant(2),
                                      activation=Tanh())
        self.simple.initialize()

Source File: test_bn.py From attention-lvcsr with MIT License

5 votes

def test_batch_normalized_mlp_mean_only_propagated():
    """Test that setting mean_only on a BatchNormalizedMLP works."""
    mlp = BatchNormalizedMLP([Tanh(), Tanh()], [5, 7, 9],
                             mean_only=False)
    assert not mlp.mean_only
    assert not any(act.children[0].mean_only for act in mlp.activations)
    mlp.mean_only = True
    assert mlp.mean_only
    assert all(act.children[0].mean_only for act in mlp.activations)

Source File: test_bn.py From attention-lvcsr with MIT License

5 votes

def test_batch_normalized_mlp_conserve_memory_propagated():
    """Test that setting conserve_memory on a BatchNormalizedMLP works."""
    mlp = BatchNormalizedMLP([Tanh(), Tanh()], [5, 7, 9],
                             conserve_memory=False)
    assert not mlp.conserve_memory
    assert not any(act.children[0].conserve_memory for act in mlp.activations)
    mlp.conserve_memory = True
    assert mlp.conserve_memory
    assert all(act.children[0].conserve_memory for act in mlp.activations)

Source File: test_bn.py From attention-lvcsr with MIT License

5 votes

def test_batch_normalized_mlp_allocation():
    """Test that BatchNormalizedMLP performs allocation correctly."""
    mlp = BatchNormalizedMLP([Tanh(), Tanh()], [5, 7, 9])
    mlp.allocate()
    assert mlp.activations[0].children[0].input_dim == 7
    assert mlp.activations[1].children[0].input_dim == 9
    assert not any(l.use_bias for l in mlp.linear_transformations)

Source File: test_bn.py From attention-lvcsr with MIT License

5 votes

def test_batch_normalized_mlp_construction():
    """Test that BatchNormalizedMLP performs construction correctly."""
    mlp = BatchNormalizedMLP([Tanh(), Tanh()], [5, 7, 9])
    assert all(isinstance(a, Sequence) for a in mlp.activations)
    assert all(isinstance(a.children[0], BatchNormalization)
               for a in mlp.activations)
    assert all(isinstance(a.children[1], Tanh)
               for a in mlp.activations)

Source File: recurrent.py From attention-lvcsr with MIT License

5 votes

def __init__(self, dim, activation=None, **kwargs):
        super(LSTM, self).__init__(**kwargs)
        self.dim = dim

        if not activation:
            activation = Tanh()
        self.children = [activation]

Source File: bricks.py From Associative_LSTM with MIT License

5 votes

def __init__(self, dim, bias, activation=None,
                 gate_activation=None, **kwargs):
        self.dim = dim
        self.bias = bias

        if not activation:
            activation = Tanh()
        if not gate_activation:
            gate_activation = Logistic()
        self.activation = activation
        self.gate_activation = gate_activation

        children = ([self.activation, self.gate_activation] +
                    kwargs.get('children', []))
        super(LSTM, self).__init__(children=children, **kwargs)

Source File: bricks.py From Associative_LSTM with MIT License

5 votes

def __init__(self, dim, num_copies, use_W_xu, activation=None,
                 gate_activation=None, **kwargs):
        self.dim = dim
        self.num_copies = num_copies
        self.use_W_xu = use_W_xu

        # shape: C x F/2
        permutations = []
        indices = numpy.arange(self.dim / 2)
        for i in range(self.num_copies):
            numpy.random.shuffle(indices)
            permutations.append(numpy.concatenate(
                [indices,
                 [ind + self.dim / 2 for ind in indices]]))
        # C x F (numpy)
        self.permutations = numpy.vstack(permutations)

        if not activation:
            activation = Tanh()
        if not gate_activation:
            gate_activation = Logistic()
        self.activation = activation
        self.gate_activation = gate_activation

        children = ([self.activation, self.gate_activation] +
                    kwargs.get('children', []))
        super(AssociativeLSTM, self).__init__(children=children, **kwargs)

Source File: test_model.py From attention-lvcsr with MIT License

4 votes

def test_model():
    x = tensor.matrix('x')
    mlp1 = MLP([Tanh(), Tanh()], [10, 20, 30], name="mlp1")
    mlp2 = MLP([Tanh()], [30, 40], name="mlp2")
    h1 = mlp1.apply(x)
    h2 = mlp2.apply(h1)

    model = Model(h2)
    assert model.get_top_bricks() == [mlp1, mlp2]
    # The order of parameters returned is deterministic but
    # not sensible.
    assert list(model.get_parameter_dict().items()) == [
        ('/mlp2/linear_0.b', mlp2.linear_transformations[0].b),
        ('/mlp1/linear_1.b', mlp1.linear_transformations[1].b),
        ('/mlp1/linear_0.b', mlp1.linear_transformations[0].b),
        ('/mlp1/linear_0.W', mlp1.linear_transformations[0].W),
        ('/mlp1/linear_1.W', mlp1.linear_transformations[1].W),
        ('/mlp2/linear_0.W', mlp2.linear_transformations[0].W)]

    # Test getting and setting parameter values
    mlp3 = MLP([Tanh()], [10, 10])
    mlp3.allocate()
    model3 = Model(mlp3.apply(x))
    parameter_values = {
        '/mlp/linear_0.W': 2 * numpy.ones((10, 10),
                                          dtype=theano.config.floatX),
        '/mlp/linear_0.b': 3 * numpy.ones(10, dtype=theano.config.floatX)}
    model3.set_parameter_values(parameter_values)
    assert numpy.all(
        mlp3.linear_transformations[0].parameters[0].get_value() == 2)
    assert numpy.all(
        mlp3.linear_transformations[0].parameters[1].get_value() == 3)
    got_parameter_values = model3.get_parameter_values()
    assert len(got_parameter_values) == len(parameter_values)
    for name, value in parameter_values.items():
        assert_allclose(value, got_parameter_values[name])

    # Test exception is raised if parameter shapes don't match
    def helper():
        parameter_values = {
            '/mlp/linear_0.W': 2 * numpy.ones((11, 11),
                                              dtype=theano.config.floatX),
            '/mlp/linear_0.b': 3 * numpy.ones(11, dtype=theano.config.floatX)}
        model3.set_parameter_values(parameter_values)
    assert_raises(ValueError, helper)

    # Test name conflict handling
    mlp4 = MLP([Tanh()], [10, 10])

    def helper():
        Model(mlp4.apply(mlp3.apply(x)))
    assert_raises(ValueError, helper)

Source File: __init__.py From blocks-examples with MIT License

4 votes

def main(save_to, num_epochs):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(x)
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * (W1 ** 2).sum() + .00005 * (W2 ** 2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST(("train",))
    mnist_test = MNIST(("test",))

    algorithm = GradientDescent(
        cost=cost, parameters=cg.parameters,
        step_rule=Scale(learning_rate=0.1))
    extensions = [Timing(),
                  FinishAfter(after_n_epochs=num_epochs),
                  DataStreamMonitoring(
                      [cost, error_rate],
                      Flatten(
                          DataStream.default_stream(
                              mnist_test,
                              iteration_scheme=SequentialScheme(
                                  mnist_test.num_examples, 500)),
                          which_sources=('features',)),
                      prefix="test"),
                  TrainingDataMonitoring(
                      [cost, error_rate,
                       aggregation.mean(algorithm.total_gradient_norm)],
                      prefix="train",
                      after_epoch=True),
                  Checkpoint(save_to),
                  Printing()]

    if BLOCKS_EXTRAS_AVAILABLE:
        extensions.append(Plot(
            'MNIST example',
            channels=[
                ['test_final_cost',
                 'test_misclassificationrate_apply_error_rate'],
                ['train_total_gradient_norm']]))

    main_loop = MainLoop(
        algorithm,
        Flatten(
            DataStream.default_stream(
                mnist_train,
                iteration_scheme=SequentialScheme(
                    mnist_train.num_examples, 50)),
            which_sources=('features',)),
        model=Model(cost),
        extensions=extensions)

    main_loop.run()

Python blocks.bricks.Tanh() Examples