Python blocks.bricks.Tanh() Examples

The following are 22 code examples of blocks.bricks.Tanh(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module blocks.bricks , or try the search function .
Example #1
Source File: recurrent.py    From attention-lvcsr with MIT License 6 votes vote down vote up
def __init__(self, dim, activation=None, gate_activation=None,
                 **kwargs):
        super(GatedRecurrent, self).__init__(**kwargs)
        self.dim = dim

        self.recurrent_weights_init = None
        self.initial_states_init = None

        if not activation:
            activation = Tanh()
        if not gate_activation:
            gate_activation = Logistic()
        self.activation = activation
        self.gate_activation = gate_activation

        self.children = [activation, gate_activation] 
Example #2
Source File: test_search.py    From attention-lvcsr with MIT License 6 votes vote down vote up
def __init__(self, dimension, alphabet_size, **kwargs):
        super(SimpleGenerator, self).__init__(**kwargs)
        lookup = LookupTable(alphabet_size, dimension)
        transition = SimpleRecurrent(
            activation=Tanh(),
            dim=dimension, name="transition")
        attention = SequenceContentAttention(
            state_names=transition.apply.states,
            attended_dim=dimension, match_dim=dimension, name="attention")
        readout = Readout(
            readout_dim=alphabet_size,
            source_names=[transition.apply.states[0],
                          attention.take_glimpses.outputs[0]],
            emitter=SoftmaxEmitter(name="emitter"),
            feedback_brick=LookupFeedback(alphabet_size, dimension),
            name="readout")
        generator = SequenceGenerator(
            readout=readout, transition=transition, attention=attention,
            name="generator")

        self.lookup = lookup
        self.generator = generator
        self.children = [lookup, generator] 
Example #3
Source File: test_recurrent.py    From attention-lvcsr with MIT License 6 votes vote down vote up
def test_super_in_recurrent_overrider():
    # A regression test for the issue #475
    class SimpleRecurrentWithContext(SimpleRecurrent):
        @application(contexts=['context'])
        def apply(self, context, *args, **kwargs):
            kwargs['inputs'] += context
            return super(SimpleRecurrentWithContext, self).apply(*args,
                                                                 **kwargs)

        @apply.delegate
        def apply_delegate(self):
            return super(SimpleRecurrentWithContext, self).apply

    brick = SimpleRecurrentWithContext(100, Tanh())
    inputs = tensor.tensor3('inputs')
    context = tensor.matrix('context').dimshuffle('x', 0, 1)
    brick.apply(context, inputs=inputs) 
Example #4
Source File: test_recurrent.py    From attention-lvcsr with MIT License 6 votes vote down vote up
def test_saved_inner_graph():
    """Make sure that the original inner graph is saved."""
    x = tensor.tensor3()
    recurrent = SimpleRecurrent(dim=3, activation=Tanh())
    y = recurrent.apply(x)

    application_call = get_application_call(y)
    assert application_call.inner_inputs
    assert application_call.inner_outputs

    cg = ComputationGraph(application_call.inner_outputs)
    # Check that the inner scan graph is annotated
    # with `recurrent.apply`
    assert len(VariableFilter(applications=[recurrent.apply])(cg)) == 3
    # Check that the inner graph is equivalent to the one
    # produced by a stand-alone of `recurrent.apply`
    assert is_same_graph(application_call.inner_outputs[0],
                         recurrent.apply(*application_call.inner_inputs,
                                         iterate=False)) 
Example #5
Source File: test_recurrent.py    From attention-lvcsr with MIT License 6 votes vote down vote up
def setUp(self):
        self.bidir = Bidirectional(weights_init=Orthogonal(),
                                   prototype=SimpleRecurrent(
                                       dim=3, activation=Tanh()))
        self.simple = SimpleRecurrent(dim=3, weights_init=Orthogonal(),
                                      activation=Tanh(), seed=1)
        self.bidir.allocate()
        self.simple.initialize()
        self.bidir.children[0].parameters[0].set_value(
            self.simple.parameters[0].get_value())
        self.bidir.children[1].parameters[0].set_value(
            self.simple.parameters[0].get_value())
        self.x_val = 0.1 * numpy.asarray(
            list(itertools.permutations(range(4))),
            dtype=theano.config.floatX)
        self.x_val = (numpy.ones((24, 4, 3), dtype=theano.config.floatX) *
                      self.x_val[..., None])
        self.mask_val = numpy.ones((24, 4), dtype=theano.config.floatX)
        self.mask_val[12:24, 3] = 0 
Example #6
Source File: test_recurrent.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def setUp(self):
        self.gated = GatedRecurrent(
            dim=3, activation=Tanh(),
            gate_activation=Tanh(), weights_init=Constant(2))
        self.gated.initialize()
        self.reset_only = GatedRecurrent(
            dim=3, activation=Tanh(),
            gate_activation=Tanh(),
            weights_init=IsotropicGaussian(), seed=1)
        self.reset_only.initialize() 
Example #7
Source File: __init__.py    From blocks-examples with MIT License 5 votes vote down vote up
def main(save_to, num_batches):
    mlp = MLP([Tanh(), Identity()], [1, 10, 1],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0), seed=1)
    mlp.initialize()
    x = tensor.vector('numbers')
    y = tensor.vector('roots')
    cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None]))
    cost.name = "cost"

    main_loop = MainLoop(
        GradientDescent(
            cost=cost, parameters=ComputationGraph(cost).parameters,
            step_rule=Scale(learning_rate=0.001)),
        get_data_stream(range(100)),
        model=Model(cost),
        extensions=[
            Timing(),
            FinishAfter(after_n_batches=num_batches),
            DataStreamMonitoring(
                [cost], get_data_stream(range(100, 200)),
                prefix="test"),
            TrainingDataMonitoring([cost], after_epoch=True),
            Checkpoint(save_to),
            Printing()])
    main_loop.run()
    return main_loop 
Example #8
Source File: __init__.py    From blocks-examples with MIT License 5 votes vote down vote up
def __init__(self, dimension, alphabet_size, **kwargs):
        super(WordReverser, self).__init__(**kwargs)
        encoder = Bidirectional(
            SimpleRecurrent(dim=dimension, activation=Tanh()))
        fork = Fork([name for name in encoder.prototype.apply.sequences
                    if name != 'mask'])
        fork.input_dim = dimension
        fork.output_dims = [encoder.prototype.get_dim(name) for name in fork.input_names]
        lookup = LookupTable(alphabet_size, dimension)
        transition = SimpleRecurrent(
            activation=Tanh(),
            dim=dimension, name="transition")
        attention = SequenceContentAttention(
            state_names=transition.apply.states,
            attended_dim=2 * dimension, match_dim=dimension, name="attention")
        readout = Readout(
            readout_dim=alphabet_size,
            source_names=[transition.apply.states[0],
                          attention.take_glimpses.outputs[0]],
            emitter=SoftmaxEmitter(name="emitter"),
            feedback_brick=LookupFeedback(alphabet_size, dimension),
            name="readout")
        generator = SequenceGenerator(
            readout=readout, transition=transition, attention=attention,
            name="generator")

        self.lookup = lookup
        self.fork = fork
        self.encoder = encoder
        self.generator = generator
        self.children = [lookup, fork, encoder, generator] 
Example #9
Source File: attentive_reader.py    From DeepMind-Teaching-Machines-to-Read-and-Comprehend with MIT License 5 votes vote down vote up
def make_bidir_lstm_stack(seq, seq_dim, mask, sizes, skip=True, name=''):
    bricks = []

    curr_dim = [seq_dim]
    curr_hidden = [seq]

    hidden_list = []
    for k, dim in enumerate(sizes):
        fwd_lstm_ins = [Linear(input_dim=d, output_dim=4*dim, name='%s_fwd_lstm_in_%d_%d'%(name,k,l)) for l, d in enumerate(curr_dim)]
        fwd_lstm = LSTM(dim=dim, activation=Tanh(), name='%s_fwd_lstm_%d'%(name,k))

        bwd_lstm_ins = [Linear(input_dim=d, output_dim=4*dim, name='%s_bwd_lstm_in_%d_%d'%(name,k,l)) for l, d in enumerate(curr_dim)]
        bwd_lstm = LSTM(dim=dim, activation=Tanh(), name='%s_bwd_lstm_%d'%(name,k))

        bricks = bricks + [fwd_lstm, bwd_lstm] + fwd_lstm_ins + bwd_lstm_ins

        fwd_tmp = sum(x.apply(v) for x, v in zip(fwd_lstm_ins, curr_hidden))
        bwd_tmp = sum(x.apply(v) for x, v in zip(bwd_lstm_ins, curr_hidden))
        fwd_hidden, _ = fwd_lstm.apply(fwd_tmp, mask=mask)
        bwd_hidden, _ = bwd_lstm.apply(bwd_tmp[::-1], mask=mask[::-1])
        hidden_list = hidden_list + [fwd_hidden, bwd_hidden]
        if skip:
            curr_hidden = [seq, fwd_hidden, bwd_hidden[::-1]]
            curr_dim = [seq_dim, dim, dim]
        else:
            curr_hidden = [fwd_hidden, bwd_hidden[::-1]]
            curr_dim = [dim, dim]

    return bricks, hidden_list 
Example #10
Source File: test_conv.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_convolutional_sequence_activation_get_dim():
    seq = ConvolutionalSequence([Tanh()], num_channels=9, image_size=(4, 6))
    seq.allocate()
    assert seq.get_dim('output') == (9, 4, 6)

    seq = ConvolutionalSequence([Convolutional(filter_size=(7, 7),
                                               num_filters=5,
                                               border_mode=(1, 1)),
                                 Tanh()], num_channels=8, image_size=(8, 11))
    seq.allocate()
    assert seq.get_dim('output') == (5, 4, 7) 
Example #11
Source File: test_conv.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_convolutional_sequence_with_convolutions_raw_activation():
    seq = ConvolutionalSequence(
        [Convolutional(filter_size=(3, 3), num_filters=4),
         Rectifier(),
         Convolutional(filter_size=(5, 5), num_filters=3, step=(2, 2)),
         Tanh()],
        num_channels=2,
        image_size=(21, 39))
    seq.allocate()
    x = theano.tensor.tensor4()
    out = seq.apply(x).eval({x: numpy.ones((10, 2, 21, 39),
                                           dtype=theano.config.floatX)})
    assert out.shape == (10, 3, 8, 17) 
Example #12
Source File: model.py    From blocks-char-rnn with MIT License 5 votes vote down vote up
def rnn_layer(dim, h, n):
    linear = Linear(input_dim=dim, output_dim=dim, name='linear' + str(n))
    rnn = SimpleRecurrent(dim=dim, activation=Tanh(), name='rnn' + str(n))
    initialize([linear, rnn])
    return rnn.apply(linear.apply(h)) 
Example #13
Source File: test_recurrent.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def setUp(self):
        self.simple = SimpleRecurrent(dim=3, weights_init=Constant(2),
                                      activation=Tanh())
        self.simple.initialize() 
Example #14
Source File: test_bn.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_batch_normalized_mlp_mean_only_propagated():
    """Test that setting mean_only on a BatchNormalizedMLP works."""
    mlp = BatchNormalizedMLP([Tanh(), Tanh()], [5, 7, 9],
                             mean_only=False)
    assert not mlp.mean_only
    assert not any(act.children[0].mean_only for act in mlp.activations)
    mlp.mean_only = True
    assert mlp.mean_only
    assert all(act.children[0].mean_only for act in mlp.activations) 
Example #15
Source File: test_bn.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_batch_normalized_mlp_conserve_memory_propagated():
    """Test that setting conserve_memory on a BatchNormalizedMLP works."""
    mlp = BatchNormalizedMLP([Tanh(), Tanh()], [5, 7, 9],
                             conserve_memory=False)
    assert not mlp.conserve_memory
    assert not any(act.children[0].conserve_memory for act in mlp.activations)
    mlp.conserve_memory = True
    assert mlp.conserve_memory
    assert all(act.children[0].conserve_memory for act in mlp.activations) 
Example #16
Source File: test_bn.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_batch_normalized_mlp_allocation():
    """Test that BatchNormalizedMLP performs allocation correctly."""
    mlp = BatchNormalizedMLP([Tanh(), Tanh()], [5, 7, 9])
    mlp.allocate()
    assert mlp.activations[0].children[0].input_dim == 7
    assert mlp.activations[1].children[0].input_dim == 9
    assert not any(l.use_bias for l in mlp.linear_transformations) 
Example #17
Source File: test_bn.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_batch_normalized_mlp_construction():
    """Test that BatchNormalizedMLP performs construction correctly."""
    mlp = BatchNormalizedMLP([Tanh(), Tanh()], [5, 7, 9])
    assert all(isinstance(a, Sequence) for a in mlp.activations)
    assert all(isinstance(a.children[0], BatchNormalization)
               for a in mlp.activations)
    assert all(isinstance(a.children[1], Tanh)
               for a in mlp.activations) 
Example #18
Source File: recurrent.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def __init__(self, dim, activation=None, **kwargs):
        super(LSTM, self).__init__(**kwargs)
        self.dim = dim

        if not activation:
            activation = Tanh()
        self.children = [activation] 
Example #19
Source File: bricks.py    From Associative_LSTM with MIT License 5 votes vote down vote up
def __init__(self, dim, bias, activation=None,
                 gate_activation=None, **kwargs):
        self.dim = dim
        self.bias = bias

        if not activation:
            activation = Tanh()
        if not gate_activation:
            gate_activation = Logistic()
        self.activation = activation
        self.gate_activation = gate_activation

        children = ([self.activation, self.gate_activation] +
                    kwargs.get('children', []))
        super(LSTM, self).__init__(children=children, **kwargs) 
Example #20
Source File: bricks.py    From Associative_LSTM with MIT License 5 votes vote down vote up
def __init__(self, dim, num_copies, use_W_xu, activation=None,
                 gate_activation=None, **kwargs):
        self.dim = dim
        self.num_copies = num_copies
        self.use_W_xu = use_W_xu

        # shape: C x F/2
        permutations = []
        indices = numpy.arange(self.dim / 2)
        for i in range(self.num_copies):
            numpy.random.shuffle(indices)
            permutations.append(numpy.concatenate(
                [indices,
                 [ind + self.dim / 2 for ind in indices]]))
        # C x F (numpy)
        self.permutations = numpy.vstack(permutations)

        if not activation:
            activation = Tanh()
        if not gate_activation:
            gate_activation = Logistic()
        self.activation = activation
        self.gate_activation = gate_activation

        children = ([self.activation, self.gate_activation] +
                    kwargs.get('children', []))
        super(AssociativeLSTM, self).__init__(children=children, **kwargs) 
Example #21
Source File: test_model.py    From attention-lvcsr with MIT License 4 votes vote down vote up
def test_model():
    x = tensor.matrix('x')
    mlp1 = MLP([Tanh(), Tanh()], [10, 20, 30], name="mlp1")
    mlp2 = MLP([Tanh()], [30, 40], name="mlp2")
    h1 = mlp1.apply(x)
    h2 = mlp2.apply(h1)

    model = Model(h2)
    assert model.get_top_bricks() == [mlp1, mlp2]
    # The order of parameters returned is deterministic but
    # not sensible.
    assert list(model.get_parameter_dict().items()) == [
        ('/mlp2/linear_0.b', mlp2.linear_transformations[0].b),
        ('/mlp1/linear_1.b', mlp1.linear_transformations[1].b),
        ('/mlp1/linear_0.b', mlp1.linear_transformations[0].b),
        ('/mlp1/linear_0.W', mlp1.linear_transformations[0].W),
        ('/mlp1/linear_1.W', mlp1.linear_transformations[1].W),
        ('/mlp2/linear_0.W', mlp2.linear_transformations[0].W)]

    # Test getting and setting parameter values
    mlp3 = MLP([Tanh()], [10, 10])
    mlp3.allocate()
    model3 = Model(mlp3.apply(x))
    parameter_values = {
        '/mlp/linear_0.W': 2 * numpy.ones((10, 10),
                                          dtype=theano.config.floatX),
        '/mlp/linear_0.b': 3 * numpy.ones(10, dtype=theano.config.floatX)}
    model3.set_parameter_values(parameter_values)
    assert numpy.all(
        mlp3.linear_transformations[0].parameters[0].get_value() == 2)
    assert numpy.all(
        mlp3.linear_transformations[0].parameters[1].get_value() == 3)
    got_parameter_values = model3.get_parameter_values()
    assert len(got_parameter_values) == len(parameter_values)
    for name, value in parameter_values.items():
        assert_allclose(value, got_parameter_values[name])

    # Test exception is raised if parameter shapes don't match
    def helper():
        parameter_values = {
            '/mlp/linear_0.W': 2 * numpy.ones((11, 11),
                                              dtype=theano.config.floatX),
            '/mlp/linear_0.b': 3 * numpy.ones(11, dtype=theano.config.floatX)}
        model3.set_parameter_values(parameter_values)
    assert_raises(ValueError, helper)

    # Test name conflict handling
    mlp4 = MLP([Tanh()], [10, 10])

    def helper():
        Model(mlp4.apply(mlp3.apply(x)))
    assert_raises(ValueError, helper) 
Example #22
Source File: __init__.py    From blocks-examples with MIT License 4 votes vote down vote up
def main(save_to, num_epochs):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(x)
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * (W1 ** 2).sum() + .00005 * (W2 ** 2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST(("train",))
    mnist_test = MNIST(("test",))

    algorithm = GradientDescent(
        cost=cost, parameters=cg.parameters,
        step_rule=Scale(learning_rate=0.1))
    extensions = [Timing(),
                  FinishAfter(after_n_epochs=num_epochs),
                  DataStreamMonitoring(
                      [cost, error_rate],
                      Flatten(
                          DataStream.default_stream(
                              mnist_test,
                              iteration_scheme=SequentialScheme(
                                  mnist_test.num_examples, 500)),
                          which_sources=('features',)),
                      prefix="test"),
                  TrainingDataMonitoring(
                      [cost, error_rate,
                       aggregation.mean(algorithm.total_gradient_norm)],
                      prefix="train",
                      after_epoch=True),
                  Checkpoint(save_to),
                  Printing()]

    if BLOCKS_EXTRAS_AVAILABLE:
        extensions.append(Plot(
            'MNIST example',
            channels=[
                ['test_final_cost',
                 'test_misclassificationrate_apply_error_rate'],
                ['train_total_gradient_norm']]))

    main_loop = MainLoop(
        algorithm,
        Flatten(
            DataStream.default_stream(
                mnist_train,
                iteration_scheme=SequentialScheme(
                    mnist_train.num_examples, 50)),
            which_sources=('features',)),
        model=Model(cost),
        extensions=extensions)

    main_loop.run()