Python theano.tensor.unbroadcast() Examples

The following are 30 code examples of theano.tensor.unbroadcast(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module theano.tensor , or try the search function .
Example #1
Source File: recurrent.py    From Mozi with MIT License 6 votes vote down vote up
def get_backward_output(self, state_below):
        X = state_below.dimshuffle((1,0,2))

        xi = T.dot(X, self.Wb_i) + self.bb_i
        xf = T.dot(X, self.Wb_f) + self.bb_f
        xc = T.dot(X, self.Wb_c) + self.bb_c
        xo = T.dot(X, self.Wb_o) + self.bb_o

        [outputs, memories], updates = theano.scan(
            self._forward_step,
            sequences=[xi, xf, xo, xc],
            outputs_info=[
                T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
                T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1)
            ],
            non_sequences=[self.Ub_i, self.Ub_f, self.Ub_o, self.Ub_c],
            go_backwards = True,
            truncate_gradient=self.truncate_gradient
        )
        return outputs.dimshuffle((1,0,2)) 
Example #2
Source File: recurrent.py    From CAPTCHA-breaking with MIT License 6 votes vote down vote up
def get_output(self, train=False):
        X = self.get_input(train)  # shape: (nb_samples, time (padded with zeros), input_dim)
        # new shape: (time, nb_samples, input_dim) -> because theano.scan iterates over main dimension
        padded_mask = self.get_padded_shuffled_mask(train, X, pad=1)
        X = X.dimshuffle((1, 0, 2))
        x = T.dot(X, self.W) + self.b

        # scan = theano symbolic loop.
        # See: http://deeplearning.net/software/theano/library/scan.html
        # Iterate over the first dimension of the x array (=time).
        outputs, updates = theano.scan(
            self._step,  # this will be called with arguments (sequences[i], outputs[i-1], non_sequences[i])
            sequences=[x, dict(input=padded_mask, taps=[-1])],  # tensors to iterate over, inputs to _step
            # initialization of the output. Input to _step with default tap=-1.
            outputs_info=T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
            non_sequences=self.U,  # static inputs to _step
            truncate_gradient=self.truncate_gradient)

        if self.return_sequences:
            return outputs.dimshuffle((1, 0, 2))
        return outputs[-1] 
Example #3
Source File: recurrent.py    From Mozi with MIT License 6 votes vote down vote up
def get_forward_output(self, state_below):
        X = state_below.dimshuffle((1,0,2))

        xi = T.dot(X, self.W_i) + self.b_i
        xf = T.dot(X, self.W_f) + self.b_f
        xc = T.dot(X, self.W_c) + self.b_c
        xo = T.dot(X, self.W_o) + self.b_o

        [outputs, memories], updates = theano.scan(
            self._forward_step,
            sequences=[xi, xf, xo, xc],
            outputs_info=[
                T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
                T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1)
            ],
            non_sequences=[self.U_i, self.U_f, self.U_o, self.U_c],
            truncate_gradient=self.truncate_gradient
        )
        return outputs.dimshuffle((1,0,2)) 
Example #4
Source File: recurrent.py    From CAPTCHA-breaking with MIT License 6 votes vote down vote up
def get_output(self, train=False):
        X = self.get_input(train)
        padded_mask = self.get_padded_shuffled_mask(train, X, pad=1)
        X = X.dimshuffle((1, 0, 2))

        x_z = T.dot(X, self.W_z) + self.b_z
        x_r = T.dot(X, self.W_r) + self.b_r
        x_h = T.dot(X, self.W_h) + self.b_h
        outputs, updates = theano.scan(
            self._step,
            sequences=[x_z, x_r, x_h, padded_mask],
            outputs_info=T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
            non_sequences=[self.U_z, self.U_r, self.U_h],
            truncate_gradient=self.truncate_gradient)

        if self.return_sequences:
            return outputs.dimshuffle((1, 0, 2))
        return outputs[-1] 
Example #5
Source File: recurrent.py    From CAPTCHA-breaking with MIT License 6 votes vote down vote up
def get_output(self, train=False):
        X = self.get_input(train)
        padded_mask = self.get_padded_shuffled_mask(train, X, pad=1)
        X = X.dimshuffle((1, 0, 2))

        xi = T.dot(X, self.W_i) + self.b_i
        xf = T.dot(X, self.W_f) + self.b_f
        xc = T.dot(X, self.W_c) + self.b_c
        xo = T.dot(X, self.W_o) + self.b_o

        [outputs, memories], updates = theano.scan(
            self._step,
            sequences=[xi, xf, xo, xc, padded_mask],
            outputs_info=[
                T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
                T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1)
            ],
            non_sequences=[self.U_i, self.U_f, self.U_o, self.U_c],
            truncate_gradient=self.truncate_gradient)

        if self.return_sequences:
            return outputs.dimshuffle((1, 0, 2))
        return outputs[-1] 
Example #6
Source File: recurrent.py    From CAPTCHA-breaking with MIT License 6 votes vote down vote up
def get_output(self, train=False):
        X = self.get_input(train)
        padded_mask = self.get_padded_shuffled_mask(train, X, pad=1)
        X = X.dimshuffle((1, 0, 2))

        x_z = T.dot(X, self.W_z) + self.b_z
        x_r = T.dot(X, self.W_r) + self.b_r
        x_h = T.tanh(T.dot(X, self.Pmat)) + self.b_h
        outputs, updates = theano.scan(
            self._step,
            sequences=[x_z, x_r, x_h, padded_mask],
            outputs_info=T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
            non_sequences=[self.U_r, self.U_h],
            truncate_gradient=self.truncate_gradient)
        if self.return_sequences:
            return outputs.dimshuffle((1, 0, 2))
        return outputs[-1] 
Example #7
Source File: recurrent.py    From CAPTCHA-breaking with MIT License 6 votes vote down vote up
def get_output(self, train=False):
        X = self.get_input(train)
        padded_mask = self.get_padded_shuffled_mask(train, X, pad=1)
        X = X.dimshuffle((1, 0, 2))

        x_z = T.dot(X, self.W_z) + self.b_z
        x_r = T.dot(X, self.W_r) + self.b_r
        x_h = T.dot(X, self.W_h) + self.b_h
        outputs, updates = theano.scan(
            self._step,
            sequences=[x_z, x_r, x_h, padded_mask],
            outputs_info=T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
            non_sequences=[self.U_z, self.U_r, self.U_h],
            truncate_gradient=self.truncate_gradient
        )
        if self.return_sequences:
            return outputs.dimshuffle((1, 0, 2))
        return outputs[-1] 
Example #8
Source File: recurrent.py    From Mozi with MIT License 6 votes vote down vote up
def _train_fprop(self, state_below):
        X = state_below.dimshuffle((1, 0, 2))

        xi = T.dot(X, self.W_i) + self.b_i
        xf = T.dot(X, self.W_f) + self.b_f
        xc = T.dot(X, self.W_c) + self.b_c
        xo = T.dot(X, self.W_o) + self.b_o

        [outputs, memories], updates = theano.scan(
            self._step,
            sequences=[xi, xf, xo, xc],
            outputs_info=[
                T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
                T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1)
            ],
            non_sequences=[self.U_i, self.U_f, self.U_o, self.U_c],
            truncate_gradient=self.truncate_gradient)

        if self.return_sequences:
            return outputs.dimshuffle((1, 0, 2))
        return outputs[-1] 
Example #9
Source File: encdec.py    From NMT-Coverage with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def build_sampler(self, n_samples, n_steps, T, c):
        states = [TT.zeros(shape=(n_samples,), dtype='int64'),
                TT.zeros(shape=(n_samples,), dtype='float32')]

        init_c = c[0, -self.state['dim']:]
        states += [ReplicateLayer(n_samples)(init(init_c).out).out for init in self.initializers]
        # added by Zhaopeng Tu, 2015-10-30
        # init_coverage
        if self.state['maintain_coverage']:
            # in sampling, init_c is two-dimension (source_length*c_dim), same for init_coverage
            # modified by Zhaopeng Tu, 2015-12-18, big mistake here!!!
            # coverage should be always 3D, the first two dimensions are consistent with alignment probs
            # while the last one is the coverage dim
            if self.state['use_linguistic_coverage'] and self.state['coverage_accumulated_operation'] == 'subtractive':
                init_coverage = TT.unbroadcast(TT.ones((c.shape[0], n_samples, self.state['coverage_dim']), dtype='float32'), 2)
            else:
                init_coverage = TT.unbroadcast(TT.zeros((c.shape[0], n_samples, self.state['coverage_dim']), dtype='float32'), 2)
            states.append(init_coverage)

        if not self.state['search']:
            c = PadLayer(n_steps)(c).out

        # Pad with final states
        non_sequences = [c, T]
        if self.state['maintain_coverage'] and self.state['use_linguistic_coverage'] and self.state['use_fertility_model']:
            fertility = self.state['max_fertility'] * self.fertility_inputer(c).out
            non_sequences.append(fertility)

        outputs, updates = theano.scan(self.sampling_step,
                outputs_info=states,
                non_sequences=non_sequences,
                sequences=[TT.arange(n_steps, dtype="int64")],
                n_steps=n_steps,
                name="{}_sampler_scan".format(self.prefix))
        if self.state['maintain_coverage']:
            if self.state['use_fertility_model'] and self.state['use_linguistic_coverage']:
                return (outputs[0], outputs[1], outputs[-1], fertility), updates
            else:
                return (outputs[0], outputs[1], outputs[-1]), updates
        else:
            return (outputs[0], outputs[1]), updates 
Example #10
Source File: dbm_metrics.py    From TextDetector with GNU General Public License v3.0 5 votes vote down vote up
def _e_step(psamples, W_list, b_list, n_steps=100, eps=1e-5):
    """
    Performs 'n_steps' of mean-field inference (used to compute positive phase
    statistics)

    Parameters
    ----------
    psamples : array-like object of theano shared variables
        State of each layer of the DBM (during the inference process).
        psamples[0] points to the input
    n_steps :  integer
        Number of iterations of mean-field to perform
    """
    depth = len(psamples)

    new_psamples = [T.unbroadcast(T.shape_padleft(psample))
                    for psample in psamples]

    # now alternate mean-field inference for even/odd layers
    def mf_iteration(*psamples):
        new_psamples = [p for p in psamples]
        for i in xrange(1, depth, 2):
            new_psamples[i] = hi_given(psamples, i, W_list, b_list)
        for i in xrange(2, depth, 2):
            new_psamples[i] = hi_given(psamples, i, W_list, b_list)

        score = 0.
        for i in xrange(1, depth):
            score = T.maximum(T.mean(abs(new_psamples[i] - psamples[i])),
                              score)

        return new_psamples, theano.scan_module.until(score < eps)

    new_psamples, updates = scan(
        mf_iteration,
        states=new_psamples,
        n_steps=n_steps
    )

    return [x[0] for x in new_psamples] 
Example #11
Source File: controllers.py    From ntm-lasagne with MIT License 5 votes vote down vote up
def outputs_info(self, batch_size):
        ones_vector = T.ones((batch_size, 1))
        hid_init = T.dot(ones_vector, self.hid_init)
        hid_init = T.unbroadcast(hid_init, 0)
        return [hid_init, hid_init] 
Example #12
Source File: controllers.py    From ntm-lasagne with MIT License 5 votes vote down vote up
def outputs_info(self, batch_size):
        ones_vector = T.ones((batch_size, 1))
        hid_init = T.dot(ones_vector, self.hid_init)
        hid_init = T.unbroadcast(hid_init, 0)
        return [hid_init, hid_init] 
Example #13
Source File: controllers.py    From ntm-lasagne with MIT License 5 votes vote down vote up
def outputs_info(self, batch_size):
        ones_vector = T.ones((batch_size, 1))
        hid_init = T.dot(ones_vector, self.hid_init)
        hid_init = T.unbroadcast(hid_init, 0)
        cell_init = T.dot(ones_vector, self.cell_init)
        cell_init = T.unbroadcast(cell_init, 0)
        return [hid_init, cell_init] 
Example #14
Source File: controllers.py    From ntm-lasagne with MIT License 5 votes vote down vote up
def outputs_info(self, batch_size):
        ones_vector = T.ones((batch_size, 1))
        hid_init = T.dot(ones_vector, self.hid_init)
        hid_init = T.unbroadcast(hid_init, 0)
        return [hid_init, hid_init] 
Example #15
Source File: theano_internal.py    From spinn with MIT License 5 votes vote down vote up
def zeros_nobroadcast(shape, dtype=theano.config.floatX):
    zeros = T.zeros(shape, dtype=dtype)
    zeros = T.unbroadcast(zeros, *range(len(shape)))
    return zeros 
Example #16
Source File: encdec.py    From NMT-Coverage with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def build_sampler(self, n_samples, n_steps, T, c):
        states = [TT.zeros(shape=(n_samples,), dtype='int64'),
                TT.zeros(shape=(n_samples,), dtype='float32')]
        init_c = c[0, -self.state['dim']:]
        states += [ReplicateLayer(n_samples)(init(init_c).out).out for init in self.initializers]
        # added by Zhaopeng Tu, 2015-10-30
        # init_coverage
        if self.state['maintain_coverage']:
            # in sampling, init_c is two-dimension (source_length*c_dim), same for init_coverage
            if self.state['use_accumulated_coverage'] and self.state['coverage_accumulated_operation'] == 'subtractive':
                init_coverage = TT.unbroadcast(TT.ones((c.shape[0], self.state['coverage_dim']), dtype='float32'), 1)
            else:
                init_coverage = TT.unbroadcast(TT.zeros((c.shape[0], self.state['coverage_dim']), dtype='float32'), 1)
            states.append(init_coverage)

        if not self.state['search']:
            c = PadLayer(n_steps)(c).out

        # Pad with final states
        non_sequences = [c, T]

        outputs, updates = theano.scan(self.sampling_step,
                outputs_info=states,
                non_sequences=non_sequences,
                sequences=[TT.arange(n_steps, dtype="int64")],
                n_steps=n_steps,
                name="{}_sampler_scan".format(self.prefix))
        if self.state['maintain_coverage']:
            return (outputs[0], outputs[1], outputs[-1]), updates
        else:
            return (outputs[0], outputs[1]), updates 
Example #17
Source File: bricks.py    From PacGAN with MIT License 5 votes vote down vote up
def get_predictions(self, x, x_tilde, application_call):
        data_sample_preds = self.discriminator.apply(
            tensor.unbroadcast(tensor.concatenate([x, x_tilde], axis=0),
                               *range(x.ndim)))
        data_preds = data_sample_preds[:x.shape[0]]
        sample_preds = data_sample_preds[x.shape[0]:]

        application_call.add_auxiliary_variable(
            tensor.nnet.sigmoid(data_preds).mean(), name='data_accuracy')
        application_call.add_auxiliary_variable(
            (1 - tensor.nnet.sigmoid(sample_preds)).mean(),
            name='sample_accuracy')

        return data_preds, sample_preds 
Example #18
Source File: graph_state.py    From gated-graph-transformer-network with MIT License 5 votes vote down vote up
def create_empty(cls, batch_size, num_node_ids, node_state_size, num_edge_types):
        """
        Create an empty graph state with the specified sizes. Note that this
        will contain one zero-strength element to prevent nasty GPU errors
        from a dimension with 0 in it.

            batch_size: Number of batches
            num_node_ids: An integer giving size of node id
            node_state_size: An integer giving size of node state
            num_edge_types: An integer giving number of edge types
        """
        return cls( T.unbroadcast(T.zeros([batch_size, 1]), 1),
                    T.unbroadcast(T.zeros([batch_size, 1, num_node_ids]), 1),
                    T.unbroadcast(T.zeros([batch_size, 1, node_state_size]), 1),
                    T.unbroadcast(T.zeros([batch_size, 1, 1, num_edge_types]), 1, 2)) 
Example #19
Source File: recurrent.py    From CAPTCHA-breaking with MIT License 5 votes vote down vote up
def get_output(self, train=False):
        X = self.get_input(train)
        padded_mask = self.get_padded_shuffled_mask(train, X, pad=self.depth)
        X = X.dimshuffle((1, 0, 2))

        x = T.dot(X, self.W) + self.b

        if self.depth == 1:
            initial = T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1)
        else:
            initial = T.unbroadcast(T.unbroadcast(alloc_zeros_matrix(self.depth, X.shape[1], self.output_dim), 0), 2)

        outputs, updates = theano.scan(
            self._step,
            sequences=[x, dict(
                input=padded_mask,
                taps=[(-i) for i in range(self.depth)]
            )],
            outputs_info=[dict(
                initial=initial,
                taps=[(-i-1) for i in range(self.depth)]
            )],
            non_sequences=self.Us,
            truncate_gradient=self.truncate_gradient
        )

        if self.return_sequences:
            return outputs.dimshuffle((1, 0, 2))
        return outputs[-1] 
Example #20
Source File: test_var.py    From D-VAE with MIT License 5 votes vote down vote up
def test_broadcast(self):
        # Test that we can rebroadcast
        data = numpy.random.rand(10, 10).astype('float32')
        output_var = f32sc(name="output", value=data)

        up = tensor.unbroadcast(output_var.sum().dimshuffle('x', 'x'), 0, 1)
        output_func = theano.function(inputs=[], outputs=[],
                                      updates=[(output_var, up)])
        output_func()

        up = tensor.patternbroadcast(output_var.sum().dimshuffle('x', 'x'),
                                     output_var.type.broadcastable)
        output_func = theano.function(inputs=[], outputs=[],
                                      updates=[(output_var, up)])
        output_func() 
Example #21
Source File: test_opt.py    From D-VAE with MIT License 5 votes vote down vote up
def test_rebroadcast():
    d = numpy.random.rand(10, 10).astype('float32')
    v = theano.tensor.fmatrix()
    up = tensor.unbroadcast(v.sum().dimshuffle('x', 'x'), 0, 1)
    f = theano.function([v], [up], mode=mode_with_gpu)

    f(d)

    topo = f.maker.fgraph.toposort()
    rebrs = [node for node in topo if isinstance(node.op, tensor.Rebroadcast)]
    assert len(rebrs) == 1
    rebr = rebrs[0]

    assert isinstance(rebr.inputs[0].type, GpuArrayType)
    assert isinstance(rebr.outputs[0].type, GpuArrayType) 
Example #22
Source File: test_rop.py    From D-VAE with MIT License 5 votes vote down vote up
def test_rebroadcast(self):
        # I need the sum, because the setup expects the output to be a
        # vector
        self.check_rop_lop(tensor.unbroadcast(
            self.x[:4].dimshuffle('x', 0), 0).sum(axis=1),
            (1,)) 
Example #23
Source File: test_rop.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_rebroadcast(self):
        # I need the sum, because the setup expects the output to be a
        # vector
        self.check_rop_lop(tensor.unbroadcast(
            self.x[:4].dimshuffle('x', 0), 0).sum(axis=1),
            (1,)) 
Example #24
Source File: test_var.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_broadcast(self):
        # Test that we can rebroadcast
        data = numpy.random.rand(10, 10).astype('float32')
        output_var = f32sc(name="output", value=data)

        up = tensor.unbroadcast(output_var.sum().dimshuffle('x', 'x'), 0, 1)
        output_func = theano.function(inputs=[], outputs=[],
                                      updates=[(output_var, up)])
        output_func()

        up = tensor.patternbroadcast(output_var.sum().dimshuffle('x', 'x'),
                                     output_var.type.broadcastable)
        output_func = theano.function(inputs=[], outputs=[],
                                      updates=[(output_var, up)])
        output_func() 
Example #25
Source File: test_opt.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def test_rebroadcast():
    d = numpy.random.rand(10, 10).astype('float32')
    v = theano.tensor.fmatrix()
    up = tensor.unbroadcast(v.sum().dimshuffle('x', 'x'), 0, 1)
    f = theano.function([v], [up], mode=mode_with_gpu)

    f(d)

    topo = f.maker.fgraph.toposort()
    rebrs = [node for node in topo if isinstance(node.op, tensor.Rebroadcast)]
    assert len(rebrs) == 1
    rebr = rebrs[0]

    assert isinstance(rebr.inputs[0].type, GpuArrayType)
    assert isinstance(rebr.outputs[0].type, GpuArrayType) 
Example #26
Source File: layers.py    From ntm-lasagne with MIT License 4 votes vote down vote up
def get_output_for(self, input, get_details=False, **kwargs):

        input = input.dimshuffle(1, 0, 2)

        def step(x_t, M_tm1, h_tm1, state_tm1, ww_tm1, wr_tm1, *params):
            # Update the memory (using w_tm1 of the writing heads & M_tm1)
            M_t = self.write_heads.write(h_tm1, ww_tm1, M_tm1)

            # Get the read vector (using w_tm1 of the reading heads & M_t)
            r_t = self.read_heads.read(wr_tm1, M_t)

            # Apply the controller (using x_t, r_t & the requirements for the controller)
            h_t, state_t = self.controller.step(x_t, r_t, h_tm1, state_tm1)

            # Update the weights (using h_t, M_t & w_tm1)
            ww_t = self.write_heads.get_weights(h_t, ww_tm1, M_t)
            wr_t = self.read_heads.get_weights(h_t, wr_tm1, M_t)

            return [M_t, h_t, state_t, ww_t, wr_t]

        memory_init = T.tile(self.memory.memory_init, (input.shape[1], 1, 1))
        memory_init = T.unbroadcast(memory_init, 0)

        write_weights_init = T.tile(self.write_heads.weights_init, (input.shape[1], 1, 1))
        write_weights_init = T.unbroadcast(write_weights_init, 0)
        read_weights_init = T.tile(self.read_heads.weights_init, (input.shape[1], 1, 1))
        read_weights_init = T.unbroadcast(read_weights_init, 0)

        non_seqs = self.controller.get_params() + self.memory.get_params() + \
            self.write_heads.get_params() + self.read_heads.get_params()

        hids, _ = theano.scan(
            fn=step,
            sequences=input,
            outputs_info=[memory_init] + self.controller.outputs_info(input.shape[1]) + \
                         [write_weights_init, read_weights_init],
            non_sequences=non_seqs,
            strict=True)

        # dimshuffle back to (n_batch, n_time_steps, n_features)
        if get_details:
            hid_out = [
                hids[0].dimshuffle(1, 0, 2, 3),
                hids[1].dimshuffle(1, 0, 2),
                hids[2].dimshuffle(1, 0, 2),
                hids[3].dimshuffle(1, 0, 2, 3),
                hids[4].dimshuffle(1, 0, 2, 3)]
        else:
            if self.only_return_final:
                hid_out = hids[1][-1]
            else:
                hid_out = hids[1].dimshuffle(1, 0, 2)

        return hid_out 
Example #27
Source File: prednet.py    From prednet with MIT License 4 votes vote down vote up
def get_initial_state(self, x):
        input_shape = self.input_spec[0].shape
        init_nb_row = input_shape[self.row_axis]
        init_nb_col = input_shape[self.column_axis]

        base_initial_state = K.zeros_like(x)  # (samples, timesteps) + image_shape
        non_channel_axis = -1 if self.data_format == 'channels_first' else -2
        for _ in range(2):
            base_initial_state = K.sum(base_initial_state, axis=non_channel_axis)
        base_initial_state = K.sum(base_initial_state, axis=1)  # (samples, nb_channels)

        initial_states = []
        states_to_pass = ['r', 'c', 'e']
        nlayers_to_pass = {u: self.nb_layers for u in states_to_pass}
        if self.extrap_start_time is not None:
           states_to_pass.append('ahat')  # pass prediction in states so can use as actual for t+1 when extrapolating
           nlayers_to_pass['ahat'] = 1
        for u in states_to_pass:
            for l in range(nlayers_to_pass[u]):
                ds_factor = 2 ** l
                nb_row = init_nb_row // ds_factor
                nb_col = init_nb_col // ds_factor
                if u in ['r', 'c']:
                    stack_size = self.R_stack_sizes[l]
                elif u == 'e':
                    stack_size = 2 * self.stack_sizes[l]
                elif u == 'ahat':
                    stack_size = self.stack_sizes[l]
                output_size = stack_size * nb_row * nb_col  # flattened size

                reducer = K.zeros((input_shape[self.channel_axis], output_size)) # (nb_channels, output_size)
                initial_state = K.dot(base_initial_state, reducer) # (samples, output_size)
                if self.data_format == 'channels_first':
                    output_shp = (-1, stack_size, nb_row, nb_col)
                else:
                    output_shp = (-1, nb_row, nb_col, stack_size)
                initial_state = K.reshape(initial_state, output_shp)
                initial_states += [initial_state]

        if K._BACKEND == 'theano':
            from theano import tensor as T
            # There is a known issue in the Theano scan op when dealing with inputs whose shape is 1 along a dimension.
            # In our case, this is a problem when training on grayscale images, and the below line fixes it.
            initial_states = [T.unbroadcast(init_state, 0, 1) for init_state in initial_states]

        if self.extrap_start_time is not None:
            initial_states += [K.variable(0, int if K.backend() != 'tensorflow' else 'int32')]  # the last state will correspond to the current timestep
        return initial_states 
Example #28
Source File: crf.py    From keras-contrib with MIT License 4 votes vote down vote up
def viterbi_decoding(self, X, mask=None):
        input_energy = self.activation(K.dot(X, self.kernel) + self.bias)
        if self.use_boundary:
            input_energy = self.add_boundary_energy(
                input_energy, mask, self.left_boundary, self.right_boundary)

        argmin_tables = self.recursion(input_energy, mask, return_logZ=False)
        argmin_tables = K.cast(argmin_tables, 'int32')

        # backward to find best path, `initial_best_idx` can be any,
        # as all elements in the last argmin_table are the same
        argmin_tables = K.reverse(argmin_tables, 1)
        # matrix instead of vector is required by tf `K.rnn`
        initial_best_idx = [K.expand_dims(argmin_tables[:, 0, 0])]
        if K.backend() == 'theano':
            from theano import tensor as T
            initial_best_idx = [T.unbroadcast(initial_best_idx[0], 1)]

        def gather_each_row(params, indices):
            n = K.shape(indices)[0]
            if K.backend() == 'theano':
                from theano import tensor as T
                return params[T.arange(n), indices]
            elif K.backend() == 'tensorflow':
                import tensorflow as tf
                indices = K.transpose(K.stack([tf.range(n), indices]))
                return tf.gather_nd(params, indices)
            else:
                raise NotImplementedError

        def find_path(argmin_table, best_idx):
            next_best_idx = gather_each_row(argmin_table, best_idx[0][:, 0])
            next_best_idx = K.expand_dims(next_best_idx)
            if K.backend() == 'theano':
                from theano import tensor as T
                next_best_idx = T.unbroadcast(next_best_idx, 1)
            return next_best_idx, [next_best_idx]

        _, best_paths, _ = K.rnn(find_path, argmin_tables, initial_best_idx,
                                 input_length=K.int_shape(X)[1], unroll=self.unroll)
        best_paths = K.reverse(best_paths, 1)
        best_paths = K.squeeze(best_paths, 2)

        return K.one_hot(best_paths, self.units) 
Example #29
Source File: rnn.py    From TextDetector with GNU General Public License v3.0 4 votes vote down vote up
def fprop(self, state_below, return_all=False):

        if isinstance(state_below, tuple):
            state_below, mask = state_below
        else:
            mask = None

        z0 = tensor.alloc(np.cast[config.floatX](0), state_below.shape[1],
                          self.dim)

        z0 = tensor.unbroadcast(z0, 0)
        if self.dim == 1:
            z0 = tensor.unbroadcast(z0, 1)

        W, U, b = self._params
        if self.weight_noise:
            W = self.add_noise(W)
            U = self.add_noise(U)

        state_below = tensor.dot(state_below, W) + b

        if mask is not None:
            (z, updates) = scan(fn=self.fprop_step_mask,
                                sequences=[state_below, mask],
                                outputs_info=[z0],
                                non_sequences=[U])
        else:
            (z, updates) = scan(fn=self.fprop_step,
                                sequences=[state_below],
                                outputs_info=[z0],
                                non_sequences=[U])

        self._scan_updates.update(updates)

        if return_all:
            return z

        if self.indices is not None:
            if len(self.indices) > 1:
                return [z[i, :, :self.dim] for i in self.indices]
            else:
                return z[self.indices[0], :, :self.dim]
        else:
            if mask is not None:
                return (z[:, :, :self.dim], mask)
            else:
                return z[:, :, :self.dim] 
Example #30
Source File: rnn.py    From TextDetector with GNU General Public License v3.0 4 votes vote down vote up
def fprop(self, state_below, return_all=False):

        if isinstance(state_below, tuple):
            state_below, mask = state_below
        else:
            mask = None

        z0 = tensor.alloc(np.cast[config.floatX](0), state_below.shape[1],
                          self.dim * 2)

        z0 = tensor.unbroadcast(z0, 0)
        if self.dim == 1:
            z0 = tensor.unbroadcast(z0, 1)

        W, U, b = self._params
        if self.weight_noise:
            W = self.add_noise(W)
            U = self.add_noise(U)

        state_below = tensor.dot(state_below, W) + b

        if mask is not None:
            (z, updates) = scan(fn=self.fprop_step_mask,
                                sequences=[state_below, mask],
                                outputs_info=[z0],
                                non_sequences=[U])
        else:
            (z, updates) = scan(fn=self.fprop_step,
                                sequences=[state_below],
                                outputs_info=[z0],
                                non_sequences=[U])

            self._scan_updates.update(updates)

        if return_all:
            return z

        if self.indices is not None:
            if len(self.indices) > 1:
                return [z[i, :, :self.dim] for i in self.indices]
            else:
                return z[self.indices[0], :, :self.dim]
        else:
            if mask is not None:
                return (z[:, :, :self.dim], mask)
            else:
                return z[:, :, :self.dim]