Python Examples of theano.tensor.unbroadcast

Source File: recurrent.py From Mozi with MIT License

6 votes

def get_backward_output(self, state_below):
        X = state_below.dimshuffle((1,0,2))

        xi = T.dot(X, self.Wb_i) + self.bb_i
        xf = T.dot(X, self.Wb_f) + self.bb_f
        xc = T.dot(X, self.Wb_c) + self.bb_c
        xo = T.dot(X, self.Wb_o) + self.bb_o

        [outputs, memories], updates = theano.scan(
            self._forward_step,
            sequences=[xi, xf, xo, xc],
            outputs_info=[
                T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
                T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1)
            ],
            non_sequences=[self.Ub_i, self.Ub_f, self.Ub_o, self.Ub_c],
            go_backwards = True,
            truncate_gradient=self.truncate_gradient
        )
        return outputs.dimshuffle((1,0,2))

Source File: recurrent.py From CAPTCHA-breaking with MIT License

6 votes

def get_output(self, train=False):
        X = self.get_input(train)  # shape: (nb_samples, time (padded with zeros), input_dim)
        # new shape: (time, nb_samples, input_dim) -> because theano.scan iterates over main dimension
        padded_mask = self.get_padded_shuffled_mask(train, X, pad=1)
        X = X.dimshuffle((1, 0, 2))
        x = T.dot(X, self.W) + self.b

        # scan = theano symbolic loop.
        # See: http://deeplearning.net/software/theano/library/scan.html
        # Iterate over the first dimension of the x array (=time).
        outputs, updates = theano.scan(
            self._step,  # this will be called with arguments (sequences[i], outputs[i-1], non_sequences[i])
            sequences=[x, dict(input=padded_mask, taps=[-1])],  # tensors to iterate over, inputs to _step
            # initialization of the output. Input to _step with default tap=-1.
            outputs_info=T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
            non_sequences=self.U,  # static inputs to _step
            truncate_gradient=self.truncate_gradient)

        if self.return_sequences:
            return outputs.dimshuffle((1, 0, 2))
        return outputs[-1]

Source File: recurrent.py From Mozi with MIT License

6 votes

def get_forward_output(self, state_below):
        X = state_below.dimshuffle((1,0,2))

        xi = T.dot(X, self.W_i) + self.b_i
        xf = T.dot(X, self.W_f) + self.b_f
        xc = T.dot(X, self.W_c) + self.b_c
        xo = T.dot(X, self.W_o) + self.b_o

        [outputs, memories], updates = theano.scan(
            self._forward_step,
            sequences=[xi, xf, xo, xc],
            outputs_info=[
                T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
                T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1)
            ],
            non_sequences=[self.U_i, self.U_f, self.U_o, self.U_c],
            truncate_gradient=self.truncate_gradient
        )
        return outputs.dimshuffle((1,0,2))

Source File: recurrent.py From CAPTCHA-breaking with MIT License

6 votes

def get_output(self, train=False):
        X = self.get_input(train)
        padded_mask = self.get_padded_shuffled_mask(train, X, pad=1)
        X = X.dimshuffle((1, 0, 2))

        x_z = T.dot(X, self.W_z) + self.b_z
        x_r = T.dot(X, self.W_r) + self.b_r
        x_h = T.dot(X, self.W_h) + self.b_h
        outputs, updates = theano.scan(
            self._step,
            sequences=[x_z, x_r, x_h, padded_mask],
            outputs_info=T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
            non_sequences=[self.U_z, self.U_r, self.U_h],
            truncate_gradient=self.truncate_gradient)

        if self.return_sequences:
            return outputs.dimshuffle((1, 0, 2))
        return outputs[-1]

Source File: recurrent.py From CAPTCHA-breaking with MIT License

6 votes

def get_output(self, train=False):
        X = self.get_input(train)
        padded_mask = self.get_padded_shuffled_mask(train, X, pad=1)
        X = X.dimshuffle((1, 0, 2))

        xi = T.dot(X, self.W_i) + self.b_i
        xf = T.dot(X, self.W_f) + self.b_f
        xc = T.dot(X, self.W_c) + self.b_c
        xo = T.dot(X, self.W_o) + self.b_o

        [outputs, memories], updates = theano.scan(
            self._step,
            sequences=[xi, xf, xo, xc, padded_mask],
            outputs_info=[
                T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
                T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1)
            ],
            non_sequences=[self.U_i, self.U_f, self.U_o, self.U_c],
            truncate_gradient=self.truncate_gradient)

        if self.return_sequences:
            return outputs.dimshuffle((1, 0, 2))
        return outputs[-1]

Source File: recurrent.py From CAPTCHA-breaking with MIT License

6 votes

def get_output(self, train=False):
        X = self.get_input(train)
        padded_mask = self.get_padded_shuffled_mask(train, X, pad=1)
        X = X.dimshuffle((1, 0, 2))

        x_z = T.dot(X, self.W_z) + self.b_z
        x_r = T.dot(X, self.W_r) + self.b_r
        x_h = T.tanh(T.dot(X, self.Pmat)) + self.b_h
        outputs, updates = theano.scan(
            self._step,
            sequences=[x_z, x_r, x_h, padded_mask],
            outputs_info=T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
            non_sequences=[self.U_r, self.U_h],
            truncate_gradient=self.truncate_gradient)
        if self.return_sequences:
            return outputs.dimshuffle((1, 0, 2))
        return outputs[-1]

Source File: recurrent.py From CAPTCHA-breaking with MIT License

6 votes

def get_output(self, train=False):
        X = self.get_input(train)
        padded_mask = self.get_padded_shuffled_mask(train, X, pad=1)
        X = X.dimshuffle((1, 0, 2))

        x_z = T.dot(X, self.W_z) + self.b_z
        x_r = T.dot(X, self.W_r) + self.b_r
        x_h = T.dot(X, self.W_h) + self.b_h
        outputs, updates = theano.scan(
            self._step,
            sequences=[x_z, x_r, x_h, padded_mask],
            outputs_info=T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
            non_sequences=[self.U_z, self.U_r, self.U_h],
            truncate_gradient=self.truncate_gradient
        )
        if self.return_sequences:
            return outputs.dimshuffle((1, 0, 2))
        return outputs[-1]

Source File: recurrent.py From Mozi with MIT License

6 votes

def _train_fprop(self, state_below):
        X = state_below.dimshuffle((1, 0, 2))

        xi = T.dot(X, self.W_i) + self.b_i
        xf = T.dot(X, self.W_f) + self.b_f
        xc = T.dot(X, self.W_c) + self.b_c
        xo = T.dot(X, self.W_o) + self.b_o

        [outputs, memories], updates = theano.scan(
            self._step,
            sequences=[xi, xf, xo, xc],
            outputs_info=[
                T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1),
                T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1)
            ],
            non_sequences=[self.U_i, self.U_f, self.U_o, self.U_c],
            truncate_gradient=self.truncate_gradient)

        if self.return_sequences:
            return outputs.dimshuffle((1, 0, 2))
        return outputs[-1]

Source File: encdec.py From NMT-Coverage with BSD 3-Clause "New" or "Revised" License

5 votes

def build_sampler(self, n_samples, n_steps, T, c):
        states = [TT.zeros(shape=(n_samples,), dtype='int64'),
                TT.zeros(shape=(n_samples,), dtype='float32')]

        init_c = c[0, -self.state['dim']:]
        states += [ReplicateLayer(n_samples)(init(init_c).out).out for init in self.initializers]
        # added by Zhaopeng Tu, 2015-10-30
        # init_coverage
        if self.state['maintain_coverage']:
            # in sampling, init_c is two-dimension (source_length*c_dim), same for init_coverage
            # modified by Zhaopeng Tu, 2015-12-18, big mistake here!!!
            # coverage should be always 3D, the first two dimensions are consistent with alignment probs
            # while the last one is the coverage dim
            if self.state['use_linguistic_coverage'] and self.state['coverage_accumulated_operation'] == 'subtractive':
                init_coverage = TT.unbroadcast(TT.ones((c.shape[0], n_samples, self.state['coverage_dim']), dtype='float32'), 2)
            else:
                init_coverage = TT.unbroadcast(TT.zeros((c.shape[0], n_samples, self.state['coverage_dim']), dtype='float32'), 2)
            states.append(init_coverage)

        if not self.state['search']:
            c = PadLayer(n_steps)(c).out

        # Pad with final states
        non_sequences = [c, T]
        if self.state['maintain_coverage'] and self.state['use_linguistic_coverage'] and self.state['use_fertility_model']:
            fertility = self.state['max_fertility'] * self.fertility_inputer(c).out
            non_sequences.append(fertility)

        outputs, updates = theano.scan(self.sampling_step,
                outputs_info=states,
                non_sequences=non_sequences,
                sequences=[TT.arange(n_steps, dtype="int64")],
                n_steps=n_steps,
                name="{}_sampler_scan".format(self.prefix))
        if self.state['maintain_coverage']:
            if self.state['use_fertility_model'] and self.state['use_linguistic_coverage']:
                return (outputs[0], outputs[1], outputs[-1], fertility), updates
            else:
                return (outputs[0], outputs[1], outputs[-1]), updates
        else:
            return (outputs[0], outputs[1]), updates

Source File: dbm_metrics.py From TextDetector with GNU General Public License v3.0

5 votes

def _e_step(psamples, W_list, b_list, n_steps=100, eps=1e-5):
    """
    Performs 'n_steps' of mean-field inference (used to compute positive phase
    statistics)

    Parameters
    ----------
    psamples : array-like object of theano shared variables
        State of each layer of the DBM (during the inference process).
        psamples[0] points to the input
    n_steps :  integer
        Number of iterations of mean-field to perform
    """
    depth = len(psamples)

    new_psamples = [T.unbroadcast(T.shape_padleft(psample))
                    for psample in psamples]

    # now alternate mean-field inference for even/odd layers
    def mf_iteration(*psamples):
        new_psamples = [p for p in psamples]
        for i in xrange(1, depth, 2):
            new_psamples[i] = hi_given(psamples, i, W_list, b_list)
        for i in xrange(2, depth, 2):
            new_psamples[i] = hi_given(psamples, i, W_list, b_list)

        score = 0.
        for i in xrange(1, depth):
            score = T.maximum(T.mean(abs(new_psamples[i] - psamples[i])),
                              score)

        return new_psamples, theano.scan_module.until(score < eps)

    new_psamples, updates = scan(
        mf_iteration,
        states=new_psamples,
        n_steps=n_steps
    )

    return [x[0] for x in new_psamples]

Source File: controllers.py From ntm-lasagne with MIT License

5 votes

def outputs_info(self, batch_size):
        ones_vector = T.ones((batch_size, 1))
        hid_init = T.dot(ones_vector, self.hid_init)
        hid_init = T.unbroadcast(hid_init, 0)
        return [hid_init, hid_init]

Source File: controllers.py From ntm-lasagne with MIT License

5 votes

def outputs_info(self, batch_size):
        ones_vector = T.ones((batch_size, 1))
        hid_init = T.dot(ones_vector, self.hid_init)
        hid_init = T.unbroadcast(hid_init, 0)
        return [hid_init, hid_init]

Source File: controllers.py From ntm-lasagne with MIT License

5 votes

def outputs_info(self, batch_size):
        ones_vector = T.ones((batch_size, 1))
        hid_init = T.dot(ones_vector, self.hid_init)
        hid_init = T.unbroadcast(hid_init, 0)
        cell_init = T.dot(ones_vector, self.cell_init)
        cell_init = T.unbroadcast(cell_init, 0)
        return [hid_init, cell_init]

Source File: controllers.py From ntm-lasagne with MIT License

5 votes

def outputs_info(self, batch_size):
        ones_vector = T.ones((batch_size, 1))
        hid_init = T.dot(ones_vector, self.hid_init)
        hid_init = T.unbroadcast(hid_init, 0)
        return [hid_init, hid_init]

Source File: theano_internal.py From spinn with MIT License

5 votes

def zeros_nobroadcast(shape, dtype=theano.config.floatX):
    zeros = T.zeros(shape, dtype=dtype)
    zeros = T.unbroadcast(zeros, *range(len(shape)))
    return zeros

Source File: encdec.py From NMT-Coverage with BSD 3-Clause "New" or "Revised" License

5 votes

def build_sampler(self, n_samples, n_steps, T, c):
        states = [TT.zeros(shape=(n_samples,), dtype='int64'),
                TT.zeros(shape=(n_samples,), dtype='float32')]
        init_c = c[0, -self.state['dim']:]
        states += [ReplicateLayer(n_samples)(init(init_c).out).out for init in self.initializers]
        # added by Zhaopeng Tu, 2015-10-30
        # init_coverage
        if self.state['maintain_coverage']:
            # in sampling, init_c is two-dimension (source_length*c_dim), same for init_coverage
            if self.state['use_accumulated_coverage'] and self.state['coverage_accumulated_operation'] == 'subtractive':
                init_coverage = TT.unbroadcast(TT.ones((c.shape[0], self.state['coverage_dim']), dtype='float32'), 1)
            else:
                init_coverage = TT.unbroadcast(TT.zeros((c.shape[0], self.state['coverage_dim']), dtype='float32'), 1)
            states.append(init_coverage)

        if not self.state['search']:
            c = PadLayer(n_steps)(c).out

        # Pad with final states
        non_sequences = [c, T]

        outputs, updates = theano.scan(self.sampling_step,
                outputs_info=states,
                non_sequences=non_sequences,
                sequences=[TT.arange(n_steps, dtype="int64")],
                n_steps=n_steps,
                name="{}_sampler_scan".format(self.prefix))
        if self.state['maintain_coverage']:
            return (outputs[0], outputs[1], outputs[-1]), updates
        else:
            return (outputs[0], outputs[1]), updates

Source File: bricks.py From PacGAN with MIT License

5 votes

def get_predictions(self, x, x_tilde, application_call):
        data_sample_preds = self.discriminator.apply(
            tensor.unbroadcast(tensor.concatenate([x, x_tilde], axis=0),
                               *range(x.ndim)))
        data_preds = data_sample_preds[:x.shape[0]]
        sample_preds = data_sample_preds[x.shape[0]:]

        application_call.add_auxiliary_variable(
            tensor.nnet.sigmoid(data_preds).mean(), name='data_accuracy')
        application_call.add_auxiliary_variable(
            (1 - tensor.nnet.sigmoid(sample_preds)).mean(),
            name='sample_accuracy')

        return data_preds, sample_preds

Source File: graph_state.py From gated-graph-transformer-network with MIT License

5 votes

def create_empty(cls, batch_size, num_node_ids, node_state_size, num_edge_types):
        """
        Create an empty graph state with the specified sizes. Note that this
        will contain one zero-strength element to prevent nasty GPU errors
        from a dimension with 0 in it.

            batch_size: Number of batches
            num_node_ids: An integer giving size of node id
            node_state_size: An integer giving size of node state
            num_edge_types: An integer giving number of edge types
        """
        return cls( T.unbroadcast(T.zeros([batch_size, 1]), 1),
                    T.unbroadcast(T.zeros([batch_size, 1, num_node_ids]), 1),
                    T.unbroadcast(T.zeros([batch_size, 1, node_state_size]), 1),
                    T.unbroadcast(T.zeros([batch_size, 1, 1, num_edge_types]), 1, 2))

Source File: recurrent.py From CAPTCHA-breaking with MIT License

5 votes

def get_output(self, train=False):
        X = self.get_input(train)
        padded_mask = self.get_padded_shuffled_mask(train, X, pad=self.depth)
        X = X.dimshuffle((1, 0, 2))

        x = T.dot(X, self.W) + self.b

        if self.depth == 1:
            initial = T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1)
        else:
            initial = T.unbroadcast(T.unbroadcast(alloc_zeros_matrix(self.depth, X.shape[1], self.output_dim), 0), 2)

        outputs, updates = theano.scan(
            self._step,
            sequences=[x, dict(
                input=padded_mask,
                taps=[(-i) for i in range(self.depth)]
            )],
            outputs_info=[dict(
                initial=initial,
                taps=[(-i-1) for i in range(self.depth)]
            )],
            non_sequences=self.Us,
            truncate_gradient=self.truncate_gradient
        )

        if self.return_sequences:
            return outputs.dimshuffle((1, 0, 2))
        return outputs[-1]

Source File: test_var.py From D-VAE with MIT License

5 votes

def test_broadcast(self):
        # Test that we can rebroadcast
        data = numpy.random.rand(10, 10).astype('float32')
        output_var = f32sc(name="output", value=data)

        up = tensor.unbroadcast(output_var.sum().dimshuffle('x', 'x'), 0, 1)
        output_func = theano.function(inputs=[], outputs=[],
                                      updates=[(output_var, up)])
        output_func()

        up = tensor.patternbroadcast(output_var.sum().dimshuffle('x', 'x'),
                                     output_var.type.broadcastable)
        output_func = theano.function(inputs=[], outputs=[],
                                      updates=[(output_var, up)])
        output_func()

Source File: test_opt.py From D-VAE with MIT License

5 votes

def test_rebroadcast():
    d = numpy.random.rand(10, 10).astype('float32')
    v = theano.tensor.fmatrix()
    up = tensor.unbroadcast(v.sum().dimshuffle('x', 'x'), 0, 1)
    f = theano.function([v], [up], mode=mode_with_gpu)

    f(d)

    topo = f.maker.fgraph.toposort()
    rebrs = [node for node in topo if isinstance(node.op, tensor.Rebroadcast)]
    assert len(rebrs) == 1
    rebr = rebrs[0]

    assert isinstance(rebr.inputs[0].type, GpuArrayType)
    assert isinstance(rebr.outputs[0].type, GpuArrayType)

Source File: test_rop.py From D-VAE with MIT License

5 votes

def test_rebroadcast(self):
        # I need the sum, because the setup expects the output to be a
        # vector
        self.check_rop_lop(tensor.unbroadcast(
            self.x[:4].dimshuffle('x', 0), 0).sum(axis=1),
            (1,))

Source File: test_rop.py From attention-lvcsr with MIT License

5 votes

def test_rebroadcast(self):
        # I need the sum, because the setup expects the output to be a
        # vector
        self.check_rop_lop(tensor.unbroadcast(
            self.x[:4].dimshuffle('x', 0), 0).sum(axis=1),
            (1,))

Source File: test_var.py From attention-lvcsr with MIT License

5 votes

def test_broadcast(self):
        # Test that we can rebroadcast
        data = numpy.random.rand(10, 10).astype('float32')
        output_var = f32sc(name="output", value=data)

        up = tensor.unbroadcast(output_var.sum().dimshuffle('x', 'x'), 0, 1)
        output_func = theano.function(inputs=[], outputs=[],
                                      updates=[(output_var, up)])
        output_func()

        up = tensor.patternbroadcast(output_var.sum().dimshuffle('x', 'x'),
                                     output_var.type.broadcastable)
        output_func = theano.function(inputs=[], outputs=[],
                                      updates=[(output_var, up)])
        output_func()

Source File: test_opt.py From attention-lvcsr with MIT License

5 votes

def test_rebroadcast():
    d = numpy.random.rand(10, 10).astype('float32')
    v = theano.tensor.fmatrix()
    up = tensor.unbroadcast(v.sum().dimshuffle('x', 'x'), 0, 1)
    f = theano.function([v], [up], mode=mode_with_gpu)

    f(d)

    topo = f.maker.fgraph.toposort()
    rebrs = [node for node in topo if isinstance(node.op, tensor.Rebroadcast)]
    assert len(rebrs) == 1
    rebr = rebrs[0]

    assert isinstance(rebr.inputs[0].type, GpuArrayType)
    assert isinstance(rebr.outputs[0].type, GpuArrayType)

Source File: layers.py From ntm-lasagne with MIT License

4 votes

def get_output_for(self, input, get_details=False, **kwargs):

        input = input.dimshuffle(1, 0, 2)

        def step(x_t, M_tm1, h_tm1, state_tm1, ww_tm1, wr_tm1, *params):
            # Update the memory (using w_tm1 of the writing heads & M_tm1)
            M_t = self.write_heads.write(h_tm1, ww_tm1, M_tm1)

            # Get the read vector (using w_tm1 of the reading heads & M_t)
            r_t = self.read_heads.read(wr_tm1, M_t)

            # Apply the controller (using x_t, r_t & the requirements for the controller)
            h_t, state_t = self.controller.step(x_t, r_t, h_tm1, state_tm1)

            # Update the weights (using h_t, M_t & w_tm1)
            ww_t = self.write_heads.get_weights(h_t, ww_tm1, M_t)
            wr_t = self.read_heads.get_weights(h_t, wr_tm1, M_t)

            return [M_t, h_t, state_t, ww_t, wr_t]

        memory_init = T.tile(self.memory.memory_init, (input.shape[1], 1, 1))
        memory_init = T.unbroadcast(memory_init, 0)

        write_weights_init = T.tile(self.write_heads.weights_init, (input.shape[1], 1, 1))
        write_weights_init = T.unbroadcast(write_weights_init, 0)
        read_weights_init = T.tile(self.read_heads.weights_init, (input.shape[1], 1, 1))
        read_weights_init = T.unbroadcast(read_weights_init, 0)

        non_seqs = self.controller.get_params() + self.memory.get_params() + \
            self.write_heads.get_params() + self.read_heads.get_params()

        hids, _ = theano.scan(
            fn=step,
            sequences=input,
            outputs_info=[memory_init] + self.controller.outputs_info(input.shape[1]) + \
                         [write_weights_init, read_weights_init],
            non_sequences=non_seqs,
            strict=True)

        # dimshuffle back to (n_batch, n_time_steps, n_features)
        if get_details:
            hid_out = [
                hids[0].dimshuffle(1, 0, 2, 3),
                hids[1].dimshuffle(1, 0, 2),
                hids[2].dimshuffle(1, 0, 2),
                hids[3].dimshuffle(1, 0, 2, 3),
                hids[4].dimshuffle(1, 0, 2, 3)]
        else:
            if self.only_return_final:
                hid_out = hids[1][-1]
            else:
                hid_out = hids[1].dimshuffle(1, 0, 2)

        return hid_out

Source File: prednet.py From prednet with MIT License

4 votes

def get_initial_state(self, x):
        input_shape = self.input_spec[0].shape
        init_nb_row = input_shape[self.row_axis]
        init_nb_col = input_shape[self.column_axis]

        base_initial_state = K.zeros_like(x)  # (samples, timesteps) + image_shape
        non_channel_axis = -1 if self.data_format == 'channels_first' else -2
        for _ in range(2):
            base_initial_state = K.sum(base_initial_state, axis=non_channel_axis)
        base_initial_state = K.sum(base_initial_state, axis=1)  # (samples, nb_channels)

        initial_states = []
        states_to_pass = ['r', 'c', 'e']
        nlayers_to_pass = {u: self.nb_layers for u in states_to_pass}
        if self.extrap_start_time is not None:
           states_to_pass.append('ahat')  # pass prediction in states so can use as actual for t+1 when extrapolating
           nlayers_to_pass['ahat'] = 1
        for u in states_to_pass:
            for l in range(nlayers_to_pass[u]):
                ds_factor = 2 ** l
                nb_row = init_nb_row // ds_factor
                nb_col = init_nb_col // ds_factor
                if u in ['r', 'c']:
                    stack_size = self.R_stack_sizes[l]
                elif u == 'e':
                    stack_size = 2 * self.stack_sizes[l]
                elif u == 'ahat':
                    stack_size = self.stack_sizes[l]
                output_size = stack_size * nb_row * nb_col  # flattened size

                reducer = K.zeros((input_shape[self.channel_axis], output_size)) # (nb_channels, output_size)
                initial_state = K.dot(base_initial_state, reducer) # (samples, output_size)
                if self.data_format == 'channels_first':
                    output_shp = (-1, stack_size, nb_row, nb_col)
                else:
                    output_shp = (-1, nb_row, nb_col, stack_size)
                initial_state = K.reshape(initial_state, output_shp)
                initial_states += [initial_state]

        if K._BACKEND == 'theano':
            from theano import tensor as T
            # There is a known issue in the Theano scan op when dealing with inputs whose shape is 1 along a dimension.
            # In our case, this is a problem when training on grayscale images, and the below line fixes it.
            initial_states = [T.unbroadcast(init_state, 0, 1) for init_state in initial_states]

        if self.extrap_start_time is not None:
            initial_states += [K.variable(0, int if K.backend() != 'tensorflow' else 'int32')]  # the last state will correspond to the current timestep
        return initial_states

Source File: crf.py From keras-contrib with MIT License

4 votes

def viterbi_decoding(self, X, mask=None):
        input_energy = self.activation(K.dot(X, self.kernel) + self.bias)
        if self.use_boundary:
            input_energy = self.add_boundary_energy(
                input_energy, mask, self.left_boundary, self.right_boundary)

        argmin_tables = self.recursion(input_energy, mask, return_logZ=False)
        argmin_tables = K.cast(argmin_tables, 'int32')

        # backward to find best path, `initial_best_idx` can be any,
        # as all elements in the last argmin_table are the same
        argmin_tables = K.reverse(argmin_tables, 1)
        # matrix instead of vector is required by tf `K.rnn`
        initial_best_idx = [K.expand_dims(argmin_tables[:, 0, 0])]
        if K.backend() == 'theano':
            from theano import tensor as T
            initial_best_idx = [T.unbroadcast(initial_best_idx[0], 1)]

        def gather_each_row(params, indices):
            n = K.shape(indices)[0]
            if K.backend() == 'theano':
                from theano import tensor as T
                return params[T.arange(n), indices]
            elif K.backend() == 'tensorflow':
                import tensorflow as tf
                indices = K.transpose(K.stack([tf.range(n), indices]))
                return tf.gather_nd(params, indices)
            else:
                raise NotImplementedError

        def find_path(argmin_table, best_idx):
            next_best_idx = gather_each_row(argmin_table, best_idx[0][:, 0])
            next_best_idx = K.expand_dims(next_best_idx)
            if K.backend() == 'theano':
                from theano import tensor as T
                next_best_idx = T.unbroadcast(next_best_idx, 1)
            return next_best_idx, [next_best_idx]

        _, best_paths, _ = K.rnn(find_path, argmin_tables, initial_best_idx,
                                 input_length=K.int_shape(X)[1], unroll=self.unroll)
        best_paths = K.reverse(best_paths, 1)
        best_paths = K.squeeze(best_paths, 2)

        return K.one_hot(best_paths, self.units)

Source File: rnn.py From TextDetector with GNU General Public License v3.0

4 votes

def fprop(self, state_below, return_all=False):

        if isinstance(state_below, tuple):
            state_below, mask = state_below
        else:
            mask = None

        z0 = tensor.alloc(np.cast[config.floatX](0), state_below.shape[1],
                          self.dim)

        z0 = tensor.unbroadcast(z0, 0)
        if self.dim == 1:
            z0 = tensor.unbroadcast(z0, 1)

        W, U, b = self._params
        if self.weight_noise:
            W = self.add_noise(W)
            U = self.add_noise(U)

        state_below = tensor.dot(state_below, W) + b

        if mask is not None:
            (z, updates) = scan(fn=self.fprop_step_mask,
                                sequences=[state_below, mask],
                                outputs_info=[z0],
                                non_sequences=[U])
        else:
            (z, updates) = scan(fn=self.fprop_step,
                                sequences=[state_below],
                                outputs_info=[z0],
                                non_sequences=[U])

        self._scan_updates.update(updates)

        if return_all:
            return z

        if self.indices is not None:
            if len(self.indices) > 1:
                return [z[i, :, :self.dim] for i in self.indices]
            else:
                return z[self.indices[0], :, :self.dim]
        else:
            if mask is not None:
                return (z[:, :, :self.dim], mask)
            else:
                return z[:, :, :self.dim]

Source File: rnn.py From TextDetector with GNU General Public License v3.0

4 votes

def fprop(self, state_below, return_all=False):

        if isinstance(state_below, tuple):
            state_below, mask = state_below
        else:
            mask = None

        z0 = tensor.alloc(np.cast[config.floatX](0), state_below.shape[1],
                          self.dim * 2)

        z0 = tensor.unbroadcast(z0, 0)
        if self.dim == 1:
            z0 = tensor.unbroadcast(z0, 1)

        W, U, b = self._params
        if self.weight_noise:
            W = self.add_noise(W)
            U = self.add_noise(U)

        state_below = tensor.dot(state_below, W) + b

        if mask is not None:
            (z, updates) = scan(fn=self.fprop_step_mask,
                                sequences=[state_below, mask],
                                outputs_info=[z0],
                                non_sequences=[U])
        else:
            (z, updates) = scan(fn=self.fprop_step,
                                sequences=[state_below],
                                outputs_info=[z0],
                                non_sequences=[U])

            self._scan_updates.update(updates)

        if return_all:
            return z

        if self.indices is not None:
            if len(self.indices) > 1:
                return [z[i, :, :self.dim] for i in self.indices]
            else:
                return z[self.indices[0], :, :self.dim]
        else:
            if mask is not None:
                return (z[:, :, :self.dim], mask)
            else:
                return z[:, :, :self.dim]

Python theano.tensor.unbroadcast() Examples