Python theano.tensor.unbroadcast() Examples
The following are 30
code examples of theano.tensor.unbroadcast().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
theano.tensor
, or try the search function
.
Example #1
Source File: recurrent.py From Mozi with MIT License | 6 votes |
def get_backward_output(self, state_below): X = state_below.dimshuffle((1,0,2)) xi = T.dot(X, self.Wb_i) + self.bb_i xf = T.dot(X, self.Wb_f) + self.bb_f xc = T.dot(X, self.Wb_c) + self.bb_c xo = T.dot(X, self.Wb_o) + self.bb_o [outputs, memories], updates = theano.scan( self._forward_step, sequences=[xi, xf, xo, xc], outputs_info=[ T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1), T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1) ], non_sequences=[self.Ub_i, self.Ub_f, self.Ub_o, self.Ub_c], go_backwards = True, truncate_gradient=self.truncate_gradient ) return outputs.dimshuffle((1,0,2))
Example #2
Source File: recurrent.py From CAPTCHA-breaking with MIT License | 6 votes |
def get_output(self, train=False): X = self.get_input(train) # shape: (nb_samples, time (padded with zeros), input_dim) # new shape: (time, nb_samples, input_dim) -> because theano.scan iterates over main dimension padded_mask = self.get_padded_shuffled_mask(train, X, pad=1) X = X.dimshuffle((1, 0, 2)) x = T.dot(X, self.W) + self.b # scan = theano symbolic loop. # See: http://deeplearning.net/software/theano/library/scan.html # Iterate over the first dimension of the x array (=time). outputs, updates = theano.scan( self._step, # this will be called with arguments (sequences[i], outputs[i-1], non_sequences[i]) sequences=[x, dict(input=padded_mask, taps=[-1])], # tensors to iterate over, inputs to _step # initialization of the output. Input to _step with default tap=-1. outputs_info=T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1), non_sequences=self.U, # static inputs to _step truncate_gradient=self.truncate_gradient) if self.return_sequences: return outputs.dimshuffle((1, 0, 2)) return outputs[-1]
Example #3
Source File: recurrent.py From Mozi with MIT License | 6 votes |
def get_forward_output(self, state_below): X = state_below.dimshuffle((1,0,2)) xi = T.dot(X, self.W_i) + self.b_i xf = T.dot(X, self.W_f) + self.b_f xc = T.dot(X, self.W_c) + self.b_c xo = T.dot(X, self.W_o) + self.b_o [outputs, memories], updates = theano.scan( self._forward_step, sequences=[xi, xf, xo, xc], outputs_info=[ T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1), T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1) ], non_sequences=[self.U_i, self.U_f, self.U_o, self.U_c], truncate_gradient=self.truncate_gradient ) return outputs.dimshuffle((1,0,2))
Example #4
Source File: recurrent.py From CAPTCHA-breaking with MIT License | 6 votes |
def get_output(self, train=False): X = self.get_input(train) padded_mask = self.get_padded_shuffled_mask(train, X, pad=1) X = X.dimshuffle((1, 0, 2)) x_z = T.dot(X, self.W_z) + self.b_z x_r = T.dot(X, self.W_r) + self.b_r x_h = T.dot(X, self.W_h) + self.b_h outputs, updates = theano.scan( self._step, sequences=[x_z, x_r, x_h, padded_mask], outputs_info=T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1), non_sequences=[self.U_z, self.U_r, self.U_h], truncate_gradient=self.truncate_gradient) if self.return_sequences: return outputs.dimshuffle((1, 0, 2)) return outputs[-1]
Example #5
Source File: recurrent.py From CAPTCHA-breaking with MIT License | 6 votes |
def get_output(self, train=False): X = self.get_input(train) padded_mask = self.get_padded_shuffled_mask(train, X, pad=1) X = X.dimshuffle((1, 0, 2)) xi = T.dot(X, self.W_i) + self.b_i xf = T.dot(X, self.W_f) + self.b_f xc = T.dot(X, self.W_c) + self.b_c xo = T.dot(X, self.W_o) + self.b_o [outputs, memories], updates = theano.scan( self._step, sequences=[xi, xf, xo, xc, padded_mask], outputs_info=[ T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1), T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1) ], non_sequences=[self.U_i, self.U_f, self.U_o, self.U_c], truncate_gradient=self.truncate_gradient) if self.return_sequences: return outputs.dimshuffle((1, 0, 2)) return outputs[-1]
Example #6
Source File: recurrent.py From CAPTCHA-breaking with MIT License | 6 votes |
def get_output(self, train=False): X = self.get_input(train) padded_mask = self.get_padded_shuffled_mask(train, X, pad=1) X = X.dimshuffle((1, 0, 2)) x_z = T.dot(X, self.W_z) + self.b_z x_r = T.dot(X, self.W_r) + self.b_r x_h = T.tanh(T.dot(X, self.Pmat)) + self.b_h outputs, updates = theano.scan( self._step, sequences=[x_z, x_r, x_h, padded_mask], outputs_info=T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1), non_sequences=[self.U_r, self.U_h], truncate_gradient=self.truncate_gradient) if self.return_sequences: return outputs.dimshuffle((1, 0, 2)) return outputs[-1]
Example #7
Source File: recurrent.py From CAPTCHA-breaking with MIT License | 6 votes |
def get_output(self, train=False): X = self.get_input(train) padded_mask = self.get_padded_shuffled_mask(train, X, pad=1) X = X.dimshuffle((1, 0, 2)) x_z = T.dot(X, self.W_z) + self.b_z x_r = T.dot(X, self.W_r) + self.b_r x_h = T.dot(X, self.W_h) + self.b_h outputs, updates = theano.scan( self._step, sequences=[x_z, x_r, x_h, padded_mask], outputs_info=T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1), non_sequences=[self.U_z, self.U_r, self.U_h], truncate_gradient=self.truncate_gradient ) if self.return_sequences: return outputs.dimshuffle((1, 0, 2)) return outputs[-1]
Example #8
Source File: recurrent.py From Mozi with MIT License | 6 votes |
def _train_fprop(self, state_below): X = state_below.dimshuffle((1, 0, 2)) xi = T.dot(X, self.W_i) + self.b_i xf = T.dot(X, self.W_f) + self.b_f xc = T.dot(X, self.W_c) + self.b_c xo = T.dot(X, self.W_o) + self.b_o [outputs, memories], updates = theano.scan( self._step, sequences=[xi, xf, xo, xc], outputs_info=[ T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1), T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1) ], non_sequences=[self.U_i, self.U_f, self.U_o, self.U_c], truncate_gradient=self.truncate_gradient) if self.return_sequences: return outputs.dimshuffle((1, 0, 2)) return outputs[-1]
Example #9
Source File: encdec.py From NMT-Coverage with BSD 3-Clause "New" or "Revised" License | 5 votes |
def build_sampler(self, n_samples, n_steps, T, c): states = [TT.zeros(shape=(n_samples,), dtype='int64'), TT.zeros(shape=(n_samples,), dtype='float32')] init_c = c[0, -self.state['dim']:] states += [ReplicateLayer(n_samples)(init(init_c).out).out for init in self.initializers] # added by Zhaopeng Tu, 2015-10-30 # init_coverage if self.state['maintain_coverage']: # in sampling, init_c is two-dimension (source_length*c_dim), same for init_coverage # modified by Zhaopeng Tu, 2015-12-18, big mistake here!!! # coverage should be always 3D, the first two dimensions are consistent with alignment probs # while the last one is the coverage dim if self.state['use_linguistic_coverage'] and self.state['coverage_accumulated_operation'] == 'subtractive': init_coverage = TT.unbroadcast(TT.ones((c.shape[0], n_samples, self.state['coverage_dim']), dtype='float32'), 2) else: init_coverage = TT.unbroadcast(TT.zeros((c.shape[0], n_samples, self.state['coverage_dim']), dtype='float32'), 2) states.append(init_coverage) if not self.state['search']: c = PadLayer(n_steps)(c).out # Pad with final states non_sequences = [c, T] if self.state['maintain_coverage'] and self.state['use_linguistic_coverage'] and self.state['use_fertility_model']: fertility = self.state['max_fertility'] * self.fertility_inputer(c).out non_sequences.append(fertility) outputs, updates = theano.scan(self.sampling_step, outputs_info=states, non_sequences=non_sequences, sequences=[TT.arange(n_steps, dtype="int64")], n_steps=n_steps, name="{}_sampler_scan".format(self.prefix)) if self.state['maintain_coverage']: if self.state['use_fertility_model'] and self.state['use_linguistic_coverage']: return (outputs[0], outputs[1], outputs[-1], fertility), updates else: return (outputs[0], outputs[1], outputs[-1]), updates else: return (outputs[0], outputs[1]), updates
Example #10
Source File: dbm_metrics.py From TextDetector with GNU General Public License v3.0 | 5 votes |
def _e_step(psamples, W_list, b_list, n_steps=100, eps=1e-5): """ Performs 'n_steps' of mean-field inference (used to compute positive phase statistics) Parameters ---------- psamples : array-like object of theano shared variables State of each layer of the DBM (during the inference process). psamples[0] points to the input n_steps : integer Number of iterations of mean-field to perform """ depth = len(psamples) new_psamples = [T.unbroadcast(T.shape_padleft(psample)) for psample in psamples] # now alternate mean-field inference for even/odd layers def mf_iteration(*psamples): new_psamples = [p for p in psamples] for i in xrange(1, depth, 2): new_psamples[i] = hi_given(psamples, i, W_list, b_list) for i in xrange(2, depth, 2): new_psamples[i] = hi_given(psamples, i, W_list, b_list) score = 0. for i in xrange(1, depth): score = T.maximum(T.mean(abs(new_psamples[i] - psamples[i])), score) return new_psamples, theano.scan_module.until(score < eps) new_psamples, updates = scan( mf_iteration, states=new_psamples, n_steps=n_steps ) return [x[0] for x in new_psamples]
Example #11
Source File: controllers.py From ntm-lasagne with MIT License | 5 votes |
def outputs_info(self, batch_size): ones_vector = T.ones((batch_size, 1)) hid_init = T.dot(ones_vector, self.hid_init) hid_init = T.unbroadcast(hid_init, 0) return [hid_init, hid_init]
Example #12
Source File: controllers.py From ntm-lasagne with MIT License | 5 votes |
def outputs_info(self, batch_size): ones_vector = T.ones((batch_size, 1)) hid_init = T.dot(ones_vector, self.hid_init) hid_init = T.unbroadcast(hid_init, 0) return [hid_init, hid_init]
Example #13
Source File: controllers.py From ntm-lasagne with MIT License | 5 votes |
def outputs_info(self, batch_size): ones_vector = T.ones((batch_size, 1)) hid_init = T.dot(ones_vector, self.hid_init) hid_init = T.unbroadcast(hid_init, 0) cell_init = T.dot(ones_vector, self.cell_init) cell_init = T.unbroadcast(cell_init, 0) return [hid_init, cell_init]
Example #14
Source File: controllers.py From ntm-lasagne with MIT License | 5 votes |
def outputs_info(self, batch_size): ones_vector = T.ones((batch_size, 1)) hid_init = T.dot(ones_vector, self.hid_init) hid_init = T.unbroadcast(hid_init, 0) return [hid_init, hid_init]
Example #15
Source File: theano_internal.py From spinn with MIT License | 5 votes |
def zeros_nobroadcast(shape, dtype=theano.config.floatX): zeros = T.zeros(shape, dtype=dtype) zeros = T.unbroadcast(zeros, *range(len(shape))) return zeros
Example #16
Source File: encdec.py From NMT-Coverage with BSD 3-Clause "New" or "Revised" License | 5 votes |
def build_sampler(self, n_samples, n_steps, T, c): states = [TT.zeros(shape=(n_samples,), dtype='int64'), TT.zeros(shape=(n_samples,), dtype='float32')] init_c = c[0, -self.state['dim']:] states += [ReplicateLayer(n_samples)(init(init_c).out).out for init in self.initializers] # added by Zhaopeng Tu, 2015-10-30 # init_coverage if self.state['maintain_coverage']: # in sampling, init_c is two-dimension (source_length*c_dim), same for init_coverage if self.state['use_accumulated_coverage'] and self.state['coverage_accumulated_operation'] == 'subtractive': init_coverage = TT.unbroadcast(TT.ones((c.shape[0], self.state['coverage_dim']), dtype='float32'), 1) else: init_coverage = TT.unbroadcast(TT.zeros((c.shape[0], self.state['coverage_dim']), dtype='float32'), 1) states.append(init_coverage) if not self.state['search']: c = PadLayer(n_steps)(c).out # Pad with final states non_sequences = [c, T] outputs, updates = theano.scan(self.sampling_step, outputs_info=states, non_sequences=non_sequences, sequences=[TT.arange(n_steps, dtype="int64")], n_steps=n_steps, name="{}_sampler_scan".format(self.prefix)) if self.state['maintain_coverage']: return (outputs[0], outputs[1], outputs[-1]), updates else: return (outputs[0], outputs[1]), updates
Example #17
Source File: bricks.py From PacGAN with MIT License | 5 votes |
def get_predictions(self, x, x_tilde, application_call): data_sample_preds = self.discriminator.apply( tensor.unbroadcast(tensor.concatenate([x, x_tilde], axis=0), *range(x.ndim))) data_preds = data_sample_preds[:x.shape[0]] sample_preds = data_sample_preds[x.shape[0]:] application_call.add_auxiliary_variable( tensor.nnet.sigmoid(data_preds).mean(), name='data_accuracy') application_call.add_auxiliary_variable( (1 - tensor.nnet.sigmoid(sample_preds)).mean(), name='sample_accuracy') return data_preds, sample_preds
Example #18
Source File: graph_state.py From gated-graph-transformer-network with MIT License | 5 votes |
def create_empty(cls, batch_size, num_node_ids, node_state_size, num_edge_types): """ Create an empty graph state with the specified sizes. Note that this will contain one zero-strength element to prevent nasty GPU errors from a dimension with 0 in it. batch_size: Number of batches num_node_ids: An integer giving size of node id node_state_size: An integer giving size of node state num_edge_types: An integer giving number of edge types """ return cls( T.unbroadcast(T.zeros([batch_size, 1]), 1), T.unbroadcast(T.zeros([batch_size, 1, num_node_ids]), 1), T.unbroadcast(T.zeros([batch_size, 1, node_state_size]), 1), T.unbroadcast(T.zeros([batch_size, 1, 1, num_edge_types]), 1, 2))
Example #19
Source File: recurrent.py From CAPTCHA-breaking with MIT License | 5 votes |
def get_output(self, train=False): X = self.get_input(train) padded_mask = self.get_padded_shuffled_mask(train, X, pad=self.depth) X = X.dimshuffle((1, 0, 2)) x = T.dot(X, self.W) + self.b if self.depth == 1: initial = T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1) else: initial = T.unbroadcast(T.unbroadcast(alloc_zeros_matrix(self.depth, X.shape[1], self.output_dim), 0), 2) outputs, updates = theano.scan( self._step, sequences=[x, dict( input=padded_mask, taps=[(-i) for i in range(self.depth)] )], outputs_info=[dict( initial=initial, taps=[(-i-1) for i in range(self.depth)] )], non_sequences=self.Us, truncate_gradient=self.truncate_gradient ) if self.return_sequences: return outputs.dimshuffle((1, 0, 2)) return outputs[-1]
Example #20
Source File: test_var.py From D-VAE with MIT License | 5 votes |
def test_broadcast(self): # Test that we can rebroadcast data = numpy.random.rand(10, 10).astype('float32') output_var = f32sc(name="output", value=data) up = tensor.unbroadcast(output_var.sum().dimshuffle('x', 'x'), 0, 1) output_func = theano.function(inputs=[], outputs=[], updates=[(output_var, up)]) output_func() up = tensor.patternbroadcast(output_var.sum().dimshuffle('x', 'x'), output_var.type.broadcastable) output_func = theano.function(inputs=[], outputs=[], updates=[(output_var, up)]) output_func()
Example #21
Source File: test_opt.py From D-VAE with MIT License | 5 votes |
def test_rebroadcast(): d = numpy.random.rand(10, 10).astype('float32') v = theano.tensor.fmatrix() up = tensor.unbroadcast(v.sum().dimshuffle('x', 'x'), 0, 1) f = theano.function([v], [up], mode=mode_with_gpu) f(d) topo = f.maker.fgraph.toposort() rebrs = [node for node in topo if isinstance(node.op, tensor.Rebroadcast)] assert len(rebrs) == 1 rebr = rebrs[0] assert isinstance(rebr.inputs[0].type, GpuArrayType) assert isinstance(rebr.outputs[0].type, GpuArrayType)
Example #22
Source File: test_rop.py From D-VAE with MIT License | 5 votes |
def test_rebroadcast(self): # I need the sum, because the setup expects the output to be a # vector self.check_rop_lop(tensor.unbroadcast( self.x[:4].dimshuffle('x', 0), 0).sum(axis=1), (1,))
Example #23
Source File: test_rop.py From attention-lvcsr with MIT License | 5 votes |
def test_rebroadcast(self): # I need the sum, because the setup expects the output to be a # vector self.check_rop_lop(tensor.unbroadcast( self.x[:4].dimshuffle('x', 0), 0).sum(axis=1), (1,))
Example #24
Source File: test_var.py From attention-lvcsr with MIT License | 5 votes |
def test_broadcast(self): # Test that we can rebroadcast data = numpy.random.rand(10, 10).astype('float32') output_var = f32sc(name="output", value=data) up = tensor.unbroadcast(output_var.sum().dimshuffle('x', 'x'), 0, 1) output_func = theano.function(inputs=[], outputs=[], updates=[(output_var, up)]) output_func() up = tensor.patternbroadcast(output_var.sum().dimshuffle('x', 'x'), output_var.type.broadcastable) output_func = theano.function(inputs=[], outputs=[], updates=[(output_var, up)]) output_func()
Example #25
Source File: test_opt.py From attention-lvcsr with MIT License | 5 votes |
def test_rebroadcast(): d = numpy.random.rand(10, 10).astype('float32') v = theano.tensor.fmatrix() up = tensor.unbroadcast(v.sum().dimshuffle('x', 'x'), 0, 1) f = theano.function([v], [up], mode=mode_with_gpu) f(d) topo = f.maker.fgraph.toposort() rebrs = [node for node in topo if isinstance(node.op, tensor.Rebroadcast)] assert len(rebrs) == 1 rebr = rebrs[0] assert isinstance(rebr.inputs[0].type, GpuArrayType) assert isinstance(rebr.outputs[0].type, GpuArrayType)
Example #26
Source File: layers.py From ntm-lasagne with MIT License | 4 votes |
def get_output_for(self, input, get_details=False, **kwargs): input = input.dimshuffle(1, 0, 2) def step(x_t, M_tm1, h_tm1, state_tm1, ww_tm1, wr_tm1, *params): # Update the memory (using w_tm1 of the writing heads & M_tm1) M_t = self.write_heads.write(h_tm1, ww_tm1, M_tm1) # Get the read vector (using w_tm1 of the reading heads & M_t) r_t = self.read_heads.read(wr_tm1, M_t) # Apply the controller (using x_t, r_t & the requirements for the controller) h_t, state_t = self.controller.step(x_t, r_t, h_tm1, state_tm1) # Update the weights (using h_t, M_t & w_tm1) ww_t = self.write_heads.get_weights(h_t, ww_tm1, M_t) wr_t = self.read_heads.get_weights(h_t, wr_tm1, M_t) return [M_t, h_t, state_t, ww_t, wr_t] memory_init = T.tile(self.memory.memory_init, (input.shape[1], 1, 1)) memory_init = T.unbroadcast(memory_init, 0) write_weights_init = T.tile(self.write_heads.weights_init, (input.shape[1], 1, 1)) write_weights_init = T.unbroadcast(write_weights_init, 0) read_weights_init = T.tile(self.read_heads.weights_init, (input.shape[1], 1, 1)) read_weights_init = T.unbroadcast(read_weights_init, 0) non_seqs = self.controller.get_params() + self.memory.get_params() + \ self.write_heads.get_params() + self.read_heads.get_params() hids, _ = theano.scan( fn=step, sequences=input, outputs_info=[memory_init] + self.controller.outputs_info(input.shape[1]) + \ [write_weights_init, read_weights_init], non_sequences=non_seqs, strict=True) # dimshuffle back to (n_batch, n_time_steps, n_features) if get_details: hid_out = [ hids[0].dimshuffle(1, 0, 2, 3), hids[1].dimshuffle(1, 0, 2), hids[2].dimshuffle(1, 0, 2), hids[3].dimshuffle(1, 0, 2, 3), hids[4].dimshuffle(1, 0, 2, 3)] else: if self.only_return_final: hid_out = hids[1][-1] else: hid_out = hids[1].dimshuffle(1, 0, 2) return hid_out
Example #27
Source File: prednet.py From prednet with MIT License | 4 votes |
def get_initial_state(self, x): input_shape = self.input_spec[0].shape init_nb_row = input_shape[self.row_axis] init_nb_col = input_shape[self.column_axis] base_initial_state = K.zeros_like(x) # (samples, timesteps) + image_shape non_channel_axis = -1 if self.data_format == 'channels_first' else -2 for _ in range(2): base_initial_state = K.sum(base_initial_state, axis=non_channel_axis) base_initial_state = K.sum(base_initial_state, axis=1) # (samples, nb_channels) initial_states = [] states_to_pass = ['r', 'c', 'e'] nlayers_to_pass = {u: self.nb_layers for u in states_to_pass} if self.extrap_start_time is not None: states_to_pass.append('ahat') # pass prediction in states so can use as actual for t+1 when extrapolating nlayers_to_pass['ahat'] = 1 for u in states_to_pass: for l in range(nlayers_to_pass[u]): ds_factor = 2 ** l nb_row = init_nb_row // ds_factor nb_col = init_nb_col // ds_factor if u in ['r', 'c']: stack_size = self.R_stack_sizes[l] elif u == 'e': stack_size = 2 * self.stack_sizes[l] elif u == 'ahat': stack_size = self.stack_sizes[l] output_size = stack_size * nb_row * nb_col # flattened size reducer = K.zeros((input_shape[self.channel_axis], output_size)) # (nb_channels, output_size) initial_state = K.dot(base_initial_state, reducer) # (samples, output_size) if self.data_format == 'channels_first': output_shp = (-1, stack_size, nb_row, nb_col) else: output_shp = (-1, nb_row, nb_col, stack_size) initial_state = K.reshape(initial_state, output_shp) initial_states += [initial_state] if K._BACKEND == 'theano': from theano import tensor as T # There is a known issue in the Theano scan op when dealing with inputs whose shape is 1 along a dimension. # In our case, this is a problem when training on grayscale images, and the below line fixes it. initial_states = [T.unbroadcast(init_state, 0, 1) for init_state in initial_states] if self.extrap_start_time is not None: initial_states += [K.variable(0, int if K.backend() != 'tensorflow' else 'int32')] # the last state will correspond to the current timestep return initial_states
Example #28
Source File: crf.py From keras-contrib with MIT License | 4 votes |
def viterbi_decoding(self, X, mask=None): input_energy = self.activation(K.dot(X, self.kernel) + self.bias) if self.use_boundary: input_energy = self.add_boundary_energy( input_energy, mask, self.left_boundary, self.right_boundary) argmin_tables = self.recursion(input_energy, mask, return_logZ=False) argmin_tables = K.cast(argmin_tables, 'int32') # backward to find best path, `initial_best_idx` can be any, # as all elements in the last argmin_table are the same argmin_tables = K.reverse(argmin_tables, 1) # matrix instead of vector is required by tf `K.rnn` initial_best_idx = [K.expand_dims(argmin_tables[:, 0, 0])] if K.backend() == 'theano': from theano import tensor as T initial_best_idx = [T.unbroadcast(initial_best_idx[0], 1)] def gather_each_row(params, indices): n = K.shape(indices)[0] if K.backend() == 'theano': from theano import tensor as T return params[T.arange(n), indices] elif K.backend() == 'tensorflow': import tensorflow as tf indices = K.transpose(K.stack([tf.range(n), indices])) return tf.gather_nd(params, indices) else: raise NotImplementedError def find_path(argmin_table, best_idx): next_best_idx = gather_each_row(argmin_table, best_idx[0][:, 0]) next_best_idx = K.expand_dims(next_best_idx) if K.backend() == 'theano': from theano import tensor as T next_best_idx = T.unbroadcast(next_best_idx, 1) return next_best_idx, [next_best_idx] _, best_paths, _ = K.rnn(find_path, argmin_tables, initial_best_idx, input_length=K.int_shape(X)[1], unroll=self.unroll) best_paths = K.reverse(best_paths, 1) best_paths = K.squeeze(best_paths, 2) return K.one_hot(best_paths, self.units)
Example #29
Source File: rnn.py From TextDetector with GNU General Public License v3.0 | 4 votes |
def fprop(self, state_below, return_all=False): if isinstance(state_below, tuple): state_below, mask = state_below else: mask = None z0 = tensor.alloc(np.cast[config.floatX](0), state_below.shape[1], self.dim) z0 = tensor.unbroadcast(z0, 0) if self.dim == 1: z0 = tensor.unbroadcast(z0, 1) W, U, b = self._params if self.weight_noise: W = self.add_noise(W) U = self.add_noise(U) state_below = tensor.dot(state_below, W) + b if mask is not None: (z, updates) = scan(fn=self.fprop_step_mask, sequences=[state_below, mask], outputs_info=[z0], non_sequences=[U]) else: (z, updates) = scan(fn=self.fprop_step, sequences=[state_below], outputs_info=[z0], non_sequences=[U]) self._scan_updates.update(updates) if return_all: return z if self.indices is not None: if len(self.indices) > 1: return [z[i, :, :self.dim] for i in self.indices] else: return z[self.indices[0], :, :self.dim] else: if mask is not None: return (z[:, :, :self.dim], mask) else: return z[:, :, :self.dim]
Example #30
Source File: rnn.py From TextDetector with GNU General Public License v3.0 | 4 votes |
def fprop(self, state_below, return_all=False): if isinstance(state_below, tuple): state_below, mask = state_below else: mask = None z0 = tensor.alloc(np.cast[config.floatX](0), state_below.shape[1], self.dim * 2) z0 = tensor.unbroadcast(z0, 0) if self.dim == 1: z0 = tensor.unbroadcast(z0, 1) W, U, b = self._params if self.weight_noise: W = self.add_noise(W) U = self.add_noise(U) state_below = tensor.dot(state_below, W) + b if mask is not None: (z, updates) = scan(fn=self.fprop_step_mask, sequences=[state_below, mask], outputs_info=[z0], non_sequences=[U]) else: (z, updates) = scan(fn=self.fprop_step, sequences=[state_below], outputs_info=[z0], non_sequences=[U]) self._scan_updates.update(updates) if return_all: return z if self.indices is not None: if len(self.indices) > 1: return [z[i, :, :self.dim] for i in self.indices] else: return z[self.indices[0], :, :self.dim] else: if mask is not None: return (z[:, :, :self.dim], mask) else: return z[:, :, :self.dim]