Python chainer.cuda.get_device_from_array() Examples
The following are 29
code examples of chainer.cuda.get_device_from_array().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
chainer.cuda
, or try the search function
.
Example #1
Source File: lstm_decoder.py From DSTC6-End-to-End-Conversation-Modeling with MIT License | 6 votes |
def update(self, s, i): """Update decoder state Args: s (any): Current (hidden, cell) states. If ``None`` is specified zero-vector is used. i (int): input label. Return: (~chainer.Variable) updated decoder state """ if cuda.get_device_from_array(s[0].data).id >= 0: xp = cuda.cupy else: xp = np v = chainer.Variable(xp.array([i],dtype=np.int32)) x = self.embed(v) if s is not None: hy, cy, dy = self.lstm(s[0], s[1], [x]) else: hy, cy, dy = self.lstm(None, None, [x]) return hy, cy, dy
Example #2
Source File: lstm_decoder.py From DSTC6-End-to-End-Conversation-Modeling with MIT License | 6 votes |
def update(self, s, i): """Update decoder state Args: s (any): Current (hidden, cell) states. If ``None`` is specified zero-vector is used. i (int): input label. Return: (~chainer.Variable) updated decoder state """ if cuda.get_device_from_array(s[0].data).id >= 0: xp = cuda.cupy else: xp = np v = chainer.Variable(xp.array([i],dtype=np.int32)) x = self.embed(v) if s is not None: hy, cy, dy = self.lstm(s[0], s[1], [x]) else: hy, cy, dy = self.lstm(None, None, [x]) return hy, cy, dy
Example #3
Source File: lstm_decoder.py From DSTC6-End-to-End-Conversation-Modeling with MIT License | 6 votes |
def update(self, s, i): """Update decoder state Args: s (any): Current (hidden, cell) states. If ``None`` is specified zero-vector is used. i (int): input label. Return: (~chainer.Variable) updated decoder state """ if cuda.get_device_from_array(s[0].data).id >= 0: xp = cuda.cupy else: xp = np v = chainer.Variable(xp.array([i],dtype=np.int32)) x = self.embed(v) if s is not None: hy, cy, dy = self.lstm(s[0], s[1], [x]) else: hy, cy, dy = self.lstm(None, None, [x]) return hy, cy, dy
Example #4
Source File: lstm_decoder.py From DSTC6-End-to-End-Conversation-Modeling with MIT License | 6 votes |
def update(self, s, i): """Update decoder state Args: s (any): Current (hidden, cell) states. If ``None`` is specified zero-vector is used. i (int): input label. Return: (~chainer.Variable) updated decoder state """ if cuda.get_device_from_array(s[0].data).id >= 0: xp = cuda.cupy else: xp = np v = chainer.Variable(xp.array([i],dtype=np.int32)) x = self.embed(v) if s is not None: hy, cy, dy = self.lstm(s[0], s[1], [x]) else: hy, cy, dy = self.lstm(None, None, [x]) return hy, cy, dy
Example #5
Source File: textrec_metrics.py From see with GNU General Public License v3.0 | 6 votes |
def calc_loss(self, x, t): batch_predictions, _, grids = x self.xp = cuda.get_array_module(batch_predictions, t) loss = self.calc_actual_loss(batch_predictions, None, t) # reshape grids batch_size = t.shape[0] grids = grids[-1] grid_shape = grids.shape grids = F.reshape(grids, (-1, batch_size) + grid_shape[1:]) grid_losses = [] for grid in F.separate(grids, axis=0): with cuda.get_device_from_array(getattr(grid, 'data', grid[0].data)): grid_losses.append(self.calc_direction_loss(grid)) return loss + (sum(grid_losses) / len(grid_losses))
Example #6
Source File: training.py From espnet with Apache License 2.0 | 6 votes |
def sum_sqnorm(arr): """Calculate the norm of the array. Args: arr (numpy.ndarray) Returns: Float: Sum of the norm calculated from the given array. """ sq_sum = collections.defaultdict(float) for x in arr: with cuda.get_device_from_array(x) as dev: if x is not None: x = x.ravel() s = x.dot(x) sq_sum[int(dev)] += s return sum([float(i) for i in six.itervalues(sq_sum)])
Example #7
Source File: training.py From espnet with Apache License 2.0 | 6 votes |
def sum_sqnorm(arr): """Calculate the norm of the array. Args: arr (numpy.ndarray) Returns: Float: Sum of the norm calculated from the given array. """ sq_sum = collections.defaultdict(float) for x in arr: with cuda.get_device_from_array(x) as dev: if x is not None: x = x.ravel() s = x.dot(x) sq_sum[int(dev)] += s return sum([float(i) for i in six.itervalues(sq_sum)])
Example #8
Source File: svhn_bbox_plotter.py From see with GNU General Public License v3.0 | 6 votes |
def decode_predictions(self, predictions): # concat all individual predictions and slice for each time step predictions = F.concat([F.expand_dims(p, axis=0) for p in predictions], axis=0) words = [] with cuda.get_device_from_array(predictions.data): for prediction in F.separate(predictions, axis=0): prediction = F.squeeze(prediction, axis=0) prediction = F.softmax(prediction, axis=1) prediction = self.xp.argmax(prediction.data, axis=1) word = self.loss_metrics.strip_prediction(prediction[self.xp.newaxis, ...])[0] if len(word) == 1 and word[0] == 0: return '' word = "".join(map(self.loss_metrics.label_to_char, word)) word = word.replace(chr(self.loss_metrics.char_map[str(self.loss_metrics.blank_symbol)]), '') words.append(word) text = " ".join(words) return text
Example #9
Source File: nonbias_weight_decay.py From chainerrl with MIT License | 5 votes |
def __call__(self, rule, param): if param.name == 'b': return p, g = param.array, param.grad if p is None or g is None: return with cuda.get_device_from_array(p) as dev: if int(dev) == -1: g += self.rate * p else: kernel = cuda.elementwise( 'T p, T decay', 'T g', 'g += decay * p', 'weight_decay') kernel(p, self.rate, g)
Example #10
Source File: test_ggnn_readout.py From chainer-chemistry with MIT License | 5 votes |
def data(): numpy.random.seed(0) atom_data = numpy.random.uniform( 0, high=MAX_ATOMIC_NUM, size=(batch_size, atom_size, in_channels) ).astype('f') atom_data0 = functions.copy( atom_data, cuda.get_device_from_array(atom_data.data).id).data y_grad = numpy.random.uniform( -1, 1, (batch_size, out_dim)).astype('f') return atom_data, atom_data0, y_grad
Example #11
Source File: gnn_film.py From chainer-chemistry with MIT License | 5 votes |
def __call__(self, atom_array, adj, is_real_node=None): """Forward propagation Args: atom_array (numpy.ndarray): minibatch of molecular which is represented with atom IDs (representing C, O, S, ...) `atom_array[mol_index, atom_index]` represents `mol_index`-th molecule's `atom_index`-th atomic number adj (numpy.ndarray): minibatch of adjancency matrix with edge-type information is_real_node (numpy.ndarray): 2-dim array (minibatch, num_nodes). 1 for real node, 0 for virtual node. If `None`, all node is considered as real node. Returns: ~chainer.Variable: minibatch of fingerprint """ # reset state # self.reset_state() if atom_array.dtype == self.xp.int32: h = self.embed(atom_array) # (minibatch, max_num_atoms) else: h = atom_array h0 = functions.copy(h, cuda.get_device_from_array(h.data).id) g_list = [] for step in range(self.n_update_layers): message_layer_index = 0 if self.weight_tying else step h = self.update_layers[message_layer_index](h, adj) if self.concat_hidden: g = self.readout_layers[step](h, h0, is_real_node) g_list.append(g) if self.concat_hidden: return functions.concat(g_list, axis=1) else: g = self.readout_layers[0](h, h0, is_real_node) return g
Example #12
Source File: extend.py From chainer-chemistry with MIT License | 5 votes |
def extend_arrays_to_shape(arrays, out_shape, value=0): # Ref: `_concat_arrays_with_padding` method in chainer convert.py # https://github.com/chainer/chainer/blob/master/chainer/dataset/convert.py xp = cuda.get_array_module(arrays[0]) with cuda.get_device_from_array(arrays[0]): result = xp.full(out_shape, value, dtype=arrays[0].dtype) for i in six.moves.range(len(arrays)): src = arrays[i] slices = tuple(slice(dim) for dim in src.shape) result[(i,) + slices] = src return result
Example #13
Source File: base_calculator.py From chainer-chemistry with MIT License | 5 votes |
def __init__(self, model, target_extractor=None, output_extractor=None, device=None, logger=None): self.model = model # type: chainer.Chain if device is not None: self._device = device else: self._device = cuda.get_device_from_array(*model.params()).id self.target_extractor = target_extractor self.output_extractor = output_extractor self.logger = logger or getLogger(__name__)
Example #14
Source File: voxelnet_concat.py From voxelnet_chainer with MIT License | 5 votes |
def _concat_arrays(arrays, padding): if not isinstance(arrays[0], numpy.ndarray) and\ not isinstance(arrays[0], cuda.ndarray): arrays = numpy.asarray(arrays) xp = cuda.get_array_module(arrays[0]) with cuda.get_device_from_array(arrays[0]): return xp.concatenate(arrays)
Example #15
Source File: textrec_bbox_plotter.py From see with GNU General Public License v3.0 | 5 votes |
def decode_predictions(self, predictions): # concat all individual predictions and slice for each time step predictions = predictions[0] with cuda.get_device_from_array(predictions.data): prediction = F.squeeze(predictions, axis=1) classification = F.softmax(prediction, axis=1) classification = classification.data classification = self.xp.argmax(classification, axis=1) words = self.loss_metrics.strip_prediction(classification[self.xp.newaxis, ...])[0] word = "".join(map(self.loss_metrics.label_to_char, words)) return word
Example #16
Source File: text_rec_bbox_plotter.py From see with GNU General Public License v3.0 | 5 votes |
def decode_predictions(self, predictions): # concat all individual predictions and slice for each time step predictions = F.concat([F.expand_dims(prediction, axis=0) for prediction in predictions], axis=0) with cuda.get_device_from_array(predictions.data): prediction = F.squeeze(predictions, axis=1) classification = F.softmax(prediction, axis=1) classification = classification.data classification = self.xp.argmax(classification, axis=1) words = self.loss_metrics.strip_prediction(classification[self.xp.newaxis, ...])[0] word = "".join(map(self.loss_metrics.label_to_char, words)) return word
Example #17
Source File: fsns.py From see with GNU General Public License v3.0 | 5 votes |
def localization_net(self, images): self.lstm.reset_state() self.transform_2.reset_state() images = self.data_bn(images) h = F.relu(self.bn0(self.conv0(images))) h = F.max_pooling_2d(h, 3, stride=2, pad=1) h = self.rs1_1(h) h = self.rs1_2(h) h = self.rs2_1(h) h = self.rs2_2(h) h = self.rs3_1(h) h = self.rs3_2(h) # h = self.rs4_1(h) # h = self.rs4_2(h) self.localization_vis_anchor = h h = F.average_pooling_2d(h, 5, stride=1) localizations = [] with cuda.get_device_from_array(h.data): for _ in range(self.num_timesteps): in_feature = h lstm_prediction = F.relu(self.lstm(in_feature)) transformed = self.transform_2(lstm_prediction) transformed = F.reshape(transformed, (-1, 2, 3)) localizations.append(rotation_dropout(transformed, ratio=self.dropout_ratio)) return F.concat(localizations, axis=0)
Example #18
Source File: fsns.py From see with GNU General Public License v3.0 | 5 votes |
def __call__(self, images): self.lstm.reset_state() self.transform_2.reset_state() h = self.bn0(self.conv0(images)) h = F.average_pooling_2d(F.relu(h), 2, stride=2) h = self.rs1(h) h = F.max_pooling_2d(h, 2, stride=2) h = self.rs2(h) h = F.max_pooling_2d(h, 2, stride=2) h = self.rs3(h) # h = self.rs4(h) self.vis_anchor = h h = F.average_pooling_2d(h, 5, stride=2) localizations = [] with cuda.get_device_from_array(h.data): # lstm_prediction = chainer.Variable(self.xp.zeros((len(images), self.lstm.state_size), dtype=h.dtype)) for _ in range(self.num_timesteps): # in_feature = self.attend(h, lstm_prediction) in_feature = h lstm_prediction = F.relu(self.lstm(in_feature)) transformed = self.transform_2(lstm_prediction) transformed = F.reshape(transformed, (-1, 2, 3)) localizations.append(rotation_dropout(transformed, ratio=self.dropout_ratio)) return F.concat(localizations, axis=0)
Example #19
Source File: evaluator.py From see with GNU General Public License v3.0 | 5 votes |
def calc_accuracy(self, predictions, labels): batch_predictions = predictions # concat all individual predictions and slice for each time step batch_predictions = F.concat([F.expand_dims(p, axis=2) for p in batch_predictions], axis=2) t = F.reshape(labels, (1, self.args.timesteps, -1)) accuracies = [] with cuda.get_device_from_array(batch_predictions.data): for prediction, label in zip(F.separate(batch_predictions, axis=0), F.separate(t, axis=2)): classification = F.softmax(prediction, axis=2) classification = classification.data classification = self.xp.argmax(classification, axis=2) # classification = self.xp.transpose(classification, (1, 0)) words = self.strip_prediction(classification) labels = self.strip_prediction(label.data) for word, label in zip(words, labels): word = "".join(map(self.label_to_char, word)) label = "".join(map(self.label_to_char, label)) if word == label: self.num_correct_words += 1 self.num_words += 1 return word, label
Example #20
Source File: lstm_per_step_metrics.py From see with GNU General Public License v3.0 | 5 votes |
def calc_loss(self, x, t): batch_predictions, _, grids = x self.xp = cuda.get_array_module(batch_predictions[0], t) # reshape labels batch_size = t.shape[0] # reshape grids grid_shape = grids.shape if self.uses_original_data: grids = F.reshape(grids, (self.num_timesteps, batch_size, 4,) + grid_shape[1:]) else: grids = F.reshape(grids, (self.num_timesteps, batch_size, 1,) + grid_shape[1:]) recognition_losses = [] for prediction, label in zip(batch_predictions, F.separate(t, axis=1)): recognition_loss = F.softmax_cross_entropy(prediction, label) recognition_losses.append(recognition_loss) losses = [sum(recognition_losses) / len(recognition_losses)] # with cuda.get_device_from_array(grids.data): # grid_list = F.separate(F.reshape(grids, (self.timesteps, -1,) + grids.shape[3:]), axis=0) # overlap_losses = [] # for grid_1, grid_2 in itertools.combinations(grid_list, 2): # overlap_losses.append(self.calc_iou_loss(grid_1, grid_2)) # losses.append(sum(overlap_losses) / len(overlap_losses)) for i, grid in enumerate(F.separate(grids, axis=0), start=1): with cuda.get_device_from_array(grid.data): grid_losses = [] for sub_grid in F.separate(grid, axis=1): width, height = self.get_bbox_side_lengths(sub_grid) grid_losses.append(self.area_loss_factor * self.calc_area_loss(width, height)) grid_losses.append(self.aspect_ratio_loss_factor * self.calc_aspect_ratio_loss(width, height)) grid_losses.append(self.calc_direction_loss(sub_grid)) grid_losses.append(self.calc_height_loss(height)) losses.append(sum(grid_losses)) return sum(losses) / len(losses)
Example #21
Source File: loss_metrics.py From see with GNU General Public License v3.0 | 5 votes |
def calc_accuracy(self, x, t): batch_predictions, _, _ = x self.xp = cuda.get_array_module(batch_predictions[0], t) batch_size = t.shape[0] t = F.reshape(t, (batch_size, self.num_timesteps, -1)) accuracies = [] for predictions, labels in zip(batch_predictions, F.separate(t, axis=1)): if isinstance(predictions, list): predictions = F.concat([F.expand_dims(p, axis=0) for p in predictions], axis=0) with cuda.get_device_from_array(predictions.data): classification = F.softmax(predictions, axis=2) classification = classification.data classification = self.xp.argmax(classification, axis=2) classification = self.xp.transpose(classification, (1, 0)) words = self.strip_prediction(classification) labels = self.strip_prediction(labels.data) num_correct_words = 0 for word, label in zip(words, labels): word = "".join(map(self.label_to_char, word)) label = "".join(map(self.label_to_char, label)) if word == label: num_correct_words += 1 accuracy = num_correct_words / len(labels) accuracies.append(accuracy) overall_accuracy = sum(accuracies) / max(len(accuracies), 1) self.scale_area_loss_factor(overall_accuracy) return overall_accuracy
Example #22
Source File: textrec_metrics.py From see with GNU General Public License v3.0 | 5 votes |
def calc_accuracy(self, x, t): batch_predictions, _, _ = x batch_predictions = F.concat([F.expand_dims(prediction, axis=0) for prediction in batch_predictions], axis=0) self.xp = cuda.get_array_module(batch_predictions[0], t) accuracies = [] with cuda.get_device_from_array(batch_predictions.data): classification = F.softmax(batch_predictions, axis=2) classification = classification.data classification = self.xp.argmax(classification, axis=2) classification = self.xp.transpose(classification, (1, 0)) words = self.strip_prediction(classification) labels = self.strip_prediction(t) num_correct_words = 0 for word, label in zip(words, labels): word = "".join(map(self.label_to_char, word)) label = "".join(map(self.label_to_char, label)) if word == label: num_correct_words += 1 accuracy = num_correct_words / len(labels) accuracies.append(accuracy) overall_accuracy = sum(accuracies) / max(len(accuracies), 1) self.scale_area_loss_factor(overall_accuracy) return overall_accuracy
Example #23
Source File: models_test.py From knmt with GNU General Public License v3.0 | 5 votes |
def naive_call(self, fb_concat, targets, mask): compute_ctxt = self.attn_module.naive_call(fb_concat, mask) loss = None current_mb_size = targets[0].data.shape[0] assert current_mb_size == 1 previous_states = self.gru.get_initial_states(current_mb_size) # previous_word = Variable(np.array([self.bos_idx] * mb_size, dtype = np.int32)) # xp = cuda.get_array_module(self.gru.initial_state.data) with cuda.get_device_from_array(self.gru.initial_state.data): prev_y = F.broadcast_to(self.bos_embeding, (1, self.Eo)) # previous_word = Variable(xp.array([self.bos_idx] * current_mb_size, dtype = np.int32)) previous_word = None attn_list = [] total_nb_predictions = 0 for i in range(len(targets)): if previous_word is not None: # else we are using the initial prev_y prev_y = self.emb(previous_word) ci, attn = compute_ctxt(previous_states[-1]) concatenated = F.concat((prev_y, ci)) # print(concatenated.data.shape) new_states = self.gru(previous_states, concatenated) all_concatenated = F.concat((concatenated, new_states[-1])) logits = self.lin_o(self.maxo(all_concatenated)) local_loss = F.softmax_cross_entropy(logits, targets[i]) loss = local_loss if loss is None else loss + local_loss total_nb_predictions += 1 previous_word = targets[i] previous_states = new_states attn_list.append(attn) loss = loss / total_nb_predictions return loss, attn_list
Example #24
Source File: attention.py From knmt with GNU General Public License v3.0 | 5 votes |
def compute_ctxt_demux(self, fb_concat, mask): mb_size, nb_elems, Hi = fb_concat.data.shape assert Hi == self.Hi assert mb_size == 1 assert len(mask) == 0 precomputed_al_factor = F.reshape(self.al_lin_h( F.reshape(fb_concat, (mb_size * nb_elems, self.Hi))), (mb_size, nb_elems, self.Ha)) # concatenated_mask = F.concat([F.reshape(mask_elem, (mb_size, 1)) for mask_elem in mask], 1) def compute_ctxt(previous_state, prev_word_embedding=None): current_mb_size = previous_state.data.shape[0] al_factor = F.broadcast_to(precomputed_al_factor, (current_mb_size, nb_elems, self.Ha)) # used_fb_concat = F.broadcast_to(fb_concat, (current_mb_size, nb_elems, Hi)) # used_concatenated_mask = F.broadcast_to(concatenated_mask, (current_mb_size, nb_elems)) state_al_factor = self.al_lin_s(previous_state) #As suggested by Isao Goto if prev_word_embedding is not None: state_al_factor = state_al_factor + self.al_lin_y(prev_word_embedding) state_al_factor_bc = F.broadcast_to(F.reshape(state_al_factor, (current_mb_size, 1, self.Ha)), (current_mb_size, nb_elems, self.Ha)) a_coeffs = F.reshape(self.al_lin_o(F.reshape(F.tanh(state_al_factor_bc + al_factor), (current_mb_size * nb_elems, self.Ha))), (current_mb_size, nb_elems)) # with cuda.get_device_from_array(used_concatenated_mask.data): # a_coeffs = a_coeffs - 10000 * (1-used_concatenated_mask.data) attn = F.softmax(a_coeffs) # ci = F.reshape(F.batch_matmul(attn, used_fb_concat, transa = True), (current_mb_size, self.Hi)) ci = F.reshape(F.matmul(attn, F.reshape(fb_concat, (nb_elems, Hi))), (current_mb_size, self.Hi)) return ci, attn return compute_ctxt
Example #25
Source File: rmsprop_async.py From chainerrl with MIT License | 5 votes |
def init_state(self, param): xp = cuda.get_array_module(param.array) with cuda.get_device_from_array(param.array): self.state['ms'] = xp.zeros_like(param.array)
Example #26
Source File: ic_stn.py From see with GNU General Public License v3.0 | 4 votes |
def __call__(self, images): self.lstm.reset_state() h = self.bn0(self.conv0(images)) h = F.average_pooling_2d(F.relu(h), 2, stride=2) h = self.rs1(h) h = F.max_pooling_2d(h, 2, stride=2) h = self.rs2(h) h = F.max_pooling_2d(h, 2, stride=2) h = self.rs3(h) # h = self.rs4(h) self.vis_anchor = h h = F.average_pooling_2d(h, 5) localizations = [] with cuda.get_device_from_array(h.data): for _ in range(self.num_timesteps): timestep_localizations = [] in_feature = h lstm_prediction = F.relu(self.lstm(in_feature)) transformed = self.transform_2(lstm_prediction) transformed = F.reshape(transformed, (-1, 2, 3)) transformation_params = rotation_dropout(transformed, ratio=self.dropout_ratio) timestep_localizations.append(transformation_params) # self.transform_2.disable_update() if self.do_parameter_refinement: transformation_params = self.to_homogeneous_coordinates(transformation_params) # refine the transformation parameters for _ in range(self.num_refinement_steps): transformation_deltas = self.do_transformation_param_refinement_step(images, transformation_params) transformation_deltas = self.to_homogeneous_coordinates(transformation_deltas) transformation_params = F.batch_matmul(transformation_params, transformation_deltas) # transformation_params = F.batch_matmul(transformation_deltas, transformation_params) timestep_localizations.append(transformation_params[:, :-1, :]) localizations.append(timestep_localizations) return [F.concat(loc, axis=0) for loc in zip(*localizations)]
Example #27
Source File: fsns.py From see with GNU General Public License v3.0 | 4 votes |
def __call__(self, images): self.lstm.reset_state() self.transform_2.reset_state() h = self.bn0(self.conv0(images)) h = F.average_pooling_2d(F.relu(h), 2, stride=2) h = self.rs1(h) h = F.max_pooling_2d(h, 2, stride=2) h = self.rs2(h) h = F.max_pooling_2d(h, 2, stride=2) h = self.rs3(h) self.vis_anchor = h h = F.average_pooling_2d(h, 5, stride=2) localizations = [] with cuda.get_device_from_array(h.data): homogenuous_addon = self.xp.zeros((len(h), 1, 3), dtype=h.data.dtype) homogenuous_addon[:, 0, 2] = 1 for _ in range(self.num_timesteps): lstm_prediction = F.relu(self.lstm(h)) translation_transform = F.reshape(self.rotation_transform(lstm_prediction), (-1, 2, 3)) translation_transform = disable_shearing(translation_transform) translation_transform = F.concat((translation_transform, homogenuous_addon), axis=1) rotation_transform = F.reshape(self.rotation_transform(lstm_prediction), (-1, 2, 3)) rotation_transform = disable_translation(rotation_transform) rotation_transform = F.concat((rotation_transform, homogenuous_addon), axis=1) # first rotate, then translate transform = F.batch_matmul(rotation_transform, translation_transform) # homogenuous_multiplier = F.get_item(transform, (..., 2, 2)) # # # bring matrices from homogenous coordinates to normal coordinates transform = transform[:, :2, :] # transform = transform / homogenuous_multiplier localizations.append(rotation_dropout(transform, ratio=self.dropout_factor)) return F.concat(localizations, axis=0)
Example #28
Source File: loss_metrics.py From see with GNU General Public License v3.0 | 4 votes |
def calc_loss(self, x, t): batch_predictions, _, grids = x self.xp = cuda.get_array_module(batch_predictions[0], t) # reshape labels batch_size = t.shape[0] t = F.reshape(t, (batch_size, self.num_timesteps, -1)) # reshape grids grid_shape = grids.shape if self.uses_original_data: grids = F.reshape(grids, (self.num_timesteps, batch_size, 4,) + grid_shape[1:]) else: grids = F.reshape(grids, (self.num_timesteps, batch_size, 1,) + grid_shape[1:]) losses = [] # with cuda.get_device_from_array(grids.data): # grid_list = F.separate(F.reshape(grids, (self.num_timesteps, -1,) + grids.shape[3:]), axis=0) # overlap_losses = [] # for grid_1, grid_2 in itertools.combinations(grid_list, 2): # overlap_losses.append(self.calc_iou_loss(grid_1, grid_2)) # losses.append(sum(overlap_losses) / max(len(overlap_losses), 1)) loss_weights = [1, 1.25, 2, 1.25] for i, (predictions, grid, labels) in enumerate(zip(batch_predictions, F.separate(grids, axis=0), F.separate(t, axis=1)), start=1): with cuda.get_device_from_array(getattr(predictions, 'data', predictions[0].data)): # adapt ctc weight depending on current prediction position and labels # if all labels are blank, we want this weight to be full weight! overall_loss_weight = loss_weights[i - 1] loss = self.calc_actual_loss(predictions, grid, labels) # label_lengths = self.get_label_lengths(labels) for sub_grid in F.separate(grid, axis=1): width, height = self.get_bbox_side_lengths(sub_grid) loss += self.area_loss_factor * self.calc_area_loss(width, height) loss += self.aspect_ratio_loss_factor * self.calc_aspect_ratio_loss(width, height) loss += self.calc_direction_loss(sub_grid) loss += self.calc_height_loss(height) loss *= overall_loss_weight losses.append(loss) return sum(losses) / len(losses)
Example #29
Source File: gwm_graph_conv_model.py From chainer-chemistry with MIT License | 4 votes |
def __call__(self, atom_array, adj, super_node=None, is_real_node=None): self.reset_state() if atom_array.dtype == self.xp.int32: h = self.embed(atom_array) else: # TODO(nakago): GraphLinear or GraphMLP can be used. h = atom_array h0 = functions.copy(h, cuda.get_device_from_array(h.data).id) if self.with_gwm: h_s = self.embed_super(super_node) additional_kwargs = self.preprocess_addtional_kwargs( atom_array, adj, super_node=super_node, is_real_node=is_real_node) if self.scale_adj: adj = rescale_adj(adj) g_list = [] for step in range(self.n_update_layers): update_layer_index = 0 if self.weight_tying else step h_new = self.update_layers[update_layer_index]( h=h, adj=adj, **additional_kwargs) if self.with_gwm: h_new, h_s = self.gwm(h, h_new, h_s, update_layer_index) h = h_new if self.use_batchnorm: h = self.bnorms[update_layer_index](h) if self.dropout_ratio > 0.: h = functions.dropout(h, ratio=self.dropout_ratio) if self.activation is not None and step < self.n_activation: h = self.activation(h) if self.concat_hidden or self.sum_hidden: g = self.readout_layers[step]( h=h, h0=h0, is_real_node=is_real_node, **additional_kwargs) g_list.append(g) if self.concat_hidden: return functions.concat(g_list, axis=1) else: if self.sum_hidden: g = functions.sum(functions.stack(g_list), axis=0) else: g = self.readout_layers[0]( h=h, h0=h0, is_real_node=is_real_node) if self.with_gwm: g = functions.concat((g, h_s), axis=1) g = functions.relu(self.linear_for_concat_super(g)) return g