Python Examples of torch.nn.functional.softmax

Source File: util.py From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License

7 votes

def test_tf2torch(tf_model,torch_model,input_shape, num_rand_inp=10, precision=10**-2):
    """
    Checks consistency of torch and tf models before generating attacks
    :param tf_model: copied tf model
    :param torch_model: torch model to be transferred to tf
    :param input_shape: Format Channels X Height X Width
    :param num_rand_inp: number of random inputs to test consistency on
    :return: raises error if the outputs are not consistent
    """
    torch_model.eval()
    rand_x = torch.rand(num_rand_inp,input_shape[0],input_shape[1],input_shape[2])
    tf_op = tf_model.predict(rand_x.numpy())
    torch_op = F.softmax(torch_model(Variable(rand_x))).data.numpy()
    assert tf_op.shape == torch_op.shape, "Mismatch of dimensions of the outputs from tf and torch models"
    assert np.linalg.norm(torch_op-tf_op)/np.linalg.norm(torch_op)<=num_rand_inp*precision, "Outputs of the torch and tensorflow models" \
                                                            "do not agree"
    pass

Source File: model.py From VSE-C with MIT License

7 votes

def forward(self, encoding, lengths):
        lengths = Variable(torch.LongTensor(lengths))
        if torch.cuda.is_available():
            lengths = lengths.cuda()
        if self.method == 'mean':
            encoding_pad = nn.utils.rnn.pack_padded_sequence(encoding, lengths.data.tolist(), batch_first=True)
            encoding = nn.utils.rnn.pad_packed_sequence(encoding_pad, batch_first=True, padding_value=0)[0]
            lengths = lengths.float().view(-1, 1)
            return encoding.sum(1) / lengths, None
        elif self.method == 'max':
            return encoding.max(1)  # [bsz, in_dim], [bsz, in_dim] (position)
        elif self.method == 'attn':
            size = encoding.size()  # [bsz, len, in_dim]
            x_flat = encoding.contiguous().view(-1, size[2])  # [bsz*len, in_dim]
            hbar = self.tanh(self.ws1(x_flat))  # [bsz*len, attn_hid]
            alphas = self.ws2(hbar).view(size[0], size[1])  # [bsz, len]
            alphas = nn.utils.rnn.pack_padded_sequence(alphas, lengths.data.tolist(), batch_first=True)
            alphas = nn.utils.rnn.pad_packed_sequence(alphas, batch_first=True, padding_value=-1e8)[0]
            alphas = functional.softmax(alphas, dim=1)  # [bsz, len]
            alphas = alphas.view(size[0], 1, size[1])  # [bsz, 1, len]
            return torch.bmm(alphas, encoding).squeeze(1), alphas  # [bsz, in_dim], [bsz, len]
        elif self.method == 'last':
            return torch.cat([encoding[i][lengths[i] - 1] for i in range(encoding.size(0))], dim=0), None

Source File: set2set.py From LanczosNetwork with MIT License

6 votes

def forward(self, input_set):
    """
      Args:
        input_set: shape N X D

      Returns:
        output_vec: shape 1 X 2D
    """
    num_element = input_set.shape[0]
    element_dim = input_set.shape[1]
    assert element_dim == self.element_dim
    hidden = torch.zeros(1, 2 * self.element_dim).to(input_set.device)
    memory = torch.zeros(1, self.element_dim).to(input_set.device)

    for tt in range(self.num_step_encoder):
      hidden, memory = self.LSTM(hidden, memory)
      energy = torch.tanh(torch.mm(hidden, self.W_1) + input_set).mm(self.W_2)
      att_weight = F.softmax(energy, dim=0)
      read = (input_set * att_weight).sum(dim=0, keepdim=True)
      hidden = torch.cat([hidden, read], dim=1)

    return hidden

Source File: text_cnn.py From TaskBot with GNU General Public License v3.0

6 votes

def forward(self, x):
        # x: (batch, sentence_length)
        x = self.embed(x)
        # x: (batch, sentence_length, embed_dim)
        # TODO init embed matrix with pre-trained
        x = x.unsqueeze(1)
        # x: (batch, 1, sentence_length, embed_dim)
        x1 = self.conv_and_pool(x, self.conv11)  # (batch, kernel_num)
        x2 = self.conv_and_pool(x, self.conv12)  # (batch, kernel_num)
        x3 = self.conv_and_pool(x, self.conv13)  # (batch, kernel_num)
        x = torch.cat((x1, x2, x3), 1)  # (batch, 3 * kernel_num)
        x = self.dropout(x)
        logit = F.log_softmax(self.fc1(x), dim=1)
        # logit = F.softmax(self.fc1(x), dim=1)
        # logit = self.fc1(x)
        return logit

Source File: tutorial.py From TaskBot with GNU General Public License v3.0

6 votes

def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        attn_weights = F.softmax(
            self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
                                 encoder_outputs.unsqueeze(0))

        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)
        output, hidden = self.gru(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights

Source File: logistic_mixture.py From L3C-PyTorch with GNU General Public License v3.0

6 votes

def _visualize_params(logits_pis, means, log_scales, channel):
    """
    :param logits_pis:  NCKHW
    :param means: NCKHW
    :param log_scales: NCKHW
    :param channel: int
    :return:
    """
    assert logits_pis.shape == means.shape == log_scales.shape
    logits_pis = logits_pis[0, channel, ...].detach()
    means = means[0, channel, ...].detach()
    log_scales = log_scales[0, channel, ...].detach()

    pis = torch.softmax(logits_pis, dim=0)  # Kdim==0 -> KHW

    mixtures = ft.lconcat(
            zip(_iter_Kdim_normalized(pis, normalize=False),
                _iter_Kdim_normalized(means),
                _iter_Kdim_normalized(log_scales)))
    grid = vis.grid.prep_for_grid(mixtures)
    img = torchvision.utils.make_grid(grid, nrow=3)
    return img

Source File: GST.py From GST-Tacotron with MIT License

6 votes

def forward(self, query, key):
        querys = self.W_query(query)  # [N, T_q, num_units]
        keys = self.W_key(key)  # [N, T_k, num_units]
        values = self.W_value(key)

        split_size = self.num_units // self.num_heads
        querys = torch.stack(torch.split(querys, split_size, dim=2), dim=0)  # [h, N, T_q, num_units/h]
        keys = torch.stack(torch.split(keys, split_size, dim=2), dim=0)  # [h, N, T_k, num_units/h]
        values = torch.stack(torch.split(values, split_size, dim=2), dim=0)  # [h, N, T_k, num_units/h]

        # score = softmax(QK^T / (d_k ** 0.5))
        scores = torch.matmul(querys, keys.transpose(2, 3))  # [h, N, T_q, T_k]
        scores = scores / (self.key_dim ** 0.5)
        scores = F.softmax(scores, dim=3)

        # out = score * V
        out = torch.matmul(scores, values)  # [h, N, T_q, num_units/h]
        out = torch.cat(torch.split(out, 1, dim=0), dim=3).squeeze(0)  # [N, T_q, num_units]

        return out

Source File: attention.py From TaskBot with GNU General Public License v3.0

6 votes

def forward(self, decoder_hidden, encoder_outputs):
        """

        Args:
            decoder_hidden: <torch.FloatTensor>, shape(B,H)
                    previous hidden state of the last layer in decoder
            encoder_outputs: <torch.FloatTensor>, shape(T,B,H)
                    encoder outputs

        Returns:
            normalized attention weights: <torch.FloatTensor>, shape(B,T)
        """
        max_len = encoder_outputs.size(0)
        H = decoder_hidden.repeat(max_len, 1, 1).transpose(0, 1)  # (B,T,H)
        encoder_outputs = encoder_outputs.transpose(0, 1)  # (B,T,H)
        attn_energies = self.score(H, encoder_outputs)  # (B,T)
        return F.softmax(attn_energies).unsqueeze(1)  # (B,1,T)

Source File: model.py From interpret-text with MIT License

6 votes

def get_z_scores(self, df_test):
        """Get softmaxed rationale importances.

        :param df_test: dataframe containing test data labels, tokens, masks,
            and counts
        :type df_test: pd.DataFrame
        :return:
            z_scores: softmaxed rationale scores with dimension
                (batch_size, length)
        :rtype: torch.FloatTensor
        """
        batch_dict = generate_data(df_test, self.use_cuda)
        x_tokens = batch_dict["x"]
        mask = batch_dict["m"]
        z_scores, _, _ = self.generator(x_tokens, mask)
        z_scores = F.softmax(z_scores, dim=-1)

        return z_scores

Source File: losses.py From ACAN with MIT License

6 votes

def get_entropy(self, pred, label):
        n, c, h, w = pred.size()
        label = label.unsqueeze(3).long()
        pred = F.softmax(pred, 1).permute(0, 2, 3, 1)
        one_hot_label = ((torch.arange(c)).cuda() == label).float()

        if self.eps == 0:
            prior = 0
        else:
            if self.priorType == 'gaussian':
                tensor = (torch.arange(c).cuda() - label).float()
                prior = NormalDist(tensor, c / 10)
            elif self.priorType == 'uniform':
                prior = 1 / (c-1)

        smoothed_label = (1 - self.eps) * one_hot_label + self.eps * prior * (1 - one_hot_label)
        entropy = smoothed_label * safe_log(pred) + (1 - smoothed_label) * safe_log(1 - pred)
        return -entropy

Source File: utils.py From integrated-gradient-pytorch with MIT License

6 votes

def calculate_outputs_and_gradients(inputs, model, target_label_idx, cuda=False):
    # do the pre-processing
    predict_idx = None
    gradients = []
    for input in inputs:
        input = pre_processing(input, cuda)
        output = model(input)
        output = F.softmax(output, dim=1)
        if target_label_idx is None:
            target_label_idx = torch.argmax(output, 1).item()
        index = np.ones((output.size()[0], 1)) * target_label_idx
        index = torch.tensor(index, dtype=torch.int64)
        if cuda:
            index = index.cuda()
        output = output.gather(1, index)
        # clear grad
        model.zero_grad()
        output.backward()
        gradient = input.grad.detach().cpu().numpy()[0]
        gradients.append(gradient)
    gradients = np.array(gradients)
    return gradients, target_label_idx

Source File: set2set.py From LanczosNetwork with MIT License

5 votes

def forward(self, input_set):
    """
      Args:
        input_set: shape N X D

      Returns:
        output_set: shape N X 1
    """
    num_element = input_set.shape[0]
    element_dim = input_set.shape[1]
    assert element_dim == self.element_dim
    hidden = torch.zeros(1, 2 * self.element_dim).to(input_set.device)
    memory = torch.zeros(1, self.element_dim).to(input_set.device)

    # encoding
    for tt in range(self.num_step_encoder):
      hidden, memory = self.LSTM_encoder(hidden, memory)
      energy = torch.tanh(torch.mm(hidden, self.W_1) + input_set).mm(self.W_2)
      att_weight = F.softmax(energy, dim=0)
      read = (input_set * att_weight).sum(dim=0, keepdim=True)
      hidden = torch.cat([hidden, read], dim=1)

    # decoding
    memory = torch.zeros_like(memory)
    output_set = []
    for tt in range(num_element):
      hidden, memory = self.LSTM_decoder(hidden, memory)
      energy = torch.tanh(torch.mm(hidden, self.W_3) + input_set).mm(self.W_4)
      att_weight = F.softmax(energy, dim=0)
      read = (input_set * att_weight).sum(dim=0, keepdim=True)
      hidden = torch.cat([hidden, read], dim=1)
      energy = torch.tanh(torch.mm(read, self.W_5) + torch.mm(
          input_set, self.W_6)).mm(self.W_7)
      output_set += [torch.argmax(energy)]

    return torch.stack(output_set)

Source File: decoder_deep.py From ConvLab with MIT License

5 votes

def logits2words(self, output, decoded_words, dataset, sample_size):
		'''
		* Decode words from logits output at a time step AND put decoded words in final results *
		* take argmax if sample size == 1
		'''
		batch_size = output.size(0)
		if sample_size == 1: # take argmax directly w/o sampling
			topv, topi = F.softmax(output, dim=1).data.topk(1) # both (batch_size, 1)

		else: # sample over word distribution
			topv, topi = [], []
			word_dis = F.softmax(output, dim=1) # (batch_size, output_size)

			# sample from part of the output distribution for word variations
			n_candidate = 3
			word_dis_sort, idx_of_idx = torch.sort(word_dis, dim=1, descending=True)
			word_dis_sort = word_dis_sort[:, :n_candidate]
			idx_of_idx = idx_of_idx[:, :n_candidate]
			sample_idx = torch.multinomial(word_dis_sort, 1) # (batch_size,)
			for b in range(batch_size):
				i = int(sample_idx[b])
				idx = int(idx_of_idx[b][i])
				prob = float(word_dis[b][idx])
				topi.append(idx)
				topv.append(prob)
				
			topv = torch.FloatTensor(topv).view(batch_size, 1)
			topi = torch.LongTensor(topi).view(batch_size, 1)
			
		decoded_words_t = np.zeros((batch_size, self.output_size))
		for b in range(batch_size):
			idx = topi[b][0]
			word = dataset.index2word[idx.item()]
			decoded_words[b] += (word + ' ')
			decoded_words_t[b][idx] = 1
		decoded_words_t = Variable(torch.from_numpy(decoded_words_t.astype(np.float32)))

		if self.USE_CUDA:
			decoded_words_t = decoded_words_t.cuda()

		return decoded_words_t

Source File: unsup_net.py From SEDST with MIT License

5 votes

def forward(self, z_enc_out, pz_proba, u_enc_out, m_t_input, last_hidden, flag=False):
        """
        decode the response: P(m|u,z)
        :param pz_proba: [Tz,B,V], output of the prior decoder
        :param z_enc_out: [Tz,B,H]
        :param u_enc_out: [T,B,H]
        :param m_t_input: [1,B]
        :param last_hidden:
        :return: proba: [1,B,V]
        """
        batch_size = z_enc_out.size(1)
        m_embed = self.emb(m_t_input)
        z_context = F.dropout(self.attn_z(last_hidden, z_enc_out), self.dropout_rate)
        u_context = F.dropout(self.attn_u(last_hidden, u_enc_out), self.dropout_rate)
        # d_control = self.w4(z_context) + torch.mul(F.sigmoid(self.gate_z(z_context)), self.w5(u_context))
        gru_out, last_hidden = self.gru(torch.cat([z_context, u_context, m_embed], dim=2),
                                        last_hidden)
        gru_out = self.ln1(gru_out)

        gen_score = self.proj(gru_out).squeeze(0)

        z_copy_score = F.tanh(
            self.proj_copy1(torch.cat([z_enc_out, gru_out.repeat(z_enc_out.size(0), 1, 1)], 2)))  # [T,B,H]
        z_copy_score = self.v1(z_copy_score).squeeze(2).transpose(0, 1)  # [B,T]

        scores = F.softmax(torch.cat([gen_score, z_copy_score], dim=1), dim=1)
        gen_score, z_copy_score = scores[:, :gen_score.size(1)], scores[:, gen_score.size(1):]
        z_copy_score = mask_prob(z_copy_score, pz_proba.transpose(0, 1), aux=cfg.aux_device)
        proba = gen_score + self.copy_weight * z_copy_score  # [B,V]
        return proba, last_hidden

Source File: loss.py From pytorch-segmentation-toolbox with MIT License

5 votes

def forward(self, predict, target, weight=None):
        """
            Args:
                predict:(n, c, h, w)
                target:(n, h, w)
                weight (Tensor, optional): a manual rescaling weight given to each class.
                                           If given, has to be a Tensor of size "nclasses"
        """
        assert not target.requires_grad

        input_prob = F.softmax(predict, 1)
        target = self.generate_new_target(input_prob, target)
        return self.criterion(predict, target)

Source File: rnn_net.py From SEDST with MIT License

5 votes

def forward(self, hidden, encoder_outputs, normalize=True):
        encoder_outputs = encoder_outputs.transpose(0, 1)  # [B,T,H]
        attn_energies = self.score(hidden, encoder_outputs)
        normalized_energy = F.softmax(attn_energies, dim=2)  # [B,1,T]
        context = torch.bmm(normalized_energy, encoder_outputs)  # [B,1,H]
        return context.transpose(0, 1)  # [1,B,H]

Source File: transformer.py From Doc2EDAG with MIT License

5 votes

def attention(query, key, value, mask=None, dropout=None):
    """Compute 'Scaled Dot Product Attention'"""
    d_k = query.size(-1)
    scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(d_k)
    if mask is not None:
        scores = scores.masked_fill(mask == 0, -1e9)
    p_attn = F.softmax(scores, dim=-1)
    if dropout is not None:
        p_attn = dropout(p_attn)
    return torch.matmul(p_attn, value), p_attn

Source File: mrcnn.py From medicaldetectiontoolkit with Apache License 2.0

5 votes

def forward(self, x):
        """
        :param x: input feature maps (b, in_channels, y, x, (z))
        :return: rpn_class_logits (b, 2, n_anchors)
        :return: rpn_probs_logits (b, 2, n_anchors)
        :return: rpn_bbox (b, 2 * dim, n_anchors)
        """

        # Shared convolutional base of the RPN.
        x = self.conv_shared(x)

        # Anchor Score. (batch, anchors per location * 2, y, x, (z)).
        rpn_class_logits = self.conv_class(x)
        # Reshape to (batch, 2, anchors)
        axes = (0, 2, 3, 1) if self.dim == 2 else (0, 2, 3, 4, 1)
        rpn_class_logits = rpn_class_logits.permute(*axes)
        rpn_class_logits = rpn_class_logits.contiguous()
        rpn_class_logits = rpn_class_logits.view(x.size()[0], -1, 2)

        # Softmax on last dimension (fg vs. bg).
        rpn_probs = F.softmax(rpn_class_logits, dim=2)

        # Bounding box refinement. (batch, anchors_per_location * (y, x, (z), log(h), log(w), (log(d)), y, x, (z))
        rpn_bbox = self.conv_bbox(x)

        # Reshape to (batch, 2*dim, anchors)
        rpn_bbox = rpn_bbox.permute(*axes)
        rpn_bbox = rpn_bbox.contiguous()
        rpn_bbox = rpn_bbox.view(x.size()[0], -1, self.dim * 2)

        return [rpn_class_logits, rpn_probs, rpn_bbox]

Source File: logistic_mixture.py From L3C-PyTorch with GNU General Public License v3.0

5 votes

def cdf_step_non_shared(self, l, targets, c_cur, C, x_c=None) -> CDFOut:
        assert c_cur < C

        # NKHW         NKHW     NKHW
        logit_probs_c, means_c, log_scales_c, K = self._extract_non_shared_c(c_cur, C, l, x_c)

        logit_probs_c_softmax = F.softmax(logit_probs_c, dim=1)  # NKHW, pi_k
        return CDFOut(logit_probs_c_softmax, means_c, log_scales_c, K, targets.to(l.device))

Source File: multiscale_blueprint.py From L3C-PyTorch with GNU General Public License v3.0

5 votes

def get_p_y(y):
    """
    :param y: NLCHW float, logits
    :return: L dimensional vector p
    """
    Ldim = 1
    L = y.shape[Ldim]
    y = y.detach()
    p = F.softmax(y, dim=Ldim)
    p = p.transpose(Ldim, -1)
    p = p.contiguous().view(-1, L)  # nL
    p = torch.mean(p, dim=0)  # L
    return pe.tensor_to_np(p)

Source File: context_query_attention.py From TVQAplus with MIT License

5 votes

def forward(self, C, Q, c_mask, q_mask):
        """
        match the dim of '*', singlton is allowed
        :param C: (N, *, Lc, D)
        :param Q: (N, *, Lq, D)
        :param c_mask: (N, *, Lc)
        :param q_mask: (N, *, Lq)
        :return: (N, Lc, D) and (N, Lq, D)
        """

        S = self.similarity(C, Q, c_mask, q_mask)  # (N, *, Lc, Lq)
        S_ = F.softmax(S, dim=-1)  # (N, *, Lc, Lq)
        A = torch.matmul(S_, Q)  # (N, *, Lc, D)
        return A

Source File: region_proposal_network.py From easy-faster-rcnn.pytorch with MIT License

5 votes

def generate_proposals(self, anchor_bboxes: Tensor, objectnesses: Tensor, transformers: Tensor, image_width: int, image_height: int) -> Tensor:
        batch_size = anchor_bboxes.shape[0]

        proposal_bboxes = BBox.apply_transformer(anchor_bboxes, transformers)
        proposal_bboxes = BBox.clip(proposal_bboxes, left=0, top=0, right=image_width, bottom=image_height)
        proposal_probs = F.softmax(objectnesses[:, :, 1], dim=-1)

        _, sorted_indices = torch.sort(proposal_probs, dim=-1, descending=True)
        nms_proposal_bboxes_batch = []

        for batch_index in range(batch_size):
            sorted_bboxes = proposal_bboxes[batch_index][sorted_indices[batch_index]][:self._pre_nms_top_n]
            sorted_probs = proposal_probs[batch_index][sorted_indices[batch_index]][:self._pre_nms_top_n]
            threshold = 0.7
            kept_indices = nms(sorted_bboxes, sorted_probs, threshold)
            nms_bboxes = sorted_bboxes[kept_indices][:self._post_nms_top_n]
            nms_proposal_bboxes_batch.append(nms_bboxes)

        max_nms_proposal_bboxes_length = max([len(it) for it in nms_proposal_bboxes_batch])
        padded_proposal_bboxes = []

        for nms_proposal_bboxes in nms_proposal_bboxes_batch:
            padded_proposal_bboxes.append(
                torch.cat([
                    nms_proposal_bboxes,
                    torch.zeros(max_nms_proposal_bboxes_length - len(nms_proposal_bboxes), 4).to(nms_proposal_bboxes)
                ])
            )

        padded_proposal_bboxes = torch.stack(padded_proposal_bboxes, dim=0)
        return padded_proposal_bboxes

Source File: model.py From easy-faster-rcnn.pytorch with MIT License

5 votes

def generate_detections(self, proposal_bboxes: Tensor, proposal_classes: Tensor, proposal_transformers: Tensor, image_width: int, image_height: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
            batch_size = proposal_bboxes.shape[0]

            proposal_transformers = proposal_transformers.view(batch_size, -1, self.num_classes, 4)
            transformer_normalize_std = self._transformer_normalize_std.to(device=proposal_transformers.device)
            transformer_normalize_mean = self._transformer_normalize_mean.to(device=proposal_transformers.device)
            proposal_transformers = proposal_transformers * transformer_normalize_std + transformer_normalize_mean

            proposal_bboxes = proposal_bboxes.unsqueeze(dim=2).repeat(1, 1, self.num_classes, 1)
            detection_bboxes = BBox.apply_transformer(proposal_bboxes, proposal_transformers)
            detection_bboxes = BBox.clip(detection_bboxes, left=0, top=0, right=image_width, bottom=image_height)
            detection_probs = F.softmax(proposal_classes, dim=-1)

            all_detection_bboxes = []
            all_detection_classes = []
            all_detection_probs = []
            all_detection_batch_indices = []

            for batch_index in range(batch_size):
                for c in range(1, self.num_classes):
                    class_bboxes = detection_bboxes[batch_index, :, c, :]
                    class_probs = detection_probs[batch_index, :, c]
                    threshold = 0.3
                    kept_indices = nms(class_bboxes, class_probs, threshold)
                    class_bboxes = class_bboxes[kept_indices]
                    class_probs = class_probs[kept_indices]

                    all_detection_bboxes.append(class_bboxes)
                    all_detection_classes.append(torch.full((len(kept_indices),), c, dtype=torch.int))
                    all_detection_probs.append(class_probs)
                    all_detection_batch_indices.append(torch.full((len(kept_indices),), batch_index, dtype=torch.long))

            all_detection_bboxes = torch.cat(all_detection_bboxes, dim=0)
            all_detection_classes = torch.cat(all_detection_classes, dim=0)
            all_detection_probs = torch.cat(all_detection_probs, dim=0)
            all_detection_batch_indices = torch.cat(all_detection_batch_indices, dim=0)
            return all_detection_bboxes, all_detection_classes, all_detection_probs, all_detection_batch_indices

Source File: basic.py From PSMNet with MIT License

5 votes

def forward(self, left, right):

        refimg_fea     = self.feature_extraction(left)
        targetimg_fea  = self.feature_extraction(right)
 
        #matching
        cost = Variable(torch.FloatTensor(refimg_fea.size()[0], refimg_fea.size()[1]*2, self.maxdisp/4,  refimg_fea.size()[2],  refimg_fea.size()[3]).zero_(), volatile= not self.training).cuda()

        for i in range(self.maxdisp/4):
            if i > 0 :
             cost[:, :refimg_fea.size()[1], i, :,i:]   = refimg_fea[:,:,:,i:]
             cost[:, refimg_fea.size()[1]:, i, :,i:] = targetimg_fea[:,:,:,:-i]
            else:
             cost[:, :refimg_fea.size()[1], i, :,:]   = refimg_fea
             cost[:, refimg_fea.size()[1]:, i, :,:]   = targetimg_fea
        cost = cost.contiguous()

        cost0 = self.dres0(cost)
        cost0 = self.dres1(cost0) + cost0
        cost0 = self.dres2(cost0) + cost0 
        cost0 = self.dres3(cost0) + cost0 
        cost0 = self.dres4(cost0) + cost0

        cost = self.classify(cost0)
        cost = F.upsample(cost, [self.maxdisp,left.size()[2],left.size()[3]], mode='trilinear')
        cost = torch.squeeze(cost,1)
        pred = F.softmax(cost)
        pred = disparityregression(self.maxdisp)(pred)

        return pred

Source File: ufrcnn.py From medicaldetectiontoolkit with Apache License 2.0

5 votes

def compute_rpn_class_loss(rpn_match, rpn_class_logits, shem_poolsize):
    """
    :param rpn_match: (n_anchors). [-1, 0, 1] for negative, neutral, and positive matched anchors.
    :param rpn_class_logits: (n_anchors, 2). logits from RPN classifier.
    :param shem_poolsize: int. factor of top-k candidates to draw from per negative sample
    (stochastic-hard-example-mining).
    :return: loss: torch tensor
    :return: np_neg_ix: 1D array containing indices of the neg_roi_logits, which have been sampled for training.
    """

    # filter out neutral anchors.
    pos_indices = torch.nonzero(rpn_match == 1)
    neg_indices = torch.nonzero(rpn_match == -1)

    # loss for positive samples
    if 0 not in pos_indices.size():
        pos_indices = pos_indices.squeeze(1)
        roi_logits_pos = rpn_class_logits[pos_indices]
        pos_loss = F.cross_entropy(roi_logits_pos, torch.LongTensor([1] * pos_indices.shape[0]).cuda())
    else:
        pos_loss = torch.FloatTensor([0]).cuda()

    # loss for negative samples: draw hard negative examples (SHEM)
    # that match the number of positive samples, but at least 1.
    if 0 not in neg_indices.size():
        neg_indices = neg_indices.squeeze(1)
        roi_logits_neg = rpn_class_logits[neg_indices]
        negative_count = np.max((1, pos_indices.cpu().data.numpy().size))
        roi_probs_neg = F.softmax(roi_logits_neg, dim=1)
        neg_ix = mutils.shem(roi_probs_neg, negative_count, shem_poolsize)
        neg_loss = F.cross_entropy(roi_logits_neg[neg_ix], torch.LongTensor([0] * neg_ix.shape[0]).cuda())
        np_neg_ix = neg_ix.cpu().data.numpy()
    else:
        neg_loss = torch.FloatTensor([0]).cuda()
        np_neg_ix = np.array([]).astype('int32')

    loss = (pos_loss + neg_loss) / 2
    return loss, np_neg_ix

Source File: ufrcnn.py From medicaldetectiontoolkit with Apache License 2.0

5 votes

def forward(self, x):
        """
        :param x: input feature maps (b, in_channels, y, x, (z))
        :return: rpn_class_logits (b, 2, n_anchors)
        :return: rpn_probs_logits (b, 2, n_anchors)
        :return: rpn_bbox (b, 2 * dim, n_anchors)
        """

        # Shared convolutional base of the RPN.
        x = self.conv_shared(x)

        # Anchor Score. (batch, anchors per location * 2, y, x, (z)).
        rpn_class_logits = self.conv_class(x)
        # Reshape to (batch, 2, anchors)
        axes = (0, 2, 3, 1) if self.dim == 2 else (0, 2, 3, 4, 1)
        rpn_class_logits = rpn_class_logits.permute(*axes)
        rpn_class_logits = rpn_class_logits.contiguous()
        rpn_class_logits = rpn_class_logits.view(x.size()[0], -1, 2)

        # Softmax on last dimension (fg vs. bg).
        rpn_probs = F.softmax(rpn_class_logits, dim=2)

        # Bounding box refinement. (batch, anchors_per_location * (y, x, (z), log(h), log(w), (log(d)), y, x, (z))
        rpn_bbox = self.conv_bbox(x)

        # Reshape to (batch, 2*dim, anchors)
        rpn_bbox = rpn_bbox.permute(*axes)
        rpn_bbox = rpn_bbox.contiguous()
        rpn_bbox = rpn_bbox.view(x.size()[0], -1, self.dim * 2)

        return [rpn_class_logits, rpn_probs, rpn_bbox]

Source File: detection_unet.py From medicaldetectiontoolkit with Apache License 2.0

5 votes

def test_forward(self, batch, **kwargs):
        """
        test method. wrapper around forward pass of network without usage of any ground truth information.
        prepares input data for processing and stores outputs in a dictionary.
        :param batch: dictionary containing 'data'
        :param kwargs:
        :return: results_dict: dictionary with keys:
               'boxes': list over batch elements. each batch element is a list of boxes. each box is a dictionary:
                       [[{box_0}, ... {box_n}], [{box_0}, ... {box_n}], ...]
               'seg_preds': pixel-wise class predictions (b, 1, y, x, (z)) with values [0, n_classes]
        """
        img = batch['data']
        var_img = torch.FloatTensor(img).cuda()
        seg_logits, box_coords, max_scores = self.forward(var_img)

        results_dict = {}
        results_dict['boxes'] = [[] for _ in range(img.shape[0])]
        for cix in range(len(self.cf.class_dict.keys())):
            for bix in range(img.shape[0]):
                for rix in range(len(max_scores[cix][bix])):
                    if max_scores[cix][bix][rix] > self.cf.detection_min_confidence:
                        results_dict['boxes'][bix].append({'box_coords': np.copy(box_coords[cix][bix][rix]),
                                                           'box_score': max_scores[cix][bix][rix],
                                                           'box_pred_class_id': cix + 1,  # add 0 for background.
                                                           'box_type': 'det'})

        results_dict['seg_preds'] = np.argmax(F.softmax(seg_logits, 1).cpu().data.numpy(), 1)[:, np.newaxis].astype('uint8')
        return results_dict

Source File: detection_unet.py From medicaldetectiontoolkit with Apache License 2.0

5 votes

def forward(self, x):
        """
        forward pass of network.
        :param x: input image. shape (b, c, y, x, (z))
        :return: seg_logits: shape (b, n_classes, y, x, (z))
        :return: out_box_coords: list over n_classes. elements are arrays(b, n_rois, (y1, x1, y2, x2, (z1), (z2)))
        :return: out_max_scores: list over n_classes. elements are arrays(b, n_rois)
        """

        out_features = self.fpn(x)[0]
        seg_logits = self.conv_final(out_features)
        out_box_coords, out_max_scores = [], []
        smax = F.softmax(seg_logits, dim=1).detach().cpu().data.numpy()

        for cl in range(1, len(self.cf.class_dict.keys()) + 1):
            max_scores = [[] for _ in range(x.shape[0])]
            hard_mask = np.copy(smax).argmax(1)
            hard_mask[hard_mask != cl] = 0
            hard_mask[hard_mask == cl] = 1
            # perform connected component analysis on argmaxed predictions,
            # draw boxes around components and return coordinates.
            box_coords, rois = get_coords(hard_mask, self.cf.n_roi_candidates, self.cf.dim)

            # for each object, choose the highest softmax score (in the respective class)
            # of all pixels in the component as object score.
            for bix, broi in enumerate(rois):
                for nix, nroi in enumerate(broi):
                    component_score = np.max(smax[bix, cl][nroi > 0]) if self.cf.aggregation_operation == 'max' \
                        else np.median(smax[bix, cl][nroi > 0])
                    max_scores[bix].append(component_score)
            out_box_coords.append(box_coords)
            out_max_scores.append(max_scores)
        return seg_logits, out_box_coords, out_max_scores

Source File: retina_net.py From medicaldetectiontoolkit with Apache License 2.0

5 votes

def compute_class_loss(anchor_matches, class_pred_logits, shem_poolsize=20):
    """
    :param anchor_matches: (n_anchors). [-1, 0, class_id] for negative, neutral, and positive matched anchors.
    :param class_pred_logits: (n_anchors, n_classes). logits from classifier sub-network.
    :param shem_poolsize: int. factor of top-k candidates to draw from per negative sample (online-hard-example-mining).
    :return: loss: torch tensor.
    :return: np_neg_ix: 1D array containing indices of the neg_roi_logits, which have been sampled for training.
    """
    # Positive and Negative anchors contribute to the loss,
    # but neutral anchors (match value = 0) don't.
    pos_indices = torch.nonzero(anchor_matches > 0)
    neg_indices = torch.nonzero(anchor_matches == -1)

    # get positive samples and calucalte loss.
    if 0 not in pos_indices.size():
        pos_indices = pos_indices.squeeze(1)
        roi_logits_pos = class_pred_logits[pos_indices]
        targets_pos = anchor_matches[pos_indices]
        pos_loss = F.cross_entropy(roi_logits_pos, targets_pos.long())
    else:
        pos_loss = torch.FloatTensor([0]).cuda()

    # get negative samples, such that the amount matches the number of positive samples, but at least 1.
    # get high scoring negatives by applying online-hard-example-mining.
    if 0 not in neg_indices.size():
        neg_indices = neg_indices.squeeze(1)
        roi_logits_neg = class_pred_logits[neg_indices]
        negative_count = np.max((1, pos_indices.size()[0]))
        roi_probs_neg = F.softmax(roi_logits_neg, dim=1)
        neg_ix = mutils.shem(roi_probs_neg, negative_count, shem_poolsize)
        neg_loss = F.cross_entropy(roi_logits_neg[neg_ix], torch.LongTensor([0] * neg_ix.shape[0]).cuda())
        # return the indices of negative samples, which contributed to the loss (for monitoring plots).
        np_neg_ix = neg_ix.cpu().data.numpy()
    else:
        neg_loss = torch.FloatTensor([0]).cuda()
        np_neg_ix = np.array([]).astype('int32')

    loss = (pos_loss + neg_loss) / 2
    return loss, np_neg_ix

Source File: mrcnn.py From medicaldetectiontoolkit with Apache License 2.0

5 votes

def compute_rpn_class_loss(rpn_match, rpn_class_logits, shem_poolsize):
    """
    :param rpn_match: (n_anchors). [-1, 0, 1] for negative, neutral, and positive matched anchors.
    :param rpn_class_logits: (n_anchors, 2). logits from RPN classifier.
    :param shem_poolsize: int. factor of top-k candidates to draw from per negative sample
    (stochastic-hard-example-mining).
    :return: loss: torch tensor
    :return: np_neg_ix: 1D array containing indices of the neg_roi_logits, which have been sampled for training.
    """

    # filter out neutral anchors.
    pos_indices = torch.nonzero(rpn_match == 1)
    neg_indices = torch.nonzero(rpn_match == -1)

    # loss for positive samples
    if 0 not in pos_indices.size():
        pos_indices = pos_indices.squeeze(1)
        roi_logits_pos = rpn_class_logits[pos_indices]
        pos_loss = F.cross_entropy(roi_logits_pos, torch.LongTensor([1] * pos_indices.shape[0]).cuda())
    else:
        pos_loss = torch.FloatTensor([0]).cuda()

    # loss for negative samples: draw hard negative examples (SHEM)
    # that match the number of positive samples, but at least 1.
    if 0 not in neg_indices.size():
        neg_indices = neg_indices.squeeze(1)
        roi_logits_neg = rpn_class_logits[neg_indices]
        negative_count = np.max((1, pos_indices.cpu().data.numpy().size))
        roi_probs_neg = F.softmax(roi_logits_neg, dim=1)
        neg_ix = mutils.shem(roi_probs_neg, negative_count, shem_poolsize)
        neg_loss = F.cross_entropy(roi_logits_neg[neg_ix], torch.LongTensor([0] * neg_ix.shape[0]).cuda())
        np_neg_ix = neg_ix.cpu().data.numpy()
    else:
        neg_loss = torch.FloatTensor([0]).cuda()
        np_neg_ix = np.array([]).astype('int32')

    loss = (pos_loss + neg_loss) / 2
    return loss, np_neg_ix

Python torch.nn.functional.softmax() Examples