Python torch.nn.functional.softmax() Examples

The following are 30 code examples of torch.nn.functional.softmax(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch.nn.functional , or try the search function .
Example #1
Source File: util.py    From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License 7 votes vote down vote up
def test_tf2torch(tf_model,torch_model,input_shape, num_rand_inp=10, precision=10**-2):
    """
    Checks consistency of torch and tf models before generating attacks
    :param tf_model: copied tf model
    :param torch_model: torch model to be transferred to tf
    :param input_shape: Format Channels X Height X Width
    :param num_rand_inp: number of random inputs to test consistency on
    :return: raises error if the outputs are not consistent
    """
    torch_model.eval()
    rand_x = torch.rand(num_rand_inp,input_shape[0],input_shape[1],input_shape[2])
    tf_op = tf_model.predict(rand_x.numpy())
    torch_op = F.softmax(torch_model(Variable(rand_x))).data.numpy()
    assert tf_op.shape == torch_op.shape, "Mismatch of dimensions of the outputs from tf and torch models"
    assert np.linalg.norm(torch_op-tf_op)/np.linalg.norm(torch_op)<=num_rand_inp*precision, "Outputs of the torch and tensorflow models" \
                                                            "do not agree"
    pass 
Example #2
Source File: model.py    From VSE-C with MIT License 7 votes vote down vote up
def forward(self, encoding, lengths):
        lengths = Variable(torch.LongTensor(lengths))
        if torch.cuda.is_available():
            lengths = lengths.cuda()
        if self.method == 'mean':
            encoding_pad = nn.utils.rnn.pack_padded_sequence(encoding, lengths.data.tolist(), batch_first=True)
            encoding = nn.utils.rnn.pad_packed_sequence(encoding_pad, batch_first=True, padding_value=0)[0]
            lengths = lengths.float().view(-1, 1)
            return encoding.sum(1) / lengths, None
        elif self.method == 'max':
            return encoding.max(1)  # [bsz, in_dim], [bsz, in_dim] (position)
        elif self.method == 'attn':
            size = encoding.size()  # [bsz, len, in_dim]
            x_flat = encoding.contiguous().view(-1, size[2])  # [bsz*len, in_dim]
            hbar = self.tanh(self.ws1(x_flat))  # [bsz*len, attn_hid]
            alphas = self.ws2(hbar).view(size[0], size[1])  # [bsz, len]
            alphas = nn.utils.rnn.pack_padded_sequence(alphas, lengths.data.tolist(), batch_first=True)
            alphas = nn.utils.rnn.pad_packed_sequence(alphas, batch_first=True, padding_value=-1e8)[0]
            alphas = functional.softmax(alphas, dim=1)  # [bsz, len]
            alphas = alphas.view(size[0], 1, size[1])  # [bsz, 1, len]
            return torch.bmm(alphas, encoding).squeeze(1), alphas  # [bsz, in_dim], [bsz, len]
        elif self.method == 'last':
            return torch.cat([encoding[i][lengths[i] - 1] for i in range(encoding.size(0))], dim=0), None 
Example #3
Source File: set2set.py    From LanczosNetwork with MIT License 6 votes vote down vote up
def forward(self, input_set):
    """
      Args:
        input_set: shape N X D

      Returns:
        output_vec: shape 1 X 2D
    """
    num_element = input_set.shape[0]
    element_dim = input_set.shape[1]
    assert element_dim == self.element_dim
    hidden = torch.zeros(1, 2 * self.element_dim).to(input_set.device)
    memory = torch.zeros(1, self.element_dim).to(input_set.device)

    for tt in range(self.num_step_encoder):
      hidden, memory = self.LSTM(hidden, memory)
      energy = torch.tanh(torch.mm(hidden, self.W_1) + input_set).mm(self.W_2)
      att_weight = F.softmax(energy, dim=0)
      read = (input_set * att_weight).sum(dim=0, keepdim=True)
      hidden = torch.cat([hidden, read], dim=1)

    return hidden 
Example #4
Source File: text_cnn.py    From TaskBot with GNU General Public License v3.0 6 votes vote down vote up
def forward(self, x):
        # x: (batch, sentence_length)
        x = self.embed(x)
        # x: (batch, sentence_length, embed_dim)
        # TODO init embed matrix with pre-trained
        x = x.unsqueeze(1)
        # x: (batch, 1, sentence_length, embed_dim)
        x1 = self.conv_and_pool(x, self.conv11)  # (batch, kernel_num)
        x2 = self.conv_and_pool(x, self.conv12)  # (batch, kernel_num)
        x3 = self.conv_and_pool(x, self.conv13)  # (batch, kernel_num)
        x = torch.cat((x1, x2, x3), 1)  # (batch, 3 * kernel_num)
        x = self.dropout(x)
        logit = F.log_softmax(self.fc1(x), dim=1)
        # logit = F.softmax(self.fc1(x), dim=1)
        # logit = self.fc1(x)
        return logit 
Example #5
Source File: tutorial.py    From TaskBot with GNU General Public License v3.0 6 votes vote down vote up
def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        attn_weights = F.softmax(
            self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
                                 encoder_outputs.unsqueeze(0))

        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)
        output, hidden = self.gru(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights 
Example #6
Source File: logistic_mixture.py    From L3C-PyTorch with GNU General Public License v3.0 6 votes vote down vote up
def _visualize_params(logits_pis, means, log_scales, channel):
    """
    :param logits_pis:  NCKHW
    :param means: NCKHW
    :param log_scales: NCKHW
    :param channel: int
    :return:
    """
    assert logits_pis.shape == means.shape == log_scales.shape
    logits_pis = logits_pis[0, channel, ...].detach()
    means = means[0, channel, ...].detach()
    log_scales = log_scales[0, channel, ...].detach()

    pis = torch.softmax(logits_pis, dim=0)  # Kdim==0 -> KHW

    mixtures = ft.lconcat(
            zip(_iter_Kdim_normalized(pis, normalize=False),
                _iter_Kdim_normalized(means),
                _iter_Kdim_normalized(log_scales)))
    grid = vis.grid.prep_for_grid(mixtures)
    img = torchvision.utils.make_grid(grid, nrow=3)
    return img 
Example #7
Source File: GST.py    From GST-Tacotron with MIT License 6 votes vote down vote up
def forward(self, query, key):
        querys = self.W_query(query)  # [N, T_q, num_units]
        keys = self.W_key(key)  # [N, T_k, num_units]
        values = self.W_value(key)

        split_size = self.num_units // self.num_heads
        querys = torch.stack(torch.split(querys, split_size, dim=2), dim=0)  # [h, N, T_q, num_units/h]
        keys = torch.stack(torch.split(keys, split_size, dim=2), dim=0)  # [h, N, T_k, num_units/h]
        values = torch.stack(torch.split(values, split_size, dim=2), dim=0)  # [h, N, T_k, num_units/h]

        # score = softmax(QK^T / (d_k ** 0.5))
        scores = torch.matmul(querys, keys.transpose(2, 3))  # [h, N, T_q, T_k]
        scores = scores / (self.key_dim ** 0.5)
        scores = F.softmax(scores, dim=3)

        # out = score * V
        out = torch.matmul(scores, values)  # [h, N, T_q, num_units/h]
        out = torch.cat(torch.split(out, 1, dim=0), dim=3).squeeze(0)  # [N, T_q, num_units]

        return out 
Example #8
Source File: attention.py    From TaskBot with GNU General Public License v3.0 6 votes vote down vote up
def forward(self, decoder_hidden, encoder_outputs):
        """

        Args:
            decoder_hidden: <torch.FloatTensor>, shape(B,H)
                    previous hidden state of the last layer in decoder
            encoder_outputs: <torch.FloatTensor>, shape(T,B,H)
                    encoder outputs

        Returns:
            normalized attention weights: <torch.FloatTensor>, shape(B,T)
        """
        max_len = encoder_outputs.size(0)
        H = decoder_hidden.repeat(max_len, 1, 1).transpose(0, 1)  # (B,T,H)
        encoder_outputs = encoder_outputs.transpose(0, 1)  # (B,T,H)
        attn_energies = self.score(H, encoder_outputs)  # (B,T)
        return F.softmax(attn_energies).unsqueeze(1)  # (B,1,T) 
Example #9
Source File: model.py    From interpret-text with MIT License 6 votes vote down vote up
def get_z_scores(self, df_test):
        """Get softmaxed rationale importances.

        :param df_test: dataframe containing test data labels, tokens, masks,
            and counts
        :type df_test: pd.DataFrame
        :return:
            z_scores: softmaxed rationale scores with dimension
                (batch_size, length)
        :rtype: torch.FloatTensor
        """
        batch_dict = generate_data(df_test, self.use_cuda)
        x_tokens = batch_dict["x"]
        mask = batch_dict["m"]
        z_scores, _, _ = self.generator(x_tokens, mask)
        z_scores = F.softmax(z_scores, dim=-1)

        return z_scores 
Example #10
Source File: losses.py    From ACAN with MIT License 6 votes vote down vote up
def get_entropy(self, pred, label):
        n, c, h, w = pred.size()
        label = label.unsqueeze(3).long()
        pred = F.softmax(pred, 1).permute(0, 2, 3, 1)
        one_hot_label = ((torch.arange(c)).cuda() == label).float()

        if self.eps == 0:
            prior = 0
        else:
            if self.priorType == 'gaussian':
                tensor = (torch.arange(c).cuda() - label).float()
                prior = NormalDist(tensor, c / 10)
            elif self.priorType == 'uniform':
                prior = 1 / (c-1)

        smoothed_label = (1 - self.eps) * one_hot_label + self.eps * prior * (1 - one_hot_label)
        entropy = smoothed_label * safe_log(pred) + (1 - smoothed_label) * safe_log(1 - pred)
        return -entropy 
Example #11
Source File: utils.py    From integrated-gradient-pytorch with MIT License 6 votes vote down vote up
def calculate_outputs_and_gradients(inputs, model, target_label_idx, cuda=False):
    # do the pre-processing
    predict_idx = None
    gradients = []
    for input in inputs:
        input = pre_processing(input, cuda)
        output = model(input)
        output = F.softmax(output, dim=1)
        if target_label_idx is None:
            target_label_idx = torch.argmax(output, 1).item()
        index = np.ones((output.size()[0], 1)) * target_label_idx
        index = torch.tensor(index, dtype=torch.int64)
        if cuda:
            index = index.cuda()
        output = output.gather(1, index)
        # clear grad
        model.zero_grad()
        output.backward()
        gradient = input.grad.detach().cpu().numpy()[0]
        gradients.append(gradient)
    gradients = np.array(gradients)
    return gradients, target_label_idx 
Example #12
Source File: set2set.py    From LanczosNetwork with MIT License 5 votes vote down vote up
def forward(self, input_set):
    """
      Args:
        input_set: shape N X D

      Returns:
        output_set: shape N X 1
    """
    num_element = input_set.shape[0]
    element_dim = input_set.shape[1]
    assert element_dim == self.element_dim
    hidden = torch.zeros(1, 2 * self.element_dim).to(input_set.device)
    memory = torch.zeros(1, self.element_dim).to(input_set.device)

    # encoding
    for tt in range(self.num_step_encoder):
      hidden, memory = self.LSTM_encoder(hidden, memory)
      energy = torch.tanh(torch.mm(hidden, self.W_1) + input_set).mm(self.W_2)
      att_weight = F.softmax(energy, dim=0)
      read = (input_set * att_weight).sum(dim=0, keepdim=True)
      hidden = torch.cat([hidden, read], dim=1)

    # decoding
    memory = torch.zeros_like(memory)
    output_set = []
    for tt in range(num_element):
      hidden, memory = self.LSTM_decoder(hidden, memory)
      energy = torch.tanh(torch.mm(hidden, self.W_3) + input_set).mm(self.W_4)
      att_weight = F.softmax(energy, dim=0)
      read = (input_set * att_weight).sum(dim=0, keepdim=True)
      hidden = torch.cat([hidden, read], dim=1)
      energy = torch.tanh(torch.mm(read, self.W_5) + torch.mm(
          input_set, self.W_6)).mm(self.W_7)
      output_set += [torch.argmax(energy)]

    return torch.stack(output_set) 
Example #13
Source File: decoder_deep.py    From ConvLab with MIT License 5 votes vote down vote up
def logits2words(self, output, decoded_words, dataset, sample_size):
		'''
		* Decode words from logits output at a time step AND put decoded words in final results *
		* take argmax if sample size == 1
		'''
		batch_size = output.size(0)
		if sample_size == 1: # take argmax directly w/o sampling
			topv, topi = F.softmax(output, dim=1).data.topk(1) # both (batch_size, 1)

		else: # sample over word distribution
			topv, topi = [], []
			word_dis = F.softmax(output, dim=1) # (batch_size, output_size)

			# sample from part of the output distribution for word variations
			n_candidate = 3
			word_dis_sort, idx_of_idx = torch.sort(word_dis, dim=1, descending=True)
			word_dis_sort = word_dis_sort[:, :n_candidate]
			idx_of_idx = idx_of_idx[:, :n_candidate]
			sample_idx = torch.multinomial(word_dis_sort, 1) # (batch_size,)
			for b in range(batch_size):
				i = int(sample_idx[b])
				idx = int(idx_of_idx[b][i])
				prob = float(word_dis[b][idx])
				topi.append(idx)
				topv.append(prob)
				
			topv = torch.FloatTensor(topv).view(batch_size, 1)
			topi = torch.LongTensor(topi).view(batch_size, 1)
			
		decoded_words_t = np.zeros((batch_size, self.output_size))
		for b in range(batch_size):
			idx = topi[b][0]
			word = dataset.index2word[idx.item()]
			decoded_words[b] += (word + ' ')
			decoded_words_t[b][idx] = 1
		decoded_words_t = Variable(torch.from_numpy(decoded_words_t.astype(np.float32)))

		if self.USE_CUDA:
			decoded_words_t = decoded_words_t.cuda()

		return decoded_words_t 
Example #14
Source File: unsup_net.py    From SEDST with MIT License 5 votes vote down vote up
def forward(self, z_enc_out, pz_proba, u_enc_out, m_t_input, last_hidden, flag=False):
        """
        decode the response: P(m|u,z)
        :param pz_proba: [Tz,B,V], output of the prior decoder
        :param z_enc_out: [Tz,B,H]
        :param u_enc_out: [T,B,H]
        :param m_t_input: [1,B]
        :param last_hidden:
        :return: proba: [1,B,V]
        """
        batch_size = z_enc_out.size(1)
        m_embed = self.emb(m_t_input)
        z_context = F.dropout(self.attn_z(last_hidden, z_enc_out), self.dropout_rate)
        u_context = F.dropout(self.attn_u(last_hidden, u_enc_out), self.dropout_rate)
        # d_control = self.w4(z_context) + torch.mul(F.sigmoid(self.gate_z(z_context)), self.w5(u_context))
        gru_out, last_hidden = self.gru(torch.cat([z_context, u_context, m_embed], dim=2),
                                        last_hidden)
        gru_out = self.ln1(gru_out)

        gen_score = self.proj(gru_out).squeeze(0)

        z_copy_score = F.tanh(
            self.proj_copy1(torch.cat([z_enc_out, gru_out.repeat(z_enc_out.size(0), 1, 1)], 2)))  # [T,B,H]
        z_copy_score = self.v1(z_copy_score).squeeze(2).transpose(0, 1)  # [B,T]

        scores = F.softmax(torch.cat([gen_score, z_copy_score], dim=1), dim=1)
        gen_score, z_copy_score = scores[:, :gen_score.size(1)], scores[:, gen_score.size(1):]
        z_copy_score = mask_prob(z_copy_score, pz_proba.transpose(0, 1), aux=cfg.aux_device)
        proba = gen_score + self.copy_weight * z_copy_score  # [B,V]
        return proba, last_hidden 
Example #15
Source File: loss.py    From pytorch-segmentation-toolbox with MIT License 5 votes vote down vote up
def forward(self, predict, target, weight=None):
        """
            Args:
                predict:(n, c, h, w)
                target:(n, h, w)
                weight (Tensor, optional): a manual rescaling weight given to each class.
                                           If given, has to be a Tensor of size "nclasses"
        """
        assert not target.requires_grad

        input_prob = F.softmax(predict, 1)
        target = self.generate_new_target(input_prob, target)
        return self.criterion(predict, target) 
Example #16
Source File: rnn_net.py    From SEDST with MIT License 5 votes vote down vote up
def forward(self, hidden, encoder_outputs, normalize=True):
        encoder_outputs = encoder_outputs.transpose(0, 1)  # [B,T,H]
        attn_energies = self.score(hidden, encoder_outputs)
        normalized_energy = F.softmax(attn_energies, dim=2)  # [B,1,T]
        context = torch.bmm(normalized_energy, encoder_outputs)  # [B,1,H]
        return context.transpose(0, 1)  # [1,B,H] 
Example #17
Source File: transformer.py    From Doc2EDAG with MIT License 5 votes vote down vote up
def attention(query, key, value, mask=None, dropout=None):
    """Compute 'Scaled Dot Product Attention'"""
    d_k = query.size(-1)
    scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(d_k)
    if mask is not None:
        scores = scores.masked_fill(mask == 0, -1e9)
    p_attn = F.softmax(scores, dim=-1)
    if dropout is not None:
        p_attn = dropout(p_attn)
    return torch.matmul(p_attn, value), p_attn 
Example #18
Source File: mrcnn.py    From medicaldetectiontoolkit with Apache License 2.0 5 votes vote down vote up
def forward(self, x):
        """
        :param x: input feature maps (b, in_channels, y, x, (z))
        :return: rpn_class_logits (b, 2, n_anchors)
        :return: rpn_probs_logits (b, 2, n_anchors)
        :return: rpn_bbox (b, 2 * dim, n_anchors)
        """

        # Shared convolutional base of the RPN.
        x = self.conv_shared(x)

        # Anchor Score. (batch, anchors per location * 2, y, x, (z)).
        rpn_class_logits = self.conv_class(x)
        # Reshape to (batch, 2, anchors)
        axes = (0, 2, 3, 1) if self.dim == 2 else (0, 2, 3, 4, 1)
        rpn_class_logits = rpn_class_logits.permute(*axes)
        rpn_class_logits = rpn_class_logits.contiguous()
        rpn_class_logits = rpn_class_logits.view(x.size()[0], -1, 2)

        # Softmax on last dimension (fg vs. bg).
        rpn_probs = F.softmax(rpn_class_logits, dim=2)

        # Bounding box refinement. (batch, anchors_per_location * (y, x, (z), log(h), log(w), (log(d)), y, x, (z))
        rpn_bbox = self.conv_bbox(x)

        # Reshape to (batch, 2*dim, anchors)
        rpn_bbox = rpn_bbox.permute(*axes)
        rpn_bbox = rpn_bbox.contiguous()
        rpn_bbox = rpn_bbox.view(x.size()[0], -1, self.dim * 2)

        return [rpn_class_logits, rpn_probs, rpn_bbox] 
Example #19
Source File: logistic_mixture.py    From L3C-PyTorch with GNU General Public License v3.0 5 votes vote down vote up
def cdf_step_non_shared(self, l, targets, c_cur, C, x_c=None) -> CDFOut:
        assert c_cur < C

        # NKHW         NKHW     NKHW
        logit_probs_c, means_c, log_scales_c, K = self._extract_non_shared_c(c_cur, C, l, x_c)

        logit_probs_c_softmax = F.softmax(logit_probs_c, dim=1)  # NKHW, pi_k
        return CDFOut(logit_probs_c_softmax, means_c, log_scales_c, K, targets.to(l.device)) 
Example #20
Source File: multiscale_blueprint.py    From L3C-PyTorch with GNU General Public License v3.0 5 votes vote down vote up
def get_p_y(y):
    """
    :param y: NLCHW float, logits
    :return: L dimensional vector p
    """
    Ldim = 1
    L = y.shape[Ldim]
    y = y.detach()
    p = F.softmax(y, dim=Ldim)
    p = p.transpose(Ldim, -1)
    p = p.contiguous().view(-1, L)  # nL
    p = torch.mean(p, dim=0)  # L
    return pe.tensor_to_np(p) 
Example #21
Source File: context_query_attention.py    From TVQAplus with MIT License 5 votes vote down vote up
def forward(self, C, Q, c_mask, q_mask):
        """
        match the dim of '*', singlton is allowed
        :param C: (N, *, Lc, D)
        :param Q: (N, *, Lq, D)
        :param c_mask: (N, *, Lc)
        :param q_mask: (N, *, Lq)
        :return: (N, Lc, D) and (N, Lq, D)
        """

        S = self.similarity(C, Q, c_mask, q_mask)  # (N, *, Lc, Lq)
        S_ = F.softmax(S, dim=-1)  # (N, *, Lc, Lq)
        A = torch.matmul(S_, Q)  # (N, *, Lc, D)
        return A 
Example #22
Source File: region_proposal_network.py    From easy-faster-rcnn.pytorch with MIT License 5 votes vote down vote up
def generate_proposals(self, anchor_bboxes: Tensor, objectnesses: Tensor, transformers: Tensor, image_width: int, image_height: int) -> Tensor:
        batch_size = anchor_bboxes.shape[0]

        proposal_bboxes = BBox.apply_transformer(anchor_bboxes, transformers)
        proposal_bboxes = BBox.clip(proposal_bboxes, left=0, top=0, right=image_width, bottom=image_height)
        proposal_probs = F.softmax(objectnesses[:, :, 1], dim=-1)

        _, sorted_indices = torch.sort(proposal_probs, dim=-1, descending=True)
        nms_proposal_bboxes_batch = []

        for batch_index in range(batch_size):
            sorted_bboxes = proposal_bboxes[batch_index][sorted_indices[batch_index]][:self._pre_nms_top_n]
            sorted_probs = proposal_probs[batch_index][sorted_indices[batch_index]][:self._pre_nms_top_n]
            threshold = 0.7
            kept_indices = nms(sorted_bboxes, sorted_probs, threshold)
            nms_bboxes = sorted_bboxes[kept_indices][:self._post_nms_top_n]
            nms_proposal_bboxes_batch.append(nms_bboxes)

        max_nms_proposal_bboxes_length = max([len(it) for it in nms_proposal_bboxes_batch])
        padded_proposal_bboxes = []

        for nms_proposal_bboxes in nms_proposal_bboxes_batch:
            padded_proposal_bboxes.append(
                torch.cat([
                    nms_proposal_bboxes,
                    torch.zeros(max_nms_proposal_bboxes_length - len(nms_proposal_bboxes), 4).to(nms_proposal_bboxes)
                ])
            )

        padded_proposal_bboxes = torch.stack(padded_proposal_bboxes, dim=0)
        return padded_proposal_bboxes 
Example #23
Source File: model.py    From easy-faster-rcnn.pytorch with MIT License 5 votes vote down vote up
def generate_detections(self, proposal_bboxes: Tensor, proposal_classes: Tensor, proposal_transformers: Tensor, image_width: int, image_height: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
            batch_size = proposal_bboxes.shape[0]

            proposal_transformers = proposal_transformers.view(batch_size, -1, self.num_classes, 4)
            transformer_normalize_std = self._transformer_normalize_std.to(device=proposal_transformers.device)
            transformer_normalize_mean = self._transformer_normalize_mean.to(device=proposal_transformers.device)
            proposal_transformers = proposal_transformers * transformer_normalize_std + transformer_normalize_mean

            proposal_bboxes = proposal_bboxes.unsqueeze(dim=2).repeat(1, 1, self.num_classes, 1)
            detection_bboxes = BBox.apply_transformer(proposal_bboxes, proposal_transformers)
            detection_bboxes = BBox.clip(detection_bboxes, left=0, top=0, right=image_width, bottom=image_height)
            detection_probs = F.softmax(proposal_classes, dim=-1)

            all_detection_bboxes = []
            all_detection_classes = []
            all_detection_probs = []
            all_detection_batch_indices = []

            for batch_index in range(batch_size):
                for c in range(1, self.num_classes):
                    class_bboxes = detection_bboxes[batch_index, :, c, :]
                    class_probs = detection_probs[batch_index, :, c]
                    threshold = 0.3
                    kept_indices = nms(class_bboxes, class_probs, threshold)
                    class_bboxes = class_bboxes[kept_indices]
                    class_probs = class_probs[kept_indices]

                    all_detection_bboxes.append(class_bboxes)
                    all_detection_classes.append(torch.full((len(kept_indices),), c, dtype=torch.int))
                    all_detection_probs.append(class_probs)
                    all_detection_batch_indices.append(torch.full((len(kept_indices),), batch_index, dtype=torch.long))

            all_detection_bboxes = torch.cat(all_detection_bboxes, dim=0)
            all_detection_classes = torch.cat(all_detection_classes, dim=0)
            all_detection_probs = torch.cat(all_detection_probs, dim=0)
            all_detection_batch_indices = torch.cat(all_detection_batch_indices, dim=0)
            return all_detection_bboxes, all_detection_classes, all_detection_probs, all_detection_batch_indices 
Example #24
Source File: basic.py    From PSMNet with MIT License 5 votes vote down vote up
def forward(self, left, right):

        refimg_fea     = self.feature_extraction(left)
        targetimg_fea  = self.feature_extraction(right)
 
        #matching
        cost = Variable(torch.FloatTensor(refimg_fea.size()[0], refimg_fea.size()[1]*2, self.maxdisp/4,  refimg_fea.size()[2],  refimg_fea.size()[3]).zero_(), volatile= not self.training).cuda()

        for i in range(self.maxdisp/4):
            if i > 0 :
             cost[:, :refimg_fea.size()[1], i, :,i:]   = refimg_fea[:,:,:,i:]
             cost[:, refimg_fea.size()[1]:, i, :,i:] = targetimg_fea[:,:,:,:-i]
            else:
             cost[:, :refimg_fea.size()[1], i, :,:]   = refimg_fea
             cost[:, refimg_fea.size()[1]:, i, :,:]   = targetimg_fea
        cost = cost.contiguous()

        cost0 = self.dres0(cost)
        cost0 = self.dres1(cost0) + cost0
        cost0 = self.dres2(cost0) + cost0 
        cost0 = self.dres3(cost0) + cost0 
        cost0 = self.dres4(cost0) + cost0

        cost = self.classify(cost0)
        cost = F.upsample(cost, [self.maxdisp,left.size()[2],left.size()[3]], mode='trilinear')
        cost = torch.squeeze(cost,1)
        pred = F.softmax(cost)
        pred = disparityregression(self.maxdisp)(pred)

        return pred 
Example #25
Source File: ufrcnn.py    From medicaldetectiontoolkit with Apache License 2.0 5 votes vote down vote up
def compute_rpn_class_loss(rpn_match, rpn_class_logits, shem_poolsize):
    """
    :param rpn_match: (n_anchors). [-1, 0, 1] for negative, neutral, and positive matched anchors.
    :param rpn_class_logits: (n_anchors, 2). logits from RPN classifier.
    :param shem_poolsize: int. factor of top-k candidates to draw from per negative sample
    (stochastic-hard-example-mining).
    :return: loss: torch tensor
    :return: np_neg_ix: 1D array containing indices of the neg_roi_logits, which have been sampled for training.
    """

    # filter out neutral anchors.
    pos_indices = torch.nonzero(rpn_match == 1)
    neg_indices = torch.nonzero(rpn_match == -1)

    # loss for positive samples
    if 0 not in pos_indices.size():
        pos_indices = pos_indices.squeeze(1)
        roi_logits_pos = rpn_class_logits[pos_indices]
        pos_loss = F.cross_entropy(roi_logits_pos, torch.LongTensor([1] * pos_indices.shape[0]).cuda())
    else:
        pos_loss = torch.FloatTensor([0]).cuda()

    # loss for negative samples: draw hard negative examples (SHEM)
    # that match the number of positive samples, but at least 1.
    if 0 not in neg_indices.size():
        neg_indices = neg_indices.squeeze(1)
        roi_logits_neg = rpn_class_logits[neg_indices]
        negative_count = np.max((1, pos_indices.cpu().data.numpy().size))
        roi_probs_neg = F.softmax(roi_logits_neg, dim=1)
        neg_ix = mutils.shem(roi_probs_neg, negative_count, shem_poolsize)
        neg_loss = F.cross_entropy(roi_logits_neg[neg_ix], torch.LongTensor([0] * neg_ix.shape[0]).cuda())
        np_neg_ix = neg_ix.cpu().data.numpy()
    else:
        neg_loss = torch.FloatTensor([0]).cuda()
        np_neg_ix = np.array([]).astype('int32')

    loss = (pos_loss + neg_loss) / 2
    return loss, np_neg_ix 
Example #26
Source File: ufrcnn.py    From medicaldetectiontoolkit with Apache License 2.0 5 votes vote down vote up
def forward(self, x):
        """
        :param x: input feature maps (b, in_channels, y, x, (z))
        :return: rpn_class_logits (b, 2, n_anchors)
        :return: rpn_probs_logits (b, 2, n_anchors)
        :return: rpn_bbox (b, 2 * dim, n_anchors)
        """

        # Shared convolutional base of the RPN.
        x = self.conv_shared(x)

        # Anchor Score. (batch, anchors per location * 2, y, x, (z)).
        rpn_class_logits = self.conv_class(x)
        # Reshape to (batch, 2, anchors)
        axes = (0, 2, 3, 1) if self.dim == 2 else (0, 2, 3, 4, 1)
        rpn_class_logits = rpn_class_logits.permute(*axes)
        rpn_class_logits = rpn_class_logits.contiguous()
        rpn_class_logits = rpn_class_logits.view(x.size()[0], -1, 2)

        # Softmax on last dimension (fg vs. bg).
        rpn_probs = F.softmax(rpn_class_logits, dim=2)

        # Bounding box refinement. (batch, anchors_per_location * (y, x, (z), log(h), log(w), (log(d)), y, x, (z))
        rpn_bbox = self.conv_bbox(x)

        # Reshape to (batch, 2*dim, anchors)
        rpn_bbox = rpn_bbox.permute(*axes)
        rpn_bbox = rpn_bbox.contiguous()
        rpn_bbox = rpn_bbox.view(x.size()[0], -1, self.dim * 2)

        return [rpn_class_logits, rpn_probs, rpn_bbox] 
Example #27
Source File: detection_unet.py    From medicaldetectiontoolkit with Apache License 2.0 5 votes vote down vote up
def test_forward(self, batch, **kwargs):
        """
        test method. wrapper around forward pass of network without usage of any ground truth information.
        prepares input data for processing and stores outputs in a dictionary.
        :param batch: dictionary containing 'data'
        :param kwargs:
        :return: results_dict: dictionary with keys:
               'boxes': list over batch elements. each batch element is a list of boxes. each box is a dictionary:
                       [[{box_0}, ... {box_n}], [{box_0}, ... {box_n}], ...]
               'seg_preds': pixel-wise class predictions (b, 1, y, x, (z)) with values [0, n_classes]
        """
        img = batch['data']
        var_img = torch.FloatTensor(img).cuda()
        seg_logits, box_coords, max_scores = self.forward(var_img)

        results_dict = {}
        results_dict['boxes'] = [[] for _ in range(img.shape[0])]
        for cix in range(len(self.cf.class_dict.keys())):
            for bix in range(img.shape[0]):
                for rix in range(len(max_scores[cix][bix])):
                    if max_scores[cix][bix][rix] > self.cf.detection_min_confidence:
                        results_dict['boxes'][bix].append({'box_coords': np.copy(box_coords[cix][bix][rix]),
                                                           'box_score': max_scores[cix][bix][rix],
                                                           'box_pred_class_id': cix + 1,  # add 0 for background.
                                                           'box_type': 'det'})

        results_dict['seg_preds'] = np.argmax(F.softmax(seg_logits, 1).cpu().data.numpy(), 1)[:, np.newaxis].astype('uint8')
        return results_dict 
Example #28
Source File: detection_unet.py    From medicaldetectiontoolkit with Apache License 2.0 5 votes vote down vote up
def forward(self, x):
        """
        forward pass of network.
        :param x: input image. shape (b, c, y, x, (z))
        :return: seg_logits: shape (b, n_classes, y, x, (z))
        :return: out_box_coords: list over n_classes. elements are arrays(b, n_rois, (y1, x1, y2, x2, (z1), (z2)))
        :return: out_max_scores: list over n_classes. elements are arrays(b, n_rois)
        """

        out_features = self.fpn(x)[0]
        seg_logits = self.conv_final(out_features)
        out_box_coords, out_max_scores = [], []
        smax = F.softmax(seg_logits, dim=1).detach().cpu().data.numpy()

        for cl in range(1, len(self.cf.class_dict.keys()) + 1):
            max_scores = [[] for _ in range(x.shape[0])]
            hard_mask = np.copy(smax).argmax(1)
            hard_mask[hard_mask != cl] = 0
            hard_mask[hard_mask == cl] = 1
            # perform connected component analysis on argmaxed predictions,
            # draw boxes around components and return coordinates.
            box_coords, rois = get_coords(hard_mask, self.cf.n_roi_candidates, self.cf.dim)

            # for each object, choose the highest softmax score (in the respective class)
            # of all pixels in the component as object score.
            for bix, broi in enumerate(rois):
                for nix, nroi in enumerate(broi):
                    component_score = np.max(smax[bix, cl][nroi > 0]) if self.cf.aggregation_operation == 'max' \
                        else np.median(smax[bix, cl][nroi > 0])
                    max_scores[bix].append(component_score)
            out_box_coords.append(box_coords)
            out_max_scores.append(max_scores)
        return seg_logits, out_box_coords, out_max_scores 
Example #29
Source File: retina_net.py    From medicaldetectiontoolkit with Apache License 2.0 5 votes vote down vote up
def compute_class_loss(anchor_matches, class_pred_logits, shem_poolsize=20):
    """
    :param anchor_matches: (n_anchors). [-1, 0, class_id] for negative, neutral, and positive matched anchors.
    :param class_pred_logits: (n_anchors, n_classes). logits from classifier sub-network.
    :param shem_poolsize: int. factor of top-k candidates to draw from per negative sample (online-hard-example-mining).
    :return: loss: torch tensor.
    :return: np_neg_ix: 1D array containing indices of the neg_roi_logits, which have been sampled for training.
    """
    # Positive and Negative anchors contribute to the loss,
    # but neutral anchors (match value = 0) don't.
    pos_indices = torch.nonzero(anchor_matches > 0)
    neg_indices = torch.nonzero(anchor_matches == -1)

    # get positive samples and calucalte loss.
    if 0 not in pos_indices.size():
        pos_indices = pos_indices.squeeze(1)
        roi_logits_pos = class_pred_logits[pos_indices]
        targets_pos = anchor_matches[pos_indices]
        pos_loss = F.cross_entropy(roi_logits_pos, targets_pos.long())
    else:
        pos_loss = torch.FloatTensor([0]).cuda()

    # get negative samples, such that the amount matches the number of positive samples, but at least 1.
    # get high scoring negatives by applying online-hard-example-mining.
    if 0 not in neg_indices.size():
        neg_indices = neg_indices.squeeze(1)
        roi_logits_neg = class_pred_logits[neg_indices]
        negative_count = np.max((1, pos_indices.size()[0]))
        roi_probs_neg = F.softmax(roi_logits_neg, dim=1)
        neg_ix = mutils.shem(roi_probs_neg, negative_count, shem_poolsize)
        neg_loss = F.cross_entropy(roi_logits_neg[neg_ix], torch.LongTensor([0] * neg_ix.shape[0]).cuda())
        # return the indices of negative samples, which contributed to the loss (for monitoring plots).
        np_neg_ix = neg_ix.cpu().data.numpy()
    else:
        neg_loss = torch.FloatTensor([0]).cuda()
        np_neg_ix = np.array([]).astype('int32')

    loss = (pos_loss + neg_loss) / 2
    return loss, np_neg_ix 
Example #30
Source File: mrcnn.py    From medicaldetectiontoolkit with Apache License 2.0 5 votes vote down vote up
def compute_rpn_class_loss(rpn_match, rpn_class_logits, shem_poolsize):
    """
    :param rpn_match: (n_anchors). [-1, 0, 1] for negative, neutral, and positive matched anchors.
    :param rpn_class_logits: (n_anchors, 2). logits from RPN classifier.
    :param shem_poolsize: int. factor of top-k candidates to draw from per negative sample
    (stochastic-hard-example-mining).
    :return: loss: torch tensor
    :return: np_neg_ix: 1D array containing indices of the neg_roi_logits, which have been sampled for training.
    """

    # filter out neutral anchors.
    pos_indices = torch.nonzero(rpn_match == 1)
    neg_indices = torch.nonzero(rpn_match == -1)

    # loss for positive samples
    if 0 not in pos_indices.size():
        pos_indices = pos_indices.squeeze(1)
        roi_logits_pos = rpn_class_logits[pos_indices]
        pos_loss = F.cross_entropy(roi_logits_pos, torch.LongTensor([1] * pos_indices.shape[0]).cuda())
    else:
        pos_loss = torch.FloatTensor([0]).cuda()

    # loss for negative samples: draw hard negative examples (SHEM)
    # that match the number of positive samples, but at least 1.
    if 0 not in neg_indices.size():
        neg_indices = neg_indices.squeeze(1)
        roi_logits_neg = rpn_class_logits[neg_indices]
        negative_count = np.max((1, pos_indices.cpu().data.numpy().size))
        roi_probs_neg = F.softmax(roi_logits_neg, dim=1)
        neg_ix = mutils.shem(roi_probs_neg, negative_count, shem_poolsize)
        neg_loss = F.cross_entropy(roi_logits_neg[neg_ix], torch.LongTensor([0] * neg_ix.shape[0]).cuda())
        np_neg_ix = neg_ix.cpu().data.numpy()
    else:
        neg_loss = torch.FloatTensor([0]).cuda()
        np_neg_ix = np.array([]).astype('int32')

    loss = (pos_loss + neg_loss) / 2
    return loss, np_neg_ix