Python Examples of chainer.functions.transpose

Source File: dcgan_updater_base.py From tgan with MIT License

6 votes

def _generate_fake_video(self, z_slow, z_fast):
        n_b, z_fast_dim, n_frames = z_fast.shape
        self.batchsize = n_b
        z_fast = F.reshape(F.transpose(
            z_fast, [0, 2, 1]), (n_b * n_frames, z_fast_dim))

        n_b, z_slow_dim = z_slow.shape
        z_slow = F.reshape(
            F.broadcast_to(F.reshape(z_slow, (n_b, 1, z_slow_dim)),
                           (n_b, n_frames, z_slow_dim)),
            (n_b * n_frames, z_slow_dim))

        with chainer.using_config('train', True):
            fake_video = self.vgen(z_slow, z_fast)
            _, n_ch, h, w = fake_video.shape

        fake_video = F.transpose(
            F.reshape(fake_video, (n_b, n_frames, n_ch, h, w)),
            [0, 2, 1, 3, 4])
        return fake_video

Source File: net.py From pixcaler with MIT License

6 votes

def __call__(self, x):
        r = self.r
        out = self.conv(x)
        batchsize = out.shape[0]
        in_channels = out.shape[1]
        out_channels = in_channels // (r ** 2)
        in_height = out.shape[2]
        in_width = out.shape[3]
        out_height = in_height * r
        out_width = in_width * r

        out = F.reshape(out, (batchsize, r, r, out_channels, in_height, in_width))
        out = F.transpose(out, (0, 3, 4, 1, 5, 2))
        out = F.reshape(out, (batchsize, out_channels, out_height, out_width))
        return out


# U-net https://arxiv.org/pdf/1611.07004v1.pdf

# convolution-batchnormalization-(dropout)-relu

Source File: nets.py From treasure-boxes with MIT License

6 votes

def block_embed(embed, x, dropout=0.0):
    """Embedding function followed by convolution

    Args:
        embed (callable): A :func:`~chainer.functions.embed_id` function
            or :class:`~chainer.links.EmbedID` link.
        x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
        :class:`cupy.ndarray`): Input variable, which
            is a :math:`(B, L)`-shaped int array. Its first dimension
            :math:`(B)` is assumed to be the *minibatch dimension*.
            The second dimension :math:`(L)` is the length of padded
            sentences.
        dropout (float): Dropout ratio.

    Returns:
        ~chainer.Variable: Output variable. A float array with shape
        of :math:`(B, N, L, 1)`. :math:`(N)` is the number of dimensions
        of word embedding.

    """
    e = embed(x)
    e = F.dropout(e, ratio=dropout)
    e = F.transpose(e, (0, 2, 1))
    e = e[:, :, :, None]
    return e

Source File: nets.py From qb with MIT License

6 votes

def block_embed(embed, x, dropout=0.):
    """Embedding function followed by convolution

    Args:
        embed (callable): A :func:`~chainer.functions.embed_id` function
            or :class:`~chainer.links.EmbedID` link.
        x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
        :class:`cupy.ndarray`): Input variable, which
            is a :math:`(B, L)`-shaped int array. Its first dimension
            :math:`(B)` is assumed to be the *minibatch dimension*.
            The second dimension :math:`(L)` is the length of padded
            sentences.
        dropout (float): Dropout ratio.

    Returns:
        ~chainer.Variable: Output variable. A float array with shape
        of :math:`(B, N, L, 1)`. :math:`(N)` is the number of dimensions
        of word embedding.

    """
    e = embed(x)
    e = F.dropout(e, ratio=dropout)
    e = F.transpose(e, (0, 2, 1))
    e = e[:, :, :, None]
    return e

Source File: net.py From chainer-partial_convolution_image_inpainting with MIT License

6 votes

def __call__(self, x, mask):
        self.m.W.data = self.xp.array(self.maskW) #mask windows are set by 1
        h = self.c(x*mask) #(B,C,H,W)
        B,C,H,W = h.shape
        b = F.transpose(F.broadcast_to(self.c.b,(B,H,W,C)),(0,3,1,2))
        h = h - b
        mask_sums = self.m(mask)
        mask_new = (self.xp.sign(mask_sums.data-0.5)+1.0)*0.5
        mask_new_b = mask_new.astype("bool")
        
        mask_sums = F.where(mask_new_b,mask_sums,0.01*Variable(self.xp.ones(mask_sums.shape).astype("f")))
        h = h/mask_sums + b
         
        mask_new = Variable(mask_new)
        h = F.where(mask_new_b, h, Variable(self.xp.zeros(h.shape).astype("f"))) 

        if self.bn:
            h = self.batchnorm(h)
        if self.noise:
            h = add_noise(h)
        if self.dropout:
            h = F.dropout(h)
        if not self.activation is None:
            h = self.activation(h)
        return h, mask_new

Source File: block.py From Deep_VoiceChanger with MIT License

6 votes

def __call__(self, x):
        if self.dr:
            x = F.dropout(x, self.dr)
        x = F.transpose(x, (0, 2, 1, 3))
        out_shape = list(x.shape)
        x = F.reshape(x, (-1, x.shape[2]*x.shape[3]))
        x = self.l(x)
        x = self.activation(x)
        out_shape[2] = self.out_ch
        x = F.reshape(x, out_shape)
        x = F.transpose(x, (0, 2, 1, 3))
        return x

Source File: nets.py From contextual_augmentation with MIT License

6 votes

def block_embed(embed, x, dropout=0.):
    """Embedding function followed by convolution

    Args:
        embed (callable): A :func:`~chainer.functions.embed_id` function
            or :class:`~chainer.links.EmbedID` link.
        x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
        :class:`cupy.ndarray`): Input variable, which
            is a :math:`(B, L)`-shaped int array. Its first dimension
            :math:`(B)` is assumed to be the *minibatch dimension*.
            The second dimension :math:`(L)` is the length of padded
            sentences.
        dropout (float): Dropout ratio.

    Returns:
        ~chainer.Variable: Output variable. A float array with shape
        of :math:`(B, N, L, 1)`. :math:`(N)` is the number of dimensions
        of word embedding.

    """
    e = embed(x)
    e = F.dropout(e, ratio=dropout)
    e = F.transpose(e, (0, 2, 1))
    e = e[:, :, :, None]
    return e

Source File: nets.py From contextual_augmentation with MIT License

6 votes

def embed_xs_with_prediction(self, xs, labels=None, batch='concat'):
        predicted_exs = self.bilm.predict_embed(
            xs, self.embed.W,
            labels=labels,
            dropout=self.config['dropout'],
            mode=self.config['mode'],
            temp=self.config['temp'],
            word_lower_bound=self.config['word_lower_bound'],
            gold_lower_bound=self.config['gold_lower_bound'],
            gumbel=self.config['gumbel'],
            residual=self.config['residual'],
            wordwise=self.config['wordwise'],
            add_original=self.config['add_original'],
            augment_ratio=self.config['augment_ratio'])
        if batch == 'concat':
            predicted_ex_block = F.pad_sequence(predicted_exs, padding=0.)
            predicted_ex_block = F.transpose(
                predicted_ex_block, (0, 2, 1))[:, :, :, None]
            return predicted_ex_block
        elif batch == 'list':
            return predicted_exs
        else:
            raise NotImplementedError

Source File: set2set.py From chainer-chemistry with MIT License

6 votes

def __call__(self, h):
        # type: (chainer.Variable) -> chainer.Variable
        xp = cuda.get_array_module(h)
        mb, node, ch = h.shape  # type: int, int, int
        if self.q_star is None:
            self.q_star = [
                xp.zeros((1, self.in_channels * 2)).astype('f')
                for _ in range(mb)
            ]
        self.hx, self.cx, q = self.lstm_layer(self.hx, self.cx, self.q_star)
        # self.hx: (mb, mb, ch)
        # self.cx: (mb, mb, ch)
        # q: List[(1, ch) * mb]
        q = functions.stack(q)  # q: (mb, 1, ch)
        q_ = functions.transpose(q, axes=(0, 2, 1))  # q_: (mb, ch, 1)
        e = functions.matmul(h, q_)  # e: (mb, node, 1)
        a = functions.softmax(e)  # a: (mb, node, 1)
        a = functions.broadcast_to(a, h.shape)  # a: (mb, node, ch)
        r = functions.sum((a * h), axis=1, keepdims=True)  # r: (mb, 1, ch)
        q_star_ = functions.concat((q, r), axis=2)  # q_star_: (mb, 1, ch*2)
        self.q_star = functions.separate(q_star_)
        return functions.reshape(q_star_, (mb, ch * 2))

Source File: nets.py From contextual_augmentation with MIT License

6 votes

def block_embed(embed, x, dropout=0.):
    """Embedding function followed by convolution

    Args:
        embed (callable): A :func:`~chainer.functions.embed_id` function
            or :class:`~chainer.links.EmbedID` link.
        x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
        :class:`cupy.ndarray`): Input variable, which
            is a :math:`(B, L)`-shaped int array. Its first dimension
            :math:`(B)` is assumed to be the *minibatch dimension*.
            The second dimension :math:`(L)` is the length of padded
            sentences.
        dropout (float): Dropout ratio.

    Returns:
        ~chainer.Variable: Output variable. A float array with shape
        of :math:`(B, N, L, 1)`. :math:`(N)` is the number of dimensions
        of word embedding.

    """
    e = embed(x)
    e = F.dropout(e, ratio=dropout)
    e = F.transpose(e, (0, 2, 1))
    e = e[:, :, :, None]
    return e

Source File: irevnet.py From imgclsmob with MIT License

6 votes

def inverse(self, y):
        scale_sqr = self.scale * self.scale
        batch, y_channels, y_height, y_width = y.shape
        assert (y_channels % scale_sqr == 0)
        x_channels = y_channels // scale_sqr
        x_height = y_height * self.scale
        x_width = y_width * self.scale

        x = F.transpose(y, axes=(0, 2, 3, 1))
        x = x.reshape(batch, y_height, y_width, scale_sqr, x_channels)
        d3_split_seq = F.split_axis(x, indices_or_sections=(x.shape[3] // self.scale), axis=3)
        d3_split_seq = [t.reshape(batch, y_height, x_width, x_channels) for t in d3_split_seq]
        x = F.stack(d3_split_seq, axis=0)
        x = F.transpose(F.swapaxes(x, axis1=0, axis2=1), axes=(0, 2, 1, 3, 4)).reshape(
            batch, x_height, x_width, x_channels)
        x = F.transpose(x, axes=(0, 3, 1, 2))
        return x

Source File: nets.py From vecto with Mozilla Public License 2.0

6 votes

def block_embed(embed, x, dropout=0.):
    """Embedding function followed by convolution

    Args:
        embed (callable): A :func:`~chainer.functions.embed_id` function
            or :class:`~chainer.links.EmbedID` link.
        x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
        :class:`cupy.ndarray`): Input variable, which
            is a :math:`(B, L)`-shaped int array. Its first dimension
            :math:`(B)` is assumed to be the *minibatch dimension*.
            The second dimension :math:`(L)` is the length of padded
            sentences.
        dropout (float): Dropout ratio.

    Returns:
        ~chainer.Variable: Output variable. A float array with shape
        of :math:`(B, N, L, 1)`. :math:`(N)` is the number of dimensions
        of word embedding.

    """
    e = embed(x)
    e = F.dropout(e, ratio=dropout)
    e = F.transpose(e, (0, 2, 1))
    e = e[:, :, :, None]
    return e

Source File: losses.py From EPG with MIT License

6 votes

def process_trajectory(self, l):
        """This is the time-dependent convolution operation, applied to a trajectory (in order).
        """
        shp = l.shape[0]
        # First dim is batchsize=1, then either 1 channel for 2d conv or n_feat channels
        # for 1d conv.
        l = F.expand_dims(l, axis=0)
        l = F.transpose(l, (0, 2, 1))
        l = self.traj_c0(l)
        l = F.leaky_relu(l)
        l = self.traj_c1(l)
        l = F.leaky_relu(l)
        l = F.sum(l, axis=(0, 2)) / l.shape[0] / l.shape[2]
        l = F.expand_dims(l, axis=0)
        l = self.traj_d0(l)
        l = F.tile(l, (shp, 1))
        return l

Source File: shape_transformer_to_2d.py From chainer-chemistry with MIT License

6 votes

def inverse_transform(self, x):
        if x.ndim != 2:
            raise ValueError(
                "[ERROR] Unexpected value x.shape={}, 2-dim array is expected"
                .format(x.shape))
        if self.original_shape is None:
            raise AttributeError(
                '[Error] original_shape is None, call transform beforehand!')

        ndim = len(self.original_shape)
        axis = self.axis
        if axis < 0:
            axis += ndim
        inverse_transpose_order = [i for i in range(ndim - 1)]
        inverse_transpose_order.insert(axis, ndim-1)
        x = functions.reshape(x, tuple([self.original_shape[i]
                                        for i in self.transpose_order]))
        x = functions.transpose(x, tuple(inverse_transpose_order))
        return x

Source File: block.py From Deep_VoiceChanger with MIT License

6 votes

def __call__(self, x):
        if self.dr:
            with chainer.using_config('train', True):
                x = F.dropout(x, self.dr)
        if self.gap:
            x = F.sum(x, axis=(2,3))
        N = x.shape[0]
        #Below code copyed from https://github.com/pfnet-research/chainer-gan-lib/blob/master/minibatch_discrimination/net.py
        feature = F.reshape(F.leaky_relu(x), (N, -1))
        m = F.reshape(self.md(feature), (N, self.B * self.C, 1))
        m0 = F.broadcast_to(m, (N, self.B * self.C, N))
        m1 = F.transpose(m0, (2, 1, 0))
        d = F.absolute(F.reshape(m0 - m1, (N, self.B, self.C, N)))
        d = F.sum(F.exp(-F.sum(d, axis=2)), axis=2) - 1
        h = F.concat([feature, d])

        h = self.l(h)
        return h

Source File: net.py From chainer-gan-lib with MIT License

6 votes

def __call__(self, x):
        N = x.data.shape[0]
        h = F.leaky_relu(self.c0_0(x))
        h = F.leaky_relu(self.bn0_1(self.c0_1(h)))
        h = F.leaky_relu(self.bn1_0(self.c1_0(h)))
        h = F.leaky_relu(self.bn1_1(self.c1_1(h)))
        h = F.leaky_relu(self.bn2_0(self.c2_0(h)))
        h = F.leaky_relu(self.bn2_1(self.c2_1(h)))
        feature = F.reshape(F.leaky_relu(self.c3_0(h)), (N, 8192))
        m = F.reshape(self.md(feature), (N, self.B * self.C, 1))
        m0 = F.broadcast_to(m, (N, self.B * self.C, N))
        m1 = F.transpose(m0, (2, 1, 0))
        d = F.absolute(F.reshape(m0 - m1, (N, self.B, self.C, N)))
        d = F.sum(F.exp(-F.sum(d, axis=2)), axis=2) - 1
        h = F.concat([feature, d])

        return self.l4(h)

Source File: tgan_updater_base.py From tgan with MIT License

6 votes

def _generate_fake_video(self, z_slow, z_fast):
        n_b, z_fast_dim, n_frames = z_fast.shape
        self.batchsize = n_b
        z_fast = F.reshape(F.transpose(
            z_fast, [0, 2, 1]), (n_b * n_frames, z_fast_dim))

        n_b, z_slow_dim = z_slow.shape
        z_slow = F.reshape(
            F.broadcast_to(F.reshape(z_slow, (n_b, 1, z_slow_dim)),
                           (n_b, n_frames, z_slow_dim)),
            (n_b * n_frames, z_slow_dim))

        with chainer.using_config('train', True):
            fake_video = self.vgen(z_slow, z_fast)
            _, n_ch, h, w = fake_video.shape

        fake_video = F.transpose(
            F.reshape(fake_video, (n_b, n_frames, n_ch, h, w)),
            [0, 2, 1, 3, 4])
        return fake_video

Source File: transformer_text_localizer.py From kiss with GNU General Public License v3.0

6 votes

def get_transform_params(self, features):
        batch_size, num_channels, feature_height, feature_weight = features.shape
        features = F.reshape(features, (batch_size, num_channels, -1))
        features = F.transpose(features, (0, 2, 1))

        target = chainer.Variable(self.xp.zeros((batch_size, 1, 6), dtype=chainer.get_dtype()))

        for _ in range(self.num_bboxes_to_localize):
            embedded_params = self.param_embedder(target.array, n_batch_axes=2)
            embedded_params = self.positional_encoding(embedded_params)
            decoded = self.decoder(embedded_params, features, None, self.mask)
            params = self.param_predictor(decoded, n_batch_axes=2)
            target = F.concat([target, params[:, -1:]])

        target = F.reshape(target[:, 1:], (-1,) + target.shape[2:])
        transform_params = rotation_dropout(F.reshape(target, (-1, 2, 3)), ratio=self.dropout_ratio)
        return transform_params

Source File: attention.py From models with MIT License

6 votes

def __call__(self, query, key, value, mask=None):
        """
            Perform attention on the value array, using the query and key parameters for calculating the attention mask.
            :param query: matrix of shape (batch_size, num_timesteps, transformer_size) that is used for attention mask calculation
            :param key: matrix of shape (batch_size, num_timesteps, transformer_size) that is used for attention mask calculation
            :param value: matrix of shape (batch_size, num_timesteps, transformer_size) that is used for attention calculation
            :param mask: mask that can be used to mask out parts of the feature maps and avoid attending to those parts
            :return: the attended feature map `value`.
        """
        if mask is not None:
            mask = mask[:, self.xp.newaxis, ...]

        batch_size = len(query)

        query, key, value = [self.project(linear, x, batch_size) for linear, x in zip(self.linears, (query, key, value))]

        x, self.attention = self.attention_implementation(query, key, value, mask=mask, dropout_ratio=self.dropout_ratio)

        x = F.transpose(x, (0, 2, 1, 3))
        x = F.reshape(x, (batch_size, -1, self.num_heads * self.key_dimensionality))

        return self.linears[-1](x, n_batch_axes=2)

Source File: elmo.py From models with MIT License

6 votes

def _load_projection(self):
        cnn_options = self._options['char_cnn']
        filters = cnn_options['filters']
        n_filters = sum(f[1] for f in filters)

        with self.init_scope():
            self._projection = L.Linear(
                n_filters, self.output_dim, nobias=False)
        with h5py.File(cached_path(self._weight_file), 'r') as fin:
            weight = fin['CNN_proj']['W_proj'][...]
            bias = fin['CNN_proj']['b_proj'][...]
            self._projection.W.data[:] = numpy.transpose(weight)
            self._projection.b.data[:] = bias

            self._projection.W._requires_grad = self.requires_grad
            self._projection.b._requires_grad = self.requires_grad

Source File: modeling.py From models with MIT License

6 votes

def __call__(self, input_ids, input_mask, token_type_ids):
        final_hidden = self.bert.get_sequence_output(
            input_ids,
            input_mask,
            token_type_ids)
        batch_size = final_hidden.shape[0]
        seq_length = final_hidden.shape[1]
        hidden_size = final_hidden.shape[2]

        final_hidden_matrix = F.reshape(
            final_hidden, [batch_size * seq_length, hidden_size])

        logits = self.output(final_hidden_matrix)

        logits = F.reshape(logits, [batch_size, seq_length, 2])
        logits = logits - (1 - input_mask[:, :, None]) * 1000.  # ignore pads
        logits = F.transpose(logits, [2, 0, 1])

        unstacked_logits = F.separate(logits, axis=0)

        (start_logits, end_logits) = (unstacked_logits[0], unstacked_logits[1])
        return (start_logits, end_logits)

Source File: feature_propagation_block.py From chainer-pointnet with MIT License

6 votes

def __call__(self, distances, points1, points2):
        """

        Args:
            distances (numpy.ndarray or cupy.ndarray):
                3-dim array (bs, num_point2, num_point1)
            points1 (Variable): 3-dim (batch_size, num_point1, ch1)
            points2 (Variable): 3-dim (batch_size, num_point2, ch2)
                points2 is deeper, rich feature. num_point1 > num_point2

        Returns (Variable): 3-dim (batch_size, num_point1, ch1+ch2)
        """
        # h: interpolated_points (batch_size, num_point1, ch1+ch2)
        h = self.interpolation(distances, points1, points2)
        # h: interpolated_points (batch_size, ch1+ch2, num_point1, 1)
        h = functions.transpose(h, (0, 2, 1))[:, :, :, None]
        for conv_block in self.feature_extractor_list:
            h = conv_block(h)
        h = functions.transpose(h[:, :, :, 0], (0, 2, 1))
        return h  # h (bs, num_point, ch')

Source File: set_abstraction_all_block.py From chainer-pointnet with MIT License

6 votes

def __call__(self, coord_points, feature_points=None):
        # coord_points   (batch_size, num_point, coord_dim)
        # feature_points (batch_size, num_point, ch)
        # num_point, ch: coord_dim

        # grouped_points (batch_size, k, num_sample, channel)
        # center_points  (batch_size, k, coord_dim)
        grouped_points, center_points = self.sampling_grouping(
            coord_points, feature_points=feature_points)
        # set alias `h` -> (bs, channel, num_sample, k)
        # Note: transpose may be removed by optimizing shape sequence for sampling_groupoing
        h = functions.transpose(grouped_points, (0, 3, 2, 1))
        # h (bs, ch, num_sample_in_region, k=num_group)
        for conv_block in self.feature_extractor_list:
            h = conv_block(h)
        # TODO: try other option of pooling function
        h = functions.max(h, axis=2, keepdims=True)
        # h (bs, ch, 1, k=num_group)
        for conv_block in self.head_list:
            h = conv_block(h)
        h = functions.transpose(h[:, :, 0, :], (0, 2, 1))
        return center_points, h  # (bs, k, coord), h (bs, k, ch')

Source File: fsns.py From see with GNU General Public License v3.0

6 votes

def __call__(self, images, label=None):
        if self.uses_original_data:
            # handle each individual view as increase in batch size
            batch_size, num_channels, height, width = images.shape
            images = F.reshape(images, (batch_size, num_channels, height, 4, -1))
            images = F.transpose(images, (0, 3, 1, 2, 4))
            images = F.reshape(images, (batch_size * 4, num_channels, height, width // 4))

        batch_size = images.shape[0]
        h = self.localization_net(images)
        new_batch_size = h.shape[0]
        batch_size_increase_factor = new_batch_size // batch_size
        images = F.concat([images for _ in range(batch_size_increase_factor)], axis=0)

        if label is None:
            return self.recognition_net(images, h)
        return self.recognition_net(images, h, label)

Source File: nets.py From chainer with MIT License

6 votes

def block_embed(embed, x, dropout=0.):
    """Embedding function followed by convolution

    Args:
        embed (callable): A :func:`~chainer.functions.embed_id` function
            or :class:`~chainer.links.EmbedID` link.
        x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
        :class:`cupy.ndarray`): Input variable, which
            is a :math:`(B, L)`-shaped int array. Its first dimension
            :math:`(B)` is assumed to be the *minibatch dimension*.
            The second dimension :math:`(L)` is the length of padded
            sentences.
        dropout (float): Dropout ratio.

    Returns:
        ~chainer.Variable: Output variable. A float array with shape
        of :math:`(B, N, L, 1)`. :math:`(N)` is the number of dimensions
        of word embedding.

    """
    e = embed(x)
    e = F.dropout(e, ratio=dropout)
    e = F.transpose(e, (0, 2, 1))
    e = e[:, :, :, None]
    return e

Source File: yolov2_predict.py From YOLOv2 with MIT License

5 votes

def __call__(self, orig_img):
        orig_input_height, orig_input_width, _ = orig_img.shape
        #img = cv2.resize(orig_img, (640, 640))
        img = reshape_to_yolo_size(orig_img)
        input_height, input_width, _ = img.shape
        #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = np.asarray(img, dtype=np.float32) / 255.0
        img = img.transpose(2, 0, 1)

        # forward
        x_data = img[np.newaxis, :, :, :]
        x = Variable(x_data)
        x, y, w, h, conf, prob = self.model.predict(x)

        # parse results
        _, _, _, grid_h, grid_w = x.shape
        x = F.reshape(x, (self.n_boxes, grid_h, grid_w)).data
        y = F.reshape(y, (self.n_boxes, grid_h, grid_w)).data
        w = F.reshape(w, (self.n_boxes, grid_h, grid_w)).data
        h = F.reshape(h, (self.n_boxes, grid_h, grid_w)).data
        conf = F.reshape(conf, (self.n_boxes, grid_h, grid_w)).data
        prob = F.transpose(F.reshape(prob, (self.n_boxes, self.n_classes, grid_h, grid_w)), (1, 0, 2, 3)).data
        detected_indices = (conf * prob).max(axis=0) > self.detection_thresh

        results = []
        for i in range(detected_indices.sum()):
            results.append({
                "label": self.labels[prob.transpose(1, 2, 3, 0)[detected_indices][i].argmax()],
                "probs": prob.transpose(1, 2, 3, 0)[detected_indices][i],
                "conf" : conf[detected_indices][i],
                "objectness": conf[detected_indices][i] * prob.transpose(1, 2, 3, 0)[detected_indices][i].max(),
                "box"  : Box(
                            x[detected_indices][i]*orig_input_width,
                            y[detected_indices][i]*orig_input_height,
                            w[detected_indices][i]*orig_input_width,
                            h[detected_indices][i]*orig_input_height).crop_region(orig_input_height, orig_input_width)
            })

        # nms
        nms_results = nms(results, self.iou_thresh)
        return nms_results

Source File: pointnet_seg.py From chainer-pointnet with MIT License

5 votes

def calc_trans_loss(t):
    # Loss to enforce the transformation as orthogonal matrix
    # t (batchsize, K, K) - transform matrix
    xp = cuda.get_array_module(t)
    bs, k1, k2 = t.shape
    assert k1 == k2
    mat_diff = functions.matmul(t, functions.transpose(t, (0, 2, 1)))
    mat_diff = mat_diff - xp.identity(k1, dtype=xp.float32)
    # divide by 2. is to make the behavior same with tf.
    # https://www.tensorflow.org/versions/r1.1/api_docs/python/tf/nn/l2_loss
    return functions.sum(functions.batch_l2_norm_squared(mat_diff)) / 2.

Source File: pointnet_seg.py From chainer-pointnet with MIT License

5 votes

def __call__(self, x, t):
        h, t1, t2 = self.calc(x)
        # h: (bs, ch, N), t: (bs, N)
        # print('h', h.shape, 't', t.shape)
        bs, ch, n = h.shape
        h = functions.reshape(functions.transpose(h, (0, 2, 1)), (bs * n, ch))
        t = functions.reshape(t, (bs * n,))
        cls_loss = functions.softmax_cross_entropy(h, t)
        reporter.report({'cls_loss': cls_loss}, self)

        loss = cls_loss
        # Enforce the transformation as orthogonal matrix
        if self.trans and self.trans_lam1 >= 0:
            trans_loss1 = self.trans_lam1 * calc_trans_loss(t1)
            reporter.report({'trans_loss1': trans_loss1}, self)
            loss = loss + trans_loss1
        if self.trans and self.trans_lam2 >= 0:
            trans_loss2 = self.trans_lam2 * calc_trans_loss(t2)
            reporter.report({'trans_loss2': trans_loss2}, self)
            loss = loss + trans_loss2
        reporter.report({'loss': loss}, self)

        if self.compute_accuracy:
            acc = functions.accuracy(h, t)
            reporter.report({'accuracy': acc}, self)
        return loss

Source File: attention.py From kiss with GNU General Public License v3.0

5 votes

def __call__(self, query, key, value, mask=None):
        if mask is not None:
            mask = mask[:, self.xp.newaxis, ...]

        batch_size = len(query)

        query, key, value = [self.project(linear, x, batch_size) for linear, x in zip(self.linears, (query, key, value))]

        x, self.attention = self.attention_implementation(query, key, value, mask=mask, dropout_ratio=self.dropout_ratio)

        x = F.transpose(x, (0, 2, 1, 3))
        x = F.reshape(x, (batch_size, -1, self.num_heads * self.key_dimensionality))

        return self.linears[-1](x, n_batch_axes=2)

Source File: angular_loss.py From deep_metric_learning with MIT License

5 votes

def angular_mc_loss(f, f_p, alpha=45, in_degree=True):
    '''
    Args:
        f (chainer.Variable or xp.npdarray):
            Anchor vectors. Each vectors in f must be l2 normalized.
        f_p (chainer.Variable or xp.npdarray):
            Positive vectors. Each vectors in f must be l2 normalized.
    '''
    xp = cuda.get_array_module(f)

    if in_degree:
        alpha = np.deg2rad(alpha)
    sq_tan_alpha = np.tan(alpha) ** 2
    n_pairs = len(f)

    # first and second term of f_{a,p,n}
    term1 = 4 * sq_tan_alpha * matmul(f + f_p, transpose(f_p))
    term2 = 2 * (1 + sq_tan_alpha) * F.sum(f * f_p, axis=1, keepdims=True)
#    term2 = 2 * (1 + sq_tan_alpha) * F.batch_matmul(f, f_p, transa=True).reshape(n_pairs, 1)

    f_apn = term1 - F.broadcast_to(term2, (n_pairs, n_pairs))
    # multiply zero to diagonal components of f_apn
    mask = xp.ones_like(f_apn.data) - xp.eye(n_pairs, dtype=f.dtype)
    f_apn = f_apn * mask

    return F.average(F.logsumexp(f_apn, axis=1))

Python chainer.functions.transpose() Examples