Python chainer.functions.transpose() Examples

The following are 30 code examples of chainer.functions.transpose(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module chainer.functions , or try the search function .
Example #1
Source File: dcgan_updater_base.py    From tgan with MIT License 6 votes vote down vote up
def _generate_fake_video(self, z_slow, z_fast):
        n_b, z_fast_dim, n_frames = z_fast.shape
        self.batchsize = n_b
        z_fast = F.reshape(F.transpose(
            z_fast, [0, 2, 1]), (n_b * n_frames, z_fast_dim))

        n_b, z_slow_dim = z_slow.shape
        z_slow = F.reshape(
            F.broadcast_to(F.reshape(z_slow, (n_b, 1, z_slow_dim)),
                           (n_b, n_frames, z_slow_dim)),
            (n_b * n_frames, z_slow_dim))

        with chainer.using_config('train', True):
            fake_video = self.vgen(z_slow, z_fast)
            _, n_ch, h, w = fake_video.shape

        fake_video = F.transpose(
            F.reshape(fake_video, (n_b, n_frames, n_ch, h, w)),
            [0, 2, 1, 3, 4])
        return fake_video 
Example #2
Source File: net.py    From pixcaler with MIT License 6 votes vote down vote up
def __call__(self, x):
        r = self.r
        out = self.conv(x)
        batchsize = out.shape[0]
        in_channels = out.shape[1]
        out_channels = in_channels // (r ** 2)
        in_height = out.shape[2]
        in_width = out.shape[3]
        out_height = in_height * r
        out_width = in_width * r

        out = F.reshape(out, (batchsize, r, r, out_channels, in_height, in_width))
        out = F.transpose(out, (0, 3, 4, 1, 5, 2))
        out = F.reshape(out, (batchsize, out_channels, out_height, out_width))
        return out


# U-net https://arxiv.org/pdf/1611.07004v1.pdf

# convolution-batchnormalization-(dropout)-relu 
Example #3
Source File: nets.py    From treasure-boxes with MIT License 6 votes vote down vote up
def block_embed(embed, x, dropout=0.0):
    """Embedding function followed by convolution

    Args:
        embed (callable): A :func:`~chainer.functions.embed_id` function
            or :class:`~chainer.links.EmbedID` link.
        x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
        :class:`cupy.ndarray`): Input variable, which
            is a :math:`(B, L)`-shaped int array. Its first dimension
            :math:`(B)` is assumed to be the *minibatch dimension*.
            The second dimension :math:`(L)` is the length of padded
            sentences.
        dropout (float): Dropout ratio.

    Returns:
        ~chainer.Variable: Output variable. A float array with shape
        of :math:`(B, N, L, 1)`. :math:`(N)` is the number of dimensions
        of word embedding.

    """
    e = embed(x)
    e = F.dropout(e, ratio=dropout)
    e = F.transpose(e, (0, 2, 1))
    e = e[:, :, :, None]
    return e 
Example #4
Source File: nets.py    From qb with MIT License 6 votes vote down vote up
def block_embed(embed, x, dropout=0.):
    """Embedding function followed by convolution

    Args:
        embed (callable): A :func:`~chainer.functions.embed_id` function
            or :class:`~chainer.links.EmbedID` link.
        x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
        :class:`cupy.ndarray`): Input variable, which
            is a :math:`(B, L)`-shaped int array. Its first dimension
            :math:`(B)` is assumed to be the *minibatch dimension*.
            The second dimension :math:`(L)` is the length of padded
            sentences.
        dropout (float): Dropout ratio.

    Returns:
        ~chainer.Variable: Output variable. A float array with shape
        of :math:`(B, N, L, 1)`. :math:`(N)` is the number of dimensions
        of word embedding.

    """
    e = embed(x)
    e = F.dropout(e, ratio=dropout)
    e = F.transpose(e, (0, 2, 1))
    e = e[:, :, :, None]
    return e 
Example #5
Source File: net.py    From chainer-partial_convolution_image_inpainting with MIT License 6 votes vote down vote up
def __call__(self, x, mask):
        self.m.W.data = self.xp.array(self.maskW) #mask windows are set by 1
        h = self.c(x*mask) #(B,C,H,W)
        B,C,H,W = h.shape
        b = F.transpose(F.broadcast_to(self.c.b,(B,H,W,C)),(0,3,1,2))
        h = h - b
        mask_sums = self.m(mask)
        mask_new = (self.xp.sign(mask_sums.data-0.5)+1.0)*0.5
        mask_new_b = mask_new.astype("bool")
        
        mask_sums = F.where(mask_new_b,mask_sums,0.01*Variable(self.xp.ones(mask_sums.shape).astype("f")))
        h = h/mask_sums + b
         
        mask_new = Variable(mask_new)
        h = F.where(mask_new_b, h, Variable(self.xp.zeros(h.shape).astype("f"))) 

        if self.bn:
            h = self.batchnorm(h)
        if self.noise:
            h = add_noise(h)
        if self.dropout:
            h = F.dropout(h)
        if not self.activation is None:
            h = self.activation(h)
        return h, mask_new 
Example #6
Source File: block.py    From Deep_VoiceChanger with MIT License 6 votes vote down vote up
def __call__(self, x):
        if self.dr:
            x = F.dropout(x, self.dr)
        x = F.transpose(x, (0, 2, 1, 3))
        out_shape = list(x.shape)
        x = F.reshape(x, (-1, x.shape[2]*x.shape[3]))
        x = self.l(x)
        x = self.activation(x)
        out_shape[2] = self.out_ch
        x = F.reshape(x, out_shape)
        x = F.transpose(x, (0, 2, 1, 3))
        return x 
Example #7
Source File: nets.py    From contextual_augmentation with MIT License 6 votes vote down vote up
def block_embed(embed, x, dropout=0.):
    """Embedding function followed by convolution

    Args:
        embed (callable): A :func:`~chainer.functions.embed_id` function
            or :class:`~chainer.links.EmbedID` link.
        x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
        :class:`cupy.ndarray`): Input variable, which
            is a :math:`(B, L)`-shaped int array. Its first dimension
            :math:`(B)` is assumed to be the *minibatch dimension*.
            The second dimension :math:`(L)` is the length of padded
            sentences.
        dropout (float): Dropout ratio.

    Returns:
        ~chainer.Variable: Output variable. A float array with shape
        of :math:`(B, N, L, 1)`. :math:`(N)` is the number of dimensions
        of word embedding.

    """
    e = embed(x)
    e = F.dropout(e, ratio=dropout)
    e = F.transpose(e, (0, 2, 1))
    e = e[:, :, :, None]
    return e 
Example #8
Source File: nets.py    From contextual_augmentation with MIT License 6 votes vote down vote up
def embed_xs_with_prediction(self, xs, labels=None, batch='concat'):
        predicted_exs = self.bilm.predict_embed(
            xs, self.embed.W,
            labels=labels,
            dropout=self.config['dropout'],
            mode=self.config['mode'],
            temp=self.config['temp'],
            word_lower_bound=self.config['word_lower_bound'],
            gold_lower_bound=self.config['gold_lower_bound'],
            gumbel=self.config['gumbel'],
            residual=self.config['residual'],
            wordwise=self.config['wordwise'],
            add_original=self.config['add_original'],
            augment_ratio=self.config['augment_ratio'])
        if batch == 'concat':
            predicted_ex_block = F.pad_sequence(predicted_exs, padding=0.)
            predicted_ex_block = F.transpose(
                predicted_ex_block, (0, 2, 1))[:, :, :, None]
            return predicted_ex_block
        elif batch == 'list':
            return predicted_exs
        else:
            raise NotImplementedError 
Example #9
Source File: set2set.py    From chainer-chemistry with MIT License 6 votes vote down vote up
def __call__(self, h):
        # type: (chainer.Variable) -> chainer.Variable
        xp = cuda.get_array_module(h)
        mb, node, ch = h.shape  # type: int, int, int
        if self.q_star is None:
            self.q_star = [
                xp.zeros((1, self.in_channels * 2)).astype('f')
                for _ in range(mb)
            ]
        self.hx, self.cx, q = self.lstm_layer(self.hx, self.cx, self.q_star)
        # self.hx: (mb, mb, ch)
        # self.cx: (mb, mb, ch)
        # q: List[(1, ch) * mb]
        q = functions.stack(q)  # q: (mb, 1, ch)
        q_ = functions.transpose(q, axes=(0, 2, 1))  # q_: (mb, ch, 1)
        e = functions.matmul(h, q_)  # e: (mb, node, 1)
        a = functions.softmax(e)  # a: (mb, node, 1)
        a = functions.broadcast_to(a, h.shape)  # a: (mb, node, ch)
        r = functions.sum((a * h), axis=1, keepdims=True)  # r: (mb, 1, ch)
        q_star_ = functions.concat((q, r), axis=2)  # q_star_: (mb, 1, ch*2)
        self.q_star = functions.separate(q_star_)
        return functions.reshape(q_star_, (mb, ch * 2)) 
Example #10
Source File: nets.py    From contextual_augmentation with MIT License 6 votes vote down vote up
def block_embed(embed, x, dropout=0.):
    """Embedding function followed by convolution

    Args:
        embed (callable): A :func:`~chainer.functions.embed_id` function
            or :class:`~chainer.links.EmbedID` link.
        x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
        :class:`cupy.ndarray`): Input variable, which
            is a :math:`(B, L)`-shaped int array. Its first dimension
            :math:`(B)` is assumed to be the *minibatch dimension*.
            The second dimension :math:`(L)` is the length of padded
            sentences.
        dropout (float): Dropout ratio.

    Returns:
        ~chainer.Variable: Output variable. A float array with shape
        of :math:`(B, N, L, 1)`. :math:`(N)` is the number of dimensions
        of word embedding.

    """
    e = embed(x)
    e = F.dropout(e, ratio=dropout)
    e = F.transpose(e, (0, 2, 1))
    e = e[:, :, :, None]
    return e 
Example #11
Source File: irevnet.py    From imgclsmob with MIT License 6 votes vote down vote up
def inverse(self, y):
        scale_sqr = self.scale * self.scale
        batch, y_channels, y_height, y_width = y.shape
        assert (y_channels % scale_sqr == 0)
        x_channels = y_channels // scale_sqr
        x_height = y_height * self.scale
        x_width = y_width * self.scale

        x = F.transpose(y, axes=(0, 2, 3, 1))
        x = x.reshape(batch, y_height, y_width, scale_sqr, x_channels)
        d3_split_seq = F.split_axis(x, indices_or_sections=(x.shape[3] // self.scale), axis=3)
        d3_split_seq = [t.reshape(batch, y_height, x_width, x_channels) for t in d3_split_seq]
        x = F.stack(d3_split_seq, axis=0)
        x = F.transpose(F.swapaxes(x, axis1=0, axis2=1), axes=(0, 2, 1, 3, 4)).reshape(
            batch, x_height, x_width, x_channels)
        x = F.transpose(x, axes=(0, 3, 1, 2))
        return x 
Example #12
Source File: nets.py    From vecto with Mozilla Public License 2.0 6 votes vote down vote up
def block_embed(embed, x, dropout=0.):
    """Embedding function followed by convolution

    Args:
        embed (callable): A :func:`~chainer.functions.embed_id` function
            or :class:`~chainer.links.EmbedID` link.
        x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
        :class:`cupy.ndarray`): Input variable, which
            is a :math:`(B, L)`-shaped int array. Its first dimension
            :math:`(B)` is assumed to be the *minibatch dimension*.
            The second dimension :math:`(L)` is the length of padded
            sentences.
        dropout (float): Dropout ratio.

    Returns:
        ~chainer.Variable: Output variable. A float array with shape
        of :math:`(B, N, L, 1)`. :math:`(N)` is the number of dimensions
        of word embedding.

    """
    e = embed(x)
    e = F.dropout(e, ratio=dropout)
    e = F.transpose(e, (0, 2, 1))
    e = e[:, :, :, None]
    return e 
Example #13
Source File: losses.py    From EPG with MIT License 6 votes vote down vote up
def process_trajectory(self, l):
        """This is the time-dependent convolution operation, applied to a trajectory (in order).
        """
        shp = l.shape[0]
        # First dim is batchsize=1, then either 1 channel for 2d conv or n_feat channels
        # for 1d conv.
        l = F.expand_dims(l, axis=0)
        l = F.transpose(l, (0, 2, 1))
        l = self.traj_c0(l)
        l = F.leaky_relu(l)
        l = self.traj_c1(l)
        l = F.leaky_relu(l)
        l = F.sum(l, axis=(0, 2)) / l.shape[0] / l.shape[2]
        l = F.expand_dims(l, axis=0)
        l = self.traj_d0(l)
        l = F.tile(l, (shp, 1))
        return l 
Example #14
Source File: shape_transformer_to_2d.py    From chainer-chemistry with MIT License 6 votes vote down vote up
def inverse_transform(self, x):
        if x.ndim != 2:
            raise ValueError(
                "[ERROR] Unexpected value x.shape={}, 2-dim array is expected"
                .format(x.shape))
        if self.original_shape is None:
            raise AttributeError(
                '[Error] original_shape is None, call transform beforehand!')

        ndim = len(self.original_shape)
        axis = self.axis
        if axis < 0:
            axis += ndim
        inverse_transpose_order = [i for i in range(ndim - 1)]
        inverse_transpose_order.insert(axis, ndim-1)
        x = functions.reshape(x, tuple([self.original_shape[i]
                                        for i in self.transpose_order]))
        x = functions.transpose(x, tuple(inverse_transpose_order))
        return x 
Example #15
Source File: block.py    From Deep_VoiceChanger with MIT License 6 votes vote down vote up
def __call__(self, x):
        if self.dr:
            with chainer.using_config('train', True):
                x = F.dropout(x, self.dr)
        if self.gap:
            x = F.sum(x, axis=(2,3))
        N = x.shape[0]
        #Below code copyed from https://github.com/pfnet-research/chainer-gan-lib/blob/master/minibatch_discrimination/net.py
        feature = F.reshape(F.leaky_relu(x), (N, -1))
        m = F.reshape(self.md(feature), (N, self.B * self.C, 1))
        m0 = F.broadcast_to(m, (N, self.B * self.C, N))
        m1 = F.transpose(m0, (2, 1, 0))
        d = F.absolute(F.reshape(m0 - m1, (N, self.B, self.C, N)))
        d = F.sum(F.exp(-F.sum(d, axis=2)), axis=2) - 1
        h = F.concat([feature, d])

        h = self.l(h)
        return h 
Example #16
Source File: net.py    From chainer-gan-lib with MIT License 6 votes vote down vote up
def __call__(self, x):
        N = x.data.shape[0]
        h = F.leaky_relu(self.c0_0(x))
        h = F.leaky_relu(self.bn0_1(self.c0_1(h)))
        h = F.leaky_relu(self.bn1_0(self.c1_0(h)))
        h = F.leaky_relu(self.bn1_1(self.c1_1(h)))
        h = F.leaky_relu(self.bn2_0(self.c2_0(h)))
        h = F.leaky_relu(self.bn2_1(self.c2_1(h)))
        feature = F.reshape(F.leaky_relu(self.c3_0(h)), (N, 8192))
        m = F.reshape(self.md(feature), (N, self.B * self.C, 1))
        m0 = F.broadcast_to(m, (N, self.B * self.C, N))
        m1 = F.transpose(m0, (2, 1, 0))
        d = F.absolute(F.reshape(m0 - m1, (N, self.B, self.C, N)))
        d = F.sum(F.exp(-F.sum(d, axis=2)), axis=2) - 1
        h = F.concat([feature, d])

        return self.l4(h) 
Example #17
Source File: tgan_updater_base.py    From tgan with MIT License 6 votes vote down vote up
def _generate_fake_video(self, z_slow, z_fast):
        n_b, z_fast_dim, n_frames = z_fast.shape
        self.batchsize = n_b
        z_fast = F.reshape(F.transpose(
            z_fast, [0, 2, 1]), (n_b * n_frames, z_fast_dim))

        n_b, z_slow_dim = z_slow.shape
        z_slow = F.reshape(
            F.broadcast_to(F.reshape(z_slow, (n_b, 1, z_slow_dim)),
                           (n_b, n_frames, z_slow_dim)),
            (n_b * n_frames, z_slow_dim))

        with chainer.using_config('train', True):
            fake_video = self.vgen(z_slow, z_fast)
            _, n_ch, h, w = fake_video.shape

        fake_video = F.transpose(
            F.reshape(fake_video, (n_b, n_frames, n_ch, h, w)),
            [0, 2, 1, 3, 4])
        return fake_video 
Example #18
Source File: transformer_text_localizer.py    From kiss with GNU General Public License v3.0 6 votes vote down vote up
def get_transform_params(self, features):
        batch_size, num_channels, feature_height, feature_weight = features.shape
        features = F.reshape(features, (batch_size, num_channels, -1))
        features = F.transpose(features, (0, 2, 1))

        target = chainer.Variable(self.xp.zeros((batch_size, 1, 6), dtype=chainer.get_dtype()))

        for _ in range(self.num_bboxes_to_localize):
            embedded_params = self.param_embedder(target.array, n_batch_axes=2)
            embedded_params = self.positional_encoding(embedded_params)
            decoded = self.decoder(embedded_params, features, None, self.mask)
            params = self.param_predictor(decoded, n_batch_axes=2)
            target = F.concat([target, params[:, -1:]])

        target = F.reshape(target[:, 1:], (-1,) + target.shape[2:])
        transform_params = rotation_dropout(F.reshape(target, (-1, 2, 3)), ratio=self.dropout_ratio)
        return transform_params 
Example #19
Source File: attention.py    From models with MIT License 6 votes vote down vote up
def __call__(self, query, key, value, mask=None):
        """
            Perform attention on the value array, using the query and key parameters for calculating the attention mask.
            :param query: matrix of shape (batch_size, num_timesteps, transformer_size) that is used for attention mask calculation
            :param key: matrix of shape (batch_size, num_timesteps, transformer_size) that is used for attention mask calculation
            :param value: matrix of shape (batch_size, num_timesteps, transformer_size) that is used for attention calculation
            :param mask: mask that can be used to mask out parts of the feature maps and avoid attending to those parts
            :return: the attended feature map `value`.
        """
        if mask is not None:
            mask = mask[:, self.xp.newaxis, ...]

        batch_size = len(query)

        query, key, value = [self.project(linear, x, batch_size) for linear, x in zip(self.linears, (query, key, value))]

        x, self.attention = self.attention_implementation(query, key, value, mask=mask, dropout_ratio=self.dropout_ratio)

        x = F.transpose(x, (0, 2, 1, 3))
        x = F.reshape(x, (batch_size, -1, self.num_heads * self.key_dimensionality))

        return self.linears[-1](x, n_batch_axes=2) 
Example #20
Source File: elmo.py    From models with MIT License 6 votes vote down vote up
def _load_projection(self):
        cnn_options = self._options['char_cnn']
        filters = cnn_options['filters']
        n_filters = sum(f[1] for f in filters)

        with self.init_scope():
            self._projection = L.Linear(
                n_filters, self.output_dim, nobias=False)
        with h5py.File(cached_path(self._weight_file), 'r') as fin:
            weight = fin['CNN_proj']['W_proj'][...]
            bias = fin['CNN_proj']['b_proj'][...]
            self._projection.W.data[:] = numpy.transpose(weight)
            self._projection.b.data[:] = bias

            self._projection.W._requires_grad = self.requires_grad
            self._projection.b._requires_grad = self.requires_grad 
Example #21
Source File: modeling.py    From models with MIT License 6 votes vote down vote up
def __call__(self, input_ids, input_mask, token_type_ids):
        final_hidden = self.bert.get_sequence_output(
            input_ids,
            input_mask,
            token_type_ids)
        batch_size = final_hidden.shape[0]
        seq_length = final_hidden.shape[1]
        hidden_size = final_hidden.shape[2]

        final_hidden_matrix = F.reshape(
            final_hidden, [batch_size * seq_length, hidden_size])

        logits = self.output(final_hidden_matrix)

        logits = F.reshape(logits, [batch_size, seq_length, 2])
        logits = logits - (1 - input_mask[:, :, None]) * 1000.  # ignore pads
        logits = F.transpose(logits, [2, 0, 1])

        unstacked_logits = F.separate(logits, axis=0)

        (start_logits, end_logits) = (unstacked_logits[0], unstacked_logits[1])
        return (start_logits, end_logits) 
Example #22
Source File: feature_propagation_block.py    From chainer-pointnet with MIT License 6 votes vote down vote up
def __call__(self, distances, points1, points2):
        """

        Args:
            distances (numpy.ndarray or cupy.ndarray):
                3-dim array (bs, num_point2, num_point1)
            points1 (Variable): 3-dim (batch_size, num_point1, ch1)
            points2 (Variable): 3-dim (batch_size, num_point2, ch2)
                points2 is deeper, rich feature. num_point1 > num_point2

        Returns (Variable): 3-dim (batch_size, num_point1, ch1+ch2)
        """
        # h: interpolated_points (batch_size, num_point1, ch1+ch2)
        h = self.interpolation(distances, points1, points2)
        # h: interpolated_points (batch_size, ch1+ch2, num_point1, 1)
        h = functions.transpose(h, (0, 2, 1))[:, :, :, None]
        for conv_block in self.feature_extractor_list:
            h = conv_block(h)
        h = functions.transpose(h[:, :, :, 0], (0, 2, 1))
        return h  # h (bs, num_point, ch') 
Example #23
Source File: set_abstraction_all_block.py    From chainer-pointnet with MIT License 6 votes vote down vote up
def __call__(self, coord_points, feature_points=None):
        # coord_points   (batch_size, num_point, coord_dim)
        # feature_points (batch_size, num_point, ch)
        # num_point, ch: coord_dim

        # grouped_points (batch_size, k, num_sample, channel)
        # center_points  (batch_size, k, coord_dim)
        grouped_points, center_points = self.sampling_grouping(
            coord_points, feature_points=feature_points)
        # set alias `h` -> (bs, channel, num_sample, k)
        # Note: transpose may be removed by optimizing shape sequence for sampling_groupoing
        h = functions.transpose(grouped_points, (0, 3, 2, 1))
        # h (bs, ch, num_sample_in_region, k=num_group)
        for conv_block in self.feature_extractor_list:
            h = conv_block(h)
        # TODO: try other option of pooling function
        h = functions.max(h, axis=2, keepdims=True)
        # h (bs, ch, 1, k=num_group)
        for conv_block in self.head_list:
            h = conv_block(h)
        h = functions.transpose(h[:, :, 0, :], (0, 2, 1))
        return center_points, h  # (bs, k, coord), h (bs, k, ch') 
Example #24
Source File: fsns.py    From see with GNU General Public License v3.0 6 votes vote down vote up
def __call__(self, images, label=None):
        if self.uses_original_data:
            # handle each individual view as increase in batch size
            batch_size, num_channels, height, width = images.shape
            images = F.reshape(images, (batch_size, num_channels, height, 4, -1))
            images = F.transpose(images, (0, 3, 1, 2, 4))
            images = F.reshape(images, (batch_size * 4, num_channels, height, width // 4))

        batch_size = images.shape[0]
        h = self.localization_net(images)
        new_batch_size = h.shape[0]
        batch_size_increase_factor = new_batch_size // batch_size
        images = F.concat([images for _ in range(batch_size_increase_factor)], axis=0)

        if label is None:
            return self.recognition_net(images, h)
        return self.recognition_net(images, h, label) 
Example #25
Source File: nets.py    From chainer with MIT License 6 votes vote down vote up
def block_embed(embed, x, dropout=0.):
    """Embedding function followed by convolution

    Args:
        embed (callable): A :func:`~chainer.functions.embed_id` function
            or :class:`~chainer.links.EmbedID` link.
        x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
        :class:`cupy.ndarray`): Input variable, which
            is a :math:`(B, L)`-shaped int array. Its first dimension
            :math:`(B)` is assumed to be the *minibatch dimension*.
            The second dimension :math:`(L)` is the length of padded
            sentences.
        dropout (float): Dropout ratio.

    Returns:
        ~chainer.Variable: Output variable. A float array with shape
        of :math:`(B, N, L, 1)`. :math:`(N)` is the number of dimensions
        of word embedding.

    """
    e = embed(x)
    e = F.dropout(e, ratio=dropout)
    e = F.transpose(e, (0, 2, 1))
    e = e[:, :, :, None]
    return e 
Example #26
Source File: yolov2_predict.py    From YOLOv2 with MIT License 5 votes vote down vote up
def __call__(self, orig_img):
        orig_input_height, orig_input_width, _ = orig_img.shape
        #img = cv2.resize(orig_img, (640, 640))
        img = reshape_to_yolo_size(orig_img)
        input_height, input_width, _ = img.shape
        #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = np.asarray(img, dtype=np.float32) / 255.0
        img = img.transpose(2, 0, 1)

        # forward
        x_data = img[np.newaxis, :, :, :]
        x = Variable(x_data)
        x, y, w, h, conf, prob = self.model.predict(x)

        # parse results
        _, _, _, grid_h, grid_w = x.shape
        x = F.reshape(x, (self.n_boxes, grid_h, grid_w)).data
        y = F.reshape(y, (self.n_boxes, grid_h, grid_w)).data
        w = F.reshape(w, (self.n_boxes, grid_h, grid_w)).data
        h = F.reshape(h, (self.n_boxes, grid_h, grid_w)).data
        conf = F.reshape(conf, (self.n_boxes, grid_h, grid_w)).data
        prob = F.transpose(F.reshape(prob, (self.n_boxes, self.n_classes, grid_h, grid_w)), (1, 0, 2, 3)).data
        detected_indices = (conf * prob).max(axis=0) > self.detection_thresh

        results = []
        for i in range(detected_indices.sum()):
            results.append({
                "label": self.labels[prob.transpose(1, 2, 3, 0)[detected_indices][i].argmax()],
                "probs": prob.transpose(1, 2, 3, 0)[detected_indices][i],
                "conf" : conf[detected_indices][i],
                "objectness": conf[detected_indices][i] * prob.transpose(1, 2, 3, 0)[detected_indices][i].max(),
                "box"  : Box(
                            x[detected_indices][i]*orig_input_width,
                            y[detected_indices][i]*orig_input_height,
                            w[detected_indices][i]*orig_input_width,
                            h[detected_indices][i]*orig_input_height).crop_region(orig_input_height, orig_input_width)
            })

        # nms
        nms_results = nms(results, self.iou_thresh)
        return nms_results 
Example #27
Source File: pointnet_seg.py    From chainer-pointnet with MIT License 5 votes vote down vote up
def calc_trans_loss(t):
    # Loss to enforce the transformation as orthogonal matrix
    # t (batchsize, K, K) - transform matrix
    xp = cuda.get_array_module(t)
    bs, k1, k2 = t.shape
    assert k1 == k2
    mat_diff = functions.matmul(t, functions.transpose(t, (0, 2, 1)))
    mat_diff = mat_diff - xp.identity(k1, dtype=xp.float32)
    # divide by 2. is to make the behavior same with tf.
    # https://www.tensorflow.org/versions/r1.1/api_docs/python/tf/nn/l2_loss
    return functions.sum(functions.batch_l2_norm_squared(mat_diff)) / 2. 
Example #28
Source File: pointnet_seg.py    From chainer-pointnet with MIT License 5 votes vote down vote up
def __call__(self, x, t):
        h, t1, t2 = self.calc(x)
        # h: (bs, ch, N), t: (bs, N)
        # print('h', h.shape, 't', t.shape)
        bs, ch, n = h.shape
        h = functions.reshape(functions.transpose(h, (0, 2, 1)), (bs * n, ch))
        t = functions.reshape(t, (bs * n,))
        cls_loss = functions.softmax_cross_entropy(h, t)
        reporter.report({'cls_loss': cls_loss}, self)

        loss = cls_loss
        # Enforce the transformation as orthogonal matrix
        if self.trans and self.trans_lam1 >= 0:
            trans_loss1 = self.trans_lam1 * calc_trans_loss(t1)
            reporter.report({'trans_loss1': trans_loss1}, self)
            loss = loss + trans_loss1
        if self.trans and self.trans_lam2 >= 0:
            trans_loss2 = self.trans_lam2 * calc_trans_loss(t2)
            reporter.report({'trans_loss2': trans_loss2}, self)
            loss = loss + trans_loss2
        reporter.report({'loss': loss}, self)

        if self.compute_accuracy:
            acc = functions.accuracy(h, t)
            reporter.report({'accuracy': acc}, self)
        return loss 
Example #29
Source File: attention.py    From kiss with GNU General Public License v3.0 5 votes vote down vote up
def __call__(self, query, key, value, mask=None):
        if mask is not None:
            mask = mask[:, self.xp.newaxis, ...]

        batch_size = len(query)

        query, key, value = [self.project(linear, x, batch_size) for linear, x in zip(self.linears, (query, key, value))]

        x, self.attention = self.attention_implementation(query, key, value, mask=mask, dropout_ratio=self.dropout_ratio)

        x = F.transpose(x, (0, 2, 1, 3))
        x = F.reshape(x, (batch_size, -1, self.num_heads * self.key_dimensionality))

        return self.linears[-1](x, n_batch_axes=2) 
Example #30
Source File: angular_loss.py    From deep_metric_learning with MIT License 5 votes vote down vote up
def angular_mc_loss(f, f_p, alpha=45, in_degree=True):
    '''
    Args:
        f (chainer.Variable or xp.npdarray):
            Anchor vectors. Each vectors in f must be l2 normalized.
        f_p (chainer.Variable or xp.npdarray):
            Positive vectors. Each vectors in f must be l2 normalized.
    '''
    xp = cuda.get_array_module(f)

    if in_degree:
        alpha = np.deg2rad(alpha)
    sq_tan_alpha = np.tan(alpha) ** 2
    n_pairs = len(f)

    # first and second term of f_{a,p,n}
    term1 = 4 * sq_tan_alpha * matmul(f + f_p, transpose(f_p))
    term2 = 2 * (1 + sq_tan_alpha) * F.sum(f * f_p, axis=1, keepdims=True)
#    term2 = 2 * (1 + sq_tan_alpha) * F.batch_matmul(f, f_p, transa=True).reshape(n_pairs, 1)

    f_apn = term1 - F.broadcast_to(term2, (n_pairs, n_pairs))
    # multiply zero to diagonal components of f_apn
    mask = xp.ones_like(f_apn.data) - xp.eye(n_pairs, dtype=f.dtype)
    f_apn = f_apn * mask

    return F.average(F.logsumexp(f_apn, axis=1))