Python chainer.functions.softmax() Examples

The following are 30 code examples of chainer.functions.softmax(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module chainer.functions , or try the search function .
Example #1
Source File:    From imgclsmob with MIT License 6 votes vote down vote up
def __call__(self, x):
        y = self.branches(x)

        u = F.sum(y, axis=1)
        s = F.average_pooling_2d(u, ksize=u.shape[2:])
        z = self.fc1(s)
        w = self.fc2(z)

        batch = w.shape[0]
        w = F.reshape(w, shape=(batch, self.num_branches, self.out_channels))
        w = self.softmax(w)
        w = F.expand_dims(F.expand_dims(w, axis=3), axis=4)

        y = y * w
        y = F.sum(y, axis=1)
        return y 
Example #2
Source File:    From models with MIT License 6 votes vote down vote up
def get_gaussian_params(self, x):
        h = F.tanh(self.l1(x))
        h = self.l2(h)

        pi = h[:, :self.gaussian_mixtures]
        mu_var_dim = self.gaussian_mixtures * self.input_dim
        mu = h[:, self.gaussian_mixtures:self.gaussian_mixtures + mu_var_dim]
        log_var = h[:, self.gaussian_mixtures + mu_var_dim:]

        n_batch = x.shape[0]

        # mixing coefficients
        pi = F.reshape(pi, (n_batch, self.gaussian_mixtures))
        pi = F.softmax(pi, axis=1)

        # mean
        mu = F.reshape(mu, (n_batch, self.gaussian_mixtures, self.input_dim))

        # log variance
        log_var = F.reshape(
            log_var, (n_batch, self.gaussian_mixtures, self.input_dim))

        return pi, mu, log_var 
Example #3
Source File:    From chainer-char-rnn with MIT License 6 votes vote down vote up
def forward_one_step(self, x_data, y_data, state, train=True, dropout_ratio=0.5):
        x = Variable(x_data, volatile=not train)
        t = Variable(y_data, volatile=not train)

        h0      = self.embed(x)
        h1_in   = self.l1_x(F.dropout(h0, ratio=dropout_ratio, train=train)) + self.l1_h(state['h1'])
        c1, h1  = F.lstm(state['c1'], h1_in)
        h2_in   = self.l2_x(F.dropout(h1, ratio=dropout_ratio, train=train)) + self.l2_h(state['h2'])
        c2, h2  = F.lstm(state['c2'], h2_in)
        y       = self.l3(F.dropout(h2, ratio=dropout_ratio, train=train))
        state   = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2}

        if train:
            return state, F.softmax_cross_entropy(y, t)
            return state, F.softmax(y) 
Example #4
Source File:    From chainerrl with MIT License 6 votes vote down vote up
def mellowmax(values, omega=1., axis=1):
    """Mellowmax function.

    This is a kind of softmax function that is, unlike the Boltzmann softmax,


        values (Variable or ndarray):
            Input values. Mellowmax is taken along the second axis.
        omega (float):
            Parameter of mellowmax.
        axis (int):
            Axis along which mellowmax is taken.
        outputs (Variable)
    n = values.shape[axis]
    return (F.logsumexp(omega * values, axis=axis) - np.log(n)) / omega 
Example #5
Source File:    From convolutional_seq2seq with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def attend(self, query, key, value, mask, minfs=None):
        Input shapes:
            q=(b, units, dec_l), k=(b, units, enc_l),
            v=(b, units, dec_l, enc_l), m=(b, dec_l, enc_l)

        # Calculate Attention Scores with Mask for Zero-padded Areas
        pre_a = F.batch_matmul(query, key, transa=True)  # (b, dec_l, enc_l)
        minfs = self.xp.full(pre_a.shape, -np.inf, pre_a.dtype) \
            if minfs is None else minfs
        pre_a = F.where(mask, pre_a, minfs)
        a = F.softmax(pre_a, axis=2)
        # if values in axis=2 are all -inf, they become nan. thus do re-mask.
        a = F.where(self.xp.isnan(,
                    self.xp.zeros(a.shape, dtype=a.dtype), a)
        reshaped_a = a[:, None]  # (b, 1, dec_xl, enc_l)

        # Calculate Weighted Sum
        pre_c = F.broadcast_to(reshaped_a, value.shape) * value
        c = F.sum(pre_c, axis=3, keepdims=True)  # (b, units, dec_xl, 1)
        return c 
Example #6
Source File:    From see with GNU General Public License v3.0 6 votes vote down vote up
def decode_predictions(self, predictions):
        # concat all individual predictions and slice for each time step
        predictions = F.concat([F.expand_dims(p, axis=0) for p in predictions], axis=0)

        words = []
        with cuda.get_device_from_array(
            for prediction in F.separate(predictions, axis=0):
                prediction = F.squeeze(prediction, axis=0)
                prediction = F.softmax(prediction, axis=1)
                prediction = self.xp.argmax(, axis=1)
                word = self.loss_metrics.strip_prediction(prediction[self.xp.newaxis, ...])[0]
                if len(word) == 1 and word[0] == 0:
                    return ''

                word = "".join(map(self.loss_metrics.label_to_char, word))
                word = word.replace(chr(self.loss_metrics.char_map[str(self.loss_metrics.blank_symbol)]), '')

        text = " ".join(words)
        return text 
Example #7
Source File:    From ssai-cnn with MIT License 6 votes vote down vote up
def __call__(self, x, t):
        h = F.relu(self.conv1(x))
        h = F.max_pooling_2d(h, 2, 1)
        h = F.relu(self.conv2(h))
        h = F.relu(self.conv3(h))
        h = F.relu(self.fc4(h))
        h = self.fc5(h)
        h = F.reshape(h, ([0], 3, 16, 16))
        h = self.channelwise_inhibited(h)

        if self.train:
            self.loss = F.softmax_cross_entropy(h, t, normalize=False)
            return self.loss
            self.pred = F.softmax(h)
            return self.pred 
Example #8
Source File:    From ssai-cnn with MIT License 6 votes vote down vote up
def __call__(self, x, t):
        h = F.relu(self.conv1(x))
        h = F.max_pooling_2d(h, 2, 1)
        h = F.relu(self.conv2(h))
        h = F.relu(self.conv3(h))
        h = F.dropout(F.relu(self.fc4(h)), train=self.train)
        h = self.fc5(h)
        h = F.reshape(h, ([0], 3, 16, 16))
        h = self.channelwise_inhibited(h)

        if self.train:
            self.loss = F.softmax_cross_entropy(h, t, normalize=False)
            return self.loss
            self.pred = F.softmax(h)
            return self.pred 
Example #9
Source File:    From lda2vec with MIT License 6 votes vote down vote up
def proportions(self, doc_ids, softmax=False):
        """ Given an array of document indices, return a vector
        for each document of just the unnormalized topic weights.

            doc_weights : chainer.Variable
                Two dimensional topic weights of each document.
        w = self.weights(doc_ids)
        if softmax:
            size =
            mask = self.xp.random.random_integers(0, 1, size=size)
            y = (F.softmax(w * self.temperature) *
            norm, y = F.broadcast(F.expand_dims(F.sum(y, axis=1), 1), y)
            return y / (norm + 1e-7)
            return w 
Example #10
Source File:    From lda2vec with MIT License 6 votes vote down vote up
def __call__(self, doc_ids, update_only_docs=False):
        """ Given an array of document integer indices, returns a vector
        for each document. The vector is composed of topic weights projected
        onto topic vectors.

            doc_ids : chainer.Variable
                One-dimensional batch vectors of IDs

            doc_vector : chainer.Variable
                Batch of two-dimensional embeddings for every document.
        # (batchsize, ) --> (batchsize, multinomial)
        proportions = self.proportions(doc_ids, softmax=True)
        # (batchsize, n_factors) * (n_factors, n_dim) --> (batchsize, n_dim)
        factors = F.dropout(self.factors(), ratio=self.dropout_ratio)
        if update_only_docs:
        w_sum = F.matmul(proportions, factors)
        return w_sum 
Example #11
Source File:    From Semantic-Segmentation-using-Adversarial-Networks with MIT License 6 votes vote down vote up
def forward(self, batch):
        label_onehot_batch = [self._onehot_encode(pair[1]) for pair in batch]

        input_img, ground_truth = self.converter(batch, self.device)
        ground_truth_onehot = self.converter(label_onehot_batch, self.device)
        input_img = Variable(input_img, volatile=not self.gen.train)
        ground_truth = Variable(ground_truth, volatile=not self.gen.train)
        ground_truth_onehot = Variable(ground_truth_onehot, volatile=not self.gen.train)
        x_real = self._make_dis_input(input_img, ground_truth_onehot)
        y_real = self.dis(x_real)

        pred_label_map = self.gen(input_img)
        x_fake = self._make_dis_input(input_img, F.softmax(pred_label_map))
        y_fake = self.dis(x_fake)

        self.y_fake = y_fake
        self.y_real = y_real
        self.pred_label_map = pred_label_map
        self.ground_truth = ground_truth 
Example #12
Source File:    From chainer-image-caption with MIT License 6 votes vote down vote up
def generate(net, image_model, image_path):
    feature = image_model.feature(image_path)
    candidates = [(net, [bos], 0)]

    for i in range(max_length):
        next_candidates = []
        for prev_net, tokens, likelihood in candidates:
            if tokens[-1] == eos:
                next_candidates.append((None, tokens, likelihood))
            net = prev_net.copy()
            x = xp.asarray([tokens[-1]]).astype(np.int32)
            y = F.softmax(net(x))
            token_likelihood = np.log(cuda.to_cpu([0]))
            order = token_likelihood.argsort()[-beam_width:][::-1]
            next_candidates.extend([(net, tokens + [i], likelihood + token_likelihood[i]) for i in order])
        candidates = sorted(next_candidates, key=lambda x: -x[2])[:beam_width]
        if all([candidate[1][-1] == eos for candidate in candidates]):
    return [candidate[1] for candidate in candidates] 
Example #13
Source File:    From chainerrl with MIT License 6 votes vote down vote up
def __call__(self, x):
        h = x
        for l in self.conv_layers:
            h = self.activation(l(h))

        # Advantage
        batch_size = x.shape[0]

        h = self.activation(self.main_stream(h))
        h_a, h_v = F.split_axis(h, 2, axis=-1)
        ya = F.reshape(self.a_stream(h_a),
                       (batch_size, self.n_actions, self.n_atoms))

        mean = F.sum(ya, axis=1, keepdims=True) / self.n_actions

        ya, mean = F.broadcast(ya, mean)
        ya -= mean

        # State value
        ys = F.reshape(self.v_stream(h_v), (batch_size, 1, self.n_atoms))
        ya, ys = F.broadcast(ya, ys)
        q = F.softmax(ya + ys, axis=2)

        return action_value.DistributionalDiscreteActionValue(q, self.z_values) 
Example #14
Source File:    From see with GNU General Public License v3.0 5 votes vote down vote up
def calc_accuracy(self, x, t):
        batch_predictions, _, _ = x
        self.xp = cuda.get_array_module(batch_predictions[0], t)
        batch_size = t.shape[0]
        t = F.reshape(t, (batch_size, self.num_timesteps, -1))
        accuracies = []

        for predictions, labels in zip(batch_predictions, F.separate(t, axis=1)):
            if isinstance(predictions, list):
                predictions = F.concat([F.expand_dims(p, axis=0) for p in predictions], axis=0)
            with cuda.get_device_from_array(

                classification = F.softmax(predictions, axis=2)
                classification =
                classification = self.xp.argmax(classification, axis=2)
                classification = self.xp.transpose(classification, (1, 0))

                words = self.strip_prediction(classification)
                labels = self.strip_prediction(

                num_correct_words = 0
                for word, label in zip(words, labels):
                    word = "".join(map(self.label_to_char, word))
                    label = "".join(map(self.label_to_char, label))
                    if word == label:
                        num_correct_words += 1

                accuracy = num_correct_words / len(labels)

        overall_accuracy = sum(accuracies) / max(len(accuracies), 1)
        return overall_accuracy 
Example #15
Source File:    From chainer with MIT License 5 votes vote down vote up
def test_Softmax(self):
        class Link(chainer.Chain):
            def forward(self, x):
                return F.softmax(x)

        assert_export_import_match(Link(), self.x) 
Example #16
Source File:    From chainer with MIT License 5 votes vote down vote up
def query(self, u):
        xp = backend.get_array_module(u)
        size = self.m.shape[1]
        inds = xp.arange(size - 1, -1, -1, dtype=numpy.int32)
        tm = self.TA(inds)
        tc = self.TC(inds)
        tm = F.broadcast_to(tm, self.m.shape)
        tc = F.broadcast_to(tc, self.c.shape)
        p = F.softmax(F.matmul(self.m + tm, F.expand_dims(u, -1)))
        o = F.matmul(F.swapaxes(self.c + tc, 2, 1), p)
        o = F.squeeze(o, -1)
        u = o + u
        return u 
Example #17
Source File:    From chainer with MIT License 5 votes vote down vote up
def predict(self, xs, softmax=False, argmax=False):
        concat_encodings = F.dropout(self.encoder(xs), ratio=self.dropout)
        concat_outputs = self.output(concat_encodings)
        if softmax:
            return F.softmax(concat_outputs).array
        elif argmax:
            return self.xp.argmax(concat_outputs.array, axis=1)
            return concat_outputs 
Example #18
Source File:    From see with GNU General Public License v3.0 5 votes vote down vote up
def calc_accuracy(self, x, t):
        batch_predictions, _, _ = x

        # concat all individual predictions and slice for each time step
        batch_predictions = F.concat([F.expand_dims(p, axis=0) for p in batch_predictions], axis=0)

        self.xp = cuda.get_array_module(batch_predictions[0], t)
        batch_size = t.shape[0]
        t = F.reshape(t, (batch_size, self.num_timesteps, -1))

        accuracies = []
        with cuda.get_device_from_array(
            for prediction, label in zip(F.separate(batch_predictions, axis=0), F.separate(t, axis=1)):
                classification = F.softmax(prediction, axis=2)
                classification =
                classification = self.xp.argmax(classification, axis=2)
                # classification = self.xp.transpose(classification, (1, 0))

                words = self.strip_prediction(classification)
                labels = self.strip_prediction(

                num_correct_words = 0
                for word, label in zip(words, labels):
                    word = "".join(map(self.label_to_char, word))
                    label = "".join(map(self.label_to_char, label))
                    if word == label:
                        num_correct_words += 1

                accuracy = num_correct_words / len(labels)

        overall_accuracy = sum(accuracies) / max(len(accuracies), 1)
        return overall_accuracy 
Example #19
Source File:    From vat_chainer with MIT License 5 votes vote down vote up
def entropy_y_x(p_logit):
    p = F.softmax(p_logit)
    return - F.sum(p * F.log_softmax(p_logit)) / p_logit.shape[0] 
Example #20
Source File:    From vat_chainer with MIT License 5 votes vote down vote up
def kl_categorical(p_logit, q_logit):
    if isinstance(p_logit, chainer.Variable):
        xp = cuda.get_array_module(
        xp = cuda.get_array_module(p_logit)
    p = F.softmax(p_logit)
    _kl = F.sum(p * (F.log_softmax(p_logit) - F.log_softmax(q_logit)), 1)
    return F.sum(_kl) / 
Example #21
Source File:    From EPG with MIT License 5 votes vote down vote up
def __init__(self, env_dim, act_dim, inner_lr=None, **kwargs):
        assert inner_lr is not None
        super().__init__(env_dim, act_dim, 1, **kwargs)
        self.pi = NN([env_dim] + list([64, 64]) + [act_dim], out_fn=F.softmax)
        self._lst_adam = [Adam(var.shape, stepsize=inner_lr) for var in self.backprop_params] 
Example #22
Source File:    From vecto with Mozilla Public License 2.0 5 votes vote down vote up
def predict(self, xs, softmax=False, argmax=False):
        concat_encodings = F.dropout(self.encoder(xs), ratio=self.dropout)
        concat_outputs = self.output(concat_encodings)
        if softmax:
            return F.softmax(concat_outputs).data
        elif argmax:
            return self.xp.argmax(, axis=1)
            return concat_outputs 
Example #23
Source File:    From cryptotrader with MIT License 5 votes vote down vote up
def sample(self):
        return F.softmax(F.gaussian(self.mean, self.ln_var)) 
Example #24
Source File:    From models with MIT License 5 votes vote down vote up
def forward(self, zs, xs):
        confs = []
        locs = []
        for i, (z, x) in enumerate(zip(zs, xs)):
            conf, loc = getattr(self, 'rpn' + str(i + 3))(z, x)

        conf_weight = F.softmax(self.conf_weight, axis=0)
        loc_weight = F.softmax(self.loc_weight, axis=0)

        return (
            self.weight_average(conf_weight, confs),
            self.weight_average(loc_weight, locs)) 
Example #25
Source File:    From cryptotrader with MIT License 5 votes vote down vote up
def most_probable(self):
        return F.softmax(self.mean) 
Example #26
Source File:    From cryptotrader with MIT License 5 votes vote down vote up
def __call__(self, x):
        h =
        h = F.concat([h, self.portvec(x)], axis=1)
        h = self.conv(h)
        h = self.cashbias(h)
        return F.softmax(h) 
Example #27
Source File:    From models with MIT License 5 votes vote down vote up
def attention_implementation(self, query, key, value, mask=None, dropout_ratio=None):
        scores = F.matmul(query, F.transpose(key, (0, 1, 3, 2))) / math.sqrt(self.key_dimensionality)
        if mask is not None:
            batch_size, num_heads, _, _ = scores.shape
            mask = self.xp.array(mask)
            mask = self.xp.broadcast_to(mask, (batch_size, num_heads) + mask.shape[2:])
            mask = mask[:, :, :scores.shape[2], :scores.shape[3]]
            scores = F.where(mask, scores, self.xp.full_like(scores.array, -1e9))

        attention_probabilities = F.softmax(scores, axis=3)
        if dropout_ratio is not None:
            attention_probabilities = F.dropout(attention_probabilities, ratio=dropout_ratio)

        return F.matmul(attention_probabilities, value), attention_probabilities 
Example #28
Source File:    From imgclsmob with MIT License 5 votes vote down vote up
def __call__(self, x, y):
        x = self.up(x)
        x =
        w_conf = F.softmax(x)
        w_max = F.broadcast_to(F.expand_dims(F.max(w_conf, axis=1), axis=1), x.shape)
        x = y * (1 - w_max) + x
        return x 
Example #29
Source File:    From ram with MIT License 5 votes vote down vote up
def __call__(self, x, t, train=True):
        x = chainer.Variable(self.xp.asarray(x), volatile=not train)
        t = chainer.Variable(self.xp.asarray(t), volatile=not train)
        bs =[0] # batch size
        self.clear(bs, train)

        # init mean location
        l = np.random.uniform(-1, 1, size=(bs,2)).astype(np.float32)
        l = chainer.Variable(self.xp.asarray(l), volatile=not train)

        # forward n_steps time
        sum_ln_pi = 0
        self.forward(x, train, action=False, init_l=l)
        for i in range(1, self.n_steps):
            action = True if (i == self.n_steps - 1) else False
            l, ln_pi, y, b = self.forward(x, train, action)
            if train: sum_ln_pi += ln_pi

        # loss with softmax cross entropy
        self.loss_action = F.softmax_cross_entropy(y, t)
        self.loss = self.loss_action
        self.accuracy = F.accuracy(y, t)

        if train:
            # reward
            conditions = self.xp.argmax(, axis=1) ==
            r = self.xp.where(conditions, 1., 0.).astype(np.float32)

            # squared error between reward and baseline
            self.loss_base = F.mean_squared_error(r, b)
            self.loss += self.loss_base

            # loss with reinforce rule
            mean_ln_pi = sum_ln_pi / (self.n_steps - 1)
            self.loss_reinforce = F.sum(-mean_ln_pi * (r-b))/bs
            self.loss += self.loss_reinforce

        return self.loss 
Example #30
Source File:    From chainer with MIT License 5 votes vote down vote up
def setUp(self):
        self.t = functions.softmax(self.x).array
        self.expect = numpy.sum(-self.t * functions.log_softmax(self.x).array,
        if self.reduce == 'mean':
            self.expect = numpy.average(self.expect)