Python Examples of chainer.functions.embed

Source File: nets.py From contextual_augmentation with MIT License

6 votes

def block_embed(embed, x, dropout=0.):
    """Embedding function followed by convolution

    Args:
        embed (callable): A :func:`~chainer.functions.embed_id` function
            or :class:`~chainer.links.EmbedID` link.
        x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
        :class:`cupy.ndarray`): Input variable, which
            is a :math:`(B, L)`-shaped int array. Its first dimension
            :math:`(B)` is assumed to be the *minibatch dimension*.
            The second dimension :math:`(L)` is the length of padded
            sentences.
        dropout (float): Dropout ratio.

    Returns:
        ~chainer.Variable: Output variable. A float array with shape
        of :math:`(B, N, L, 1)`. :math:`(N)` is the number of dimensions
        of word embedding.

    """
    e = embed(x)
    e = F.dropout(e, ratio=dropout)
    e = F.transpose(e, (0, 2, 1))
    e = e[:, :, :, None]
    return e

Source File: nets.py From contextual_augmentation with MIT License

5 votes

def embed_seq_batch(embed, seq_batch, dropout=0., context=None):
    x_len = [len(seq) for seq in seq_batch]
    x_section = np.cumsum(x_len[:-1])
    ex = embed(F.concat(seq_batch, axis=0))
    ex = F.dropout(ex, dropout)
    if context is not None:
        ids = [embed.xp.full((l, ), i).astype('i')
               for i, l in enumerate(x_len)]
        ids = embed.xp.concatenate(ids, axis=0)
        cx = F.embed_id(ids, context)
        ex = F.concat([ex, cx], axis=1)
    exs = F.split_axis(ex, x_section, 0)
    return exs

Source File: nets.py From contextual_augmentation with MIT License

5 votes

def sequence_embed(embed, xs, dropout=0.):
    """Efficient embedding function for variable-length sequences

    This output is equally to
    "return [F.dropout(embed(x), ratio=dropout) for x in xs]".
    However, calling the functions is one-shot and faster.

    Args:
        embed (callable): A :func:`~chainer.functions.embed_id` function
            or :class:`~chainer.links.EmbedID` link.
        xs (list of :class:`~chainer.Variable` or :class:`numpy.ndarray` or \
        :class:`cupy.ndarray`): i-th element in the list is an input variable,
            which is a :math:`(L_i, )`-shaped int array.
        dropout (float): Dropout ratio.

    Returns:
        list of ~chainer.Variable: Output variables. i-th element in the
        list is an output variable, which is a :math:`(L_i, N)`-shaped
        float array. :math:`(N)` is the number of dimensions of word embedding.

    """
    x_len = [len(x) for x in xs]
    x_section = np.cumsum(x_len[:-1])
    ex = embed(F.concat(xs, axis=0))
    ex = F.dropout(ex, ratio=dropout)
    exs = F.split_axis(ex, x_section, 0)
    return exs

Source File: lda2vec_model.py From lda2vec with MIT License

5 votes

def fit_partial(self, rdoc_ids, rword_indices, window=5,
                    update_only_docs=False):
        doc_ids, word_indices = move(self.xp, rdoc_ids, rword_indices)
        pivot_idx = next(move(self.xp, rword_indices[window: -window]))
        pivot = F.embed_id(pivot_idx, self.sampler.W)
        if update_only_docs:
            pivot.unchain_backward()
        doc_at_pivot = rdoc_ids[window: -window]
        doc = self.mixture(next(move(self.xp, doc_at_pivot)),
                           update_only_docs=update_only_docs)
        loss = 0.0
        start, end = window, rword_indices.shape[0] - window
        context = (F.dropout(doc, self.dropout_ratio) +
                   F.dropout(pivot, self.dropout_ratio))
        for frame in range(-window, window + 1):
            # Skip predicting the current pivot
            if frame == 0:
                continue
            # Predict word given context and pivot word
            # The target starts before the pivot
            targetidx = rword_indices[start + frame: end + frame]
            doc_at_target = rdoc_ids[start + frame: end + frame]
            doc_is_same = doc_at_target == doc_at_pivot
            rand = np.random.uniform(0, 1, doc_is_same.shape[0])
            mask = (rand > self.word_dropout_ratio).astype('bool')
            weight = np.logical_and(doc_is_same, mask).astype('int32')
            # If weight is 1.0 then targetidx
            # If weight is 0.0 then -1
            targetidx = targetidx * weight + -1 * (1 - weight)
            target, = move(self.xp, targetidx)
            loss = self.sampler(context, target)
            loss.backward()
            if update_only_docs:
                # Wipe out any gradient accumulation on word vectors
                self.sampler.W.grad *= 0.0
        return loss.data

Source File: lda2vec_model.py From lda2vec with MIT License

5 votes

def fit_partial(self, rsty_ids, raut_ids, rwrd_ids, window=5):
        sty_ids, aut_ids, wrd_ids = move(self.xp, rsty_ids, raut_ids, rwrd_ids)
        pivot_idx = next(move(self.xp, rwrd_ids[window: -window]))
        pivot = F.embed_id(pivot_idx, self.sampler.W)
        sty_at_pivot = rsty_ids[window: -window]
        aut_at_pivot = raut_ids[window: -window]
        sty = self.mixture_sty(next(move(self.xp, sty_at_pivot)))
        aut = self.mixture_aut(next(move(self.xp, aut_at_pivot)))
        loss = 0.0
        start, end = window, rwrd_ids.shape[0] - window
        context = sty + aut + F.dropout(pivot, self.dropout_ratio)
        for frame in range(-window, window + 1):
            # Skip predicting the current pivot
            if frame == 0:
                continue
            # Predict word given context and pivot word
            # The target starts before the pivot
            targetidx = rwrd_ids[start + frame: end + frame]
            sty_at_target = rsty_ids[start + frame: end + frame]
            aut_at_target = raut_ids[start + frame: end + frame]
            sty_is_same = sty_at_target == sty_at_pivot
            aut_is_same = aut_at_target == aut_at_pivot
            # Randomly dropout words (default is to never do this)
            rand = np.random.uniform(0, 1, sty_is_same.shape[0])
            mask = (rand > self.word_dropout_ratio).astype('bool')
            sty_and_aut_are_same = np.logical_and(sty_is_same, aut_is_same)
            weight = np.logical_and(sty_and_aut_are_same, mask).astype('int32')
            # If weight is 1.0 then targetidx
            # If weight is 0.0 then -1
            targetidx = targetidx * weight + -1 * (1 - weight)
            target, = move(self.xp, targetidx)
            loss = self.sampler(context, target)
            loss.backward()
        return loss.data

Source File: elmo.py From models with MIT License

5 votes

def forward(self, inputs):
        """
        Compute context insensitive token embeddings for ELMo representations.

        Parameters
        ----------
        inputs: ``torch.autograd.Variable``
            Shape ``(batch_size, sequence_length)`` of token ids representing the
            current batch.

        Returns
        -------
        Dict with keys:
        ``'token_embedding'``: ``torch.autograd.Variable``
            Shape ``(batch_size, sequence_length + 2, embedding_dim)`` tensor with context
            insensitive token representations.
        ``'mask'``:  ``torch.autograd.Variable``
            Shape ``(batch_size, sequence_length + 2)`` long tensor with sequence mask.
        """
        # Add BOS/EOS
        # mask = ((inputs > 0).sum(axis=-1) > 0)
        mask = (inputs > 0)

        token_ids_with_bos_eos, mask_with_bos_eos = add_sentence_boundary_token_ids(
            inputs,
            mask,
            self._beginning_of_sentence_token,
            self._end_of_sentence_token
        )

        token_embedding = F.embed_id(
            token_ids_with_bos_eos,
            self._token_embedding_weights
        )

        # (batch_size, sequence_length, embedding_dim)
        return {
            'mask': mask_with_bos_eos,
            'token_embedding': token_embedding
        }

Source File: nets.py From contextual_augmentation with MIT License

4 votes

def predict_embed(self,
                      xs, embedW,
                      labels=None,
                      dropout=0.,
                      mode='sampling',
                      temp=1.,
                      word_lower_bound=0.,
                      gold_lower_bound=0.,
                      gumbel=True,
                      residual=0.,
                      wordwise=True,
                      add_original=0.,
                      augment_ratio=0.25):
        x_len = [len(x) for x in xs]
        with chainer.using_config('train', False), chainer.no_backprop_mode():
            t_out_concat = self.encode(xs, labels=labels)
            prob_concat = self.output.output(t_out_concat).data
            prob_concat /= temp
            prob_concat += self.xp.random.gumbel(
                size=prob_concat.shape).astype('f')
            prob_concat = F.softmax(prob_concat).data

        out_concat = F.embed_id(
            self.xp.argmax(prob_concat, axis=1).astype(np.int32), embedW)

        # insert eos
        eos = embedW[0][None]
        new_out = []
        count = 0
        for i, x in enumerate(xs):
            new_out.append(eos)
            new_out.append(out_concat[count:count + len(x) - 2])
            new_out.append(eos)
            count += len(x) - 2
        out_concat = F.concat(new_out, axis=0)

        def embed_func(x): return F.embed_id(x, embedW, ignore_label=-1)
        raw_concat = F.concat(
            sequence_embed(embed_func, xs, self.dropout), axis=0)
        b, u = raw_concat.shape

        mask = self.xp.broadcast_to(
            (self.xp.random.rand(b, 1) < augment_ratio),
            raw_concat.shape)
        out_concat = F.where(mask, out_concat, raw_concat)

        x_len = [len(x) for x in xs]
        x_section = np.cumsum(x_len[:-1])
        out_concat = F.dropout(out_concat, dropout)
        exs = F.split_axis(out_concat, x_section, 0)
        return exs

Source File: network.py From ConvLSTM with MIT License

4 votes

def __call__(self, x, t):
        self.e1.reset_state()
        self.e2.reset_state()
        self.e3.reset_state()

        We = self.xp.array([[i == j for i in range(self.n)] for j in range(self.n)], dtype=self.xp.float32)
        for i in range(x.shape[1]):

            # save input images
            if self.directory is not None:
                for j in range(x.shape[0]):
                    filename = self.directory + "input" + str(j) + "-" + str(i) + ".png"
                    self.save_image(x[j, i, :, :].data, filename)
                
            xi = F.embed_id(x[:, i, :, :], We)
            xi = F.transpose(xi, (0, 3, 1, 2))
            
            h1 = self.e1(xi)
            h2 = self.e2(h1)
            self.e3(h2)

        self.p1.reset_state(self.e1.pc, self.e1.ph)
        self.p2.reset_state(self.e2.pc, self.e2.ph)
        self.p3.reset_state(self.e3.pc, self.e3.ph)

        loss = None
        
        for i in range(t.shape[1]):
            xs = x.shape
            
            h1 = self.p1(Variable(self.xp.zeros((xs[0], self.n, xs[2], xs[3]), dtype=self.xp.float32)))
            h2 = self.p2(h1)
            h3 = self.p3(h2)

            h = F.concat((h1, h2, h3))
            ans = self.last(h)

            # save output and teacher images
            if self.directory is not None:
                for j in range(t.shape[0]):
                    filename = self.directory + "truth" + str(j) + "-" + str(i) + ".png"
                    self.save_image(t[j, i, :, :].data, filename)
                    filename = self.directory + "output" + str(j) + "-" + str(i) + ".png"
                    self.save_image(self.xp.argmax(ans[j, :, :, :].data, 0).astype(np.int32), filename)

            cur_loss = F.softmax_cross_entropy(ans, t[:, i, :, :])
            loss = cur_loss if loss is None else loss + cur_loss
            
        reporter.report({'loss': loss}, self)
        
        return loss

Source File: bi_lstm.py From deep-crf with MIT License

4 votes

def __call__(self, x_data, x_char_data=None, x_additional=None):
        hx = None
        cx = None
        self.n_length = [len(_x) for _x in x_data]
        self.inds = np.argsort([-len(_x) for _x in x_data]).astype('i')

        if self.use_char:
            # CharCNN
            x_char_data_flat = []
            for _ in x_char_data:
                x_char_data_flat.extend(_)
            char_vecs = self.char_cnn(x_char_data_flat)
            char_index = self.char_cnn.char_index(self.n_length)

        xs = []
        for i, x in enumerate(x_data):
            x = my_variable(x, volatile=not self.train)
            x = self.word_embed(x)

            if self.use_char:
                x_char = F.embed_id(char_index[i], char_vecs, ignore_label=-1)
                x = F.concat([x, x_char], axis=1)

            if x_additional:
                for add_i in six.moves.xrange(self.n_add_feature):
                    x_add = x_additional[add_i][i]
                    x_add = my_variable(x_add, volatile=not self.train)
                    add_emb_layer = self.get_layer('add_embed_' + str(add_i))
                    x_add = add_emb_layer(x_add)
                    x = F.concat([x, x_add], axis=1)

            x = my_dropout(x, ratio=self.use_dropout, train=self.train)
            xs.append(x)

        _hy_f, _cy_f, h_vecs = self.rnn(hx=hx, cx=cx, xs=xs, )

        h_vecs = F.concat(h_vecs, axis=0)
        if self.use_dropout:
            h_vecs = my_dropout(h_vecs, ratio=self.use_dropout, train=self.train)

        # Label Predict
        output = self.output_layer(h_vecs)
        output_list = F.split_axis(output, output.data.shape[0], axis=0)

        return output_list

Python chainer.functions.embed_id() Examples