Python beam search

Source File: recognizer.py From attention-lvcsr with MIT License

10 votes

def beam_search(self, inputs, **kwargs):
        # When a recognizer is unpickled, self.beam_size is available
        # but beam search has to be recompiled.

        self.init_beam_search(self.beam_size)
        inputs = dict(inputs)
        max_length = int(self.bottom.num_time_steps(**inputs) /
                         self.max_decoded_length_scale)
        search_inputs = {}
        for var in self.inputs.values():
            search_inputs[var] = inputs.pop(var.name)[:, numpy.newaxis, ...]
        if inputs:
            raise Exception(
                'Unknown inputs passed to beam search: {}'.format(
                    inputs.keys()))
        outputs, search_costs = self._beam_search.search(
            search_inputs, self.eos_label,
            max_length,
            ignore_first_eol=self.data_prepend_eos,
            **kwargs)
        return outputs, search_costs

Source File: beam_search.py From MTMSN with Apache License 2.0

6 votes

def beam_search(encoder_logits, mask, beam_size, max_count):
    max_len = sum(mask)
    # START
    top_seqs = [[(0, 1.0)]]
    # loop
    for i in range(1, max_len + 1):
        top_seqs = beam_search_step(i, encoder_logits, top_seqs, mask, beam_size, max_count)

    number_indices_list, sign_indices_list, scores_list = [], [], []
    for seq in top_seqs:
        number_indices, sign_indices = [], []
        for i, word in enumerate(seq):
            sign_index, score = word
            if sign_index > 0 and mask[i]:
                number_indices.append(i)
                sign_indices.append(sign_index)
        if number_indices == [] and sign_indices == []:
            continue
        number_indices_list.append(number_indices)
        sign_indices_list.append(sign_indices)
        seq_score = reduce_mul([_score for _, _score in seq])
        scores_list.append(seq_score)
    if scores_list != []:
        scores_list = softmax(np.array(scores_list))
    return number_indices_list, sign_indices_list, scores_list.tolist()

Source File: snippets.py From bert4keras with Apache License 2.0

6 votes

def beam_search(self, inputs, topk, states=None, min_ends=1):
        """beam search解码
        说明：这里的topk即beam size；
        返回：最优解码序列。
        """
        inputs = [np.array([i]) for i in inputs]
        output_ids, output_scores = self.first_output_ids, np.zeros(1)
        for step in range(self.maxlen):
            scores, states = self.predict(
                inputs, output_ids, states, 'logits'
            )  # 计算当前得分
            if step == 0:  # 第1步预测后将输入重复topk次
                inputs = [np.repeat(i, topk, axis=0) for i in inputs]
            scores = output_scores.reshape((-1, 1)) + scores  # 综合累积得分
            indices = scores.argpartition(-topk, axis=None)[-topk:]  # 仅保留topk
            indices_1 = indices // scores.shape[1]  # 行索引
            indices_2 = (indices % scores.shape[1]).reshape((-1, 1))  # 列索引
            output_ids = np.concatenate([output_ids[indices_1], indices_2],
                                        1)  # 更新输出
            output_scores = np.take_along_axis(
                scores, indices, axis=None
            )  # 更新得分
            end_counts = (output_ids == self.end_id).sum(1)  # 统计出现的end标记
            if output_ids.shape[1] >= self.minlen:  # 最短长度判断
                best_one = output_scores.argmax()  # 得分最大的那个
                if end_counts[best_one] == min_ends:  # 如果已经终止
                    return output_ids[best_one]  # 直接输出
                else:  # 否则，只保留未完成部分
                    flag = (end_counts < min_ends)  # 标记未完成序列
                    if not flag.all():  # 如果有已完成的
                        inputs = [i[flag] for i in inputs]  # 扔掉已完成序列
                        output_ids = output_ids[flag]  # 扔掉已完成序列
                        output_scores = output_scores[flag]  # 扔掉已完成序列
                        end_counts = end_counts[flag]  # 扔掉已完成end计数
                        topk = flag.sum()  # topk相应变化
        # 达到长度直接输出
        return output_ids[output_scores.argmax()]

Source File: Sampler.py From pix2code with Apache License 2.0

6 votes

def predict_beam_search(self, model, input_img, beam_width=3, require_sparse_label=True, sequence_length=150):
        predictions = START_TOKEN
        out_probas = []

        current_context = [self.voc.vocabulary[PLACEHOLDER]] * (self.context_length - 1)
        current_context.append(self.voc.vocabulary[START_TOKEN])
        if require_sparse_label:
            current_context = Utils.sparsify(current_context, self.output_size)

        beam = BeamSearch(beam_width=beam_width)

        self.recursive_beam_search(model, input_img, current_context, beam, beam.root, sequence_length)

        predicted_sequence, probas_sequence = beam.search()

        for k in range(0, len(predicted_sequence)):
            prediction = predicted_sequence[k]
            probas = probas_sequence[k]
            out_probas.append(probas)

            predictions += self.voc.token_lookup[prediction]

        return predictions, out_probas

Source File: beam_search.py From MTMSN with Apache License 2.0

6 votes

def beam_search_step(step, encoder_logits, top_seqs, mask, k, max_count):
    all_seqs = []
    for seq in top_seqs:
        seq_score = reduce_mul([_score for _, _score in seq])
        # get current step using encoder_context & seq
        current_step = decode_step(step, encoder_logits)
        for i, word in enumerate(current_step):
            if i >= k:
                break
            word_index, word_score = word
            score = seq_score * word_score
            rs_seq = seq + [word]
            all_seqs.append((rs_seq, score))
    all_seqs = sorted(all_seqs, key=lambda seq: seq[1], reverse=True)
    # Expression constraint
    filtered_seqs = [seq for seq, _ in all_seqs if check_exceed(seq, mask, max_count)]
    # topk_seqs = [seq for seq, _ in all_seqs[:k]]
    topk_seqs = [seq for seq in filtered_seqs[:k]]
    return topk_seqs

Source File: seq2seq.py From seq2seq with Apache License 2.0

6 votes

def beamSearchInfer(self, sample, k):
        samples = []
        decoder_input = Variable(torch.LongTensor([[sample[0][-1]]]))
        if USE_CUDA:
            decoder_input = decoder_input.cuda()
        sequence, pre_scores, fin_scores, ave_scores, decoder_context, decoder_hidden, decoder_attention, encoder_outputs = sample
        decoder_output, decoder_context, decoder_hidden, decoder_attention = self.decoder(decoder_input, decoder_context, decoder_hidden, encoder_outputs)

        # choose topk
        topk = decoder_output.data.topk(self.top_k)
        for k in range(self.top_k):
            topk_prob = topk[0][0][k]
            topk_index = int(topk[1][0][k])
            pre_scores += topk_prob
            fin_scores = pre_scores - (k - 1 ) * self.alpha
            samples.append([sequence+[topk_index], pre_scores, fin_scores, ave_scores, decoder_context, decoder_hidden, decoder_attention, encoder_outputs])
        return samples

Source File: beamSearch.py From pycodesuggest with MIT License

6 votes

def beam_search_tree(self, session, root):

        def beam(tree_node):
            feed_dict = {
                self.model.input_data: np.array([np.array([tree_node.token_id])]),
                self.model.initial_state: tree_node.state
            }

            probabilities, state = session.run([self.model.predict, self.model.final_state], feed_dict)
            best_k_indices = best_k(probabilities[0], self.beam_width)
            for token_idx in best_k_indices:
                probability = probabilities[0][token_idx]
                tree_node.add_child(BeamSearchTreeNode(token_idx, state, probability))

        def beam_search_recursive(tree, current_depth):
            if current_depth < self.depth:
                for child in tree.children:
                    beam(child)
                    beam_search_recursive(child, current_depth+1)

        beam(root)
        beam_search_recursive(root, 1)
        return root

Source File: fix_beam.py From Aegean with Academic Free License v3.0

5 votes

def search_beam(hdulist):
    """
    Will search the beam info from the HISTORY
    :param hdulist:
    :return:
    """
    header = hdulist[0].header
    history = header['HISTORY']
    history_str = str(history)
    #AIPS   CLEAN BMAJ=  1.2500E-02 BMIN=  1.2500E-02 BPA=   0.00
    if 'BMAJ' in history_str:
        return True
    else:
        return False

Source File: core.py From transformer-keras with Apache License 2.0

5 votes

def beam_search_text_decode(self, texts,
                                k=5, delimiter=' '):
        assert self.src_tokenizer is not None
        sequences = self.src_tokenizer.texts_to_sequences(texts)
        return self.beam_search_sequence_decode(sequences, k, delimiter)

Source File: beamsearch_runner.py From neuralmonkey with BSD 3-Clause "New" or "Revised" License

5 votes

def beam_search_runner_range(
        output_series: str,
        decoder: BeamSearchDecoder,
        max_rank: int = None,
        postprocess: Callable[[List[str]], List[str]] = None) -> List[
            BeamSearchRunner]:
    """Return beam search runners for a range of ranks from 1 to max_rank.

    This means there is max_rank output series where the n-th series contains
    the n-th best hypothesis from the beam search.

    Args:
        output_series: Prefix of output series.
        decoder: Beam search decoder shared by all runners.
        max_rank: Maximum rank of the hypotheses.
        postprocess: Series-level postprocess applied on output.

    Returns:
        List of beam search runners getting hypotheses with rank from 1 to
        max_rank.
    """
    check_argument_types()

    if max_rank is None:
        max_rank = decoder.beam_size

    if max_rank > decoder.beam_size:
        raise ValueError(
            ("The maximum rank ({}) cannot be "
             "bigger than beam size {}.").format(
                 max_rank, decoder.beam_size))

    return [BeamSearchRunner("{}.rank{:03d}".format(output_series, r),
                             decoder, r, postprocess)
            for r in range(1, max_rank + 1)]

Source File: predict.py From torch-light with MIT License

5 votes

def beam_search(self, w_scores, end_seqs, top_seqs):
        max_scores, max_idxs = w_scores.sort(-1, descending=True)
        max_scores = (max_scores[:, :self.beam_size]).tolist()
        max_idxs = (max_idxs[:, :self.beam_size]).tolist()

        all_seqs, seen = [], []
        for index, seq in enumerate(top_seqs):
            seq_idxs, word_index, seq_score = seq
            if seq_idxs[-1] == EOS:
                all_seqs += [(seq, seq_score, True)]
                continue

            for score, widx in zip(max_scores[index], max_idxs[index]):
                idx = self.widx2didx(widx)
                seq_idxs, word_index, seq_score = copy.deepcopy(seq)
                seq_score += score
                seq_idxs += [idx]
                word_index += [widx]
                if word_index not in seen:
                    seen.append(word_index)
                    all_seqs += [((seq_idxs, word_index, seq_score),
                                  seq_score, idx == EOS)]

        all_seqs += [((seq[0], seq[1], seq[-1]), seq[-1], True)
                     for seq in end_seqs]
        top_seqs = sorted(all_seqs, key=lambda seq: seq[1], reverse=True)[
            :self.beam_size]

        all_done, done_nums = self.check_all_done(top_seqs)
        top_seqs = [seq for seq, _, _ in top_seqs]

        return top_seqs, all_done, self.beam_size-done_nums

Source File: agent_factory.py From neural-symbolic-machines with Apache License 2.0

5 votes

def beam_search(self, envs=None, beam_size=1, use_cache=False, greedy=False):
    """Returns Actions, rewards, obs and probs."""    
    samples = beam_search(self.model, envs, beam_size=beam_size,
                          use_cache=use_cache, greedy=greedy)
    return samples

Source File: beam_search.py From athena with Apache License 2.0

5 votes

def beam_search_score(self, candidate_holder, encoder_outputs):
        """Call the time propagating function, fetch the acoustic score at the current step
           If needed, call the auxiliary scorer and update cand_states in candidate_holder
        Args:
            candidate_holder:  the param cand_seqs and the cand_logits of it is needed
              in the transformer decoder to calculate the output. type: CandidateHolder
            encoder_outputs: the encoder outputs from the transformer encoder.
              type: tuple, (encoder_outputs, input_mask)
        """
        cand_logits = tf.TensorArray(
            tf.float32, size=0, dynamic_size=True, clear_after_read=False
        )
        cand_logits = cand_logits.unstack(
            tf.transpose(candidate_holder.cand_logits, [1, 0, 2])
        )
        cand_seqs = tf.TensorArray(
            tf.float32, size=0, dynamic_size=True, clear_after_read=False
        )
        cand_seqs = cand_seqs.unstack(tf.transpose(candidate_holder.cand_seqs, [1, 0]))
        logits, new_cand_logits, states = self.decoder_one_step(
            cand_logits, cand_seqs, self.states, encoder_outputs
        )
        new_states = candidate_holder.cand_states
        self.states = states
        cand_scores = tf.expand_dims(candidate_holder.cand_scores, axis=1)
        Z = tf.reduce_logsumexp(logits, axis=(1,), keepdims=True)
        logprobs = logits - Z
        new_scores = logprobs + cand_scores  # shape: (cand_num, num_syms)
        if self.scorers:
            for scorer in self.scorers:
                other_scores, new_states = scorer.score(candidate_holder, new_scores)
                if other_scores is not None:
                    new_scores += other_scores
        new_cand_logits = tf.transpose(new_cand_logits.stack(), [1, 0, 2])
        return new_scores, new_cand_logits, new_states

Source File: experiment.py From neural-symbolic-machines with Apache License 2.0

5 votes

def beam_search_eval(agent, envs, writer=None):
    env_batch_size = FLAGS.eval_batch_size
    env_iterator = data_utils.BatchIterator(
      dict(envs=envs), shuffle=False,
      batch_size=env_batch_size)
    dev_samples = []
    dev_samples_in_beam = []
    for j, batch_dict in enumerate(env_iterator):
      t1 = time.time()
      batch_envs = batch_dict['envs']
      tf.logging.info('=' * 50)
      tf.logging.info('eval, batch {}: {} envs'.format(j, len(batch_envs)))
      new_samples_in_beam = agent.beam_search(
        batch_envs, beam_size=FLAGS.eval_beam_size)
      dev_samples_in_beam += new_samples_in_beam
      tf.logging.info('{} samples in beam, batch {}.'.format(
        len(new_samples_in_beam), j))
      t2 = time.time()
      tf.logging.info('{} sec used in evaluator batch {}.'.format(t2 - t1, j))

    # Account for beam search where the beam doesn't
    # contain any examples without error, which will make
    # len(dev_samples) smaller than len(envs).
    dev_samples = select_top(dev_samples_in_beam)
    dev_avg_return, dev_avg_len = agent.evaluate(
      dev_samples, writer=writer, true_n=len(envs))
    tf.logging.info('{} samples in non-empty beam.'.format(len(dev_samples)))
    tf.logging.info('true n is {}'.format(len(envs)))
    tf.logging.info('{} questions in dev set.'.format(len(envs)))
    tf.logging.info('{} dev avg return.'.format(dev_avg_return))
    tf.logging.info('dev: avg return: {}, avg length: {}.'.format(
      dev_avg_return, dev_avg_len))

    return dev_avg_return, dev_samples, dev_samples_in_beam

Source File: decoders.py From mead-baseline with Apache License 2.0

5 votes

def beam_search(self, encoder_outputs, **kwargs):
        alpha = kwargs.get('alpha')
        if alpha is not None:
            kwargs['length_penalty'] = partial(gnmt_length_penalty, alpha=alpha)
        return RNNDecoder.BeamSearch(parent=self, **kwargs)(encoder_outputs)

Source File: model_ensemble.py From deepQuest with BSD 3-Clause "New" or "Revised" License

5 votes

def BeamSearchNet(self):
        """
        DEPRECATED, use predictBeamSearchNet() instead.
        """
        print "WARNING!: deprecated function, use predictBeamSearchNet() instead"
        return self.predictBeamSearchNet()

Source File: tsd_net.py From tatk with Apache License 2.0

5 votes

def beam_search_decode(self, pz_dec_outs, u_enc_out, m_tm1, u_input_np, last_hidden, degree_input, bspan_index):
        vars = torch.split(pz_dec_outs, 1, dim=1), torch.split(u_enc_out, 1, dim=1), torch.split(
            m_tm1, 1, dim=1), torch.split(last_hidden, 1, dim=1), torch.split(degree_input, 1, dim=0)
        decoded = []
        for i, (pz_dec_out_s, u_enc_out_s, m_tm1_s, last_hidden_s, degree_input_s) in enumerate(zip(*vars)):
            decoded_s = self.beam_search_decode_single(pz_dec_out_s, u_enc_out_s, m_tm1_s,
                                                       u_input_np[:, i].reshape((-1, 1)),
                                                       last_hidden_s, degree_input_s, bspan_index[i])
            decoded.append(decoded_s)
        return [list(_.view(-1)) for _ in decoded]

Source File: decoding.py From jsalt-2019-mt-tutorial with MIT License

5 votes

def beam_search(
    model,
    src_tokens,
    beam_size=1,
    max_len=200,
    device=None
):
    # Either decode on the model's device or specified device
    # (in which case move the model accordingly)
    if device is None:
        device = list(model.parameters())[0].device
    else:
        model = model.to(device)
    # TODO 4: implement beam search

    # Hints:
    # - For each beam you need to keep track of at least:
    #   1. The previously generated tokens
    #   2. The decoder state
    #   3. The score (log probability of the generated tokens)
    # - Be careful of how many decoding step you need to perform at each step
    # - Think carefuly of the stopping criterion (there are 2)
    # - As a sanity check you can check that setting beam_szie to 1 returns
    #   the same result as greedy decoding
    raise NotImplementedError("TODO 4")

Source File: model.py From Image-Caption-Generator with MIT License

5 votes

def generate_caption_beam_search(model, tokenizer, image, max_length, beam_index=3):
	# in_text --> [[idx,prob]] ;prob=0 initially
	in_text = [[tokenizer.texts_to_sequences(['startseq'])[0], 0.0]]
	while len(in_text[0][0]) < max_length:
		tempList = []
		for seq in in_text:
			padded_seq = pad_sequences([seq[0]], maxlen=max_length)
			preds = model.predict([image,padded_seq], verbose=0)
			# Take top (i.e. which have highest probailities) `beam_index` predictions
			top_preds = np.argsort(preds[0])[-beam_index:]
			# Getting the top `beam_index` predictions and 
			for word in top_preds:
				next_seq, prob = seq[0][:], seq[1]
				next_seq.append(word)
				# Update probability
				prob += preds[0][word]
				# Append as input for generating the next word
				tempList.append([next_seq, prob])
		in_text = tempList
		# Sorting according to the probabilities
		in_text = sorted(in_text, reverse=False, key=lambda l: l[1])
		# Take the top words
		in_text = in_text[-beam_index:]
	in_text = in_text[-1][0]
	final_caption_raw = [int_to_word(i,tokenizer) for i in in_text]
	final_caption = []
	for word in final_caption_raw:
		if word=='endseq':
			break
		else:
			final_caption.append(word)
	final_caption.append('endseq')
	return ' '.join(final_caption)

Source File: beam_search.py From texar-pytorch with Apache License 2.0

5 votes

def beam_search(
    symbols_to_logits_fn: Callable[[torch.Tensor, State],
                                   Tuple[torch.Tensor, State]],
    initial_ids: torch.LongTensor,
    beam_size: int,
    decode_length: int,
    vocab_size: int,
    alpha: float,
    eos_id: int,
    states: State,
    stop_early: bool = True) -> Tuple[torch.LongTensor, torch.Tensor]: ...

Source File: model.py From RL-based-Graph2Seq-for-NQG with Apache License 2.0

5 votes

def beam_search(batch, network, vocab, config):
    with torch.no_grad():
        ext_vocab_size = batch['oov_dict'].ext_vocab_size if batch['oov_dict'] else None
        hypotheses = batch_beam_search(network, batch, ext_vocab_size,
                                        config['beam_size'], min_out_len=config['min_out_len'],
                                        max_out_len=config['max_out_len'],
                                        len_in_words=config['out_len_in_words'],
                                        block_ngram_repeat=config['block_ngram_repeat'])
    to_decode = [each[0].tokens[1:] for each in hypotheses] # the first token is SOS
    decoded_batch = batch_decoded_index2word(to_decode, vocab, batch['oov_dict'])
    return decoded_batch

Source File: callbacks.py From deepQuest with BSD 3-Clause "New" or "Revised" License

5 votes

def checkDefaultParamsBeamSearch(params):
    required_params = ['model_inputs', 'model_outputs', 'dataset_inputs', 'dataset_outputs']
    default_params = {'max_batch_size': 50,
                      'beam_size': 5,
                      'maxlen': 30,
                      'normalize': False,
                      'words_so_far': False,
                      'n_parallel_loaders': 5,
                      'optimized_search': False,
                      'temporally_linked': False,
                      'link_index_id': 'link_index',
                      'state_below_index': -1,
                      'pos_unk': False,
                      'max_eval_samples': None,
                      'search_pruning': False,
                      'normalize_probs': False,
                      'alpha_factor': 0.0,
                      'coverage_penalty': False,
                      'length_penalty': False,
                      'length_norm_factor': 0.0,
                      'coverage_norm_factor': 0.0,
                      'output_max_length_depending_on_x': False,
                      'output_max_length_depending_on_x_factor': 3,
                      'output_min_length_depending_on_x': False,
                      'output_min_length_depending_on_x_factor': 2
                      }

    for k, v in params.iteritems():
        if k in default_params.keys() or k in required_params:
            default_params[k] = v

    for k in required_params:
        if k not in default_params:
            raise Exception('The beam search parameter ' + k + ' must be specified.')

    return default_params


###################################################
# Performance evaluation callbacks
###################################################

Source File: neuralSearch.py From TikZ with GNU General Public License v3.0

5 votes

def beamSearchGraph(self, problem, initialProgram, size, steps):
        frontier = [initialProgram]

        for step in range(steps):
            newFrontier = []
            for f in frontier:
                for _,candidate in self.beam(self.residual(problem, self.evaluate(f)),
                                             f, size):
                    #print "STEP = %s; PARENT = %s; CHILD = %s;"%(step,f,candidate)
                    newFrontier.append(candidate)
            #newFrontier = removeDuplicateStrings(newFrontier)
            newFrontier = [(self.value(problem,f),f) for f in newFrontier ]
            newFrontier.sort(reverse = True)
            print "New frontier ( > 0):"
            for v,f in newFrontier:
                if v > 0.0: print "V = ",v,"\t",f
            if self.solvesTask(problem, f):
                print "SOLVED TASK!"
                return 
            print "(end of new frontier)"
            print 
            # import pdb
            # pdb.set_trace()
            
            frontier = [ f for v,f in newFrontier[:size] ]


            print "Step %d of graph search:"%step
            for f in frontier: print f
            print "(end of step)"
            print

Source File: Beam.py From Transformer with Apache License 2.0

5 votes

def beam_search(src, model, SRC, TRG, opt):
    

    outputs, e_outputs, log_scores = init_vars(src, model, SRC, TRG, opt)
    eos_tok = TRG.vocab.stoi['<eos>']
    src_mask = (src != SRC.vocab.stoi['<pad>']).unsqueeze(-2)
    ind = None
    for i in range(2, opt.max_len):
    
        trg_mask = nopeak_mask(i, opt)

        out = model.out(model.decoder(outputs[:,:i],
        e_outputs, src_mask, trg_mask))

        out = F.softmax(out, dim=-1)
    
        outputs, log_scores = k_best_outputs(outputs, out, log_scores, i, opt.k)
        
        ones = (outputs==eos_tok).nonzero() # Occurrences of end symbols for all input sentences.
        sentence_lengths = torch.zeros(len(outputs), dtype=torch.long).cuda()
        for vec in ones:
            i = vec[0]
            if sentence_lengths[i]==0: # First end symbol has not been found yet
                sentence_lengths[i] = vec[1] # Position of first end symbol

        num_finished_sentences = len([s for s in sentence_lengths if s > 0])

        if num_finished_sentences == opt.k:
            alpha = 0.7
            div = 1/(sentence_lengths.type_as(log_scores)**alpha)
            _, ind = torch.max(log_scores * div, 1)
            ind = ind.data[0]
            break
    
    if ind is None:
        length = (outputs[0]==eos_tok).nonzero()[0]
        return ' '.join([TRG.vocab.itos[tok] for tok in outputs[0][1:length]])
    
    else:
        length = (outputs[ind]==eos_tok).nonzero()[0]
        return ' '.join([TRG.vocab.itos[tok] for tok in outputs[ind][1:length]])

Source File: unsup_net.py From SEDST with MIT License

5 votes

def beam_search_decode(self, pz_dec_outs, pz_proba, u_enc_out, m_tm1, last_hidden, eos_token_id,
                           flag=False):
        vars = torch.split(pz_dec_outs, 1, dim=1), torch.split(pz_proba, 1, dim=1), torch.split(u_enc_out, 1,
                                                                                                dim=1), torch.split(
            m_tm1, 1, dim=1), torch.split(last_hidden, 1, dim=1)
        decoded = []
        for pz_dec_out_s, pz_proba_s, u_enc_out_s, m_tm1_s, last_hidden_s in zip(*vars):
            decoded_s = self.beam_search_decode_single(pz_dec_out_s, pz_proba_s, u_enc_out_s, m_tm1_s, last_hidden_s,
                                                       eos_token_id, flag)
            decoded.append(decoded_s)
        return [list(_.view(-1)) for _ in decoded]

Source File: model.py From Image-Caption-Generator with MIT License

5 votes

def evaluate_model_beam_search(model, images, captions, tokenizer, max_length, beam_index=3):
	actual, predicted = list(), list()
	for image_id, caption_list in tqdm(captions.items()):
		yhat = generate_caption_beam_search(model, tokenizer, images[image_id], max_length, beam_index=beam_index)
		ground_truth = [caption.split() for caption in caption_list]
		actual.append(ground_truth)
		predicted.append(yhat.split())
	print('BLEU Scores :')
	print('A perfect match results in a score of 1.0, whereas a perfect mismatch results in a score of 0.0.')
	print('BLEU-1: %f' % corpus_bleu(actual, predicted, weights=(1.0, 0, 0, 0)))
	print('BLEU-2: %f' % corpus_bleu(actual, predicted, weights=(0.5, 0.5, 0, 0)))
	print('BLEU-3: %f' % corpus_bleu(actual, predicted, weights=(0.3, 0.3, 0.3, 0)))
	print('BLEU-4: %f' % corpus_bleu(actual, predicted, weights=(0.25, 0.25, 0.25, 0.25)))

Source File: base_decoder.py From lingvo with Apache License 2.0

5 votes

def BeamSearchDecodeWithTheta(self,
                                theta,
                                encoder_outputs,
                                num_hyps_per_beam_override=0):
    return self.beam_search.BeamSearchDecode(theta, encoder_outputs,
                                             num_hyps_per_beam_override,
                                             self._InitBeamSearchStateCallback,
                                             self._PreBeamSearchStepCallback,
                                             self._PostBeamSearchStepCallback)

Source File: beam_search.py From athena with Apache License 2.0

5 votes

def beam_search_score(self, candidate_holder, encoder_outputs):
        """Call the time propagating function, fetch the acoustic score at the current step
           If needed, call the auxiliary scorer and update cand_states in candidate_holder
        Args:
            candidate_holder:  the param cand_seqs and the cand_logits of it is needed
              in the transformer decoder to calculate the output. type: CandidateHolder
            encoder_outputs: the encoder outputs from the transformer encoder.
              type: tuple, (encoder_outputs, input_mask)
        """
        cand_logits = tf.TensorArray(
            tf.float32, size=0, dynamic_size=True, clear_after_read=False
        )
        cand_logits = cand_logits.unstack(
            tf.transpose(candidate_holder.cand_logits, [1, 0, 2])
        )
        cand_seqs = tf.TensorArray(
            tf.float32, size=0, dynamic_size=True, clear_after_read=False
        )
        cand_seqs = cand_seqs.unstack(tf.transpose(candidate_holder.cand_seqs, [1, 0]))
        logits, new_cand_logits, states = self.decoder_one_step(
            cand_logits, cand_seqs, self.states, encoder_outputs
        )
        new_states = candidate_holder.cand_states
        self.states = states
        cand_scores = tf.expand_dims(candidate_holder.cand_scores, axis=1)
        Z = tf.reduce_logsumexp(logits, axis=(1,), keepdims=True)
        logprobs = logits - Z
        new_scores = logprobs + cand_scores  # shape: (cand_num, num_class)
        if self.scorers:
            for scorer in self.scorers:
                other_scores, new_states = scorer.score(candidate_holder, new_scores)
                if other_scores is not None:
                    new_scores += other_scores
        new_cand_logits = tf.transpose(new_cand_logits.stack(), [1, 0, 2])
        return new_scores, new_cand_logits, new_states

Source File: base_decoder.py From lingvo with Apache License 2.0

5 votes

def BeamSearchDecode(self, encoder_outputs, num_hyps_per_beam_override=0):
    """Performs beam search based decoding.

    Args:
      encoder_outputs: the outputs of the encoder.
      num_hyps_per_beam_override: If set to a value <= 0, this parameter is
        ignored. If set to a value > 0, then this value will be used to override
        p.num_hyps_per_beam.

    Returns:
      `.BeamSearchDecodeOutput`, A namedtuple whose elements are tensors.
    """
    return self.BeamSearchDecodeWithTheta(self.theta, encoder_outputs,
                                          num_hyps_per_beam_override)

Source File: seq2seq_atten.py From video_captioning_rl with MIT License

5 votes

def beam_search(self, frames, flengths, beam_size=5):
        video_features = self.encoder.forward(frames, flengths)
        predicted_target = self.decoder.beam_search(video_features, flengths, beam_size=beam_size)

        return predicted_target






# Based on tutorials/08 - Language Model
# RNN Based Language Model

Source File: seq2seq.py From speech with Apache License 2.0

4 votes

def beam_search(self, batch, beam_size=10, max_len=200):
        x, y = self.collate(*batch)
        start_tok = y.data[0, 0]
        end_tok = y.data[0, -1] # TODO
        if self.is_cuda:
            x = x.cuda()
            y = y.cuda()
        x = self.encode(x)

        y = y[:, 0:1].clone()

        beam = [((start_tok,), 0, None)];
        complete = []
        for _ in range(max_len):
            new_beam = []
            for hyp, score, state in beam:

                y[0] = hyp[-1]
                out, state = self.decode_step(x, y, state=state, softmax=True)
                out = out.cpu().data.numpy().squeeze(axis=0).tolist()
                for i, p in enumerate(out):
                    new_score = score + p
                    new_hyp = hyp + (i,)
                    new_beam.append((new_hyp, new_score, state))
            new_beam = sorted(new_beam, key=lambda x: x[1], reverse=True)

            # Remove complete hypotheses
            for cand in new_beam[:beam_size]:
                if cand[0][-1] == end_tok:
                    complete.append(cand)

            beam = filter(lambda x : x[0][-1] != end_tok, new_beam)
            beam = beam[:beam_size]

            if len(beam) == 0:
                break

            # Stopping criteria:
            # complete contains beam_size more probable
            # candidates than anything left in the beam
            if sum(c[1] > beam[0][1] for c in complete) >= beam_size:
                break

        complete = sorted(complete, key=lambda x: x[1], reverse=True)
        if len(complete) == 0:
            complete = beam
        hyp, score, _ = complete[0]
        return [hyp]

Source File: beam_search.py From object_detection_with_tensorflow with MIT License

4 votes

def BeamSearch(self, sess, enc_inputs, enc_seqlen):
    """Performs beam search for decoding.

    Args:
      sess: tf.Session, session
      enc_inputs: ndarray of shape (enc_length, 1), the document ids to encode
      enc_seqlen: ndarray of shape (1), the length of the sequnce

    Returns:
      hyps: list of Hypothesis, the best hypotheses found by beam search,
          ordered by score
    """

    # Run the encoder and extract the outputs and final state.
    enc_top_states, dec_in_state = self._model.encode_top_state(
        sess, enc_inputs, enc_seqlen)
    # Replicate the initial states K times for the first step.
    hyps = [Hypothesis([self._start_token], 0.0, dec_in_state)
           ] * self._beam_size
    results = []

    steps = 0
    while steps < self._max_steps and len(results) < self._beam_size:
      latest_tokens = [h.latest_token for h in hyps]
      states = [h.state for h in hyps]

      topk_ids, topk_log_probs, new_states = self._model.decode_topk(
          sess, latest_tokens, enc_top_states, states)
      # Extend each hypothesis.
      all_hyps = []
      # The first step takes the best K results from first hyps. Following
      # steps take the best K results from K*K hyps.
      num_beam_source = 1 if steps == 0 else len(hyps)
      for i in xrange(num_beam_source):
        h, ns = hyps[i], new_states[i]
        for j in xrange(self._beam_size*2):
          all_hyps.append(h.Extend(topk_ids[i, j], topk_log_probs[i, j], ns))

      # Filter and collect any hypotheses that have the end token.
      hyps = []
      for h in self._BestHyps(all_hyps):
        if h.latest_token == self._end_token:
          # Pull the hypothesis off the beam if the end token is reached.
          results.append(h)
        else:
          # Otherwise continue to the extend the hypothesis.
          hyps.append(h)
        if len(hyps) == self._beam_size or len(results) == self._beam_size:
          break

      steps += 1

    if steps == self._max_steps:
      results.extend(hyps)

    return self._BestHyps(results)

Source File: top.py From bert-multitask-learning with MIT License

4 votes

def beam_search_decode(self, features, hidden_feature, mode, problem_name):
        # prepare inputs to attention
        key = 'ori_seq' if self.params.label_transfer else 'seq'
        encoder_outputs = hidden_feature[key]
        max_seq_len = self.params.max_seq_len
        embedding_table = hidden_feature['embed_table']
        token_type_ids = features['segment_ids']
        num_classes = self.params.num_classes[problem_name]
        batch_size = modeling.get_shape_list(
            encoder_outputs, expected_rank=3)[0]
        hidden_size = self.params.bert_config.hidden_size

        if self.params.problem_type[problem_name] == 'seq2seq_text':
            embedding_table = hidden_feature['embed_table']
        else:
            embedding_table = tf.get_variable(
                'tag_embed_table',
                shape=[num_classes, hidden_size])

        symbol_to_logit_fn = self._get_symbol_to_logit_fn(
            max_seq_len=max_seq_len,
            embedding_table=embedding_table,
            token_type_ids=token_type_ids,
            decoder=self.decoder,
            num_classes=num_classes,
            encoder_output=encoder_outputs,
            input_mask=features['input_mask'],
            params=self.params
        )

        # create cache for fast decode
        cache = {
            str(layer): {
                "key_layer": tf.zeros([batch_size, 0, hidden_size]),
                "value_layer": tf.zeros([batch_size, 0, hidden_size]),
            } for layer in range(self.params.decoder_num_hidden_layers)}
        # cache['encoder_outputs'] = encoder_outputs
        # cache['encoder_decoder_attention_mask'] = features['input_mask']
        initial_ids = tf.zeros([batch_size], dtype=tf.int32)

        decode_ids, _, _ = beam_search.beam_search(
            symbols_to_logits_fn=symbol_to_logit_fn,
            initial_ids=initial_ids,
            states=cache,
            vocab_size=self.params.num_classes[problem_name],
            beam_size=self.params.beam_size,
            alpha=self.params.beam_search_alpha,
            decode_length=self.params.decode_max_seq_len,
            eos_id=self.params.eos_id[problem_name])
        # Get the top sequence for each batch element
        top_decoded_ids = decode_ids[:, 0, 1:]
        self.prob = top_decoded_ids
        return self.prob

Source File: evaluate.py From keyphrase-gan with MIT License

4 votes

def evaluate_beam_search(generator, one2many_data_loader, opt, delimiter_word='<sep>'):
    #score_dict_all = defaultdict(list)  # {'precision@5':[],'recall@5':[],'f1_score@5':[],'num_matches@5':[],'precision@10':[],'recall@10':[],'f1score@10':[],'num_matches@10':[]}
    # file for storing the predicted keyphrases
    if opt.pred_file_prefix == "":
        pred_output_file = open(os.path.join(opt.pred_path, "predictions.txt"), "w")
    else:
        pred_output_file = open(os.path.join(opt.pred_path, "%s_predictions.txt" % opt.pred_file_prefix), "w")
    # debug
    interval = 1000

    with torch.no_grad():
        start_time = time.time()
        for batch_i, batch in enumerate(one2many_data_loader):
            if (batch_i + 1) % interval == 0:
                print("Batch %d: Time for running beam search on %d batches : %.1f" % (batch_i+1, interval, time_since(start_time)))
                sys.stdout.flush()
                start_time = time.time()
            src, src_lens, src_mask, src_oov, oov_lists, src_str_list, trg_str_2dlist, _, _, _, _, original_idx_list, title, title_oov, title_lens, title_mask = batch
            """
            src: a LongTensor containing the word indices of source sentences, [batch, src_seq_len], with oov words replaced by unk idx
            src_lens: a list containing the length of src sequences for each batch, with len=batch
            src_mask: a FloatTensor, [batch, src_seq_len]
            src_oov: a LongTensor containing the word indices of source sentences, [batch, src_seq_len], contains the index of oov words (used by copy)
            oov_lists: a list of oov words for each src, 2dlist
            """
            src = src.to(opt.device)
            src_mask = src_mask.to(opt.device)
            src_oov = src_oov.to(opt.device)
            if opt.title_guided:
                title = title.to(opt.device)
                title_mask = title_mask.to(opt.device)
                # title_oov = title_oov.to(opt.device)


            beam_search_result = generator.beam_search(src, src_lens, src_oov, src_mask, oov_lists, opt.word2idx, opt.max_eos_per_output_seq, title=title, title_lens=title_lens, title_mask=title_mask)
            pred_list = preprocess_beam_search_result(beam_search_result, opt.idx2word, opt.vocab_size, oov_lists, opt.word2idx[pykp.io.EOS_WORD], opt.word2idx[pykp.io.UNK_WORD], opt.replace_unk, src_str_list)
            # list of {"sentences": [], "scores": [], "attention": []}

            # recover the original order in the dataset
            seq_pairs = sorted(zip(original_idx_list, src_str_list, trg_str_2dlist, pred_list, oov_lists),
                               key=lambda p: p[0])
            original_idx_list, src_str_list, trg_str_2dlist, pred_list, oov_lists = zip(*seq_pairs)

            # Process every src in the batch
            for src_str, trg_str_list, pred, oov in zip(src_str_list, trg_str_2dlist, pred_list, oov_lists):
                # src_str: a list of words; trg_str: a list of keyphrases, each keyphrase is a list of words
                # pred_seq_list: a list of sequence objects, sorted by scores
                # oov: a list of oov words
                pred_str_list = pred['sentences']  # predicted sentences from a single src, a list of list of word, with len=[beam_size, out_seq_len], does not include the final <EOS>
                #print(pred_str_list)
                pred_score_list = pred['scores']
                pred_attn_list = pred['attention']  # a list of FloatTensor[output sequence length, src_len], with len = [n_best]

                if opt.one2many:
                    all_keyphrase_list = []  # a list of word list contains all the keyphrases in the top max_n sequences decoded by beam search
                    for word_list in pred_str_list:
                        all_keyphrase_list += split_word_list_by_delimiter(word_list, delimiter_word, opt.separate_present_absent, pykp.io.PEOS_WORD)
                        #not_duplicate_mask = check_duplicate_keyphrases(all_keyphrase_list)
                    #pred_str_list = [word_list for word_list, is_keep in zip(all_keyphrase_list, not_duplicate_mask) if is_keep]
                    pred_str_list = all_keyphrase_list

                # output the predicted keyphrases to a file
                pred_print_out = ''
                
                
                #pred_str_list = remove_duplications(pred_str_list)
                #print(pred_str_list)
                for word_list_i, word_list in enumerate(pred_str_list):
                    word_list = convert_to_string_list(word_list,opt.idx2word)
                    if word_list_i < len(pred_str_list) - 1:
                        pred_print_out += '%s;' % ' '.join(word_list)
                    else:
                        pred_print_out += '%s' % ' '.join(word_list)
                pred_print_out += '\n'
                pred_output_file.write(pred_print_out)

    pred_output_file.close()
    print("done!")

Source File: caption_generator.py From object_detection_kitti with Apache License 2.0

4 votes

def beam_search(self, sess, encoded_image):
    """Runs beam search caption generation on a single image.

    Args:
      sess: TensorFlow Session object.
      encoded_image: An encoded image string.

    Returns:
      A list of Caption sorted by descending score.
    """
    # Feed in the image to get the initial state.
    initial_state = self.model.feed_image(sess, encoded_image)

    initial_beam = Caption(
        sentence=[self.vocab.start_id],
        state=initial_state[0],
        logprob=0.0,
        score=0.0,
        metadata=[""])
    partial_captions = TopN(self.beam_size)
    partial_captions.push(initial_beam)
    complete_captions = TopN(self.beam_size)

    # Run beam search.
    for _ in range(self.max_caption_length - 1):
      partial_captions_list = partial_captions.extract()
      partial_captions.reset()
      input_feed = np.array([c.sentence[-1] for c in partial_captions_list])
      state_feed = np.array([c.state for c in partial_captions_list])

      softmax, new_states, metadata = self.model.inference_step(sess,
                                                                input_feed,
                                                                state_feed)

      for i, partial_caption in enumerate(partial_captions_list):
        word_probabilities = softmax[i]
        state = new_states[i]
        # For this partial caption, get the beam_size most probable next words.
        words_and_probs = list(enumerate(word_probabilities))
        words_and_probs.sort(key=lambda x: -x[1])
        words_and_probs = words_and_probs[0:self.beam_size]
        # Each next word gives a new partial caption.
        for w, p in words_and_probs:
          if p < 1e-12:
            continue  # Avoid log(0).
          sentence = partial_caption.sentence + [w]
          logprob = partial_caption.logprob + math.log(p)
          score = logprob
          if metadata:
            metadata_list = partial_caption.metadata + [metadata[i]]
          else:
            metadata_list = None
          if w == self.vocab.end_id:
            if self.length_normalization_factor > 0:
              score /= len(sentence)**self.length_normalization_factor
            beam = Caption(sentence, state, logprob, score, metadata_list)
            complete_captions.push(beam)
          else:
            beam = Caption(sentence, state, logprob, score, metadata_list)
            partial_captions.push(beam)
      if partial_captions.size() == 0:
        # We have run out of partial candidates; happens when beam_size = 1.
        break

    # If we have no complete captions then fall back to the partial captions.
    # But never output a mixture of complete and partial captions because a
    # partial caption could have a higher score than all the complete captions.
    if not complete_captions.size():
      complete_captions = partial_captions

    return complete_captions.extract(sort=True)

Source File: eval.py From hapi with Apache License 2.0

4 votes

def beam_search(FLAGS):
    device = set_device("gpu" if FLAGS.use_gpu else "cpu")
    fluid.enable_dygraph(device) if FLAGS.dynamic else None
    model = Seq2SeqAttInferModel(
        encoder_size=FLAGS.encoder_size,
        decoder_size=FLAGS.decoder_size,
        emb_dim=FLAGS.embedding_dim,
        num_classes=FLAGS.num_classes,
        beam_size=FLAGS.beam_size)

    inputs = [
        Input(
            [None, 1, 48, 384], "float32", name="pixel"), Input(
                [None, None], "int64", name="label_in")
    ]
    labels = [
        Input(
            [None, None], "int64", name="label_out"), Input(
                [None, None], "float32", name="mask")
    ]
    model.prepare(
        loss_function=None,
        metrics=SeqBeamAccuracy(),
        inputs=inputs,
        labels=labels,
        device=device)
    model.load(FLAGS.init_model)

    test_dataset = data.test()
    test_collate_fn = BatchCompose(
        [data.Resize(), data.Normalize(), data.PadTarget()])
    test_sampler = data.BatchSampler(
        test_dataset,
        batch_size=FLAGS.batch_size,
        drop_last=False,
        shuffle=False)
    test_loader = fluid.io.DataLoader(
        test_dataset,
        batch_sampler=test_sampler,
        places=device,
        num_workers=0,
        return_list=True,
        collate_fn=test_collate_fn)

    model.evaluate(
        eval_data=test_loader,
        callbacks=[LoggerCallBack(10, 2, FLAGS.batch_size)])

Source File: beam_search.py From adviser with GNU General Public License v3.0

4 votes

def beam_search(x: torch.Tensor, sos: int, eos: int, beam_size: int, vocab_size: int,
                scorers: Dict[str, ScorerInterface], weights: Dict[str, float],
                token_list: List[str] = None, maxlenratio: float = 0.0, minlenratio: float = 0.0,
                pre_beam_ratio: float = 1.5, pre_beam_score_key: str = "decoder") -> list:
    """Perform beam search with scorers.

    Args:
        x (torch.Tensor): Encoded speech feature (T, D)
        sos (int): Start of sequence id
        eos (int): End of sequence id
        beam_size (int): The number of hypotheses kept during search
        vocab_size (int): The number of vocabulary
        scorers (dict[str, ScorerInterface]): Dict of decoder modules e.g., Decoder, CTCPrefixScorer, LM
            The scorer will be ignored if it is `None`
        weights (dict[str, float]): Dict of weights for each scorers
            The scorer will be ignored if its weight is 0
        token_list (list[str]): List of tokens for debug log
        maxlenratio (float): Input length ratio to obtain max output length.
            If maxlenratio=0.0 (default), it uses a end-detect function
            to automatically find maximum hypothesis lengths
        minlenratio (float): Input length ratio to obtain min output length.
        pre_beam_score_key (str): key of scores to perform pre-beam search
        pre_beam_ratio (float): beam size in the pre-beam search will be `int(pre_beam_ratio * beam_size)`

    Returns:
        list: N-best decoding results

    """
    ret = BeamSearch(
        scorers, weights,
        beam_size=beam_size,
        vocab_size=vocab_size,
        pre_beam_ratio=pre_beam_ratio,
        pre_beam_score_key=pre_beam_score_key,
        sos=sos,
        eos=eos,
        token_list=token_list,
    ).forward(
        x=x,
        maxlenratio=maxlenratio,
        minlenratio=minlenratio)
    return [h.asdict() for h in ret]

Source File: CaptioningModel.py From speaksee with BSD 3-Clause "New" or "Revised" License

4 votes

def beam_search(self, images, seq_len, eos_idx, beam_size, out_size=1, *args):
        device = images.device
        b_s = images.size(0)
        images_shape = images.shape
        state = self.init_state(b_s, device)

        seq_mask = images.data.new_ones((b_s, beam_size, 1))
        seq_logprob = images.data.new_zeros((b_s, 1, 1))
        outputs = []
        log_probs = []
        selected_words = None

        for t in range(seq_len):
            cur_beam_size = 1 if t == 0 else beam_size

            word_logprob, state = self.step(t, state, selected_words, images, None, *args, mode='feedback')
            old_seq_logprob = seq_logprob
            word_logprob = word_logprob.view(b_s, cur_beam_size, -1)
            seq_logprob = seq_logprob + word_logprob

            # Mask sequence if it reaches EOS
            if t > 0:
                mask = (selected_words.view(b_s, cur_beam_size) != eos_idx).float().unsqueeze(-1)
                seq_mask = seq_mask * mask
                word_logprob = word_logprob * seq_mask.expand_as(word_logprob)
                old_seq_logprob = old_seq_logprob.expand_as(seq_logprob).contiguous()
                old_seq_logprob[:, :, 1:] = -999
                seq_logprob = seq_mask*seq_logprob + old_seq_logprob*(1-seq_mask)

            selected_logprob, selected_idx = torch.sort(seq_logprob.view(b_s, -1), -1, descending=True)
            selected_logprob, selected_idx = selected_logprob[:, :beam_size], selected_idx[:, :beam_size]

            selected_beam = selected_idx / seq_logprob.shape[-1]
            selected_words = selected_idx - selected_beam*seq_logprob.shape[-1]

            new_state = []
            for s in state:
                shape = [int(sh) for sh in s.shape]
                beam = selected_beam
                for _ in shape[1:]:
                    beam = beam.unsqueeze(-1)
                s = torch.gather(s.view(*([b_s, cur_beam_size] + shape[1:])), 1,
                                 beam.expand(*([b_s, beam_size] + shape[1:])))
                s = s.view(*([-1,] + shape[1:]))
                new_state.append(s)
            state = tuple(new_state)

            images_exp_shape = (b_s, cur_beam_size) + images_shape[1:]
            images_red_shape = (b_s * beam_size, ) + images_shape[1:]
            selected_beam_red_size = (b_s, beam_size) + tuple(1 for _ in range(len(images_exp_shape)-2))
            selected_beam_exp_size = (b_s, beam_size) + images_exp_shape[2:]
            images_exp = images.view(images_exp_shape)
            selected_beam_exp = selected_beam.view(selected_beam_red_size).expand(selected_beam_exp_size)
            images = torch.gather(images_exp, 1, selected_beam_exp).view(images_red_shape)
            seq_logprob = selected_logprob.unsqueeze(-1)
            seq_mask = torch.gather(seq_mask, 1, selected_beam.unsqueeze(-1))

            outputs = list(torch.gather(o, 1, selected_beam.unsqueeze(-1)) for o in outputs)
            outputs.append(selected_words.unsqueeze(-1))

            this_word_logprob = torch.gather(word_logprob, 1, selected_beam.unsqueeze(-1).expand(b_s, beam_size, word_logprob.shape[-1]))
            this_word_logprob = torch.gather(this_word_logprob, 2, selected_words.unsqueeze(-1))
            log_probs = list(torch.gather(o, 1, selected_beam.unsqueeze(-1).expand(b_s, beam_size, 1)) for o in log_probs)
            log_probs.append(this_word_logprob)
            selected_words = selected_words.view(-1)

        # Sort result
        seq_logprob, sort_idxs = torch.sort(seq_logprob, 1, descending=True)
        outputs = torch.cat(outputs, -1)
        outputs = torch.gather(outputs, 1, sort_idxs.expand(b_s, beam_size, seq_len))
        log_probs = torch.cat(log_probs, -1)
        log_probs = torch.gather(log_probs, 1, sort_idxs.expand(b_s, beam_size, seq_len))

        outputs = outputs.contiguous()[:, :out_size]
        log_probs = log_probs.contiguous()[:, :out_size]
        if out_size == 1:
            outputs = outputs.squeeze(1)
            log_probs = log_probs.squeeze(1)
        return outputs, log_probs

Source File: seq2seq.py From seq2seq with Apache License 2.0

4 votes

def beamSearchDecoder(self, input_variable):
        input_length = input_variable.size()[0]
        encoder_hidden = self.encoder.init_hidden()
        encoder_outputs, encoder_hidden = self.encoder(input_variable, encoder_hidden)

        decoder_input = Variable(torch.LongTensor([[SOS_token]]))
        decoder_context = Variable(torch.zeros(1, self.decoder.hidden_size))
        decoder_hidden = encoder_hidden
        if USE_CUDA:
            decoder_input = decoder_input.cuda()
            decoder_context = decoder_context.cuda()

        decoder_output, decoder_context, decoder_hidden, decoder_attention = self.decoder(decoder_input, decoder_context, decoder_hidden, encoder_outputs)
        topk = decoder_output.data.topk(self.top_k)
        samples = [[] for i in range(self.top_k)]
        dead_k = 0
        final_samples = []
        for index in range(self.top_k):
            topk_prob = topk[0][0][index]
            topk_index = int(topk[1][0][index])
            samples[index] = [[topk_index], topk_prob, 0, 0, decoder_context, decoder_hidden, decoder_attention, encoder_outputs]

        for _ in range(self.max_length):
            tmp = []
            for index in range(len(samples)):
                tmp.extend(self.beamSearchInfer(samples[index], index))
            samples = []

            # 筛选出topk
            df = pd.DataFrame(tmp)
            df.columns = ['sequence', 'pre_socres', 'fin_scores', "ave_scores", "decoder_context", "decoder_hidden", "decoder_attention", "encoder_outputs"]
            sequence_len = df.sequence.apply(lambda x:len(x))
            df['ave_scores'] = df['fin_scores'] / sequence_len
            df = df.sort_values('ave_scores', ascending=False).reset_index().drop(['index'], axis=1)
            df = df[:(self.top_k-dead_k)]
            for index in range(len(df)):
                group = df.ix[index]
                if group.tolist()[0][-1] == 1:
                    final_samples.append(group.tolist())
                    df = df.drop([index], axis=0)
                    dead_k += 1
                    print("drop {}, {}".format(group.tolist()[0], dead_k))
            samples = df.values.tolist()
            if len(samples) == 0:
                break

        if len(final_samples) < self.top_k:
            final_samples.extend(samples[:(self.top_k-dead_k)])
        return final_samples

Source File: generate.py From subword-qac with MIT License

4 votes

def beam_search(model, hidden, input, best_score, off, beam_size, branching_factor, max_suffix_len):
    bsz = best_score.size(0)
    batch_idx = torch.arange(bsz).to(device)

    prev_beam_idxs = []
    new_token_idxs = []
    end_scores = []
    end_prev_beam_idxs = []

    for i in range(max_suffix_len):
        output, hidden = model(input, hidden=hidden)            # output: (1, batch * beam, ntoken)
        logp = F.log_softmax(output.squeeze(0), 1)              # logp: (batch * beam, t)
        if i == 0 and off is not None:
            logp.masked_fill_(off.unsqueeze(1).repeat(1, beam_size, 1).view(bsz * beam_size, -1), -float('inf'))
        score = logp + best_score.view(-1).unsqueeze(1)     # score: (batch * beam, t)

        end_score = score[:, 2].view(-1, beam_size)
        prev_end_score = end_scores[-1] if i > 0 else \
            torch.zeros((bsz, beam_size), dtype=torch.float).fill_(-float('inf')).to(device)
        end_score, end_prev_beam_idx = torch.cat((end_score, prev_end_score), 1).sort(-1, descending=True)
        end_score = end_score[:,:beam_size]                     # end_score: (batch, beam)
        end_prev_beam_idx = end_prev_beam_idx[:, :beam_size]    # end_prev_beam_idx: (batch, beam)
        end_scores.append(end_score)
        end_prev_beam_idxs.append(end_prev_beam_idx)
        score[:, 2].fill_(-float('inf'))

        val, idx0 = score.topk(branching_factor, 1)             # (batch * beam, f)
        val = val.view(-1, beam_size * branching_factor)        # (batch, beam * f)
        idx0 = idx0.view(-1, beam_size * branching_factor)      # (batch, beam * f)
        best_score, idx1 = val.topk(beam_size, 1)               # (batch, beam * f) -> (batch, beam)

        prev_beam_idx = idx1 // branching_factor                # (batch, beam)
        new_token_idx = idx0.gather(1, idx1)                    # (batch, beam)
        prev_beam_idxs.append(prev_beam_idx)
        new_token_idxs.append(new_token_idx)
        input = new_token_idx.view(1, -1)
        hidden_idx = (prev_beam_idx + batch_idx.unsqueeze(1).mul(beam_size)).view(-1)
        hidden = [(h.index_select(0, hidden_idx), c.index_select(0, hidden_idx)) for h, c in hidden]

        if (best_score[:, 0] < end_score[:, -1]).all():
            break

    max_suffix_len = i + 1
    tokens = torch.ones(bsz, beam_size, max_suffix_len, dtype=torch.long).to(device).mul(2) # tokens: (batch, beam, L)
    pos = (beam_size + torch.arange(beam_size)).unsqueeze(0).repeat(bsz, 1).to(device)      # pos: (batch, beam)
    for i in reversed(range(max_suffix_len)):
        end = pos >= beam_size
        for j in range(bsz):
            tokens[j, 1 - end[j], i] = new_token_idxs[i][j, pos[j, 1 - end[j]]]
            pos[j][1 - end[j]] = prev_beam_idxs[i][j, pos[j, 1 - end[j]]]
            pos[j][end[j]] = end_prev_beam_idxs[i][j, pos[j, end[j]] - beam_size]
    decode_len = (tokens != 2).sum(2).max(1)[0]
    return tokens, end_scores[-1], decode_len

Source File: base_model.py From image_captioning with MIT License

4 votes

def beam_search(self, sess, image_files, vocabulary):
        """Use beam search to generate the captions for a batch of images."""
        # Feed in the images to get the contexts and the initial LSTM states
        config = self.config
        images = self.image_loader.load_images(image_files)
        contexts, initial_memory, initial_output = sess.run(
            [self.conv_feats, self.initial_memory, self.initial_output],
            feed_dict = {self.images: images})

        partial_caption_data = []
        complete_caption_data = []
        for k in range(config.batch_size):
            initial_beam = CaptionData(sentence = [],
                                       memory = initial_memory[k],
                                       output = initial_output[k],
                                       score = 1.0)
            partial_caption_data.append(TopN(config.beam_size))
            partial_caption_data[-1].push(initial_beam)
            complete_caption_data.append(TopN(config.beam_size))

        # Run beam search
        for idx in range(config.max_caption_length):
            partial_caption_data_lists = []
            for k in range(config.batch_size):
                data = partial_caption_data[k].extract()
                partial_caption_data_lists.append(data)
                partial_caption_data[k].reset()

            num_steps = 1 if idx == 0 else config.beam_size
            for b in range(num_steps):
                if idx == 0:
                    last_word = np.zeros((config.batch_size), np.int32)
                else:
                    last_word = np.array([pcl[b].sentence[-1]
                                        for pcl in partial_caption_data_lists],
                                        np.int32)

                last_memory = np.array([pcl[b].memory
                                        for pcl in partial_caption_data_lists],
                                        np.float32)
                last_output = np.array([pcl[b].output
                                        for pcl in partial_caption_data_lists],
                                        np.float32)

                memory, output, scores = sess.run(
                    [self.memory, self.output, self.probs],
                    feed_dict = {self.contexts: contexts,
                                 self.last_word: last_word,
                                 self.last_memory: last_memory,
                                 self.last_output: last_output})

                # Find the beam_size most probable next words
                for k in range(config.batch_size):
                    caption_data = partial_caption_data_lists[k][b]
                    words_and_scores = list(enumerate(scores[k]))
                    words_and_scores.sort(key=lambda x: -x[1])
                    words_and_scores = words_and_scores[0:config.beam_size+1]

                    # Append each of these words to the current partial caption
                    for w, s in words_and_scores:
                        sentence = caption_data.sentence + [w]
                        score = caption_data.score * s
                        beam = CaptionData(sentence,
                                           memory[k],
                                           output[k],
                                           score)
                        if vocabulary.words[w] == '.':
                            complete_caption_data[k].push(beam)
                        else:
                            partial_caption_data[k].push(beam)

        results = []
        for k in range(config.batch_size):
            if complete_caption_data[k].size() == 0:
                complete_caption_data[k] = partial_caption_data[k]
            results.append(complete_caption_data[k].extract(sort=True))

        return results

Source File: core.py From transformer-keras with Apache License 2.0

4 votes

def beam_search_sequence_decode(self, input_seq,
                                    topk=5, delimiter=' '):
        assert len(input_seq) == 1  # Only one sequence is currently supported
        assert self.tgt_tokenizer is not None

        if self.decode_model is None: self.make_fast_decode_model()
        src_seq = self.seq_to_matrix(input_seq)  # [1, T_s]
        src_seq = src_seq.repeat(topk, axis=0)  # [1 * k, T_s]
        enc_out = self.encode_model.predict_on_batch(src_seq)  # [1 * k, T_s, model_dim]

        tgt_tokenizer = self.tgt_tokenizer

        start_token_id = tgt_tokenizer.word_index[tgt_tokenizer.start_token]
        end_token_id = tgt_tokenizer.word_index[tgt_tokenizer.end_token]

        target_seq = np.zeros((topk, self.tgt_max_len))  # [1 * k, T_t]
        target_seq[:, 0] = start_token_id

        sequences = [([], 0.0)]
        final_results = []

        for i in range(self.tgt_max_len - 1):
            if len(final_results) >= topk: break
            output = self.decode_model.predict_on_batch([src_seq, enc_out, target_seq])  # [1 * k, T_t, model_dim]
            k_cur_output = output[:, i, :]  # [1 * k, model_dim]

            all_candidates = []

            for k, cur_output in zip(range(len(sequences)), k_cur_output):
                seq, score = sequences[k]

                # Find a complete sentence, add to the final result.
                if target_seq[k, i] == end_token_id:
                    final_results.append((seq[:-1], score))
                    continue

                # Other sentences will be generated among the remaining candidates.
                wsorted = sorted(list(enumerate(cur_output)), key=lambda x: x[-1], reverse=True)
                for wid, wp in wsorted[:topk]:
                    all_candidates.append((seq + [wid], score + wp))

            ordered = sorted(all_candidates, key=lambda tup: tup[1], reverse=True)

            sequences = ordered[:topk]
            for kk, cc in enumerate(sequences):
                seq, score = cc
                target_seq[kk, 1: len(seq) + 1] = seq

        # Extend if last word is not end_token.
        final_results.extend(sequences)
        final_results = [(x, y / (len(x) + 1)) for x, y in final_results]
        final_results = sorted(final_results, key=lambda tup: tup[1], reverse=True)[:topk]

        ori_split = tgt_tokenizer.split
        tgt_tokenizer.split = delimiter
        sequences = [(tgt_tokenizer.sequences_to_texts([x])[0], y) for x, y in final_results]
        tgt_tokenizer.split = ori_split
        return sequences

Source File: beam_search.py From beam_search with MIT License

4 votes

def beam_search(initial_state_function, generate_function, X, start_id, end_id, beam_width=4, num_hypotheses=1, max_length=50):
    """Beam search for neural network sequence to sequence (encoder-decoder) models.

    :param initial_state_function: A function that takes X as input and returns state (2-dimensonal numpy array with 1 row
                                   representing decoder recurrent layer state - currently supports only one recurrent layer).
    :param generate_function: A function that takes X, Y_tm1 (1-dimensional numpy array of token indices in decoder vocabulary
                              generated at previous step) and state_tm1 (2-dimensonal numpy array of previous step decoder recurrent
                              layer states) as input and returns state_t (2-dimensonal numpy array of current step decoder recurrent
                              layer states), p_t (2-dimensonal numpy array of decoder softmax outputs) and optional extras
                              (e.g. attention weights at current step).
    :param X: List of input token indices in encoder vocabulary.
    :param start_id: Index of <start sequence> token in decoder vocabulary.
    :param end_id: Index of <end sequence> token in decoder vocabulary.
    :param beam_width: Beam size. Default 4.
    :param num_hypotheses: Number of hypotheses to generate. Default 1.
    :param max_length: Length limit for generated sequence. Default 50.
    """

    if isinstance(X, list) or X.ndim == 1:
        X = np.array([X], dtype=np.int32).T
    assert X.ndim == 2 and X.shape[1] == 1, "X should be a column array with shape (input-sequence-length, 1)"

    next_fringe = [Node(parent=None, state=initial_state_function(X), value=start_id, cost=0.0, extras=None)]
    hypotheses = []

    for _ in range(max_length):

        fringe = []
        for n in next_fringe:
            if n.value == end_id:
                hypotheses.append(n)
            else:
                fringe.append(n)

        if not fringe:
            break

        Y_tm1 = np.array([n.value for n in fringe], dtype=np.int32)
        state_tm1 = np.array([n.state for n in fringe], dtype=np.float32)
        state_t, p_t, extras_t = generate_function(X, Y_tm1, state_tm1)
        Y_t = np.argsort(p_t, axis=1)[:,-beam_width:] # no point in taking more than fits in the beam

        next_fringe = []
        for Y_t_n, p_t_n, extras_t_n, state_t_n, n in zip(Y_t, p_t, extras_t, state_t, fringe):
            Y_nll_t_n = -np.log(p_t_n[Y_t_n])

            for y_t_n, y_nll_t_n in zip(Y_t_n, Y_nll_t_n):
                n_new = Node(parent=n, state=state_t_n, value=y_t_n, cost=y_nll_t_n, extras=extras_t_n)
                next_fringe.append(n_new)

        next_fringe = sorted(next_fringe, key=lambda n: n.cum_cost)[:beam_width] # may move this into loop to save memory

    hypotheses.sort(key=lambda n: n.cum_cost)
    return hypotheses[:num_hypotheses]

Source File: caption_generator.py From models with Apache License 2.0

4 votes

def beam_search(self, sess, encoded_image):
    """Runs beam search caption generation on a single image.

    Args:
      sess: TensorFlow Session object.
      encoded_image: An encoded image string.

    Returns:
      A list of Caption sorted by descending score.
    """
    # Feed in the image to get the initial state.
    initial_state = self.model.feed_image(sess, encoded_image)

    initial_beam = Caption(
        sentence=[self.vocab.start_id],
        state=initial_state[0],
        logprob=0.0,
        score=0.0,
        metadata=[""])
    partial_captions = TopN(self.beam_size)
    partial_captions.push(initial_beam)
    complete_captions = TopN(self.beam_size)

    # Run beam search.
    for _ in range(self.max_caption_length - 1):
      partial_captions_list = partial_captions.extract()
      partial_captions.reset()
      input_feed = np.array([c.sentence[-1] for c in partial_captions_list])
      state_feed = np.array([c.state for c in partial_captions_list])

      softmax, new_states, metadata = self.model.inference_step(sess,
                                                                input_feed,
                                                                state_feed)

      for i, partial_caption in enumerate(partial_captions_list):
        word_probabilities = softmax[i]
        state = new_states[i]
        # For this partial caption, get the beam_size most probable next words.
        # Sort the indexes with numpy, select the last self.beam_size
        # (3 by default) (ie, the most likely) and then reverse the sorted
        # indexes with [::-1] to sort them from higher to lower.
        most_likely_words = np.argsort(word_probabilities)[:-self.beam_size][::-1]

        for w in most_likely_words:
          p = word_probabilities[w]
          if p < 1e-12:
            continue  # Avoid log(0).
          sentence = partial_caption.sentence + [w]
          logprob = partial_caption.logprob + math.log(p)
          score = logprob
          if metadata:
            metadata_list = partial_caption.metadata + [metadata[i]]
          else:
            metadata_list = None
          if w == self.vocab.end_id:
            if self.length_normalization_factor > 0:
              score /= len(sentence)**self.length_normalization_factor
            beam = Caption(sentence, state, logprob, score, metadata_list)
            complete_captions.push(beam)
          else:
            beam = Caption(sentence, state, logprob, score, metadata_list)
            partial_captions.push(beam)
      if partial_captions.size() == 0:
        # We have run out of partial candidates; happens when beam_size = 1.
        break

    # If we have no complete captions then fall back to the partial captions.
    # But never output a mixture of complete and partial captions because a
    # partial caption could have a higher score than all the complete captions.
    if not complete_captions.size():
      complete_captions = partial_captions

    return complete_captions.extract(sort=True)

Source File: seq_alignment.py From libri-light with MIT License

4 votes

def beam_search(score_preds, nKeep, blankLabel):

    T, P = score_preds.shape
    beams = set([''])
    pb_t_1 = {"": 1}
    pnb_t_1 = {"": 0}

    def getLastNumber(b):
        return int(b.split(',')[-1])

    for t in range(T):

        nextBeams = set()
        pb_t = {}
        pnb_t = {}
        for i_beam, b in enumerate(beams):
            if b not in pb_t:
                pb_t[b] = 0
                pnb_t[b] = 0

            if len(b) > 0:
                pnb_t[b] += pnb_t_1[b] * score_preds[t, getLastNumber(b)]
            pb_t[b] = (pnb_t_1[b] + pb_t_1[b]) * score_preds[t, blankLabel]
            nextBeams.add(b)

            for c in range(P):
                if c == blankLabel:
                    continue

                b_ = b + "," + str(c)
                if b_ not in pb_t:
                    pb_t[b_] = 0
                    pnb_t[b_] = 0

                if b != "" and getLastNumber(b) == c:
                    pnb_t[b_] += pb_t_1[b] * score_preds[t, c]
                else:
                    pnb_t[b_] += (pb_t_1[b] + pnb_t_1[b]) * score_preds[t, c]
                nextBeams.add(b_)

        allPreds = [(pb_t[b] + pnb_t[b], b) for b in nextBeams]
        allPreds.sort(reverse=True)

        beams = [x[1] for x in allPreds[:nKeep]]
        pb_t_1 = deepcopy(pb_t)
        pnb_t_1 = deepcopy(pnb_t)

    output = []
    for score, x in allPreds[:nKeep]:
        output.append((score, [int(y) for y in x.split(',') if len(y) > 0]))
    return output