Python nltk.translate.bleu_score.SmoothingFunction() Examples
The following are 30
code examples of nltk.translate.bleu_score.SmoothingFunction().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
nltk.translate.bleu_score
, or try the search function
.
Example #1
Source File: metrics.py From ParlAI with MIT License | 8 votes |
def compute(guess: str, answers: List[str], k: int = 4) -> Optional['BleuMetric']: """ Compute approximate BLEU score between guess and a set of answers. """ if nltkbleu is None: # bleu library not installed, just return a default value return None # Warning: BLEU calculation *should* include proper tokenization and # punctuation etc. We're using the normalize_answer for everything though, # so we're over-estimating our BLEU scores. Also note that NLTK's bleu is # going to be slower than fairseq's (which is written in C), but fairseq's # requires that everything be in arrays of ints (i.e. as tensors). NLTK's # works with strings, which is better suited for this module. weights = [1 / k for _ in range(k)] score = nltkbleu.sentence_bleu( [normalize_answer(a).split(" ") for a in answers], normalize_answer(guess).split(" "), smoothing_function=nltkbleu.SmoothingFunction(epsilon=1e-12).method1, weights=weights, ) return BleuMetric(score)
Example #2
Source File: seq2seq.py From chainer with MIT License | 6 votes |
def __call__(self, trainer): device = self.device with chainer.no_backprop_mode(): references = [] hypotheses = [] for i in range(0, len(self.test_data), self.batch): sources, targets = zip(*self.test_data[i:i + self.batch]) references.extend([[t.tolist()] for t in targets]) sources = [device.send(x) for x in sources] ys = [y.tolist() for y in self.model.translate(sources, self.max_length)] hypotheses.extend(ys) bleu = bleu_score.corpus_bleu( references, hypotheses, smoothing_function=bleu_score.SmoothingFunction().method1) chainer.report({self.key: bleu})
Example #3
Source File: precision_recall.py From cotk with Apache License 2.0 | 6 votes |
def _score(self, gen: List[int], reference: List[int]) -> float: '''Return a BLEU score \in [0, 1] to calculate BLEU-ngram precision and recall. Arguments: gen (list): list of generated word ids. reference (list): list of word ids of a reference. Here is an Example: >>> gen = [4,5] >>> reference = [5,6] >>> self._score(gen, reference) 0.150 # assume self.weights = [0.25,0.25,0.25,0.25] ''' gen = self._replace_unk(gen) return sentence_bleu([reference], gen, self.weights, SmoothingFunction().method1)
Example #4
Source File: metrics.py From KBRD with MIT License | 6 votes |
def _bleu(guess, answers): """Compute approximate BLEU score between guess and a set of answers.""" if nltkbleu is None: # bleu library not installed, just return a default value return None # Warning: BLEU calculation *should* include proper tokenization and # punctuation etc. We're using the normalize_answer for everything though, # so we're over-estimating our BLEU scores. Also note that NLTK's bleu is # going to be slower than fairseq's (which is written in C), but fairseq's # requires that everything be in arrays of ints (i.e. as tensors). NLTK's # works with strings, which is better suited for this module. return nltkbleu.sentence_bleu( [normalize_answer(a).split(" ") for a in answers], normalize_answer(guess).split(" "), smoothing_function=nltkbleu.SmoothingFunction(epsilon=1e-12).method1, )
Example #5
Source File: metrics.py From neural_chat with MIT License | 6 votes |
def _bleu(guess, answers): """Compute approximate BLEU score between guess and a set of answers.""" if nltkbleu is None: # bleu library not installed, just return a default value return None # Warning: BLEU calculation *should* include proper tokenization and # punctuation etc. We're using the normalize_answer for everything though, # so we're over-estimating our BLEU scores. Also note that NLTK's bleu is # going to be slower than fairseq's (which is written in C), but fairseq's # requires that everything be in arrays of ints (i.e. as tensors). NLTK's # works with strings, which is better suited for this module. return nltkbleu.sentence_bleu( [normalize_answer(a).split(" ") for a in answers], normalize_answer(guess).split(" "), smoothing_function=nltkbleu.SmoothingFunction(epsilon=1e-12).method1, )
Example #6
Source File: utils.py From quick-nlp with MIT License | 6 votes |
def print_batch(learner: Learner, modeldata: ModelData, input_field, output_field, num_batches=1, num_sentences=-1, is_test=False, num_beams=1, weights=None, smoothing_function=None): predictions, targets, inputs = learner.predict_with_targs_and_inputs(is_test=is_test, num_beams=num_beams) weights = (1 / 3., 1 / 3., 1 / 3.) if weights is None else weights smoothing_function = SmoothingFunction().method1 if smoothing_function is None else smoothing_function blue_scores = [] for batch_num, (input, target, prediction) in enumerate(zip(inputs, targets, predictions)): inputs_str: BatchBeamTokens = modeldata.itos(input, input_field) predictions_str: BatchBeamTokens = modeldata.itos(prediction, output_field) targets_str: BatchBeamTokens = modeldata.itos(target, output_field) for index, (inp, targ, pred) in enumerate(zip(inputs_str, targets_str, predictions_str)): blue_score = sentence_bleu([targ], pred, smoothing_function=smoothing_function, weights=weights) print( f'batch: {batch_num} sample : {index}\ninput: {" ".join(inp)}\ntarget: { " ".join(targ)}\nprediction: {" ".join(pred)}\nbleu: {blue_score}\n\n') blue_scores.append(blue_score) if 0 < num_sentences <= index - 1: break if 0 < num_batches <= batch_num - 1: break print(f'mean bleu score: {np.mean(blue_scores)}')
Example #7
Source File: seq2seq_chainerio.py From pfio with MIT License | 6 votes |
def forward(self, trainer): with chainer.no_backprop_mode(): references = [] hypotheses = [] for i in range(0, len(self.test_data), self.batch): sources, targets = zip(*self.test_data[i:i + self.batch]) references.extend([[t.tolist()] for t in targets]) sources = [ chainer.dataset.to_device(self.device, x) for x in sources] ys = [y.tolist() for y in self.model.translate(sources, self.max_length)] hypotheses.extend(ys) bleu = bleu_score.corpus_bleu( references, hypotheses, smoothing_function=bleu_score.SmoothingFunction().method1) chainer.report({self.key: bleu})
Example #8
Source File: seq2seq.py From convolutional_seq2seq with BSD 3-Clause "New" or "Revised" License | 6 votes |
def __call__(self, trainer): print('## Calculate BLEU') with chainer.no_backprop_mode(): with chainer.using_config('train', False): references = [] hypotheses = [] for i in range(0, len(self.test_data), self.batch): sources, targets = zip(*self.test_data[i:i + self.batch]) references.extend([[t.tolist()] for t in targets]) sources = [ chainer.dataset.to_device(self.device, x) for x in sources] ys = [y.tolist() for y in self.model.translate(sources, self.max_length)] hypotheses.extend(ys) bleu = bleu_score.corpus_bleu( references, hypotheses, smoothing_function=bleu_score.SmoothingFunction().method1) * 100 print('BLEU:', bleu) reporter.report({self.key: bleu})
Example #9
Source File: seq2seq.py From chainer with MIT License | 6 votes |
def __call__(self, trainer): with chainer.no_backprop_mode(): references = [] hypotheses = [] for i in range(0, len(self.test_data), self.batch): sources, targets = zip(*self.test_data[i:i + self.batch]) references.extend([[t.tolist()] for t in targets]) sources = [ chainer.dataset.to_device(self.device, x) for x in sources] ys = [y.tolist() for y in self.model.translate(sources, self.max_length)] hypotheses.extend(ys) bleu = bleu_score.corpus_bleu( references, hypotheses, smoothing_function=bleu_score.SmoothingFunction().method1) reporter.report({self.key: bleu})
Example #10
Source File: test_bleu.py From cotk with Apache License 2.0 | 6 votes |
def get_bleu(self, dataloader, input, reference_key, gen_key): refs = [] gens = [] for gen_sen, resp_sen in zip(input[gen_key], input[reference_key]): gen_sen_processed = dataloader.trim_in_ids(gen_sen) resp_sen_processed = dataloader.trim_in_ids(resp_sen[1:]) refs.append(resp_sen_processed) gens.append(gen_sen_processed) gens = replace_unk(gens) bleu_irl_bw, bleu_irl_fw = [], [] for i in range(len(gens)): bleu_irl_fw.append(sentence_bleu(refs, gens[i], smoothing_function=SmoothingFunction().method1)) for i in range(len(refs)): bleu_irl_bw.append(sentence_bleu(gens, refs[i], smoothing_function=SmoothingFunction().method1)) fw_bleu = (1.0 * sum(bleu_irl_fw) / len(bleu_irl_fw)) bw_bleu = (1.0 * sum(bleu_irl_bw) / len(bleu_irl_bw)) return 2.0 * bw_bleu * fw_bleu / (fw_bleu + bw_bleu)
Example #11
Source File: bleu_metrics.py From dialog-eval with MIT License | 6 votes |
def __init__(self, smoothing): ''' Params: :smoothing: Smoothing method for bleu. ''' self.metrics = {'bleu-1': [], 'bleu-2': [], 'bleu-3': [], 'bleu-4': []} self.smoothing = [bleu_score.SmoothingFunction().method0, bleu_score.SmoothingFunction().method1, bleu_score.SmoothingFunction().method2, bleu_score.SmoothingFunction().method3, bleu_score.SmoothingFunction().method4, bleu_score.SmoothingFunction().method5, bleu_score.SmoothingFunction().method6, bleu_score.SmoothingFunction().method7] self.smoothing = self.smoothing[smoothing] # Calculate metrics for one example.
Example #12
Source File: bleu.py From dialogbot with Apache License 2.0 | 6 votes |
def bleu(answer_file, standard_answer_file): rf_answer = open(answer_file, 'r', "utf-8") rf_standard_answer = open(standard_answer_file, 'r', "utf-8") answer_lines = rf_answer.readlines() standard_answer_lines = rf_standard_answer.readlines() # compute score scores = [] for i in range(len(answer_lines)): candidate = list(answer_lines[i].strip()) each_score = 0 for j in range(10): references = [] standard_answer_line = standard_answer_lines[i * 11 + j].strip().split('\t') references.append(list(standard_answer_line[0].strip())) standard_score = standard_answer_line[1] bleu_score = sentence_bleu(references, candidate, weights=(0.35, 0.45, 0.1, 0.1), smoothing_function=SmoothingFunction().method1) each_score = bleu_score * float(standard_score) + each_score scores.append(each_score / 10) rf_answer.close() rf_standard_answer.close() score_final = sum(scores) / float(len(answer_lines)) precision_score = round(score_final, 6) return precision_score
Example #13
Source File: metric.py From MultiTurnDialogZoo with MIT License | 6 votes |
def cal_BLEU_nltk(refer, candidate, ngram=1): ''' SmoothingFunction refer to https://github.com/PaddlePaddle/models/blob/a72760dff8574fe2cb8b803e01b44624db3f3eff/PaddleNLP/Research/IJCAI2019-MMPMS/mmpms/utils/metrics.py ''' smoothie = SmoothingFunction().method7 if ngram == 1: weight = (1, 0, 0, 0) elif ngram == 2: weight = (0.5, 0.5, 0, 0) elif ngram == 3: weight = (0.33, 0.33, 0.33, 0) elif ngram == 4: weight = (0.25, 0.25, 0.25, 0.25) return sentence_bleu(refer, candidate, weights=weight, smoothing_function=smoothie) # BLEU of nlg-eval
Example #14
Source File: evaluators.py From ConvLab with MIT License | 6 votes |
def get_report(self): tokenize = lambda x: x.split() print('Generate report for {} samples'.format(len(self.hyps))) refs, hyps = [], [] tp, fp, fn = 0, 0, 0 for label, hyp in zip(self.labels, self.hyps): ref_tokens = [BOS] + tokenize(label.replace(SYS, '').replace(USR, '').strip()) + [EOS] hyp_tokens = [BOS] + tokenize(hyp.replace(SYS, '').replace(USR, '').strip()) + [EOS] refs.append([ref_tokens]) hyps.append(hyp_tokens) ref_entities = self._parse_entities(ref_tokens) hyp_entities = self._parse_entities(hyp_tokens) tpp, fpp, fnn = self._get_tp_fp_fn(ref_entities, hyp_entities) tp += tpp fp += fpp fn += fnn # bleu = corpus_bleu(refs, hyps, smoothing_function=SmoothingFunction().method1) bleu = BLEUScorer().score(hyps, refs) prec, rec, f1 = self._get_prec_recall(tp, fp, fn) report = "\nBLEU score {}\nEntity precision {:.4f} recall {:.4f} and f1 {:.4f}\n".format(bleu, prec, rec, f1) return report, bleu, prec, rec, f1
Example #15
Source File: evaluators.py From ConvLab with MIT License | 6 votes |
def get_report(self): tokenize = get_tokenize() print('Generate report for {} samples'.format(len(self.hyps))) refs, hyps = [], [] for label, hyp in zip(self.labels, self.hyps): # label = label.replace(EOS, '') # hyp = hyp.replace(EOS, '') # ref_tokens = tokenize(label)[1:] # hyp_tokens = tokenize(hyp)[1:] ref_tokens = tokenize(label) hyp_tokens = tokenize(hyp) refs.append([ref_tokens]) hyps.append(hyp_tokens) bleu = corpus_bleu(refs, hyps, smoothing_function=SmoothingFunction().method1) report = '\n===== BLEU = %f =====\n' % (bleu,) return '\n===== REPORT FOR DATASET {} ====={}'.format(self.data_name, report)
Example #16
Source File: evaluators.py From NeuralDialog-LaRL with Apache License 2.0 | 6 votes |
def get_report(self): tokenize = get_tokenize() print('Generate report for {} samples'.format(len(self.hyps))) refs, hyps = [], [] for label, hyp in zip(self.labels, self.hyps): # label = label.replace(EOS, '') # hyp = hyp.replace(EOS, '') # ref_tokens = tokenize(label)[1:] # hyp_tokens = tokenize(hyp)[1:] ref_tokens = tokenize(label) hyp_tokens = tokenize(hyp) refs.append([ref_tokens]) hyps.append(hyp_tokens) bleu = corpus_bleu(refs, hyps, smoothing_function=SmoothingFunction().method1) report = '\n===== BLEU = %f =====\n' % (bleu,) return '\n===== REPORT FOR DATASET {} ====={}'.format(self.data_name, report)
Example #17
Source File: evaluators.py From NeuralDialog-LaRL with Apache License 2.0 | 6 votes |
def get_report(self): tokenize = lambda x: x.split() print('Generate report for {} samples'.format(len(self.hyps))) refs, hyps = [], [] tp, fp, fn = 0, 0, 0 for label, hyp in zip(self.labels, self.hyps): ref_tokens = [BOS] + tokenize(label.replace(SYS, '').replace(USR, '').strip()) + [EOS] hyp_tokens = [BOS] + tokenize(hyp.replace(SYS, '').replace(USR, '').strip()) + [EOS] refs.append([ref_tokens]) hyps.append(hyp_tokens) ref_entities = self._parse_entities(ref_tokens) hyp_entities = self._parse_entities(hyp_tokens) tpp, fpp, fnn = self._get_tp_fp_fn(ref_entities, hyp_entities) tp += tpp fp += fpp fn += fnn # bleu = corpus_bleu(refs, hyps, smoothing_function=SmoothingFunction().method1) bleu = BLEUScorer().score(hyps, refs) prec, rec, f1 = self._get_prec_recall(tp, fp, fn) report = "\nBLEU score {}\nEntity precision {:.4f} recall {:.4f} and f1 {:.4f}\n".format(bleu, prec, rec, f1) return report, bleu, prec, rec, f1
Example #18
Source File: metrics.py From deepAPI with MIT License | 6 votes |
def sim_bleu(self, hyps, ref): """ :param ref - a list of tokens of the reference :param hyps - a list of tokens of the hypothesis :return maxbleu - recall bleu :return avgbleu - precision bleu """ scores = [] for hyp in hyps: try: # scores.append(sentence_bleu([ref], hyp, smoothing_function=SmoothingFunction().method7, # weights=[1./4, 1./4, 1./4, 1./4])) scores.append(smoothed_bleu(list(bleu_stats(hyp, ref)))) except: scores.append(0.0) return np.max(scores), np.mean(scores)
Example #19
Source File: test_bleu.py From cotk with Apache License 2.0 | 5 votes |
def get_bleu(self, dataloader, input, reference_key, gen_key): refs = [] gens = [] for gen_sen, resp_sen in zip(input[gen_key], input[reference_key]): gen_sen_processed = dataloader.trim_in_ids(gen_sen) resp_sen_processed = dataloader.trim_in_ids(resp_sen[1:]) refs.append([resp_sen_processed]) gens.append(gen_sen_processed) gens = replace_unk(gens) return corpus_bleu(refs, gens, smoothing_function=SmoothingFunction().method3)
Example #20
Source File: test_bleu.py From cotk with Apache License 2.0 | 5 votes |
def get_bleu(self, dataloader, input, reference_key, gen_key): refs = [] gens = [] for i in range(len(input[reference_key])): for resp_sen, gen_sen in zip(input[reference_key][i], input[gen_key][i]): gen_sen_processed = dataloader.trim_in_ids(gen_sen) resp_sen_processed = dataloader.trim_in_ids(resp_sen) gens.append(gen_sen_processed) refs.append([resp_sen_processed[1:]]) gens = replace_unk(gens) return corpus_bleu(refs, gens, smoothing_function=SmoothingFunction().method3)
Example #21
Source File: bleu.py From cotk with Apache License 2.0 | 5 votes |
def close(self) -> Dict[str, Any]: '''Return a dict which contains * **bleu**: bleu value. * **bleu hashvalue**: hash value for bleu metric, same hash value stands for same evaluation settings. ''' result = super().close() if (not self.hyps) or (not self.refs): raise RuntimeError("The metric has not been forwarded data correctly.") if self.tokenizer: self._do_tokenize() if "unk" in self.dataloader.get_special_tokens_mapping(): self.hyps = replace_unk(self.hyps, self.dataloader.get_special_tokens_mapping()["unk"]) try: weights = np.ones(self.ngram) / self.ngram result.update({"bleu": \ corpus_bleu(self.refs, self.hyps, weights=weights, smoothing_function=SmoothingFunction().method3), \ "bleu hashvalue": self._hashvalue()}) except ZeroDivisionError as _: if not self.ignore_smoothing_error: raise ZeroDivisionError("Bleu smoothing divided by zero. This is a known bug of corpus_bleu, \ usually caused when there is only one sample and the sample length is 1.") from None result.update({"bleu": \ 0, \ "bleu hashvalue": self._hashvalue()}) return result
Example #22
Source File: bleu.py From cotk with Apache License 2.0 | 5 votes |
def close(self) -> Dict[str, Any]: '''Return a dict which contains * **bleu**: bleu value. * **bleu hashvalue**: hash value for bleu metric, same hash value stands for same evaluation settings. ''' result = super().close() if (not self.hyps) or (not self.refs): raise RuntimeError("The metric has not been forwarded data correctly.") self.hyps = replace_unk(self.hyps, self.dataloader.unk_id) self._hash_unordered_list(self.refs) try: result.update({"bleu": \ corpus_bleu(self.refs, self.hyps, smoothing_function=SmoothingFunction().method3), \ "bleu hashvalue": self._hashvalue()}) except ZeroDivisionError as _: if not self.ignore_smoothing_error: raise ZeroDivisionError("Bleu smoothing divided by zero. This is a known bug of corpus_bleu, \ usually caused when there is only one sample and the sample length is 1.") result.update({"bleu": \ 0, \ "bleu hashvalue": self._hashvalue()}) return result
Example #23
Source File: test_bleu.py From cotk with Apache License 2.0 | 5 votes |
def get_self_bleu(self, dataloader, input, gen_key): gens = [] for gen_sen in input[gen_key]: gen_sen_processed = dataloader.trim_in_ids(gen_sen) gens.append(gen_sen_processed) refs = copy.deepcopy(gens) _refs = replace_unk(refs) bleu_irl = [] for i in range(len(gens)): bleu_irl.append(sentence_bleu( refs[:i] + refs[i + 1:], _refs[i], smoothing_function=SmoothingFunction().method1)) return 1.0 * sum(bleu_irl) / len(bleu_irl)
Example #24
Source File: evaluate.py From tatk with Apache License 2.0 | 5 votes |
def get_bleu4(dialog_acts, golden_utts, gen_utts): das2utts = {} for das, utt, gen in zip(dialog_acts, golden_utts, gen_utts): utt = utt.lower() gen = gen.lower() for da, svs in das.items(): domain, act = da.split('-') if act == 'Request' or domain == 'general': continue else: for s, v in sorted(svs, key=lambda x: x[0]): if s == 'Internet' or s == 'Parking' or s == 'none' or v == 'none': continue else: v = v.lower() if (' ' + v in utt) or (v + ' ' in utt): utt = utt.replace(v, '{}-{}'.format(da, s), 1) if (' ' + v in gen) or (v + ' ' in gen): gen = gen.replace(v, '{}-{}'.format(da, s), 1) hash_key = '' for da in sorted(das.keys()): for s, v in sorted(das[da], key=lambda x: x[0]): hash_key += da + '-' + s + ';' das2utts.setdefault(hash_key, {'refs': [], 'gens': []}) das2utts[hash_key]['refs'].append(utt) das2utts[hash_key]['gens'].append(gen) # pprint(das2utts) refs, gens = [], [] for das in das2utts.keys(): for gen in das2utts[das]['gens']: refs.append([s.split() for s in das2utts[das]['refs']]) gens.append(gen.split()) bleu = corpus_bleu(refs, gens, weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=SmoothingFunction().method1) return bleu
Example #25
Source File: evaluators.py From NeuralDialog-ZSDG with Apache License 2.0 | 5 votes |
def get_report(self, include_error=False): reports = [] tokenize = get_tokenize() for domain, labels in self.domain_labels.items(): predictions = self.domain_hyps[domain] self.logger.info("Generate report for {} for {} samples".format(domain, len(predictions))) refs, hyps = [], [] # find entity precision, recall and f1 tp, fp, fn = 0.0, 0.0, 0.0 for label, hyp in zip(labels, predictions): label = label.replace(EOS, '').replace(BOS, '') hyp = hyp.replace(EOS, '').replace(BOS, '') ref_tokens = tokenize(label)[2:] hyp_tokens = tokenize(hyp)[2:] refs.append([ref_tokens]) hyps.append(hyp_tokens) label_ents = self.pred_ents(label, tokenize, None) hyp_ents = self.pred_ents(hyp, tokenize, None) # hyp_ents = list(set(hyp_ents)) ttpp, ffpp, ffnn = self._get_tp_fp_fn(label_ents, hyp_ents) tp += ttpp fp += ffpp fn += ffnn ent_precision, ent_recall, ent_f1 = self._get_prec_recall(tp, fp, fn) # compute corpus level scores bleu = bleu_score.corpus_bleu(refs, hyps, smoothing_function=SmoothingFunction().method1) report = "\nDomain: %s BLEU %f\n Entity precision %f recall %f and f1 %f\n" \ % (domain, bleu, ent_precision, ent_recall, ent_f1) reports.append(report) return "\n==== REPORT===={report}".format(report="========".join(reports))
Example #26
Source File: metrics.py From quick-nlp with MIT License | 5 votes |
def bleu_score(preds, targs, stoi=None): sf = SmoothingFunction().method1 preds = torch.max(preds, dim=-1)[1][:-1] bleus = np.zeros(targs.size(1)) for res in zip(to_np(targs, preds)): if len(res[1]) > 2: bleu = sentence_bleu([res[1]], res[2], smoothing_function=sf, weights=(1 / 3., 1 / 3., 1 / 3.)) elif len(res[1]) == 2: bleu = sentence_bleu([res[1]], res[2], smoothing_function=sf, weights=(0.5, 0.5)) else: bleu = sentence_bleu([res[1]], res[2], smoothing_function=sf, weights=(1.0,)) bleus.append(bleu) return
Example #27
Source File: utils.py From quick-nlp with MIT License | 5 votes |
def print_dialogue_batch(learner: Learner, modeldata: ModelData, input_field, output_field, num_batches=1, num_sentences=-1, is_test=False, num_beams=1, smoothing_function=None, weights=None): weights = (1 / 3., 1 / 3., 1 / 3.) if weights is None else weights smoothing_function = SmoothingFunction().method1 if smoothing_function is None else smoothing_function predictions, targets, inputs = learner.predict_with_targs_and_inputs(is_test=is_test, num_beams=num_beams) blue_scores = [] for batch_num, (input, target, prediction) in enumerate(zip(inputs, targets, predictions)): input = np.transpose(input, [1, 2, 0]) # transpose number of utterances to beams [sl, bs, nb] inputs_str: BatchBeamTokens = modeldata.itos(input, input_field) inputs_str: List[str] = ["\n".join(conv) for conv in inputs_str] predictions_str: BatchBeamTokens = modeldata.itos(prediction, output_field) targets_str: BatchBeamTokens = modeldata.itos(target, output_field) for index, (inp, targ, pred) in enumerate(zip(inputs_str, targets_str, predictions_str)): if targ[0].split() == pred[0].split()[1:]: blue_score = 1 else: blue_score = sentence_bleu([targ[0].split()], pred[0].split()[1:], smoothing_function=smoothing_function, weights=weights ) print( f'BATCH: {batch_num} SAMPLE : {index}\nINPUT:\n{"".join(inp)}\nTARGET:\n{ "".join(targ)}\nPREDICTON:\n{"".join(pred)}\nblue: {blue_score}\n\n') blue_scores.append(blue_score) if 0 < num_sentences <= index - 1: break if 0 < num_batches <= batch_num - 1: break print(f'bleu score: mean: {np.mean(blue_scores)}, std: {np.std(blue_scores)}')
Example #28
Source File: bleu.py From DeepPavlov with Apache License 2.0 | 5 votes |
def bleu_advanced(y_true: List[Any], y_predicted: List[Any], weights: Tuple = (1,), smoothing_function=SMOOTH.method1, auto_reweigh=False, penalty=True) -> float: """Calculate BLEU score Parameters: y_true: list of reference tokens y_predicted: list of query tokens weights: n-gram weights smoothing_function: SmoothingFunction auto_reweigh: Option to re-normalize the weights uniformly penalty: either enable brevity penalty or not Return: BLEU score """ bleu_measure = sentence_bleu([y_true], y_predicted, weights, smoothing_function, auto_reweigh) hyp_len = len(y_predicted) hyp_lengths = hyp_len ref_lengths = closest_ref_length([y_true], hyp_len) bpenalty = brevity_penalty(ref_lengths, hyp_lengths) if penalty is True or bpenalty == 0: return bleu_measure return bleu_measure / bpenalty
Example #29
Source File: test_bleu.py From V1EngineeringInc-Docs with Creative Commons Attribution Share Alike 4.0 International | 5 votes |
def test_corpus_bleu(self): ref_file = find('models/wmt15_eval/ref.ru') hyp_file = find('models/wmt15_eval/google.ru') mteval_output_file = find('models/wmt15_eval/mteval-13a.output') # Reads the BLEU scores from the `mteval-13a.output` file. # The order of the list corresponds to the order of the ngrams. with open(mteval_output_file, 'r') as mteval_fin: # The numbers are located in the last 2nd line of the file. # The first and 2nd item in the list are the score and system names. mteval_bleu_scores = map(float, mteval_fin.readlines()[-2].split()[1:-1]) with io.open(ref_file, 'r', encoding='utf8') as ref_fin: with io.open(hyp_file, 'r', encoding='utf8') as hyp_fin: # Whitespace tokenize the file. # Note: split() automatically strip(). hypothesis = list(map(lambda x: x.split(), hyp_fin)) # Note that the corpus_bleu input is list of list of references. references = list(map(lambda x: [x.split()], ref_fin)) # Without smoothing. for i, mteval_bleu in zip(range(1, 10), mteval_bleu_scores): nltk_bleu = corpus_bleu( references, hypothesis, weights=(1.0 / i,) * i ) # Check that the BLEU scores difference is less than 0.005 . # Note: This is an approximate comparison; as much as # +/- 0.01 BLEU might be "statistically significant", # the actual translation quality might not be. assert abs(mteval_bleu - nltk_bleu) < 0.005 # With the same smoothing method used in mteval-v13a.pl chencherry = SmoothingFunction() for i, mteval_bleu in zip(range(1, 10), mteval_bleu_scores): nltk_bleu = corpus_bleu( references, hypothesis, weights=(1.0 / i,) * i, smoothing_function=chencherry.method3, ) assert abs(mteval_bleu - nltk_bleu) < 0.005
Example #30
Source File: utils.py From Deep-Reinforcement-Learning-Hands-On with MIT License | 5 votes |
def calc_bleu_many(cand_seq, ref_sequences): sf = bleu_score.SmoothingFunction() return bleu_score.sentence_bleu(ref_sequences, cand_seq, smoothing_function=sf.method1, weights=(0.5, 0.5))