Python rouge.Rouge() Examples
The following are 20
code examples of rouge.Rouge().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
rouge
, or try the search function
.
Example #1
Source File: train_and_eval.py From structured-neural-summarization with MIT License | 8 votes |
def compute_rouge(predictions, targets): predictions = [" ".join(prediction).lower() for prediction in predictions] predictions = [prediction if prediction else "EMPTY" for prediction in predictions] targets = [" ".join(target).lower() for target in targets] targets = [target if target else "EMPTY" for target in targets] rouge = Rouge() scores = rouge.get_scores(hyps=predictions, refs=targets, avg=True) return scores['rouge-2']['f']
Example #2
Source File: metrics.py From summarus with Apache License 2.0 | 6 votes |
def calc_metrics(refs, hyps, language, metric="all", meteor_jar=None): metrics = dict() metrics["count"] = len(hyps) metrics["ref_example"] = refs[-1] metrics["hyp_example"] = hyps[-1] many_refs = [[r] if r is not list else r for r in refs] if metric in ("bleu", "all"): metrics["bleu"] = corpus_bleu(many_refs, hyps) if metric in ("rouge", "all"): rouge = Rouge() scores = rouge.get_scores(hyps, refs, avg=True) metrics.update(scores) if metric in ("meteor", "all") and meteor_jar is not None and os.path.exists(meteor_jar): meteor = Meteor(meteor_jar, language=language) metrics["meteor"] = meteor.compute_score(hyps, many_refs) if metric in ("duplicate_ngrams", "all"): metrics["duplicate_ngrams"] = dict() metrics["duplicate_ngrams"].update(calc_duplicate_n_grams_rate(hyps)) return metrics
Example #3
Source File: interpolation_helper.py From texar with Apache License 2.0 | 6 votes |
def calc_reward(refs, hypo, unk_id, metric): """ calculate the reward given hypo and refs and will return bleu score if metric is 'bleu' or return sum of (Rouge-1, Rouge-2, Rouge-L) if metric is 'rouge' """ if len(hypo) == 0 or len(refs[0]) == 0: return 0. for i in range(len(hypo)): assert isinstance(hypo[i], int) if hypo[i] == unk_id: hypo[i] = -1 if metric == 'bleu': return 0.01 * sentence_bleu( references=refs, hypothesis=hypo, smooth=True) else: ref_str = ' '.join([str(word) for word in refs[0]]) hypo_str = ' '.join([str(word) for word in hypo]) rouge_scores = \ rouge.get_scores(hyps=[hypo_str], refs=[ref_str], avg=True) return sum([value['f'] for key, value in rouge_scores.items()])
Example #4
Source File: metrics.py From neural_chat with MIT License | 6 votes |
def _rouge(guess, answers): global rouge """Compute ROUGE score between guess and *any* answers. Return the best.""" if rouge is None: return None, None, None evaluator = rouge.Rouge(metrics=['rouge-n', 'rouge-l'], max_n=2) try: scores = [ evaluator.get_scores(normalize_answer(guess), normalize_answer(a)) for a in answers ] except LookupError: warn_once( 'ROUGE requires nltk punkt tokenizer. Please run ' '`python -c "import nltk; nltk.download(\'punkt\')`' ) rouge = None return None, None, None scores_rouge1 = [score['rouge-1']['r'] for score in scores] scores_rouge2 = [score['rouge-2']['r'] for score in scores] scores_rougeL = [score['rouge-l']['r'] for score in scores] return max(scores_rouge1), max(scores_rouge2), max(scores_rougeL)
Example #5
Source File: rouge.py From neuralmonkey with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__( self, rouge_type: str, name: str = "ROUGE") -> None: check_argument_types() super().__init__(name) if rouge_type.lower() not in ["1", "2", "l"]: raise ValueError( ("Invalid type of rouge metric '{}', " "must be '1', '2' or 'L'").format(rouge_type)) self.rouge_type = rouge_type.lower() self.rouge = rouge.Rouge()
Example #6
Source File: build_oracle.py From summarus with Apache License 2.0 | 5 votes |
def build_oracle_records(records, nrows=None, lower=True): references = [] predictions = [] rouge = Rouge() new_records = [] for i, record in enumerate(records): if nrows is not None and i >= nrows: break summary = record["summary"] summary = summary if not lower else summary.lower() references.append(summary) text = record["text"] calc_score = lambda x, y: calc_single_score(x, y, rouge) predicted_summary, sentences, oracle_indices = build_oracle_summary_greedy(text, summary, calc_score=calc_score) predictions.append(predicted_summary) oracle_indices = [1 if i in oracle_indices else 0 for i in range(len(sentences))] new_record = copy.copy(record) new_record["sentences"] = sentences new_record["oracle"] = oracle_indices new_records.append(new_record) print_metrics(references, predictions) return new_records
Example #7
Source File: metric.py From MultiTurnDialogZoo with MIT License | 5 votes |
def cal_ROUGE(refer, candidate): if len(candidate) == 0: candidate = ['<unk>'] elif len(candidate) == 1: candidate.append('<unk>') if len(refer) == 0: refer = ['<unk>'] elif len(refer) == 1: refer.append('<unk>') rouge = Rouge() scores = rouge.get_scores(' '.join(candidate), ' '.join(refer)) return scores[0]['rouge-2']['f']
Example #8
Source File: test_basic.py From rouge with Apache License 2.0 | 5 votes |
def setUp(self): self.hyp_path = './tests/hyp.txt' self.ref_path = './tests/ref.txt' self.data_path = './tests/data.json' with open(self.data_path) as f: self.data = json.load(f) self.rouge = rouge.Rouge() self.files_rouge = rouge.FilesRouge()
Example #9
Source File: rouge.py From neuralmonkey with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__( self, rouge_type: str, name: str = "ROUGE") -> None: check_argument_types() super().__init__(name) if rouge_type.lower() not in ["1", "2", "l"]: raise ValueError( ("Invalid type of rouge metric '{}', " "must be '1', '2' or 'L'").format(rouge_type)) self.rouge_type = rouge_type.lower() self.rouge = rouge.Rouge()
Example #10
Source File: rouge.py From neuralmonkey with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__( self, rouge_type: str, name: str = "ROUGE") -> None: check_argument_types() super().__init__(name) if rouge_type.lower() not in ["1", "2", "l"]: raise ValueError( ("Invalid type of rouge metric '{}', " "must be '1', '2' or 'L'").format(rouge_type)) self.rouge_type = rouge_type.lower() self.rouge = rouge.Rouge()
Example #11
Source File: rouge.py From vizseq with MIT License | 5 votes |
def _get_sent_rouge( hypothesis: List[str], references: List[List[str]], rouge_type: str, extra_args: Optional[Dict[str, str]] = None ) -> List[float]: assert rouge_type in {'rouge-1', 'rouge-2', 'rouge-l'} _rouge_type = 'rouge-l' if rouge_type == 'rouge-l' else 'rouge-n' _max_n = 1 if rouge_type == 'rouge-1' else 2 joint_references = [list(r) for r in zip(*references)] scores = _rouge.Rouge( metrics=[_rouge_type], max_n=_max_n, apply_avg=False ).get_scores(hypothesis, joint_references) return [s[STATS_TYPE][0] for s in scores[rouge_type]]
Example #12
Source File: controller.py From dgm_latent_bow with MIT License | 5 votes |
def __init__(self, config): """Initialization from the configuration""" self.mode = config.controller_mode self.model_name = config.model_name self.model_name_version = config.model_name + "_" + config.model_version self.start_epoch = config.start_epoch self.num_epoch = config.num_epoch self.write_output = config.write_output self.batch_size = config.batch_size self.print_interval = config.train_print_interval self.gpu_id = config.gpu_id self.drop_out = config.drop_out self.dec_start_id = config.dec_start_id self.dec_end_id = config.dec_end_id self.model_path = config.model_path self.output_path = config.output_path self.random_seed = config.random_seed self.bow_pred_method = config.bow_pred_method self.train_log = TrainingLog(config) self.id2word = None self.target_metrics = config.target_metrics self.lm_load_path = config.lm_load_path self.rouge_evaluator = rouge.Rouge(metrics=['rouge-n', 'rouge-l'], max_n=2) self.save_ckpt = config.save_ckpt self.eval_metrics_list = config.eval_metrics_list self.log_metrics = config.log_metrics self.gumbel_samples = config.gumbel_samples self.is_gumbel = config.is_gumbel return
Example #13
Source File: utils.py From fastNLP with Apache License 2.0 | 5 votes |
def rouge_all(hyps, refer): rouge = Rouge() score = rouge.get_scores(hyps, refer)[0] # mean_score = np.mean([score["rouge-1"]["f"], score["rouge-2"]["f"], score["rouge-l"]["f"]]) return score
Example #14
Source File: utils.py From fastNLP with Apache License 2.0 | 5 votes |
def rouge_eval(hyps, refer): rouge = Rouge() # print(hyps) # print(refer) # print(rouge.get_scores(hyps, refer)) try: score = rouge.get_scores(hyps, refer)[0] mean_score = np.mean([score["rouge-1"]["f"], score["rouge-2"]["f"], score["rouge-l"]["f"]]) except: mean_score = 0.0 return mean_score
Example #15
Source File: Metric.py From fastNLP with Apache License 2.0 | 5 votes |
def get_metric(self, reset=True): logger.info("[INFO] Hyps and Refer number is %d, %d", len(self.hyps), len(self.refers)) if len(self.hyps) == 0 or len(self.refers) == 0 : logger.error("During testing, no hyps or refers is selected!") return rouge = Rouge() scores_all = rouge.get_scores(self.hyps, self.refers, avg=True) if reset: self.hyps = [] self.refers = [] logger.info(scores_all) return scores_all
Example #16
Source File: rouge_calculated.py From Pointer-Generator with MIT License | 5 votes |
def rouge(sys, ref): rouge = Rouge() return rouge.get_scores(sys, ref, avg=True)
Example #17
Source File: evaluate.py From Counterfactual-StoryRW with MIT License | 5 votes |
def eval_rouge(instances: List[CFRInstance]): references = [] hypotheses = [] evaluator = rouge.Rouge(metrics=['rouge-n', 'rouge-l', 'rouge-w'], max_n=4, limit_length=True, length_limit=100, length_limit_type='words', apply_avg=True, apply_best=False, alpha=0.5, # Default F1_score weight_factor=1.2, stemming=True) by_instance = [] for instance in instances: _r = [_clean_text(g) for g in instance.gold_cf_endings] _h = _clean_text(instance.predicted_ending) references.append(_r) hypotheses.append(_h) try: by_instance.append(evaluator.get_scores(_h, _r)) except: by_instance.append({}) scores = evaluator.get_scores(hypotheses, references) return {'rouge_all' : scores, 'rouge_by_instance': by_instance }
Example #18
Source File: compute_rouge.py From nlp-recipes with MIT License | 4 votes |
def compute_rouge_python(cand, ref, is_input_files=False, language="en"): """ Computes ROUGE scores using the python package (https://pypi.org/project/py-rouge/). Args: cand (list or str): If `is_input_files` is `False`, `cand` is a list of strings containing predicted summaries. if `is_input_files` is `True`, `cand` is the path to the file containing the predicted summaries. ref (list or str): If `is_input_files` is `False`, `cand` is a list of strings containing reference summaries. if `is_input_files` is `True`, `cand` is the path to the file containing the reference summaries. is_input_files (bool, optional): If True, inputs are file names. Otherwise, inputs are lists of predicted and reference summaries. Defaults to False. language (str, optional): Language of the input text. Supported values are "en" and "hi". Defaults to "en". Returns: dict: Dictionary of ROUGE scores. """ supported_langauges = ["en", "hi"] if language not in supported_langauges: raise Exception( "Language {0} is not supported. Supported languages are: {1}.".format( language, supported_langauges ) ) if is_input_files: candidates = [line.strip() for line in open(cand, encoding="utf-8")] references = [line.strip() for line in open(ref, encoding="utf-8")] else: candidates = cand references = ref print("Number of candidates: {}".format(len(candidates))) print("Number of references: {}".format(len(references))) assert len(candidates) == len(references) if language == "en": evaluator = Rouge( metrics=["rouge-n", "rouge-l"], max_n=2, limit_length=False, apply_avg=True ) else: evaluator = RougeExt( metrics=["rouge-n", "rouge-l"], max_n=2, limit_length=False, apply_avg=True, language=language, ) scores = evaluator.get_scores(candidates, [[it] for it in references]) return scores
Example #19
Source File: metrics.py From ParlAI with MIT License | 4 votes |
def compute_many( guess: str, answers: List[str] ) -> Tuple[ Optional['RougeMetric'], Optional['RougeMetric'], Optional['RougeMetric'] ]: """ Compute ROUGE score between guess and *any* answer. Done with compute_many due to increased efficiency. :return: (rouge-1, rouge-2, rouge-L) """ # possible global initialization global rouge if rouge is None: return None, None, None if RougeMetric._evaluator is None: RougeMetric._evaluator = rouge.Rouge( metrics=['rouge-n', 'rouge-l'], max_n=2 ) try: scores = [ RougeMetric._evaluator.get_scores( normalize_answer(guess), normalize_answer(a) ) for a in answers ] except LookupError: warn_once( 'ROUGE requires nltk punkt tokenizer. Please run ' '`python -c "import nltk; nltk.download(\'punkt\')`' ) return None, None, None scores_rouge1 = max(score['rouge-1']['r'] for score in scores) scores_rouge2 = max(score['rouge-2']['r'] for score in scores) scores_rougeL = max(score['rouge-l']['r'] for score in scores) return ( RougeMetric(scores_rouge1), RougeMetric(scores_rouge2), RougeMetric(scores_rougeL), )
Example #20
Source File: rouge_evaluator.py From structured-neural-summarization with MIT License | 4 votes |
def run(args): references_file = args['REFERENCES_FILE'] predictions_file = args['PREDICTIONS_FILE'] file_type = args['--format'] or 'jsonl' case_sensitive = args.get('--case_sensitive', False) if file_type != 'textfolder': references = extract_sentences(references_file, file_type, case_sensitive) predictions = extract_sentences(predictions_file, file_type, case_sensitive) elif file_type == 'textfolder': references = extract_sentences_from_folder(references_file, case_sensitive) predictions = extract_sentences_from_folder(predictions_file, case_sensitive) assert len(references) == len(predictions), 'References and predictions are not of the same length: reference: %s, predictions: %s' % (len(references), len(predictions)) if not args['--use-rouge155']: from rouge import Rouge rouge = Rouge() scores = rouge.get_scores(hyps=predictions, refs=references, avg=True) print(scores) else: import pyrouge with tempfile.TemporaryDirectory() as data_dir: # First convert to single files ref_dir = os.path.join(data_dir, 'references') os.makedirs(ref_dir) dec_dir = os.path.join(data_dir, 'decoded') os.makedirs(dec_dir) for i, (decoded, reference) in enumerate(zip(predictions, references)): with open(os.path.join(ref_dir, '%06d_reference.txt' % i), 'w') as f: f.write(reference.replace('.', '.\n')) with open(os.path.join(dec_dir, '%06d_decoded.txt' % i), 'w') as f: f.write(decoded.replace('.', '.\n')) r = pyrouge.Rouge155() r.model_filename_pattern = '#ID#_reference.txt' r.system_filename_pattern = '(\d+)_decoded.txt' r.model_dir = ref_dir r.system_dir = dec_dir logging.getLogger('global').setLevel(logging.WARNING) # silence pyrouge logging rouge_results = r.convert_and_evaluate() results_dict = r.output_to_dict(rouge_results) print(results_dict) print() log_str = "" for x in ["1","2","l"]: log_str += "\nROUGE-%s:\n" % x for y in ["f_score", "recall", "precision"]: key = "rouge_%s_%s" % (x,y) key_cb = key + "_cb" key_ce = key + "_ce" val = results_dict[key] val_cb = results_dict[key_cb] val_ce = results_dict[key_ce] log_str += "%s: %.4f with confidence interval (%.4f, %.4f)\n" % (key, val, val_cb, val_ce) print(log_str)