Python nltk.metrics.distance.edit_distance() Examples
The following are 12
code examples of nltk.metrics.distance.edit_distance().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
nltk.metrics.distance
, or try the search function
.
Example #1
Source File: test_distance.py From persephone with Apache License 2.0 | 6 votes |
def seq_cases(): """ Cases are of the form (reference, hypothesis, substitution_cost, dist). """ hardcoded_seqs = [("", "", 1, 0), ("ab", "ad", 1, 1), ("abde", "abcde", 1, 1), ([1,3,5], [], 1, 3), ([1,3,5], [3], 1, 2), ] # Here we assume the nltk.metrics.distance implementation is correct. generated_seqs = [] for length in range(25): for _ in range(10): length2 = random.randint(0, int(length*1.5)) s1 = rand_str(length) s2 = rand_str(length2) sub_cost = random.randint(0, 3) dist = distance.edit_distance(s1, s2, substitution_cost=sub_cost) generated_seqs.append((s1, s2, sub_cost, dist)) return hardcoded_seqs + generated_seqs
Example #2
Source File: decoder.py From g-tensorflow-models with Apache License 2.0 | 6 votes |
def wer(self, decode, target): """Computes the Word Error Rate (WER). WER is defined as the edit distance between the two provided sentences after tokenizing to words. Args: decode: string of the decoded output. target: a string for the ground truth label. Returns: A float number for the WER of the current decode-target pair. """ # Map each word to a new char. words = set(decode.split() + target.split()) word2char = dict(zip(words, range(len(words)))) new_decode = [chr(word2char[w]) for w in decode.split()] new_target = [chr(word2char[w]) for w in target.split()] return distance.edit_distance(''.join(new_decode), ''.join(new_target))
Example #3
Source File: decoder.py From models with Apache License 2.0 | 6 votes |
def wer(self, decode, target): """Computes the Word Error Rate (WER). WER is defined as the edit distance between the two provided sentences after tokenizing to words. Args: decode: string of the decoded output. target: a string for the ground truth label. Returns: A float number for the WER of the current decode-target pair. """ # Map each word to a new char. words = set(decode.split() + target.split()) word2char = dict(zip(words, range(len(words)))) new_decode = [chr(word2char[w]) for w in decode.split()] new_target = [chr(word2char[w]) for w in target.split()] return distance.edit_distance(''.join(new_decode), ''.join(new_target))
Example #4
Source File: decoder.py From multilabel-image-classification-tensorflow with MIT License | 6 votes |
def wer(self, decode, target): """Computes the Word Error Rate (WER). WER is defined as the edit distance between the two provided sentences after tokenizing to words. Args: decode: string of the decoded output. target: a string for the ground truth label. Returns: A float number for the WER of the current decode-target pair. """ # Map each word to a new char. words = set(decode.split() + target.split()) word2char = dict(zip(words, range(len(words)))) new_decode = [chr(word2char[w]) for w in decode.split()] new_target = [chr(word2char[w]) for w in target.split()] return distance.edit_distance(''.join(new_decode), ''.join(new_target))
Example #5
Source File: methods_training_graph.py From Sentence-Simplification-ACL14 with BSD 3-Clause "New" or "Revised" License | 5 votes |
def process_rel_candidate_for_drop_led(relnode_candidate, filtered_mod_pos, nodeset, simple_sentences, main_sent_dict, boxer_graph, opr_drop_rel): simple_sentence = " ".join(simple_sentences) sentence_before_drop = boxer_graph.extract_main_sentence(nodeset, main_sent_dict, filtered_mod_pos) edit_dist_before_drop = edit_distance(sentence_before_drop.split(), simple_sentence.split()) temp_nodeset, temp_filtered_mod_pos = boxer_graph.drop_relation(nodeset, relnode_candidate, filtered_mod_pos) sentence_after_drop = boxer_graph.extract_main_sentence(temp_nodeset, main_sent_dict, temp_filtered_mod_pos) edit_dist_after_drop = edit_distance(sentence_after_drop.split(), simple_sentence.split()) isDrop = compare_edit_distance(opr_drop_rel, edit_dist_after_drop, edit_dist_before_drop) return isDrop # functions : Drop-MOD Candidate
Example #6
Source File: methods_training_graph.py From Sentence-Simplification-ACL14 with BSD 3-Clause "New" or "Revised" License | 5 votes |
def process_mod_candidate_for_drop_led(modcand_to_process, filtered_mod_pos, nodeset, simple_sentences, main_sent_dict, boxer_graph, opr_drop_mod): simple_sentence = " ".join(simple_sentences) sentence_before_drop = boxer_graph.extract_main_sentence(nodeset, main_sent_dict, filtered_mod_pos) edit_dist_before_drop = edit_distance(sentence_before_drop.split(), simple_sentence.split()) modcand_position_to_process = modcand_to_process[0] temp_filtered_mod_pos = filtered_mod_pos[:]+[modcand_position_to_process] sentence_after_drop = boxer_graph.extract_main_sentence(nodeset, main_sent_dict, temp_filtered_mod_pos) edit_dist_after_drop = edit_distance(sentence_after_drop.split(), simple_sentence.split()) isDrop = compare_edit_distance(opr_drop_mod, edit_dist_after_drop, edit_dist_before_drop) return isDrop # functions : Drop-OOD Candidate
Example #7
Source File: methods_training_graph.py From Sentence-Simplification-ACL14 with BSD 3-Clause "New" or "Revised" License | 5 votes |
def process_ood_candidate_for_drop_led(oodnode_candidate, filtered_mod_pos, nodeset, simple_sentences, main_sent_dict, boxer_graph, opr_drop_ood): simple_sentence = " ".join(simple_sentences) sentence_before_drop = boxer_graph.extract_main_sentence(nodeset, main_sent_dict, filtered_mod_pos) edit_dist_before_drop = edit_distance(sentence_before_drop.split(), simple_sentence.split()) temp_nodeset = nodeset[:] temp_nodeset.remove(oodnode_candidate) sentence_after_drop = boxer_graph.extract_main_sentence(temp_nodeset, main_sent_dict, filtered_mod_pos) edit_dist_after_drop = edit_distance(sentence_after_drop.split(), simple_sentence.split()) isDrop = compare_edit_distance(opr_drop_ood, edit_dist_after_drop, edit_dist_before_drop) return isDrop
Example #8
Source File: utils.py From persephone with Apache License 2.0 | 5 votes |
def batch_per(hyps: Sequence[Sequence[T]], refs: Sequence[Sequence[T]]) -> float: """ Calculates the phoneme error rate of a batch.""" macro_per = 0.0 for i in range(len(hyps)): ref = [phn_i for phn_i in refs[i] if phn_i != 0] hyp = [phn_i for phn_i in hyps[i] if phn_i != 0] macro_per += distance.edit_distance(ref, hyp)/len(ref) return macro_per/len(hyps)
Example #9
Source File: decoder.py From g-tensorflow-models with Apache License 2.0 | 5 votes |
def cer(self, decode, target): """Computes the Character Error Rate (CER). CER is defined as the edit distance between the two given strings. Args: decode: a string of the decoded output. target: a string for the ground truth label. Returns: A float number denoting the CER for the current sentence pair. """ return distance.edit_distance(decode, target)
Example #10
Source File: decoder.py From models with Apache License 2.0 | 5 votes |
def cer(self, decode, target): """Computes the Character Error Rate (CER). CER is defined as the edit distance between the two given strings. Args: decode: a string of the decoded output. target: a string for the ground truth label. Returns: A float number denoting the CER for the current sentence pair. """ return distance.edit_distance(decode, target)
Example #11
Source File: matcher.py From text-matcher with GNU General Public License v3.0 | 5 votes |
def edit_ratio(self, wordA, wordB): """ Computes the number of edits required to transform one (stemmed already, probably) word into another word, and adjusts for the average number of letters in each. Examples: color, colour: 0.1818181818 theater, theatre: 0.2857 day, today: 0.5 foobar, foo56bar: 0.2857 """ distance = editDistance(wordA, wordB) averageLength = (len(wordA) + len(wordB))/2 return distance/averageLength
Example #12
Source File: decoder.py From multilabel-image-classification-tensorflow with MIT License | 5 votes |
def cer(self, decode, target): """Computes the Character Error Rate (CER). CER is defined as the edit distance between the two given strings. Args: decode: a string of the decoded output. target: a string for the ground truth label. Returns: A float number denoting the CER for the current sentence pair. """ return distance.edit_distance(decode, target)