Python Levenshtein.editops() Examples
The following are 7
code examples of Levenshtein.editops().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
Levenshtein
, or try the search function
.
Example #1
Source File: measures.py From jiwer with Apache License 2.0 | 6 votes |
def _get_operation_counts( source_string: str, destination_string: str ) -> Tuple[int, int, int, int]: """ Check how many edit operations (delete, insert, replace) are required to transform the source string into the destination string. The number of hits can be given by subtracting the number of deletes and substitutions from the total length of the source string. :param source_string: the source string to transform into the destination string :param destination_string: the destination to transform the source string into :return: a tuple of #hits, #substitutions, #deletions, #insertions """ editops = Levenshtein.editops(source_string, destination_string) substitutions = sum(1 if op[0] == "replace" else 0 for op in editops) deletions = sum(1 if op[0] == "delete" else 0 for op in editops) insertions = sum(1 if op[0] == "insert" else 0 for op in editops) hits = len(source_string) - (substitutions + deletions) return hits, substitutions, deletions, insertions
Example #2
Source File: error_analysis.py From stanza-old with Apache License 2.0 | 6 votes |
def print_error_analysis(): options = config.options(read=True) output = get_output(options.run_dir, 'eval') errors = [(inst['input'], pred, inst['output']) for inst, pred in zip(output.data, output.predictions) if inst['output'] != pred] if 0 < options.max_examples < len(errors): indices = np.random.choice(np.arange(len(errors)), size=options.max_examples, replace=False) else: indices = range(len(errors)) if options.html: print('<!DOCTYPE html>') print('<html><head><title>Error analysis</title><meta charset="utf-8" /></head><body>') for i in indices: inp, pred, gold = [unicode(s).strip() for s in errors[i]] editops = lev.editops(gold, pred) print_visualization(inp, pred, gold, editops, html=options.html) if options.html: print('</body></html>')
Example #3
Source File: error_analysis.py From stanza-old with Apache License 2.0 | 6 votes |
def print_visualization(input_seq, pred_output_seq, gold_output_seq, editops, html=False): gold_highlights = [] pred_highlights = [] for optype, gold_idx, pred_idx in editops: gold_highlights.append(gold_idx) pred_highlights.append(pred_idx) input_seq = highlight(input_seq, pred_highlights, 'cyan', html=html) pred_output_seq = highlight(pred_output_seq, pred_highlights, 'red', html=html) gold_output_seq = highlight(gold_output_seq, gold_highlights, 'yellow', html=html) if html: print('<p>') br = u' <br />' else: br = u'' uprint(input_seq + br) uprint(pred_output_seq + br) uprint(gold_output_seq) if html: print('</p>') print('')
Example #4
Source File: transcription_error.py From patter with MIT License | 5 votes |
def _get_word_errors(s1, s2): b = set(s1.split() + s2.split()) word2char = dict(zip(b, range(len(b)))) w1 = [chr(word2char[w]) for w in s1.split()] w2 = [chr(word2char[w]) for w in s2.split()] ops = Lev.editops(''.join(w1), ''.join(w2)) errors = {"delete": 0, "insert": 0, "replace": 0} for x in ops: errors[x[0]] += 1 return errors
Example #5
Source File: transcription_error.py From patter with MIT License | 5 votes |
def _get_char_errors(s1, s2): s1, s2, = s1.replace(' ', ''), s2.replace(' ', '') ops = Lev.editops(s1, s2) errors = {"delete": 0, "insert": 0, "replace": 0} for x in ops: errors[x[0]] += 1 return errors
Example #6
Source File: test_align.py From circleseq with GNU Affero General Public License v3.0 | 5 votes |
def main(): # a = 'GCCTGAGTCCGAGCAGAAGAAGAAGGGCTCCCATCACATCAAC' # b = 'GAGTCGAGCAGAAGAAGAANGG' a = 'AATGTGTGTCTGCTGGAAGCTCCTATTCTTCCGCCATTTTCCAGTCCTCCAGAAGTTTCCTGATGGTCCATGTCTGAATTAGACACCCCTCTTCTTTGTTCCAGTTGCACCTGTAATTCTTCAGCATAGTACTTCTTAAACTGTTTTTAA' b= 'TTTNCTGATGGTCCATGTCTGTTACTC' print(l.distance(a, b)) print(l.editops(a, b)) print(l.matching_blocks(l.editops(a,b), a, b))
Example #7
Source File: analyse.py From avsr-tf1 with GNU General Public License v3.0 | 4 votes |
def compute_uer_confusion_matrix(predictions_dict, labels_dict, unit_dict): slim_dict = {key:val for key, val in unit_dict.items() if val not in ['GO', 'EOS', 'MASK', 'END']} vocab_size = len(slim_dict) invdict = {v: k for k, v in slim_dict.items()} conf_matrix = np.zeros(shape=(vocab_size, vocab_size + 2)) # plus deletions, insertions edit_ops_indices = [] edit_ops_at_word_boundaries = [] edit_ops_not_at_word_boundaries = [] for (id, label) in labels_dict.items(): label_str = ''.join(_strip_extra_chars(label)) prediction_str = ''.join(_strip_extra_chars(predictions_dict[id])) edit_ops = Levenshtein.editops(prediction_str, label_str) seen_positions = [] for op in edit_ops: opname = op[0] if len(prediction_str) >= 40: edit_ops_indices.append(op[1] / len(prediction_str)) # store all errors in the source (prediction) string if opname == 'delete': source_unit = prediction_str[op[1]] mat_col = vocab_size seen_positions.append(op[1]) if source_unit == ' ': edit_ops_at_word_boundaries.append(source_unit) else: edit_ops_not_at_word_boundaries.append(source_unit) elif opname == 'insert': source_unit = label_str[op[2]] # the inserted unit does not exist in the source string mat_col = vocab_size + 1 elif opname == 'replace': source_unit = prediction_str[op[1]] dest_unit = label_str[op[2]] mat_col = invdict[dest_unit] - 1 seen_positions.append(op[1]) if source_unit == ' ': edit_ops_at_word_boundaries.append(source_unit) else: edit_ops_not_at_word_boundaries.append(source_unit) else: raise Exception('unknown opname {}'.format(opname)) mat_row = invdict[source_unit] - 1 conf_matrix[mat_row, mat_col] += 1 for idx, symbol in enumerate(prediction_str): if idx not in seen_positions: # correct match mat_pos = invdict[symbol] - 1 conf_matrix[mat_pos, mat_pos] += 1 # plot_confusion_matrix(conf_matrix, invdict) plot_edit_ops_histogram(edit_ops_indices)