Python editdistance.eval() Examples
The following are 30
code examples of editdistance.eval().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
editdistance
, or try the search function
.
Example #1
Source File: rule.py From YaYaGen with BSD 2-Clause "Simplified" License | 7 votes |
def __is_wrong_permission(self, permission): """ Return True if the permission contains a typo """ if permission not in YaraRule.PERMISSION_SET: if permission in YaraRule.WRONG_PERMISSION_DICT.keys(): return True if permission in YaraRule.CACHE_NNSTD_PERMISSION_DICT: return False for standard_perm in YaraRule.PERMISSION_SET: distance = editdistance.eval(permission, standard_perm) if distance > 0 and distance <= 3: YaraRule.WRONG_PERMISSION_DICT[permission] = standard_perm return True else: YaraRule.CACHE_NNSTD_PERMISSION_DICT.add(permission) return False
Example #2
Source File: image_ocr.py From DeepLearning_Wavelet-LSTM with MIT License | 6 votes |
def show_edit_distance(self, num): num_left = num mean_norm_ed = 0.0 mean_ed = 0.0 while num_left > 0: word_batch = next(self.text_img_gen)[0] num_proc = min(word_batch['the_input'].shape[0], num_left) decoded_res = decode_batch(self.test_func, word_batch['the_input'][0:num_proc]) for j in range(num_proc): edit_dist = editdistance.eval(decoded_res[j], word_batch['source_str'][j]) mean_ed += float(edit_dist) mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j]) num_left -= num_proc mean_norm_ed = mean_norm_ed / num mean_ed = mean_ed / num print('\nOut of %d samples: Mean edit distance: %.3f Mean normalized edit distance: %0.3f' % (num, mean_ed, mean_norm_ed))
Example #3
Source File: e2e_st.py From espnet with Apache License 2.0 | 6 votes |
def encode(self, x): """Encode acoustic features. :param ndarray x: input acoustic feature (T, D) :return: encoder outputs :rtype: torch.Tensor """ self.eval() ilens = [x.shape[0]] # subsample frame x = x[:: self.subsample[0], :] p = next(self.parameters()) h = torch.as_tensor(x, device=p.device, dtype=p.dtype) # make a utt list (1) to use the same interface for encoder hs = h.contiguous().unsqueeze(0) # 1. encoder hs, _, _ = self.enc(hs, ilens) return hs.squeeze(0)
Example #4
Source File: e2e_asr.py From espnet with Apache License 2.0 | 6 votes |
def enhance(self, xs): """Forward only in the frontend stage. :param ndarray xs: input acoustic feature (T, C, F) :return: enhaned feature :rtype: torch.Tensor """ if self.frontend is None: raise RuntimeError("Frontend does't exist") prev = self.training self.eval() ilens = np.fromiter((xx.shape[0] for xx in xs), dtype=np.int64) # subsample frame xs = [xx[:: self.subsample[0], :] for xx in xs] xs = [to_device(self, to_torch_tensor(xx).float()) for xx in xs] xs_pad = pad_list(xs, 0.0) enhanced, hlensm, mask = self.frontend(xs_pad, ilens) if prev: self.train() return enhanced.cpu().numpy(), mask.cpu().numpy(), ilens
Example #5
Source File: evaluation.py From neutralizing-bias with MIT License | 6 votes |
def inference_metrics(model, src, tgt, config): """ decode and evaluate bleu """ inputs, preds, top_k_preds, ground_truths, auxs, raw_srcs = decode_dataset( model, src, tgt, config, k=config['eval']['precision_recall_k']) eval_classifier = models.TextClassifier.from_pickle( config['eval']['classifier_path']) metrics = get_metrics( raw_srcs, preds, ground_truths, top_k_preds=top_k_preds, classifier=eval_classifier) inputs = [' '.join(seq) for seq in inputs] preds = [' '.join(seq) for seq in preds] ground_truths = [' '.join(seq) for seq in ground_truths] auxs = [' '.join(seq) for seq in auxs] return metrics, inputs, preds, ground_truths, auxs
Example #6
Source File: image_ocr.py From pCVR with Apache License 2.0 | 6 votes |
def show_edit_distance(self, num): num_left = num mean_norm_ed = 0.0 mean_ed = 0.0 while num_left > 0: word_batch = next(self.text_img_gen)[0] num_proc = min(word_batch['the_input'].shape[0], num_left) decoded_res = decode_batch(self.test_func, word_batch['the_input'][0:num_proc]) for j in range(0, num_proc): edit_dist = editdistance.eval(decoded_res[j], word_batch['source_str'][j]) mean_ed += float(edit_dist) mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j]) num_left -= num_proc mean_norm_ed = mean_norm_ed / num mean_ed = mean_ed / num print('\nOut of %d samples: Mean edit distance: %.3f Mean normalized edit distance: %0.3f' % (num, mean_ed, mean_norm_ed))
Example #7
Source File: e2e_asr_common.py From espnet with Apache License 2.0 | 6 votes |
def calculate_cer(self, seqs_hat, seqs_true): """Calculate sentence-level CER score. :param list seqs_hat: prediction :param list seqs_true: reference :return: average sentence-level CER score :rtype float """ char_eds, char_ref_lens = [], [] for i, seq_hat_text in enumerate(seqs_hat): seq_true_text = seqs_true[i] hyp_chars = seq_hat_text.replace(" ", "") ref_chars = seq_true_text.replace(" ", "") char_eds.append(editdistance.eval(hyp_chars, ref_chars)) char_ref_lens.append(len(ref_chars)) return float(sum(char_eds)) / sum(char_ref_lens)
Example #8
Source File: e2e_asr_common.py From espnet with Apache License 2.0 | 6 votes |
def calculate_wer(self, seqs_hat, seqs_true): """Calculate sentence-level WER score. :param list seqs_hat: prediction :param list seqs_true: reference :return: average sentence-level WER score :rtype float """ word_eds, word_ref_lens = [], [] for i, seq_hat_text in enumerate(seqs_hat): seq_true_text = seqs_true[i] hyp_words = seq_hat_text.split() ref_words = seq_true_text.split() word_eds.append(editdistance.eval(hyp_words, ref_words)) word_ref_lens.append(len(ref_words)) return float(sum(word_eds)) / sum(word_ref_lens)
Example #9
Source File: vocab.py From ancient-text-restoration with Apache License 2.0 | 6 votes |
def edit_distance_batch(hyp, tar, tar_len, eos_idx): cer = 0. bs = hyp.shape[0] for i in range(bs): # filter hyp for eos hyp_len = np.argmax(hyp[i] == eos_idx, axis=0) if hyp_len.size == 0: hyp_len = hyp[i].size # filter tar for eos eos_pos = np.argmax(tar[i] == eos_idx, axis=0) if eos_pos.size > 0: tar_len[i] = eos_pos cer += editdistance.eval(hyp[i, :hyp_len], tar[i, :tar_len[i]]) / float(tar_len[i]) return np.float32(cer / bs)
Example #10
Source File: distance.py From ABXpy with MIT License | 6 votes |
def edit_distance(x, y): """Levenshtein Distance The "feature" dimension is along the columns and the "time" dimension along the lines of arrays x and y """ # convert arrays to tuple, to evaluate w/ editdistance def totuple(a): try: return tuple(totuple(i) for i in a) except TypeError: return a if x.shape[0] > 0 and y.shape[0] > 0: # x and y are not empty d = editdistance.eval(totuple(x), totuple(y)) elif x.shape[0] == y.shape[0]: # both x and y are empty d = 0 else: # x or y is empty d = np.inf return d
Example #11
Source File: comparison.py From ws-backend-community with GNU General Public License v3.0 | 6 votes |
def compare_strings_by_edit_distance(first=None, second=None): """ Get the edit distance between the two strings passed to this method. :param first: The first string to compare. :param second: The second string to compare. :return: A number representing the edit distance between the two strings passed as arguments to this method. """ return editdistance.eval(first, second) # Class Methods # Public Methods # Protected Methods # Private Methods # Properties # Representation and Comparison
Example #12
Source File: e2e_asr_common.py From espnet with Apache License 2.0 | 6 votes |
def calculate_wer(self, seqs_hat, seqs_true): """Calculate sentence-level WER score for transducer model. Args: seqs_hat (torch.Tensor): prediction (batch, seqlen) seqs_true (torch.Tensor): reference (batch, seqlen) Returns: (float): average sentence-level WER score """ word_eds, word_ref_lens = [], [] for i, seq_hat_text in enumerate(seqs_hat): seq_true_text = seqs_true[i] hyp_words = seq_hat_text.split() ref_words = seq_true_text.split() word_eds.append(editdistance.eval(hyp_words, ref_words)) word_ref_lens.append(len(ref_words)) return float(sum(word_eds)) / sum(word_ref_lens)
Example #13
Source File: image_ocr.py From DeepLearning_Wavelet-LSTM with MIT License | 6 votes |
def show_edit_distance(self, num): num_left = num mean_norm_ed = 0.0 mean_ed = 0.0 while num_left > 0: word_batch = next(self.text_img_gen)[0] num_proc = min(word_batch['the_input'].shape[0], num_left) decoded_res = decode_batch(self.test_func, word_batch['the_input'][0:num_proc]) for j in range(num_proc): edit_dist = editdistance.eval(decoded_res[j], word_batch['source_str'][j]) mean_ed += float(edit_dist) mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j]) num_left -= num_proc mean_norm_ed = mean_norm_ed / num mean_ed = mean_ed / num print('\nOut of %d samples: Mean edit distance: %.3f Mean normalized edit distance: %0.3f' % (num, mean_ed, mean_norm_ed))
Example #14
Source File: text_similarity_transformers.py From driverlessai-recipes with Apache License 2.0 | 6 votes |
def transform(self, X: dt.Frame): import editdistance output = [] X = X.to_pandas() text1_arr = X.iloc[:, 0].values text2_arr = X.iloc[:, 1].values for ind, text1 in enumerate(text1_arr): try: text1 = str(text1).lower().split() text2 = text2_arr[ind] text2 = str(text2).lower().split() edit_distance = editdistance.eval(text1, text2) output.append(edit_distance) except: output.append(-1) return np.array(output)
Example #15
Source File: image_ocr.py From DeepLearning_Wavelet-LSTM with MIT License | 6 votes |
def show_edit_distance(self, num): num_left = num mean_norm_ed = 0.0 mean_ed = 0.0 while num_left > 0: word_batch = next(self.text_img_gen)[0] num_proc = min(word_batch['the_input'].shape[0], num_left) decoded_res = decode_batch(self.test_func, word_batch['the_input'][0:num_proc]) for j in range(num_proc): edit_dist = editdistance.eval(decoded_res[j], word_batch['source_str'][j]) mean_ed += float(edit_dist) mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j]) num_left -= num_proc mean_norm_ed = mean_norm_ed / num mean_ed = mean_ed / num print('\nOut of %d samples: Mean edit distance: %.3f Mean normalized edit distance: %0.3f' % (num, mean_ed, mean_norm_ed))
Example #16
Source File: image_ocr.py From DeepLearning_Wavelet-LSTM with MIT License | 6 votes |
def show_edit_distance(self, num): num_left = num mean_norm_ed = 0.0 mean_ed = 0.0 while num_left > 0: word_batch = next(self.text_img_gen)[0] num_proc = min(word_batch['the_input'].shape[0], num_left) decoded_res = decode_batch(self.test_func, word_batch['the_input'][0:num_proc]) for j in range(num_proc): edit_dist = editdistance.eval(decoded_res[j], word_batch['source_str'][j]) mean_ed += float(edit_dist) mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j]) num_left -= num_proc mean_norm_ed = mean_norm_ed / num mean_ed = mean_ed / num print('\nOut of %d samples: Mean edit distance: %.3f Mean normalized edit distance: %0.3f' % (num, mean_ed, mean_norm_ed))
Example #17
Source File: image_ocr.py From DeepLearning_Wavelet-LSTM with MIT License | 6 votes |
def show_edit_distance(self, num): num_left = num mean_norm_ed = 0.0 mean_ed = 0.0 while num_left > 0: word_batch = next(self.text_img_gen)[0] num_proc = min(word_batch['the_input'].shape[0], num_left) decoded_res = decode_batch(self.test_func, word_batch['the_input'][0:num_proc]) for j in range(num_proc): edit_dist = editdistance.eval(decoded_res[j], word_batch['source_str'][j]) mean_ed += float(edit_dist) mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j]) num_left -= num_proc mean_norm_ed = mean_norm_ed / num mean_ed = mean_ed / num print('\nOut of %d samples: Mean edit distance: %.3f Mean normalized edit distance: %0.3f' % (num, mean_ed, mean_norm_ed))
Example #18
Source File: BKTree.py From CTCDecoder with MIT License | 6 votes |
def _query(self, node, txt, tolerance): # handle empty root node if node is None: return [] # distance between query and current node d = ed.eval(node[0], txt) # add current node to result if within tolerance res = [] if d <= tolerance: res.append(node[0]) # iterate over children for (edge, child) in node[1].items(): if d - tolerance <= edge and edge <= d + tolerance: res += self._query(child, txt, tolerance) return res
Example #19
Source File: image_ocr.py From DeepLearning_Wavelet-LSTM with MIT License | 6 votes |
def show_edit_distance(self, num): num_left = num mean_norm_ed = 0.0 mean_ed = 0.0 while num_left > 0: word_batch = next(self.text_img_gen)[0] num_proc = min(word_batch['the_input'].shape[0], num_left) decoded_res = decode_batch(self.test_func, word_batch['the_input'][0:num_proc]) for j in range(num_proc): edit_dist = editdistance.eval(decoded_res[j], word_batch['source_str'][j]) mean_ed += float(edit_dist) mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j]) num_left -= num_proc mean_norm_ed = mean_norm_ed / num mean_ed = mean_ed / num print('\nOut of %d samples: Mean edit distance: %.3f Mean normalized edit distance: %0.3f' % (num, mean_ed, mean_norm_ed))
Example #20
Source File: test.py From crnn-pytorch with BSD 2-Clause "Simplified" License | 6 votes |
def main(data_path, abc, seq_proj, backend, snapshot, input_size, gpu, visualize): os.environ["CUDA_VISIBLE_DEVICES"] = gpu cuda = True if gpu is not '' else False input_size = [int(x) for x in input_size.split('x')] transform = Compose([ Rotation(), Resize(size=(input_size[0], input_size[1])) ]) if data_path is not None: data = TextDataset(data_path=data_path, mode="test", transform=transform) else: data = TestDataset(transform=transform, abc=abc) seq_proj = [int(x) for x in seq_proj.split('x')] net = load_model(data.get_abc(), seq_proj, backend, snapshot, cuda).eval() acc, avg_ed = test(net, data, data.get_abc(), cuda, visualize) print("Accuracy: {}".format(acc)) print("Edit distance: {}".format(avg_ed))
Example #21
Source File: utils.py From Decoupled-attention-network with MIT License | 6 votes |
def add_iter(self, output, out_length, label_length, labels): start = 0 start_o = 0 self.total_samples += label_length.size()[0] raw_prdts = output.topk(1)[1] prdt_texts, prdt_prob = self.de.decode(output, out_length) for i in range(0, len(prdt_texts)): if not self.case_sensitive: prdt_texts[i] = prdt_texts[i].lower() labels[i] = labels[i].lower() all_words = [] for w in labels[i].split('|') + prdt_texts[i].split('|'): if w not in all_words: all_words.append(w) l_words = [all_words.index(_) for _ in labels[i].split('|')] p_words = [all_words.index(_) for _ in prdt_texts[i].split('|')] self.distance_C += ed.eval(labels[i], prdt_texts[i]) self.distance_W += ed.eval(l_words, p_words) self.total_C += len(labels[i]) self.total_W += len(l_words) self.correct = self.correct + 1 if labels[i] == prdt_texts[i] else self.correct
Example #22
Source File: image_ocr.py From DeepLearning_Wavelet-LSTM with MIT License | 6 votes |
def show_edit_distance(self, num): num_left = num mean_norm_ed = 0.0 mean_ed = 0.0 while num_left > 0: word_batch = next(self.text_img_gen)[0] num_proc = min(word_batch['the_input'].shape[0], num_left) decoded_res = decode_batch(self.test_func, word_batch['the_input'][0:num_proc]) for j in range(num_proc): edit_dist = editdistance.eval(decoded_res[j], word_batch['source_str'][j]) mean_ed += float(edit_dist) mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j]) num_left -= num_proc mean_norm_ed = mean_norm_ed / num mean_ed = mean_ed / num print('\nOut of %d samples: Mean edit distance: %.3f Mean normalized edit distance: %0.3f' % (num, mean_ed, mean_norm_ed))
Example #23
Source File: util.py From AfterQC with MIT License | 6 votes |
def editDistance(s1, s2): # check if editdistance module loaded if EDIT_DISTANCE_MODULE_EXISTS: return editdistance.eval(s1, s2) elif EDIT_DISTANCE_CTYPES_LOADED: return ed_ctypes.edit_distance(s1, len(s1), s2, len(s2)) m=len(s1)+1 n=len(s2)+1 tbl = [([0] * n) for i in xrange(m)] for i in xrange(m):tbl[i][0]=i for j in xrange(n):tbl[0][j]=j for i in xrange(1, m): for j in xrange(1, n): cost = 0 if s1[i-1] == s2[j-1] else 1 tbl[i][j] = min(tbl[i][j-1]+1, tbl[i-1][j]+1, tbl[i-1][j-1]+cost) return tbl[i][j]
Example #24
Source File: node_utils.py From gtos with MIT License | 6 votes |
def get_frames(self, lemma): """ Given a lemma, find the most likely frames for the lemma. If no lemma is found or it should be a senseless node, return a single element list [lemma]. """ if lemma in self.frequent_senseless_nodes or lemma not in self.lemma_frame_map: return [lemma] else: frames = list(self.lemma_frame_map[lemma]) frames.sort( key=lambda frame: ( editdistance.eval(re.sub(r'-\d\d$', '', frame), lemma), -int(frame[-2:]) if re.search(r'-\d\d$', frame) else 0 ), reverse=True ) return frames
Example #25
Source File: MorseDecoder.py From LSTM_morse with MIT License | 5 votes |
def validate(model, loader): "validate NN" print('Validate NN') loader.validationSet() #loader.trainSet() charErrorRate = float('inf') numCharErr = 0 numCharTotal = 0 numWordOK = 0 numWordTotal = 0 wordAccuracy = 0 while loader.hasNext(): iterInfo = loader.getIteratorInfo() print('Batch:', iterInfo[0],'/', iterInfo[1]) batch = loader.getNext() (recognized, probability) = model.inferBatch(batch) print(recognized, probability) print('Ground truth -> Recognized') for i in range(len(recognized)): numWordOK += 1 if batch.gtTexts[i] == recognized[i] else 0 numWordTotal += 1 dist = editdistance.eval(recognized[i], batch.gtTexts[i]) numCharErr += dist numCharTotal += len(batch.gtTexts[i]) print('[OK]' if dist==0 else '[ERR:%d]' % dist,'"' + batch.gtTexts[i] + '"', '->', '"' + recognized[i] + '"') # print validation result try: charErrorRate = numCharErr / numCharTotal wordAccuracy = numWordOK / numWordTotal print('Character error rate: {:4.1f}%. Word accuracy: {:4.1f}%.'.format(charErrorRate*100.0, wordAccuracy*100.0)) print('numCharTotal:{} numWordTotal:{}'.format(numCharTotal,numWordTotal)) except: print('numCharTotal:{} numWordTotal:{}'.format(numCharTotal,numWordTotal)) return charErrorRate, wordAccuracy
Example #26
Source File: evaluation.py From neutralizing-bias with MIT License | 5 votes |
def get_edit_distance(hypotheses, reference): ed = 0 for hyp, ref in zip(hypotheses, reference): ed += editdistance.eval(hyp, ref) return ed * 1.0 / len(hypotheses)
Example #27
Source File: main.py From SimpleHTR with MIT License | 5 votes |
def validate(model, loader): "validate NN" print('Validate NN') loader.validationSet() numCharErr = 0 numCharTotal = 0 numWordOK = 0 numWordTotal = 0 while loader.hasNext(): iterInfo = loader.getIteratorInfo() print('Batch:', iterInfo[0],'/', iterInfo[1]) batch = loader.getNext() (recognized, _) = model.inferBatch(batch) print('Ground truth -> Recognized') for i in range(len(recognized)): numWordOK += 1 if batch.gtTexts[i] == recognized[i] else 0 numWordTotal += 1 dist = editdistance.eval(recognized[i], batch.gtTexts[i]) numCharErr += dist numCharTotal += len(batch.gtTexts[i]) print('[OK]' if dist==0 else '[ERR:%d]' % dist,'"' + batch.gtTexts[i] + '"', '->', '"' + recognized[i] + '"') # print validation result charErrorRate = numCharErr / numCharTotal wordAccuracy = numWordOK / numWordTotal print('Character error rate: %f%%. Word accuracy: %f%%.' % (charErrorRate*100.0, wordAccuracy*100.0)) return charErrorRate
Example #28
Source File: callbacks.py From LipNet with MIT License | 5 votes |
def get_mean_character_error_rate(self, data): mean_individual_length = np.mean([len(pair[1]) for pair in data]) return self.get_mean_tuples(data, mean_individual_length, editdistance.eval)
Example #29
Source File: distance.py From panphon with MIT License | 5 votes |
def fast_levenshtein_distance_div_maxlen(self, source, target): """Levenshtein distance divided by maxlen Args: source (unicode): source word target (unicode): target word Returns: int: minimum number of Levenshtein edits required to get from `source` to `target` divided by the length of the longest of these arguments """ maxlen = max(len(source), len(target)) return int(editdistance.eval(source, target)) / maxlen
Example #30
Source File: distance.py From panphon with MIT License | 5 votes |
def fast_levenshtein_distance(self, source, target): """Wrapper for the distance function in the Levenshtein module Args: source (unicode): source word target (unicode): target word Returns: int: minimum number of Levenshtein edits required to get from `source` to `target` """ return int(editdistance.eval(source, target))