Python tensorflow.edit_distance() Examples
The following are 27
code examples of tensorflow.edit_distance().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorflow
, or try the search function
.
Example #1
Source File: metrics.py From rnnt-speech-recognition with MIT License | 6 votes |
def error_rate(y_true, decoded): y_true_shape = tf.shape(y_true) decoded_shape = tf.shape(decoded) max_length = tf.maximum(y_true_shape[-1], decoded_shape[-1]) if y_true.dtype == tf.string: truth = string_to_sparse(y_true) else: truth = tf.sparse.from_dense(y_true) if decoded.dtype == tf.string: hypothesis = string_to_sparse(decoded) else: hypothesis = tf.sparse.from_dense(decoded) err = tf.edit_distance(hypothesis, truth, normalize=False) err_norm = err / tf.cast(max_length, dtype=tf.float32) return err_norm
Example #2
Source File: edit_distance.py From tensorflow_end2end_speech_recognition with MIT License | 6 votes |
def compute_edit_distance(session, labels_true_st, labels_pred_st): """Compute edit distance per mini-batch. Args: session: labels_true_st: A `SparseTensor` of ground truth labels_pred_st: A `SparseTensor` of prediction Returns: edit_distances: list of edit distance of each uttearance """ indices, values, dense_shape = labels_true_st labels_pred_pl = tf.SparseTensor(indices, values, dense_shape) indices, values, dense_shape = labels_pred_st labels_true_pl = tf.SparseTensor(indices, values, dense_shape) edit_op = tf.edit_distance(labels_pred_pl, labels_true_pl, normalize=True) edit_distances = session.run(edit_op) return edit_distances
Example #3
Source File: metrics.py From athena with Apache License 2.0 | 6 votes |
def update_state(self, sparse_predictions, samples, logit_length=None): """ Accumulate errors and counts """ validated_label = tf.cast( tf.sparse.from_dense(samples["output"]), dtype=tf.int64 ) labels_counter = tf.cast(tf.shape(validated_label.values)[0], tf.float32) num_errs = tf.edit_distance( sparse_predictions, validated_label, normalize=False ) num_errs = tf.reduce_sum(num_errs) if self.rank_size > 1: num_errs = hvd.allreduce(num_errs, average=False) labels_counter = hvd.allreduce(labels_counter, average=False) self.error_count(num_errs) self.total_count(labels_counter) return num_errs, labels_counter
Example #4
Source File: init_model.py From ZASR_tensorflow with Apache License 2.0 | 6 votes |
def loss(self): """ Define loss return """ # ctc loss with tf.name_scope('loss'): self.avg_loss = tf.reduce_mean(ctc_ops.ctc_loss(self.text, self.logits, self.seq_length)) tf.summary.scalar('loss',self.avg_loss) # [optimizer] with tf.name_scope('train'): self.optimizer = tf.train.AdamOptimizer(learning_rate=self.hyparam.learning_rate).minimize(self.avg_loss) with tf.name_scope("decode"): self.decoded, log_prob = ctc_ops.ctc_beam_search_decoder(self.logits, self.seq_length, merge_repeated=False) with tf.name_scope("ctc_beam_search_decode"): self.prob = tf.nn.softmax(self.logits, dim=0) self.prob = tf.transpose(self.prob, [1, 0, 2]) # keep the same dim with decoder {batch_size, time_step, n_character} self.decoder = LM_decoder(self.hyparam.alpha, self.hyparam.beta, self.hyparam.lang_model_path, self.words) with tf.name_scope("accuracy"): self.distance = tf.edit_distance(tf.cast(self.decoded[0], tf.int32), self.text) # compute label error rate (accuracy) self.label_err = tf.reduce_mean(self.distance, name='label_error_rate') tf.summary.scalar('accuracy', self.label_err)
Example #5
Source File: tf_train_ctc.py From RNN-Tutorial with Apache License 2.0 | 6 votes |
def setup_summary_statistics(self): # Create a placholder for the summary statistics with tf.name_scope("accuracy"): # Compute the edit (Levenshtein) distance of the top path distance = tf.edit_distance( tf.cast(self.decoded[0], tf.int32), self.targets) # Compute the label error rate (accuracy) self.ler = tf.reduce_mean(distance, name='label_error_rate') self.ler_placeholder = tf.placeholder(dtype=tf.float32, shape=[]) self.train_ler_op = tf.summary.scalar( "train_label_error_rate", self.ler_placeholder) self.dev_ler_op = tf.summary.scalar( "validation_label_error_rate", self.ler_placeholder) self.test_ler_op = tf.summary.scalar( "test_label_error_rate", self.ler_placeholder)
Example #6
Source File: measure_modules.py From ludwig with Apache License 2.0 | 5 votes |
def edit_distance(targets, target_seq_length, predictions_sequence, predictions_seq_length, output_feature_name): predicts = to_sparse(predictions_sequence, predictions_seq_length, tf.shape(predictions_sequence)[1]) labels = to_sparse(targets, target_seq_length, tf.shape(targets)[1]) edit_distance = tf.edit_distance(predicts, labels, name='edit_distance_{}'.format( output_feature_name)) mean_edit_distance = tf.reduce_mean(edit_distance, name='mean_edit_distance_{}'.format( output_feature_name)) return edit_distance, mean_edit_distance
Example #7
Source File: train-timit.py From tensorpack with Apache License 2.0 | 5 votes |
def build_graph(self, feat, labelidx, labelvalue, labelshape, seqlen): label = tf.SparseTensor(labelidx, labelvalue, labelshape) cell = rnn.MultiRNNCell([rnn.LSTMBlockCell(num_units=HIDDEN) for _ in range(NLAYER)]) initial = cell.zero_state(tf.shape(feat)[0], tf.float32) outputs, last_state = tf.nn.dynamic_rnn(cell, feat, seqlen, initial, dtype=tf.float32, scope='rnn') # o: b x t x HIDDEN output = tf.reshape(outputs, [-1, HIDDEN]) # (Bxt) x rnnsize logits = FullyConnected('fc', output, NR_CLASS, activation=tf.identity, kernel_initializer=tf.truncated_normal_initializer(stddev=0.01)) logits = tf.reshape(logits, (BATCH, -1, NR_CLASS)) loss = tf.nn.ctc_loss(label, logits, seqlen, time_major=False) cost = tf.reduce_mean(loss, name='cost') logits = tf.transpose(logits, [1, 0, 2]) if self.training: # beam search is too slow to run in training predictions = tf.cast( tf.nn.ctc_greedy_decoder(logits, seqlen)[0][0], tf.int32) else: predictions = tf.cast( tf.nn.ctc_beam_search_decoder(logits, seqlen)[0][0], tf.int32) err = tf.edit_distance(predictions, label, normalize=True) err.set_shape([None]) err = tf.reduce_mean(err, name='error') summary.add_moving_summary(err, cost) return cost
Example #8
Source File: metrics.py From athena with Apache License 2.0 | 5 votes |
def update_state(self, sparse_predictions, samples, logit_length=None): """ Accumulate errors and counts """ validated_label = tf.cast( tf.sparse.from_dense(samples["output"]), dtype=tf.int64 ) labels_counter = tf.cast(tf.shape(validated_label.values)[0], tf.float32) num_errs = tf.edit_distance( sparse_predictions, validated_label, normalize=False ) num_errs = tf.reduce_sum(num_errs) self.error_count(num_errs) self.total_count(labels_counter) return num_errs, labels_counter
Example #9
Source File: model.py From ctc-asr with MIT License | 5 votes |
def error_rates_fn(labels, originals, decoded, decoded_texts): """Calculate edit distance and word error rate. Args: labels (tf.SparseTensor or tf.Tensor): Integer SparseTensor containing the target. With dense shape [batch_size, time (target)]. Dense Tensors are converted into SparseTensors if `FLAGS.use_warp_ctc == True`. originals (tf.Tensor): String Tensor of shape [batch_size] with the original plaintext. decoded (tf.Tensor): Integer tensor of the decoded output labels. decoded_texts (tf.Tensor) String tensor with the decoded output labels converted to normal text. Returns: tf.Tensor: Edit distances for the batch. tf.Tensor: Mean edit distance. tf.Tensor: Word error rates for the batch. tf.Tensor: Word error rate. """ # Edit distances and average edit distance. edit_distances = tf.edit_distance(decoded, labels) mean_edit_distance = tf.reduce_mean(edit_distances) # Word error rates for the batch and average word error rate (WER). wers, wer = tf.py_func(metrics.wer_batch, [originals, decoded_texts], [TF_FLOAT, TF_FLOAT], name='py_wer_batch') return edit_distances, mean_edit_distance, wers, wer
Example #10
Source File: model_fn.py From cnn_lstm_ctc_ocr with GNU General Public License v3.0 | 5 votes |
def _get_testing( rnn_logits,sequence_length,label,label_length, continuous_eval, lexicon, lexicon_prior ): """Create ops for testing (all scalars): loss: CTC loss function value, label_error: batch level edit distance on beam search max sequence_error: batch level sequence error rate """ with tf.name_scope( "train" ): # Reduce by mean (rather than sum) if doing continuous evaluation batch_loss = model.ctc_loss_layer( rnn_logits,label,sequence_length, reduce_mean=continuous_eval) with tf.name_scope( "test" ): predictions,_ = _get_output( rnn_logits, sequence_length, lexicon, lexicon_prior ) hypothesis = tf.cast( predictions[0], tf.int32 ) # for edit_distance # Per-sequence statistic num_label_errors = tf.edit_distance( hypothesis, label, normalize=False ) # Per-batch summary counts batch_num_label_errors = tf.reduce_sum( num_label_errors) batch_num_sequence_errors = tf.count_nonzero( num_label_errors, axis=0 ) batch_num_labels = tf.reduce_sum( label_length ) # Wide integer type casts (prefer unsigned, but truediv dislikes those) batch_num_label_errors = tf.cast( batch_num_label_errors, tf.int64 ) batch_num_sequence_errors = tf.cast( batch_num_sequence_errors, tf.int64 ) batch_num_labels = tf.cast( batch_num_labels, tf.int64) return batch_loss, batch_num_label_errors, batch_num_sequence_errors, \ batch_num_labels, predictions
Example #11
Source File: losses.py From deep_lip_reading with Apache License 2.0 | 5 votes |
def cer(y_true, y_pred, return_all=False): labels_pred_sparse = one_hot_labels_to_sparse(y_pred) labels_true_sparse = one_hot_labels_to_sparse(y_true) ed = tf.edit_distance(tf.cast(labels_pred_sparse, tf.int32), labels_true_sparse) cer = tf.reduce_mean(ed) if return_all: return cer, ed else: return cer
Example #12
Source File: CTCModel.py From CTCModel with MIT License | 5 votes |
def tf_edit_distance(hypothesis, truth, norm=False): """ Edit distance using tensorflow inputs are tf.Sparse_tensors """ return tf.edit_distance(hypothesis, truth, normalize=norm, name='edit_distance')
Example #13
Source File: metrics_utils.py From listen-attend-and-spell with Apache License 2.0 | 5 votes |
def edit_distance(hypothesis, truth, eos_id, mapping=None): if mapping: mapping = tf.convert_to_tensor(mapping) hypothesis = tf.nn.embedding_lookup(mapping, hypothesis) truth = tf.nn.embedding_lookup(mapping, truth) hypothesis = dense_to_sparse(hypothesis, eos_id, merge_repeated=True) truth = dense_to_sparse(truth, eos_id, merge_repeated=True) return tf.edit_distance(hypothesis, truth, normalize=True)
Example #14
Source File: ed.py From Automatic_Speech_Recognition with MIT License | 5 votes |
def get_edit_distance(hyp_arr,truth_arr,mode='train'): ''' calculate edit distance ''' graph = tf.Graph() with graph.as_default(): truth = tf.sparse_placeholder(tf.int32) hyp = tf.sparse_placeholder(tf.int32) editDist = tf.edit_distance(hyp, truth, normalize=True) with tf.Session(graph=graph) as session: truthTest = list_to_sparse_tensor(truth_arr, mode) hypTest = list_to_sparse_tensor(hyp_arr, mode) feedDict = {truth: truthTest, hyp: hypTest} dist = session.run(editDist, feed_dict=feedDict) return dist
Example #15
Source File: test.py From cnn_lstm_ctc_ocr_for_ICPR with GNU General Public License v3.0 | 5 votes |
def _get_testing(rnn_logits,sequence_length,label,label_length): """Create ops for testing (all scalars): loss: CTC loss function value, label_error: Batch-normalized edit distance on beam search max sequence_error: Batch-normalized sequence error rate """ with tf.name_scope("train"): loss = model.ctc_loss_layer(rnn_logits,label,sequence_length) with tf.name_scope("test"): predictions,_ = tf.nn.ctc_beam_search_decoder(rnn_logits, sequence_length, beam_width=128, top_paths=1, merge_repeated=True) hypothesis = tf.cast(predictions[0], tf.int32) # for edit_distance label_errors = tf.edit_distance(hypothesis, label, normalize=False) sequence_errors = tf.count_nonzero(label_errors,axis=0) total_label_error = tf.reduce_sum( label_errors ) total_labels = tf.reduce_sum( label_length ) label_error = tf.truediv( total_label_error, tf.cast(total_labels, tf.float32 ), name='label_error') sequence_error = tf.truediv( tf.cast( sequence_errors, tf.int32 ), tf.shape(label_length)[0], name='sequence_error') tf.summary.scalar( 'loss', loss ) tf.summary.scalar( 'label_error', label_error ) tf.summary.scalar( 'sequence_error', sequence_error ) return loss, label_error, sequence_error
Example #16
Source File: models.py From kaggle_speech_recognition with MIT License | 5 votes |
def cal_perf(pred, sparse_labels): """Helper function to calculate edit distance and accuracy. """ edist = tf.edit_distance(tf.cast(pred[0], tf.int32), sparse_labels, normalize=False) acc = tf.reduce_mean(tf.cast(tf.equal(edist, 0), tf.float32)) return edist, acc
Example #17
Source File: edit_distance_op_test.py From deep_image_model with Apache License 2.0 | 5 votes |
def _testEditDistanceST( self, hypothesis_st, truth_st, normalize, expected_output, expected_shape, expected_err_re=None): edit_distance = tf.edit_distance( hypothesis=hypothesis_st, truth=truth_st, normalize=normalize) if expected_err_re is None: self.assertEqual(edit_distance.get_shape(), expected_shape) output = edit_distance.eval() self.assertAllClose(output, expected_output) else: with self.assertRaisesOpError(expected_err_re): edit_distance.eval()
Example #18
Source File: tensorflow_model.py From calamari with Apache License 2.0 | 5 votes |
def create_solver(self): def sparse_targets(targets, targets_length): return tf.cast(K.ctc_label_dense_to_sparse(targets, math_ops.cast( K.flatten(targets_length), dtype='int32')), 'int32') def create_cer(sparse_decoded, sparse_targets): return tf.edit_distance(tf.cast(sparse_decoded, tf.int32), sparse_targets, normalize=True) # Note for codec change: the codec size is derived upon creation, therefore the ctc ops must be created # using the true codec size (the W/B-Matrix may change its shape however during loading/codec change # to match the true codec size loss = KL.Lambda(lambda args: K.ctc_batch_cost(*args), output_shape=(1,), name='ctc')((self.targets, self.softmax, self.output_seq_len, self.targets_length)) self.sparse_targets = KL.Lambda(lambda args: sparse_targets(*args), name='sparse_targets')((self.targets, self.targets_length)) self.cer = KL.Lambda(lambda args: create_cer(*args), output_shape=(1,), name='cer')((self.sparse_decoded, self.sparse_targets)) if self.network_proto.solver == NetworkParams.MOMENTUM_SOLVER: optimizer = keras.optimizers.SGD(self.network_proto.learning_rate, self.network_proto.momentum, clipnorm=self.network_proto.clipping_norm) elif self.network_proto.solver == NetworkParams.ADAM_SOLVER: optimizer = keras.optimizers.Adam(self.network_proto.learning_rate, clipnorm=self.network_proto.clipping_norm) else: raise Exception("Unknown solver of type '%s'" % self.network_proto.solver) def ctc_loss(t, p): return p model = Model(inputs=[self.targets, self.input_data, self.input_length, self.targets_length], outputs=[loss]) model.compile(optimizer=optimizer, loss={'ctc': ctc_loss}, ) return model
Example #19
Source File: DeepSpeech.py From uai-sdk with Apache License 2.0 | 4 votes |
def calculate_mean_edit_distance_and_loss(model_feeder, tower, dropout): r''' This routine beam search decodes a mini-batch and calculates the loss and mean edit distance. Next to total and average loss it returns the mean edit distance, the decoded result and the batch's original Y. ''' # Obtain the next batch of data batch_x, batch_seq_len, batch_y = model_feeder.next_batch(tower) # Calculate the logits of the batch using BiRNN logits = BiRNN(batch_x, tf.to_int64(batch_seq_len), dropout) # Compute the CTC loss using either TensorFlow's `ctc_loss` or Baidu's `warp_ctc_loss`. if FLAGS.use_warpctc: total_loss = tf.contrib.warpctc.warp_ctc_loss(labels=batch_y, inputs=logits, sequence_length=batch_seq_len) else: total_loss = tf.nn.ctc_loss(labels=batch_y, inputs=logits, sequence_length=batch_seq_len) # Calculate the average loss across the batch avg_loss = tf.reduce_mean(total_loss) # Beam search decode the batch decoded, _ = decode_with_lm(logits, batch_seq_len, merge_repeated=False, beam_width=FLAGS.beam_width) # Compute the edit (Levenshtein) distance distance = tf.edit_distance(tf.cast(decoded[0], tf.int32), batch_y) # Compute the mean edit distance mean_edit_distance = tf.reduce_mean(distance) # Finally we return the # - calculated total and # - average losses, # - the Levenshtein distance, # - the recognition mean edit distance, # - the decoded batch and # - the original batch_y (which contains the verified transcriptions). return total_loss, avg_loss, distance, mean_edit_distance, decoded, batch_y # Adam Optimization # ================= # In contrast to 'Deep Speech: Scaling up end-to-end speech recognition' # (http://arxiv.org/abs/1412.5567), # in which 'Nesterov's Accelerated Gradient Descent' # (www.cs.toronto.edu/~fritz/absps/momentum.pdf) was used, # we will use the Adam method for optimization (http://arxiv.org/abs/1412.6980), # because, generally, it requires less fine-tuning.
Example #20
Source File: DeepSpeech.py From AVSR-Deep-Speech with GNU General Public License v2.0 | 4 votes |
def calculate_mean_edit_distance_and_loss(batch_set, dropout): r''' This routine beam search decodes a mini-batch and calculates the loss and mean edit distance. Next to total and average loss it returns the mean edit distance, the decoded result and the batch's original Y. ''' # Obtain the next batch of data batch_x, batch_seq_len, batch_y = batch_set.next_batch() # Calculate the logits of the batch using BiRNN logits = BiRNN(batch_x, tf.to_int64(batch_seq_len), dropout) # Compute the CTC loss using either TensorFlow's `ctc_loss` or Baidu's `warp_ctc_loss`. if FLAGS.use_warpctc: total_loss = tf.contrib.warpctc.warp_ctc_loss(labels=batch_y, inputs=logits, sequence_length=batch_seq_len) else: total_loss = tf.nn.ctc_loss(labels=batch_y, inputs=logits, sequence_length=batch_seq_len) # Calculate the average loss across the batch avg_loss = tf.reduce_mean(total_loss) # Beam search decode the batch decoded, _ = tf.nn.ctc_beam_search_decoder(logits, batch_seq_len, merge_repeated=False) # Compute the edit (Levenshtein) distance distance = tf.edit_distance(tf.cast(decoded[0], tf.int32), batch_y) # Compute the mean edit distance mean_edit_distance = tf.reduce_mean(distance) # Finally we return the # - calculated total and # - average losses, # - the Levenshtein distance, # - the recognition mean edit distance, # - the decoded batch and # - the original batch_y (which contains the verified transcriptions). return total_loss, avg_loss, distance, mean_edit_distance, decoded, batch_y # Adam Optimization # ================= # In constrast to 'Deep Speech: Scaling up end-to-end speech recognition' # (http://arxiv.org/abs/1412.5567), # in which 'Nesterov's Accelerated Gradient Descent' # (www.cs.toronto.edu/~fritz/absps/momentum.pdf) was used, # we will use the Adam method for optimization (http://arxiv.org/abs/1412.6980), # because, generally, it requires less fine-tuning.
Example #21
Source File: DeepSpeech_RHL_AVSR.py From AVSR-Deep-Speech with GNU General Public License v2.0 | 4 votes |
def calculate_mean_edit_distance_and_loss(batch_set, dropout): r''' This routine beam search decodes a mini-batch and calculates the loss and mean edit distance. Next to total and average loss it returns the mean edit distance, the decoded result and the batch's original Y. ''' # Obtain the next batch of data batch_x, batch_seq_len, batch_y = batch_set.next_batch() # Calculate the logits of the batch using BiRNN logits = BiRNN(batch_x, tf.to_int64(batch_seq_len), dropout) # Compute the CTC loss using either TensorFlow's `ctc_loss` or Baidu's `warp_ctc_loss`. if FLAGS.use_warpctc: total_loss = tf.contrib.warpctc.warp_ctc_loss(labels=batch_y, inputs=logits, sequence_length=batch_seq_len) else: total_loss = tf.nn.ctc_loss(labels=batch_y, inputs=logits, sequence_length=batch_seq_len) # Calculate the average loss across the batch avg_loss = tf.reduce_mean(total_loss) # Beam search decode the batch decoded, _ = tf.nn.ctc_beam_search_decoder(logits, batch_seq_len, merge_repeated=False) # Compute the edit (Levenshtein) distance distance = tf.edit_distance(tf.cast(decoded[0], tf.int32), batch_y) # Compute the mean edit distance mean_edit_distance = tf.reduce_mean(distance) # Finally we return the # - calculated total and # - average losses, # - the Levenshtein distance, # - the recognition mean edit distance, # - the decoded batch and # - the original batch_y (which contains the verified transcriptions). return total_loss, avg_loss, distance, mean_edit_distance, decoded, batch_y # Adam Optimization # ================= # In constrast to 'Deep Speech: Scaling up end-to-end speech recognition' # (http://arxiv.org/abs/1412.5567), # in which 'Nesterov's Accelerated Gradient Descent' # (www.cs.toronto.edu/~fritz/absps/momentum.pdf) was used, # we will use the Adam method for optimization (http://arxiv.org/abs/1412.6980), # because, generally, it requires less fine-tuning.
Example #22
Source File: DeepSpeech_RHL.py From AVSR-Deep-Speech with GNU General Public License v2.0 | 4 votes |
def calculate_mean_edit_distance_and_loss(batch_set, dropout): r''' This routine beam search decodes a mini-batch and calculates the loss and mean edit distance. Next to total and average loss it returns the mean edit distance, the decoded result and the batch's original Y. ''' # Obtain the next batch of data batch_x, batch_seq_len, batch_y = batch_set.next_batch() # Calculate the logits of the batch using BiRNN logits = BiRNN(batch_x, tf.to_int64(batch_seq_len), dropout) # Compute the CTC loss using either TensorFlow's `ctc_loss` or Baidu's `warp_ctc_loss`. if FLAGS.use_warpctc: total_loss = tf.contrib.warpctc.warp_ctc_loss(labels=batch_y, inputs=logits, sequence_length=batch_seq_len) else: total_loss = tf.nn.ctc_loss(labels=batch_y, inputs=logits, sequence_length=batch_seq_len) # Calculate the average loss across the batch avg_loss = tf.reduce_mean(total_loss) # Beam search decode the batch decoded, _ = tf.nn.ctc_beam_search_decoder(logits, batch_seq_len, merge_repeated=False) # Compute the edit (Levenshtein) distance distance = tf.edit_distance(tf.cast(decoded[0], tf.int32), batch_y) # Compute the mean edit distance mean_edit_distance = tf.reduce_mean(distance) # Finally we return the # - calculated total and # - average losses, # - the Levenshtein distance, # - the recognition mean edit distance, # - the decoded batch and # - the original batch_y (which contains the verified transcriptions). return total_loss, avg_loss, distance, mean_edit_distance, decoded, batch_y # Adam Optimization # ================= # In constrast to 'Deep Speech: Scaling up end-to-end speech recognition' # (http://arxiv.org/abs/1412.5567), # in which 'Nesterov's Accelerated Gradient Descent' # (www.cs.toronto.edu/~fritz/absps/momentum.pdf) was used, # we will use the Adam method for optimization (http://arxiv.org/abs/1412.6980), # because, generally, it requires less fine-tuning.
Example #23
Source File: metrics.py From training_results_v0.5 with Apache License 2.0 | 4 votes |
def sequence_edit_distance(predictions, labels, weights_fn=common_layers.weights_nonzero): """Average edit distance, ignoring padding 0s. The score returned is the edit distance divided by the total length of reference truth and the weight returned is the total length of the truth. Args: predictions: Tensor of shape [`batch_size`, `length`, 1, `num_classes`] and type tf.float32 representing the logits, 0-padded. labels: Tensor of shape [`batch_size`, `length`, 1, 1] and type tf.int32 representing the labels of same length as logits and 0-padded. weights_fn: ignored. The weights returned are the total length of the ground truth labels, excluding 0-paddings. Returns: (edit distance / reference length, reference length) Raises: ValueError: if weights_fn is not common_layers.weights_nonzero. """ if weights_fn is not common_layers.weights_nonzero: raise ValueError("Only weights_nonzero can be used for this metric.") with tf.variable_scope("edit_distance", values=[predictions, labels]): # Transform logits into sequence classes by taking max at every step. predictions = tf.to_int32( tf.squeeze(tf.argmax(predictions, axis=-1), axis=(2, 3))) nonzero_idx = tf.where(tf.not_equal(predictions, 0)) sparse_outputs = tf.SparseTensor(nonzero_idx, tf.gather_nd(predictions, nonzero_idx), tf.shape(predictions, out_type=tf.int64)) labels = tf.squeeze(labels, axis=(2, 3)) nonzero_idx = tf.where(tf.not_equal(labels, 0)) label_sparse_outputs = tf.SparseTensor(nonzero_idx, tf.gather_nd(labels, nonzero_idx), tf.shape(labels, out_type=tf.int64)) distance = tf.reduce_sum( tf.edit_distance(sparse_outputs, label_sparse_outputs, normalize=False)) reference_length = tf.to_float(common_layers.shape_list(nonzero_idx)[0]) return distance / reference_length, reference_length
Example #24
Source File: metrics.py From training_results_v0.5 with Apache License 2.0 | 4 votes |
def sequence_edit_distance(predictions, labels, weights_fn=common_layers.weights_nonzero): """Average edit distance, ignoring padding 0s. The score returned is the edit distance divided by the total length of reference truth and the weight returned is the total length of the truth. Args: predictions: Tensor of shape [`batch_size`, `length`, 1, `num_classes`] and type tf.float32 representing the logits, 0-padded. labels: Tensor of shape [`batch_size`, `length`, 1, 1] and type tf.int32 representing the labels of same length as logits and 0-padded. weights_fn: ignored. The weights returned are the total length of the ground truth labels, excluding 0-paddings. Returns: (edit distance / reference length, reference length) Raises: ValueError: if weights_fn is not common_layers.weights_nonzero. """ if weights_fn is not common_layers.weights_nonzero: raise ValueError("Only weights_nonzero can be used for this metric.") with tf.variable_scope("edit_distance", values=[predictions, labels]): # Transform logits into sequence classes by taking max at every step. predictions = tf.to_int32( tf.squeeze(tf.argmax(predictions, axis=-1), axis=(2, 3))) nonzero_idx = tf.where(tf.not_equal(predictions, 0)) sparse_outputs = tf.SparseTensor(nonzero_idx, tf.gather_nd(predictions, nonzero_idx), tf.shape(predictions, out_type=tf.int64)) labels = tf.squeeze(labels, axis=(2, 3)) nonzero_idx = tf.where(tf.not_equal(labels, 0)) label_sparse_outputs = tf.SparseTensor(nonzero_idx, tf.gather_nd(labels, nonzero_idx), tf.shape(labels, out_type=tf.int64)) distance = tf.reduce_sum( tf.edit_distance(sparse_outputs, label_sparse_outputs, normalize=False)) reference_length = tf.to_float(common_layers.shape_list(nonzero_idx)[0]) return distance / reference_length, reference_length
Example #25
Source File: metrics.py From BERT with Apache License 2.0 | 4 votes |
def word_error_rate(raw_predictions, labels, lookup=None, weights_fn=common_layers.weights_nonzero): """Calculate word error rate. Args: raw_predictions: The raw predictions. labels: The actual labels. lookup: A tf.constant mapping indices to output tokens. weights_fn: Weighting function. Returns: The word error rate. """ def from_tokens(raw, lookup_): gathered = tf.gather(lookup_, tf.cast(raw, tf.int32)) joined = tf.regex_replace(tf.reduce_join(gathered, axis=1), b"<EOS>.*", b"") cleaned = tf.regex_replace(joined, b"_", b" ") tokens = tf.string_split(cleaned, " ") return tokens def from_characters(raw, lookup_): """Convert ascii+2 encoded codes to string-tokens.""" corrected = tf.bitcast( tf.clip_by_value(tf.subtract(raw, 2), 0, 255), tf.uint8) gathered = tf.gather(lookup_, tf.cast(corrected, tf.int32))[:, :, 0] joined = tf.reduce_join(gathered, axis=1) cleaned = tf.regex_replace(joined, b"\0", b"") tokens = tf.string_split(cleaned, " ") return tokens if lookup is None: lookup = tf.constant([chr(i) for i in range(256)]) convert_fn = from_characters else: convert_fn = from_tokens if weights_fn is not common_layers.weights_nonzero: raise ValueError("Only weights_nonzero can be used for this metric.") with tf.variable_scope("word_error_rate", values=[raw_predictions, labels]): raw_predictions = tf.squeeze( tf.argmax(raw_predictions, axis=-1), axis=(2, 3)) labels = tf.squeeze(labels, axis=(2, 3)) reference = convert_fn(labels, lookup) predictions = convert_fn(raw_predictions, lookup) distance = tf.reduce_sum( tf.edit_distance(predictions, reference, normalize=False)) reference_length = tf.cast( tf.size(reference.values, out_type=tf.int32), dtype=tf.float32) return distance / reference_length, reference_length
Example #26
Source File: metrics.py From BERT with Apache License 2.0 | 4 votes |
def sequence_edit_distance(predictions, labels, weights_fn=common_layers.weights_nonzero): """Average edit distance, ignoring padding 0s. The score returned is the edit distance divided by the total length of reference truth and the weight returned is the total length of the truth. Args: predictions: Tensor of shape [`batch_size`, `length`, 1, `num_classes`] and type tf.float32 representing the logits, 0-padded. labels: Tensor of shape [`batch_size`, `length`, 1, 1] and type tf.int32 representing the labels of same length as logits and 0-padded. weights_fn: ignored. The weights returned are the total length of the ground truth labels, excluding 0-paddings. Returns: (edit distance / reference length, reference length) Raises: ValueError: if weights_fn is not common_layers.weights_nonzero. """ if weights_fn is not common_layers.weights_nonzero: raise ValueError("Only weights_nonzero can be used for this metric.") with tf.variable_scope("edit_distance", values=[predictions, labels]): # Transform logits into sequence classes by taking max at every step. predictions = tf.to_int32( tf.squeeze(tf.argmax(predictions, axis=-1), axis=(2, 3))) nonzero_idx = tf.where(tf.not_equal(predictions, 0)) sparse_outputs = tf.SparseTensor(nonzero_idx, tf.gather_nd(predictions, nonzero_idx), tf.shape(predictions, out_type=tf.int64)) labels = tf.squeeze(labels, axis=(2, 3)) nonzero_idx = tf.where(tf.not_equal(labels, 0)) label_sparse_outputs = tf.SparseTensor(nonzero_idx, tf.gather_nd(labels, nonzero_idx), tf.shape(labels, out_type=tf.int64)) distance = tf.reduce_sum( tf.edit_distance(sparse_outputs, label_sparse_outputs, normalize=False)) reference_length = tf.to_float(common_layers.shape_list(nonzero_idx)[0]) return distance / reference_length, reference_length
Example #27
Source File: metrics.py From fine-lm with MIT License | 4 votes |
def sequence_edit_distance(predictions, labels, weights_fn=common_layers.weights_nonzero): """Average edit distance, ignoring padding 0s. The score returned is the edit distance divided by the total length of reference truth and the weight returned is the total length of the truth. Args: predictions: Tensor of shape [`batch_size`, `length`, 1, `num_classes`] and type tf.float32 representing the logits, 0-padded. labels: Tensor of shape [`batch_size`, `length`, 1, 1] and type tf.int32 representing the labels of same length as logits and 0-padded. weights_fn: ignored. The weights returned are the total length of the ground truth labels, excluding 0-paddings. Returns: (edit distance / reference length, reference length) Raises: ValueError: if weights_fn is not common_layers.weights_nonzero. """ if weights_fn is not common_layers.weights_nonzero: raise ValueError("Only weights_nonzero can be used for this metric.") with tf.variable_scope("edit_distance", values=[predictions, labels]): # Transform logits into sequence classes by taking max at every step. predictions = tf.to_int32( tf.squeeze(tf.argmax(predictions, axis=-1), axis=(2, 3))) nonzero_idx = tf.where(tf.not_equal(predictions, 0)) sparse_outputs = tf.SparseTensor(nonzero_idx, tf.gather_nd(predictions, nonzero_idx), tf.shape(predictions, out_type=tf.int64)) labels = tf.squeeze(labels, axis=(2, 3)) nonzero_idx = tf.where(tf.not_equal(labels, 0)) label_sparse_outputs = tf.SparseTensor(nonzero_idx, tf.gather_nd(labels, nonzero_idx), tf.shape(labels, out_type=tf.int64)) distance = tf.reduce_sum( tf.edit_distance(sparse_outputs, label_sparse_outputs, normalize=False)) reference_length = tf.to_float(common_layers.shape_list(nonzero_idx)[0]) return distance / reference_length, reference_length