Python keras.backend.ctc_decode() Examples
The following are 16
code examples of keras.backend.ctc_decode().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
keras.backend
, or try the search function
.
Example #1
Source File: CTCModel.py From CTCModel with MIT License | 6 votes |
def ctc_complete_decoding_lambda_func(args, **arguments): """ Complete CTC decoding using Keras (function K.ctc_decode) :param args: y_pred, input_length :param arguments: greedy, beam_width, top_paths :return: K.ctc_decode with dtype='float32' """ # import tensorflow as tf # Require for loading a model saved y_pred, input_length = args my_params = arguments assert (K.backend() == 'tensorflow') return K.cast(K.ctc_decode(y_pred, tf.squeeze(input_length), greedy=my_params['greedy'], beam_width=my_params['beam_width'], top_paths=my_params['top_paths'])[0][0], dtype='float32')
Example #2
Source File: speech_recognition.py From parrots with Apache License 2.0 | 6 votes |
def predict(self, data_input, input_len): """ 预测结果 :param data_input: :param input_len: :return: 返回语音识别后的拼音符号列表 """ batch_size = 1 in_len = np.zeros((batch_size), dtype=np.int32) in_len[0] = input_len x_in = np.zeros((batch_size, 1600, self.AUDIO_FEATURE_LENGTH, 1), dtype=np.float) for i in range(batch_size): x_in[i, 0:len(data_input)] = data_input with self.graph.as_default(): base_pred = self.base_model.predict(x=x_in) base_pred = base_pred[:, :, :] decoder = K.ctc_decode(base_pred, in_len, greedy=True, beam_width=100, top_paths=1) result = K.get_value(decoder[0][0])[0] return result
Example #3
Source File: utils.py From CRNN with MIT License | 5 votes |
def evaluate(self): correct_predictions = 0 correct_char_predictions = 0 x_val, y_val = self.val_generator[np.random.randint(0, int(self.val_generator.nb_samples / self.val_generator.batch_size))] #x_val, y_val = next(self.val_generator) y_pred = self.prediction_model.predict(x_val) shape = y_pred[:, 2:, :].shape ctc_decode = K.ctc_decode(y_pred[:, 2:, :], input_length=np.ones(shape[0])*shape[1])[0][0] ctc_out = K.get_value(ctc_decode)[:, :self.label_len] for i in range(self.val_generator.batch_size): print(ctc_out[i]) result_str = ''.join([self.characters[c] for c in ctc_out[i]]) result_str = result_str.replace('-', '') if result_str == y_val[i]: correct_predictions += 1 print(result_str, y_val[i]) for c1, c2 in zip(result_str, y_val[i]): if c1 == c2: correct_char_predictions += 1 return correct_predictions / self.val_generator.batch_size, correct_char_predictions
Example #4
Source File: eval.py From CRNN with MIT License | 5 votes |
def predict_text(model, img): y_pred = model.predict(img[np.newaxis, :, :, :]) shape = y_pred[:, 2:, :].shape ctc_decode = K.ctc_decode(y_pred[:, 2:, :], input_length=np.ones(shape[0])*shape[1])[0][0] ctc_out = K.get_value(ctc_decode)[:, :cfg.label_len] result_str = ''.join([cfg.characters[c] for c in ctc_out[0]]) result_str = result_str.replace('-', '') return result_str
Example #5
Source File: decoders.py From LipNet with MIT License | 5 votes |
def _decode(y_pred, input_length, greedy=True, beam_width=100, top_paths=1): """Decodes the output of a softmax. Can use either greedy search (also known as best path) or a constrained dictionary search. # Arguments y_pred: tensor `(samples, time_steps, num_categories)` containing the prediction, or output of the softmax. input_length: tensor `(samples, )` containing the sequence length for each batch item in `y_pred`. greedy: perform much faster best-path search if `true`. This does not use a dictionary. beam_width: if `greedy` is `false`: a beam search decoder will be used with a beam of this width. top_paths: if `greedy` is `false`, how many of the most probable paths will be returned. # Returns Tuple: List: if `greedy` is `true`, returns a list of one element that contains the decoded sequence. If `false`, returns the `top_paths` most probable decoded sequences. Important: blank labels are returned as `-1`. Tensor `(top_paths, )` that contains the log probability of each decoded sequence. """ decoded = K.ctc_decode(y_pred=y_pred, input_length=input_length, greedy=greedy, beam_width=beam_width, top_paths=top_paths) paths = [path.eval(session=K.get_session()) for path in decoded[0]] logprobs = decoded[1].eval(session=K.get_session()) return (paths, logprobs)
Example #6
Source File: speech_model_01.py From ASR_WORD with GNU Affero General Public License v3.0 | 5 votes |
def predict(self , data_input , input_len): batch_size = 1 in_len = np.zeros((batch_size) , dtype=np.int32) in_len[0] = input_len x_in = np.zeros(shape=[batch_size , 2000 , self.FEATURE_LENGTH , 1] , dtype=np.float) for i in range(batch_size): x_in[i , 0 : len(data_input)] = data_input base_pred = self.base_model.predict(x=x_in) base_pred = base_pred[: , : , :] r = K.ctc_decode(base_pred , in_len , greedy=True , beam_width=100 , top_paths=1) r1 = K.get_value(r[0][0]) r1 = r1[0] return r1
Example #7
Source File: speech_model_02.py From ASR_WORD with GNU Affero General Public License v3.0 | 5 votes |
def predict(self , data_input , input_len): batch_size = 1 in_len = np.zeros((batch_size) , dtype=np.int32) in_len[0] = input_len x_in = np.zeros(shape=[batch_size , 2000 , self.FEATURE_LENGTH , 1] , dtype=np.float) for i in range(batch_size): x_in[i , 0 : len(data_input)] = data_input base_pred = self.base_model.predict(x=x_in) base_pred = base_pred[: , : , :] r = K.ctc_decode(base_pred , in_len , greedy=True , beam_width=100 , top_paths=1) r1 = K.get_value(r[0][0]) r1 = r1[0] return r1
Example #8
Source File: utils.py From Vietnamese_Handwriting_Recognition with MIT License | 5 votes |
def decode_predict_ctc(out, chars, top_paths=1): results = [] beam_width = 5 if beam_width < top_paths: beam_width = top_paths for i in range(top_paths): lables = K.get_value( K.ctc_decode( out, input_length=np.ones(out.shape[0]) * out.shape[1], greedy=False, beam_width=beam_width, top_paths=top_paths )[0][i] )[0] text = labels_to_text(chars, lables) results.append(text) return results
Example #9
Source File: SpeechModel251.py From ASRT_SpeechRecognition with GNU General Public License v3.0 | 5 votes |
def Predict(self, data_input, input_len): ''' 预测结果 返回语音识别后的拼音符号列表 ''' batch_size = 1 in_len = np.zeros((batch_size),dtype = np.int32) in_len[0] = input_len x_in = np.zeros((batch_size, 1600, self.AUDIO_FEATURE_LENGTH, 1), dtype=np.float) for i in range(batch_size): x_in[i,0:len(data_input)] = data_input base_pred = self.base_model.predict(x = x_in) #print('base_pred:\n', base_pred) #y_p = base_pred #for j in range(200): # mean = np.sum(y_p[0][j]) / y_p[0][j].shape[0] # print('max y_p:',np.max(y_p[0][j]),'min y_p:',np.min(y_p[0][j]),'mean y_p:',mean,'mid y_p:',y_p[0][j][100]) # print('argmin:',np.argmin(y_p[0][j]),'argmax:',np.argmax(y_p[0][j])) # count=0 # for i in range(y_p[0][j].shape[0]): # if(y_p[0][j][i] < mean): # count += 1 # print('count:',count) base_pred =base_pred[:, :, :] #base_pred =base_pred[:, 2:, :] r = K.ctc_decode(base_pred, in_len, greedy = True, beam_width=100, top_paths=1) #print('r', r) r1 = r[0][0].eval(session=tf.compat.v1.Session()) tf.compat.v1.reset_default_graph() return r1[0]
Example #10
Source File: SpeechModel261_p.py From ASRT_SpeechRecognition with GNU General Public License v3.0 | 4 votes |
def Predict(self, data_input, input_len): ''' 预测结果 返回语音识别后的拼音符号列表 ''' batch_size = 1 in_len = np.zeros((batch_size),dtype = np.int32) in_len[0] = input_len x_in = np.zeros((batch_size, 1600, self.AUDIO_FEATURE_LENGTH, 1), dtype=np.float) for i in range(batch_size): x_in[i,0:len(data_input)] = data_input base_pred = self.base_model.predict(x = x_in) #print('base_pred:\n', base_pred) #y_p = base_pred #for j in range(200): # mean = np.sum(y_p[0][j]) / y_p[0][j].shape[0] # print('max y_p:',np.max(y_p[0][j]),'min y_p:',np.min(y_p[0][j]),'mean y_p:',mean,'mid y_p:',y_p[0][j][100]) # print('argmin:',np.argmin(y_p[0][j]),'argmax:',np.argmax(y_p[0][j])) # count=0 # for i in range(y_p[0][j].shape[0]): # if(y_p[0][j][i] < mean): # count += 1 # print('count:',count) base_pred =base_pred[:, :, :] #base_pred =base_pred[:, 2:, :] r = K.ctc_decode(base_pred, in_len, greedy = True, beam_width=100, top_paths=1) #print('r', r) r1 = K.get_value(r[0][0]) #print('r1', r1) #r2 = K.get_value(r[1]) #print(r2) r1=r1[0] return r1 pass
Example #11
Source File: SpeechModel252.py From ASRT_SpeechRecognition with GNU General Public License v3.0 | 4 votes |
def Predict(self, data_input, input_len): ''' 预测结果 返回语音识别后的拼音符号列表 ''' batch_size = 1 in_len = np.zeros((batch_size),dtype = np.int32) in_len[0] = input_len x_in = np.zeros((batch_size, 1600, self.AUDIO_FEATURE_LENGTH, 1), dtype=np.float) for i in range(batch_size): x_in[i,0:len(data_input)] = data_input base_pred = self.base_model.predict(x = x_in) #print('base_pred:\n', base_pred) #y_p = base_pred #for j in range(200): # mean = np.sum(y_p[0][j]) / y_p[0][j].shape[0] # print('max y_p:',np.max(y_p[0][j]),'min y_p:',np.min(y_p[0][j]),'mean y_p:',mean,'mid y_p:',y_p[0][j][100]) # print('argmin:',np.argmin(y_p[0][j]),'argmax:',np.argmax(y_p[0][j])) # count=0 # for i in range(y_p[0][j].shape[0]): # if(y_p[0][j][i] < mean): # count += 1 # print('count:',count) base_pred =base_pred[:, :, :] #base_pred =base_pred[:, 2:, :] r = K.ctc_decode(base_pred, in_len, greedy = True, beam_width=100, top_paths=1) #print('r', r) r1 = K.get_value(r[0][0]) #print('r1', r1) #r2 = K.get_value(r[1]) #print(r2) r1=r1[0] return r1 pass
Example #12
Source File: SpeechModel251_p.py From ASRT_SpeechRecognition with GNU General Public License v3.0 | 4 votes |
def Predict(self, data_input, input_len): ''' 预测结果 返回语音识别后的拼音符号列表 ''' batch_size = 1 in_len = np.zeros((batch_size),dtype = np.int32) in_len[0] = input_len x_in = np.zeros((batch_size, 1600, self.AUDIO_FEATURE_LENGTH, 1), dtype=np.float) for i in range(batch_size): x_in[i,0:len(data_input)] = data_input base_pred = self.base_model.predict(x = x_in) #print('base_pred:\n', base_pred) #y_p = base_pred #for j in range(200): # mean = np.sum(y_p[0][j]) / y_p[0][j].shape[0] # print('max y_p:',np.max(y_p[0][j]),'min y_p:',np.min(y_p[0][j]),'mean y_p:',mean,'mid y_p:',y_p[0][j][100]) # print('argmin:',np.argmin(y_p[0][j]),'argmax:',np.argmax(y_p[0][j])) # count=0 # for i in range(y_p[0][j].shape[0]): # if(y_p[0][j][i] < mean): # count += 1 # print('count:',count) base_pred =base_pred[:, :, :] #base_pred =base_pred[:, 2:, :] r = K.ctc_decode(base_pred, in_len, greedy = True, beam_width=100, top_paths=1) #print('r', r) r1 = K.get_value(r[0][0]) #print('r1', r1) #r2 = K.get_value(r[1]) #print(r2) r1=r1[0] return r1 pass
Example #13
Source File: SpeechModel24.py From ASRT_SpeechRecognition with GNU General Public License v3.0 | 4 votes |
def Predict(self, data_input, input_len): ''' 预测结果 返回语音识别后的拼音符号列表 ''' batch_size = 1 in_len = np.zeros((batch_size),dtype = np.int32) in_len[0] = input_len x_in = np.zeros((batch_size, 1600, self.AUDIO_FEATURE_LENGTH, 1), dtype=np.float) for i in range(batch_size): x_in[i,0:len(data_input)] = data_input base_pred = self.base_model.predict(x = x_in) #print('base_pred:\n', base_pred) #y_p = base_pred #for j in range(200): # mean = np.sum(y_p[0][j]) / y_p[0][j].shape[0] # print('max y_p:',np.max(y_p[0][j]),'min y_p:',np.min(y_p[0][j]),'mean y_p:',mean,'mid y_p:',y_p[0][j][100]) # print('argmin:',np.argmin(y_p[0][j]),'argmax:',np.argmax(y_p[0][j])) # count=0 # for i in range(y_p[0][j].shape[0]): # if(y_p[0][j][i] < mean): # count += 1 # print('count:',count) base_pred =base_pred[:, :, :] #base_pred =base_pred[:, 2:, :] r = K.ctc_decode(base_pred, in_len, greedy = True, beam_width=100, top_paths=1) #print('r', r) r1 = K.get_value(r[0][0]) #print('r1', r1) #r2 = K.get_value(r[1]) #print(r2) r1=r1[0] return r1 pass
Example #14
Source File: SpeechModel25.py From ASRT_SpeechRecognition with GNU General Public License v3.0 | 4 votes |
def Predict(self, data_input, input_len): ''' 预测结果 返回语音识别后的拼音符号列表 ''' batch_size = 1 in_len = np.zeros((batch_size),dtype = np.int32) in_len[0] = input_len x_in = np.zeros((batch_size, 1600, self.AUDIO_FEATURE_LENGTH, 1), dtype=np.float) for i in range(batch_size): x_in[i,0:len(data_input)] = data_input base_pred = self.base_model.predict(x = x_in) #print('base_pred:\n', base_pred) #y_p = base_pred #for j in range(200): # mean = np.sum(y_p[0][j]) / y_p[0][j].shape[0] # print('max y_p:',np.max(y_p[0][j]),'min y_p:',np.min(y_p[0][j]),'mean y_p:',mean,'mid y_p:',y_p[0][j][100]) # print('argmin:',np.argmin(y_p[0][j]),'argmax:',np.argmax(y_p[0][j])) # count=0 # for i in range(y_p[0][j].shape[0]): # if(y_p[0][j][i] < mean): # count += 1 # print('count:',count) base_pred =base_pred[:, :, :] #base_pred =base_pred[:, 2:, :] r = K.ctc_decode(base_pred, in_len, greedy = True, beam_width=100, top_paths=1) #print('r', r) r1 = K.get_value(r[0][0]) #print('r1', r1) #r2 = K.get_value(r[1]) #print(r2) r1=r1[0] return r1 pass
Example #15
Source File: SpeechModel261.py From ASRT_SpeechRecognition with GNU General Public License v3.0 | 4 votes |
def Predict(self, data_input, input_len): ''' 预测结果 返回语音识别后的拼音符号列表 ''' batch_size = 1 in_len = np.zeros((batch_size),dtype = np.int32) in_len[0] = input_len x_in = np.zeros((batch_size, 1600, self.AUDIO_FEATURE_LENGTH, 1), dtype=np.float) for i in range(batch_size): x_in[i,0:len(data_input)] = data_input base_pred = self.base_model.predict(x = x_in) #print('base_pred:\n', base_pred) #y_p = base_pred #for j in range(200): # mean = np.sum(y_p[0][j]) / y_p[0][j].shape[0] # print('max y_p:',np.max(y_p[0][j]),'min y_p:',np.min(y_p[0][j]),'mean y_p:',mean,'mid y_p:',y_p[0][j][100]) # print('argmin:',np.argmin(y_p[0][j]),'argmax:',np.argmax(y_p[0][j])) # count=0 # for i in range(y_p[0][j].shape[0]): # if(y_p[0][j][i] < mean): # count += 1 # print('count:',count) base_pred =base_pred[:, :, :] #base_pred =base_pred[:, 2:, :] r = K.ctc_decode(base_pred, in_len, greedy = True, beam_width=100, top_paths=1) #print('r', r) r1 = K.get_value(r[0][0]) #print('r1', r1) #r2 = K.get_value(r[1]) #print(r2) r1=r1[0] return r1 pass
Example #16
Source File: SpeechModel26.py From ASRT_SpeechRecognition with GNU General Public License v3.0 | 4 votes |
def Predict(self, data_input, input_len): ''' 预测结果 返回语音识别后的拼音符号列表 ''' batch_size = 1 in_len = np.zeros((batch_size),dtype = np.int32) in_len[0] = input_len x_in = np.zeros((batch_size, 1600, self.AUDIO_FEATURE_LENGTH, 1), dtype=np.float) for i in range(batch_size): x_in[i,0:len(data_input)] = data_input base_pred = self.base_model.predict(x = x_in) #print('base_pred:\n', base_pred) #y_p = base_pred #for j in range(200): # mean = np.sum(y_p[0][j]) / y_p[0][j].shape[0] # print('max y_p:',np.max(y_p[0][j]),'min y_p:',np.min(y_p[0][j]),'mean y_p:',mean,'mid y_p:',y_p[0][j][100]) # print('argmin:',np.argmin(y_p[0][j]),'argmax:',np.argmax(y_p[0][j])) # count=0 # for i in range(y_p[0][j].shape[0]): # if(y_p[0][j][i] < mean): # count += 1 # print('count:',count) base_pred =base_pred[:, :, :] #base_pred =base_pred[:, 2:, :] r = K.ctc_decode(base_pred, in_len, greedy = True, beam_width=100, top_paths=1) #print('r', r) r1 = K.get_value(r[0][0]) #print('r1', r1) #r2 = K.get_value(r[1]) #print(r2) r1=r1[0] return r1 pass