Python speech_recognition.AudioFile() Examples
The following are 10
code examples of speech_recognition.AudioFile().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
speech_recognition
, or try the search function
.
Example #1
Source File: google_stt.py From selene-backend with GNU Affero General Public License v3.0 | 6 votes |
def _call_google_stt(self): """Use the audio data from the request to call the Google STT API We need to replicate the first 16 bytes in the audio due a bug with the Google speech recognition library that removes the first 16 bytes from the flac file we are sending. """ lang = self.request.args['lang'] audio = self.request.data with AudioFile(BytesIO(audio[:16] + audio)) as source: recording = self.recognizer.record(source) response = self.recognizer.recognize_google( recording, key=self.google_stt_key, language=lang, show_all=True ) return response
Example #2
Source File: rebreakcaptcha.py From rebreakcaptcha with MIT License | 6 votes |
def speech_to_text(self, audio_source): # Initialize a new recognizer with the audio in memory as source recognizer = sr.Recognizer() with sr.AudioFile(audio_source) as source: audio = recognizer.record(source) # read the entire audio file audio_output = "" # recognize speech using Google Speech Recognition try: audio_output = recognizer.recognize_google(audio) print("[{0}] Google Speech Recognition: ".format(self.current_iteration) + audio_output) # Check if we got harder audio captcha if any(character.isalpha() for character in audio_output): # Use Houndify to detect the harder audio captcha print("[{0}] Fallback to Houndify!".format(self.current_iteration)) audio_output = self.string_to_digits(recognizer.recognize_houndify(audio, client_id=HOUNDIFY_CLIENT_ID, client_key=HOUNDIFY_CLIENT_KEY)) print("[{0}] Houndify: ".format(self.current_iteration) + audio_output) except sr.UnknownValueError: print("[{0}] Google Speech Recognition could not understand audio".format(self.current_iteration)) except sr.RequestError as e: print("[{0}] Could not request results from Google Speech Recognition service; {1}".format(self.current_iteration).format(e)) return audio_output
Example #3
Source File: stt.py From personal-backend with Apache License 2.0 | 6 votes |
def get_stt_routes(app): @app.route("/" + API_VERSION + "/stt", methods=['POST']) @noindex @requires_auth def stt(): flac_audio = request.data lang = str(request.args.get("lang", "en-us")) with NamedTemporaryFile() as fp: fp.write(flac_audio) with AudioFile(fp.name) as source: audio = recognizer.record(source) # read the entire audio file utterance = engine.execute(audio, language=lang) return json.dumps([utterance]) return app
Example #4
Source File: apis.py From asr-study with MIT License | 6 votes |
def recognize_from_api(audio, api, name='API', safe=True, **kwargs): if not isinstance(audio, sr.AudioData): with sr.AudioFile(audio) as source: audio = r.record(source) try: return api(audio, **kwargs) except sr.UnknownValueError as e: if not safe: raise e return "\t%s could not understand audio" % name except sr.RequestError as e: if not safe: raise e return "\tCould not request results from %s \ service; {0}" % (name, e)
Example #5
Source File: utils.py From ExpressBot with GNU General Public License v2.0 | 5 votes |
def recognition(wav_file): r = sr.Recognizer() with sr.AudioFile(wav_file) as source: audio = r.record(source) # recognize speech using Google Speech Recognition try: return r.recognize_google(audio, language='cmn-Hans-CN') except sr.UnknownValueError: return "Google Speech Recognition could not understand audio.4C7" except sr.RequestError as e: return "Could not request results from Google Speech Recognition service.4C7; {0}".format(e)
Example #6
Source File: Utils.py From kalliope with GNU General Public License v3.0 | 5 votes |
def __init__(self, audio_file=None): """ Thread used to caught n audio from the microphone and pass it to a callback method """ super(SpeechRecognition, self).__init__() self.recognizer = sr.Recognizer() self.microphone = sr.Microphone() self.callback = None self.stop_thread = None self.kill_yourself = False self.audio_stream = None # get global configuration sl = SettingLoader() self.settings = sl.settings if audio_file is None: # audio file not set, we need to capture a sample from the microphone with self.microphone as source: if self.settings.options.adjust_for_ambient_noise_second > 0: # threshold is calculated from capturing ambient sound logger.debug("[SpeechRecognition] threshold calculated by " "capturing ambient noise during %s seconds" % self.settings.options.adjust_for_ambient_noise_second) Utils.print_info("[SpeechRecognition] capturing ambient sound during %s seconds" % self.settings.options.adjust_for_ambient_noise_second) self.recognizer.adjust_for_ambient_noise(source, duration=self.settings. options.adjust_for_ambient_noise_second) else: # threshold is defined manually logger.debug("[SpeechRecognition] threshold defined by settings: %s" % self.settings.options.energy_threshold) self.recognizer.energy_threshold = self.settings.options.energy_threshold Utils.print_info("[SpeechRecognition] Threshold set to: %s" % self.recognizer.energy_threshold) else: # audio file provided with sr.AudioFile(audio_file) as source: self.audio_stream = self.recognizer.record(source) # read the entire audio file
Example #7
Source File: youtube_helpers.py From KTSpeechCrawler with MIT License | 5 votes |
def _get_transcript_google_web_asr(t): import tempfile try: with tempfile.NamedTemporaryFile(suffix=".wav") as f: extract_audio_part_segment(t["video_file"], t["ts_start"], t["ts_end"], f.name) r = sr.Recognizer() with sr.AudioFile(f.name) as source: audio = r.record(source) return r.recognize_google(audio) except Exception as e: print(e) return None
Example #8
Source File: speech_recognition.py From macaw with MIT License | 5 votes |
def speech_to_text(self, file_path): print(file_path) wav_file_name = ogg_to_wav(file_path) with sr.AudioFile(wav_file_name) as source: audio = self.asr.record(source) try: text = self.asr.recognize_google(audio) os.remove(wav_file_name) return text except sr.UnknownValueError: print("Google Speech Recognition could not understand audio") except sr.RequestError as e: print("Could not request results from Google Speech Recognition service; {0}".format(e))
Example #9
Source File: audio.py From uncaptcha with MIT License | 4 votes |
def getNum(audio_file, results_dict, digit_num=0, ans=[]): global r r = sr.Recognizer() with sr.AudioFile(audio_file) as source: audio = r.record(source) # read the entire audio file manage_vars = multiprocessing.Manager() ret_vals = manage_vars.dict() results_dict_threaded = manage_vars.dict() results = [] threads = [] timed = manage_vars.dict() for api in apis: timed[api] = manage_vars.list() apis_func = [googleCloud, sphinx, wit, bing, google, ibm] i = 0 start = time.time() for api in apis_func: t = multiprocessing.Process(target=api, args=(audio, ret_vals, i, results_dict_threaded, timed)) threads.append(t) t.start() i += 1 for thread in threads: thread.join() end = time.time() print "getnumtime", end-start print timed results_dict["time" + str(digit_num)] = end - start # merge the results with the past results for name in results_dict_threaded.keys(): if name in results_dict: results_dict[name] += results_dict_threaded[name] else: results_dict[name] = results_dict_threaded[name] #print(ret_vals) i = 0 for key in ret_vals.keys(): results.append(ret_vals[key]) # logging.debug(results) resultsFiltered = filter(None, results) results = [] for result in resultsFiltered: digits = [digit for digit in str(result)] results += digits # logging.debug(results) results = sorted(results, key=results.count, reverse=True) if not results: logging.debug("FOUND NOTHING") ans[digit_num] = DEFAULT return DEFAULT else: # print(results[0]) logging.info("DETERMINED AS: " + str(results[0])) print ans print digit_num ans[digit_num] = results[0] return results[0]
Example #10
Source File: live-rec-test.py From KerasDeepSpeech with GNU Affero General Public License v3.0 | 4 votes |
def startloop(rec_number): ##read in data from csv # df = pd.read_csv(TRANSCRIPT_SOURCE, sep=',', header=None) #HEADERS wav_filename = [] wav_filesize = [] transcript = [] # print("when ready press enter to start recording and then ctrl+c to stop") # time.sleep(1) trans = str(raw_input('please type the exact words you will speak (for WER calculation), or press enter to use Google Transcribe for WER calc\n:')) trans = clean(trans) if trans == "": trans = "N/A" print("Transcript is:", trans) inputvar = str(raw_input('ready? press enter to begin recording and ctrl+c to stop')) filename = "rec" if inputvar == "": r = record(filename, OUTPUT_DIR, trans) # inputcheck = str(raw_input('press enter if you are happy, or r to redo.')) wav_filename.append(r) wav_filesize.append(os.path.getsize(r)) if trans == "N/A": r = sr.Recognizer() with sr.AudioFile(AUDIO_FILE) as source: audio = r.record(source) # read the entire audio file trans = r.recognize_google(audio) trans = trans.lower() transcript.append(trans) a = {'wav_filename': wav_filename, 'wav_filesize': wav_filesize, 'transcript': transcript } df_train = pd.DataFrame(a, columns=['wav_filename', 'wav_filesize', 'transcript'], dtype=int) df_train.to_csv("./data/live/live.csv", sep=',', header=True, index=False, encoding='ascii')