Python speech_recognition.AudioFile() Examples

The following are 10 code examples of speech_recognition.AudioFile(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module speech_recognition , or try the search function .
Example #1
Source File: google_stt.py    From selene-backend with GNU Affero General Public License v3.0 6 votes vote down vote up
def _call_google_stt(self):
        """Use the audio data from the request to call the Google STT API

        We need to replicate the first 16 bytes in the audio due a bug with
        the Google speech recognition library that removes the first 16 bytes
        from the flac file we are sending.
        """
        lang = self.request.args['lang']
        audio = self.request.data
        with AudioFile(BytesIO(audio[:16] + audio)) as source:
            recording = self.recognizer.record(source)
        response = self.recognizer.recognize_google(
            recording,
            key=self.google_stt_key,
            language=lang,
            show_all=True
        )

        return response 
Example #2
Source File: rebreakcaptcha.py    From rebreakcaptcha with MIT License 6 votes vote down vote up
def speech_to_text(self, audio_source):
        # Initialize a new recognizer with the audio in memory as source
        recognizer = sr.Recognizer()
        with sr.AudioFile(audio_source) as source:
            audio = recognizer.record(source) # read the entire audio file

        audio_output = ""
        # recognize speech using Google Speech Recognition
        try:
            audio_output = recognizer.recognize_google(audio)
            print("[{0}] Google Speech Recognition: ".format(self.current_iteration) + audio_output)
            # Check if we got harder audio captcha
            if any(character.isalpha() for character in audio_output):
                # Use Houndify to detect the harder audio captcha
                print("[{0}] Fallback to Houndify!".format(self.current_iteration))
                audio_output = self.string_to_digits(recognizer.recognize_houndify(audio, client_id=HOUNDIFY_CLIENT_ID, client_key=HOUNDIFY_CLIENT_KEY))
                print("[{0}] Houndify: ".format(self.current_iteration) + audio_output)
        except sr.UnknownValueError:
            print("[{0}] Google Speech Recognition could not understand audio".format(self.current_iteration))
        except sr.RequestError as e:
            print("[{0}] Could not request results from Google Speech Recognition service; {1}".format(self.current_iteration).format(e))
            
        return audio_output 
Example #3
Source File: stt.py    From personal-backend with Apache License 2.0 6 votes vote down vote up
def get_stt_routes(app):
    @app.route("/" + API_VERSION + "/stt", methods=['POST'])
    @noindex
    @requires_auth
    def stt():
        flac_audio = request.data
        lang = str(request.args.get("lang", "en-us"))
        with NamedTemporaryFile() as fp:
            fp.write(flac_audio)
            with AudioFile(fp.name) as source:
                audio = recognizer.record(source)  # read the entire audio file

            utterance = engine.execute(audio, language=lang)
        return json.dumps([utterance])

    return app 
Example #4
Source File: apis.py    From asr-study with MIT License 6 votes vote down vote up
def recognize_from_api(audio, api, name='API', safe=True, **kwargs):
    if not isinstance(audio, sr.AudioData):
        with sr.AudioFile(audio) as source:
            audio = r.record(source)
    try:
        return api(audio, **kwargs)
    except sr.UnknownValueError as e:
        if not safe:
            raise e
        return "\t%s could not understand audio" % name
    except sr.RequestError as e:
        if not safe:
            raise e
        return "\tCould not request results from %s \
    service; {0}" % (name, e) 
Example #5
Source File: utils.py    From ExpressBot with GNU General Public License v2.0 5 votes vote down vote up
def recognition(wav_file):
    r = sr.Recognizer()
    with sr.AudioFile(wav_file) as source:
        audio = r.record(source)
    # recognize speech using Google Speech Recognition
    try:
        return r.recognize_google(audio, language='cmn-Hans-CN')

    except sr.UnknownValueError:
        return "Google Speech Recognition could not understand audio.4C7"
    except sr.RequestError as e:
        return "Could not request results from Google Speech Recognition service.4C7; {0}".format(e) 
Example #6
Source File: Utils.py    From kalliope with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, audio_file=None):
        """
        Thread used to caught n audio from the microphone and pass it to a callback method
        """
        super(SpeechRecognition, self).__init__()
        self.recognizer = sr.Recognizer()
        self.microphone = sr.Microphone()
        self.callback = None
        self.stop_thread = None
        self.kill_yourself = False
        self.audio_stream = None

        # get global configuration
        sl = SettingLoader()
        self.settings = sl.settings

        if audio_file is None:
            # audio file not set, we need to capture a sample from the microphone
            with self.microphone as source:
                if self.settings.options.adjust_for_ambient_noise_second > 0:
                    # threshold is calculated from capturing ambient sound
                    logger.debug("[SpeechRecognition] threshold calculated by "
                                 "capturing ambient noise during %s seconds" %
                                 self.settings.options.adjust_for_ambient_noise_second)
                    Utils.print_info("[SpeechRecognition] capturing ambient sound during %s seconds" %
                                     self.settings.options.adjust_for_ambient_noise_second)
                    self.recognizer.adjust_for_ambient_noise(source,
                                                             duration=self.settings.
                                                             options.adjust_for_ambient_noise_second)
                else:
                    # threshold is defined manually
                    logger.debug("[SpeechRecognition] threshold defined by settings: %s" %
                                 self.settings.options.energy_threshold)
                    self.recognizer.energy_threshold = self.settings.options.energy_threshold

                Utils.print_info("[SpeechRecognition] Threshold set to: %s" % self.recognizer.energy_threshold)
        else:
            # audio file provided
            with sr.AudioFile(audio_file) as source:
                self.audio_stream = self.recognizer.record(source)  # read the entire audio file 
Example #7
Source File: youtube_helpers.py    From KTSpeechCrawler with MIT License 5 votes vote down vote up
def _get_transcript_google_web_asr(t):
    import tempfile
    try:
        with tempfile.NamedTemporaryFile(suffix=".wav") as f:
            extract_audio_part_segment(t["video_file"], t["ts_start"], t["ts_end"], f.name)

            r = sr.Recognizer()
            with sr.AudioFile(f.name) as source:
                audio = r.record(source)

                return r.recognize_google(audio)
    except Exception as e:
        print(e)
        return None 
Example #8
Source File: speech_recognition.py    From macaw with MIT License 5 votes vote down vote up
def speech_to_text(self, file_path):
        print(file_path)
        wav_file_name = ogg_to_wav(file_path)
        with sr.AudioFile(wav_file_name) as source:
            audio = self.asr.record(source)
        try:
            text = self.asr.recognize_google(audio)
            os.remove(wav_file_name)
            return text
        except sr.UnknownValueError:
            print("Google Speech Recognition could not understand audio")
        except sr.RequestError as e:
            print("Could not request results from Google Speech Recognition service; {0}".format(e)) 
Example #9
Source File: audio.py    From uncaptcha with MIT License 4 votes vote down vote up
def getNum(audio_file, results_dict, digit_num=0, ans=[]):
	global r
	r = sr.Recognizer()

	with sr.AudioFile(audio_file) as source:
		audio = r.record(source)  # read the entire audio file

	manage_vars = multiprocessing.Manager()
	ret_vals = manage_vars.dict()
	results_dict_threaded = manage_vars.dict()
	results = []
	threads = []
	timed = manage_vars.dict()
	for api in apis:
	    timed[api] = manage_vars.list()
	apis_func = [googleCloud, sphinx, wit, bing, google, ibm]
	i = 0
	start = time.time()
	for api in apis_func:
		t = multiprocessing.Process(target=api, args=(audio, ret_vals, i, results_dict_threaded, timed))
		threads.append(t)
		t.start()
		i += 1
		
	for thread in threads:
		thread.join()
	        end = time.time()
		print "getnumtime", end-start
		print timed
	results_dict["time" + str(digit_num)] = end - start
	# merge the results with the past results
	for name in results_dict_threaded.keys():
		if name in results_dict:
			results_dict[name] += results_dict_threaded[name]
		else:
			results_dict[name] = results_dict_threaded[name]
	#print(ret_vals)
	i = 0
	for key in ret_vals.keys():
		results.append(ret_vals[key])
	# logging.debug(results)
	resultsFiltered = filter(None, results)
	results = []
	for result in resultsFiltered:
		digits = [digit for digit in str(result)]
		results += digits

	# logging.debug(results)
	results = sorted(results, key=results.count, reverse=True)
	if not results:
		logging.debug("FOUND NOTHING")
		ans[digit_num] = DEFAULT
		return DEFAULT
	else:
		# print(results[0])
		logging.info("DETERMINED AS: " + str(results[0]))
		print ans
		print digit_num
		ans[digit_num] = results[0]
		return results[0] 
Example #10
Source File: live-rec-test.py    From KerasDeepSpeech with GNU Affero General Public License v3.0 4 votes vote down vote up
def startloop(rec_number):
    ##read in data from csv
    # df = pd.read_csv(TRANSCRIPT_SOURCE, sep=',', header=None)

    #HEADERS
    wav_filename = []
    wav_filesize = []
    transcript = []

    # print("when ready press enter to start recording and then ctrl+c to stop")
    # time.sleep(1)

    trans = str(raw_input('please type the exact words you will speak (for WER calculation), or press enter to use Google Transcribe for WER calc\n:'))
    trans = clean(trans)
    if trans == "":
        trans = "N/A"

    print("Transcript is:", trans)

    inputvar = str(raw_input('ready? press enter to begin recording and ctrl+c to stop'))
    filename = "rec"

    if inputvar == "":
        r = record(filename, OUTPUT_DIR, trans)
        # inputcheck = str(raw_input('press enter if you are happy, or r to redo.'))
        wav_filename.append(r)
        wav_filesize.append(os.path.getsize(r))

        if trans == "N/A":
            r = sr.Recognizer()
            with sr.AudioFile(AUDIO_FILE) as source:
                audio = r.record(source)  # read the entire audio file
                trans = r.recognize_google(audio)
                trans = trans.lower()

        transcript.append(trans)


    a = {'wav_filename': wav_filename,
         'wav_filesize': wav_filesize,
         'transcript': transcript
         }

    df_train = pd.DataFrame(a, columns=['wav_filename', 'wav_filesize', 'transcript'], dtype=int)
    df_train.to_csv("./data/live/live.csv", sep=',', header=True, index=False, encoding='ascii')