Python Examples of speech_recognition.AudioFile

Source File: google_stt.py From selene-backend with GNU Affero General Public License v3.0

6 votes

def _call_google_stt(self):
        """Use the audio data from the request to call the Google STT API

        We need to replicate the first 16 bytes in the audio due a bug with
        the Google speech recognition library that removes the first 16 bytes
        from the flac file we are sending.
        """
        lang = self.request.args['lang']
        audio = self.request.data
        with AudioFile(BytesIO(audio[:16] + audio)) as source:
            recording = self.recognizer.record(source)
        response = self.recognizer.recognize_google(
            recording,
            key=self.google_stt_key,
            language=lang,
            show_all=True
        )

        return response

Source File: rebreakcaptcha.py From rebreakcaptcha with MIT License

6 votes

def speech_to_text(self, audio_source):
        # Initialize a new recognizer with the audio in memory as source
        recognizer = sr.Recognizer()
        with sr.AudioFile(audio_source) as source:
            audio = recognizer.record(source) # read the entire audio file

        audio_output = ""
        # recognize speech using Google Speech Recognition
        try:
            audio_output = recognizer.recognize_google(audio)
            print("[{0}] Google Speech Recognition: ".format(self.current_iteration) + audio_output)
            # Check if we got harder audio captcha
            if any(character.isalpha() for character in audio_output):
                # Use Houndify to detect the harder audio captcha
                print("[{0}] Fallback to Houndify!".format(self.current_iteration))
                audio_output = self.string_to_digits(recognizer.recognize_houndify(audio, client_id=HOUNDIFY_CLIENT_ID, client_key=HOUNDIFY_CLIENT_KEY))
                print("[{0}] Houndify: ".format(self.current_iteration) + audio_output)
        except sr.UnknownValueError:
            print("[{0}] Google Speech Recognition could not understand audio".format(self.current_iteration))
        except sr.RequestError as e:
            print("[{0}] Could not request results from Google Speech Recognition service; {1}".format(self.current_iteration).format(e))
            
        return audio_output

Source File: stt.py From personal-backend with Apache License 2.0

6 votes

def get_stt_routes(app):
    @app.route("/" + API_VERSION + "/stt", methods=['POST'])
    @noindex
    @requires_auth
    def stt():
        flac_audio = request.data
        lang = str(request.args.get("lang", "en-us"))
        with NamedTemporaryFile() as fp:
            fp.write(flac_audio)
            with AudioFile(fp.name) as source:
                audio = recognizer.record(source)  # read the entire audio file

            utterance = engine.execute(audio, language=lang)
        return json.dumps([utterance])

    return app

Source File: apis.py From asr-study with MIT License

6 votes

def recognize_from_api(audio, api, name='API', safe=True, **kwargs):
    if not isinstance(audio, sr.AudioData):
        with sr.AudioFile(audio) as source:
            audio = r.record(source)
    try:
        return api(audio, **kwargs)
    except sr.UnknownValueError as e:
        if not safe:
            raise e
        return "\t%s could not understand audio" % name
    except sr.RequestError as e:
        if not safe:
            raise e
        return "\tCould not request results from %s \
    service; {0}" % (name, e)

Source File: utils.py From ExpressBot with GNU General Public License v2.0

5 votes

def recognition(wav_file):
    r = sr.Recognizer()
    with sr.AudioFile(wav_file) as source:
        audio = r.record(source)
    # recognize speech using Google Speech Recognition
    try:
        return r.recognize_google(audio, language='cmn-Hans-CN')

    except sr.UnknownValueError:
        return "Google Speech Recognition could not understand audio.4C7"
    except sr.RequestError as e:
        return "Could not request results from Google Speech Recognition service.4C7; {0}".format(e)

Source File: Utils.py From kalliope with GNU General Public License v3.0

5 votes

def __init__(self, audio_file=None):
        """
        Thread used to caught n audio from the microphone and pass it to a callback method
        """
        super(SpeechRecognition, self).__init__()
        self.recognizer = sr.Recognizer()
        self.microphone = sr.Microphone()
        self.callback = None
        self.stop_thread = None
        self.kill_yourself = False
        self.audio_stream = None

        # get global configuration
        sl = SettingLoader()
        self.settings = sl.settings

        if audio_file is None:
            # audio file not set, we need to capture a sample from the microphone
            with self.microphone as source:
                if self.settings.options.adjust_for_ambient_noise_second > 0:
                    # threshold is calculated from capturing ambient sound
                    logger.debug("[SpeechRecognition] threshold calculated by "
                                 "capturing ambient noise during %s seconds" %
                                 self.settings.options.adjust_for_ambient_noise_second)
                    Utils.print_info("[SpeechRecognition] capturing ambient sound during %s seconds" %
                                     self.settings.options.adjust_for_ambient_noise_second)
                    self.recognizer.adjust_for_ambient_noise(source,
                                                             duration=self.settings.
                                                             options.adjust_for_ambient_noise_second)
                else:
                    # threshold is defined manually
                    logger.debug("[SpeechRecognition] threshold defined by settings: %s" %
                                 self.settings.options.energy_threshold)
                    self.recognizer.energy_threshold = self.settings.options.energy_threshold

                Utils.print_info("[SpeechRecognition] Threshold set to: %s" % self.recognizer.energy_threshold)
        else:
            # audio file provided
            with sr.AudioFile(audio_file) as source:
                self.audio_stream = self.recognizer.record(source)  # read the entire audio file

Source File: youtube_helpers.py From KTSpeechCrawler with MIT License

5 votes

def _get_transcript_google_web_asr(t):
    import tempfile
    try:
        with tempfile.NamedTemporaryFile(suffix=".wav") as f:
            extract_audio_part_segment(t["video_file"], t["ts_start"], t["ts_end"], f.name)

            r = sr.Recognizer()
            with sr.AudioFile(f.name) as source:
                audio = r.record(source)

                return r.recognize_google(audio)
    except Exception as e:
        print(e)
        return None

Source File: speech_recognition.py From macaw with MIT License

5 votes

def speech_to_text(self, file_path):
        print(file_path)
        wav_file_name = ogg_to_wav(file_path)
        with sr.AudioFile(wav_file_name) as source:
            audio = self.asr.record(source)
        try:
            text = self.asr.recognize_google(audio)
            os.remove(wav_file_name)
            return text
        except sr.UnknownValueError:
            print("Google Speech Recognition could not understand audio")
        except sr.RequestError as e:
            print("Could not request results from Google Speech Recognition service; {0}".format(e))

Source File: audio.py From uncaptcha with MIT License

4 votes

def getNum(audio_file, results_dict, digit_num=0, ans=[]):
	global r
	r = sr.Recognizer()

	with sr.AudioFile(audio_file) as source:
		audio = r.record(source)  # read the entire audio file

	manage_vars = multiprocessing.Manager()
	ret_vals = manage_vars.dict()
	results_dict_threaded = manage_vars.dict()
	results = []
	threads = []
	timed = manage_vars.dict()
	for api in apis:
	    timed[api] = manage_vars.list()
	apis_func = [googleCloud, sphinx, wit, bing, google, ibm]
	i = 0
	start = time.time()
	for api in apis_func:
		t = multiprocessing.Process(target=api, args=(audio, ret_vals, i, results_dict_threaded, timed))
		threads.append(t)
		t.start()
		i += 1
		
	for thread in threads:
		thread.join()
	        end = time.time()
		print "getnumtime", end-start
		print timed
	results_dict["time" + str(digit_num)] = end - start
	# merge the results with the past results
	for name in results_dict_threaded.keys():
		if name in results_dict:
			results_dict[name] += results_dict_threaded[name]
		else:
			results_dict[name] = results_dict_threaded[name]
	#print(ret_vals)
	i = 0
	for key in ret_vals.keys():
		results.append(ret_vals[key])
	# logging.debug(results)
	resultsFiltered = filter(None, results)
	results = []
	for result in resultsFiltered:
		digits = [digit for digit in str(result)]
		results += digits

	# logging.debug(results)
	results = sorted(results, key=results.count, reverse=True)
	if not results:
		logging.debug("FOUND NOTHING")
		ans[digit_num] = DEFAULT
		return DEFAULT
	else:
		# print(results[0])
		logging.info("DETERMINED AS: " + str(results[0]))
		print ans
		print digit_num
		ans[digit_num] = results[0]
		return results[0]

Source File: live-rec-test.py From KerasDeepSpeech with GNU Affero General Public License v3.0

4 votes

def startloop(rec_number):
    ##read in data from csv
    # df = pd.read_csv(TRANSCRIPT_SOURCE, sep=',', header=None)

    #HEADERS
    wav_filename = []
    wav_filesize = []
    transcript = []

    # print("when ready press enter to start recording and then ctrl+c to stop")
    # time.sleep(1)

    trans = str(raw_input('please type the exact words you will speak (for WER calculation), or press enter to use Google Transcribe for WER calc\n:'))
    trans = clean(trans)
    if trans == "":
        trans = "N/A"

    print("Transcript is:", trans)

    inputvar = str(raw_input('ready? press enter to begin recording and ctrl+c to stop'))
    filename = "rec"

    if inputvar == "":
        r = record(filename, OUTPUT_DIR, trans)
        # inputcheck = str(raw_input('press enter if you are happy, or r to redo.'))
        wav_filename.append(r)
        wav_filesize.append(os.path.getsize(r))

        if trans == "N/A":
            r = sr.Recognizer()
            with sr.AudioFile(AUDIO_FILE) as source:
                audio = r.record(source)  # read the entire audio file
                trans = r.recognize_google(audio)
                trans = trans.lower()

        transcript.append(trans)


    a = {'wav_filename': wav_filename,
         'wav_filesize': wav_filesize,
         'transcript': transcript
         }

    df_train = pd.DataFrame(a, columns=['wav_filename', 'wav_filesize', 'transcript'], dtype=int)
    df_train.to_csv("./data/live/live.csv", sep=',', header=True, index=False, encoding='ascii')

Python speech_recognition.AudioFile() Examples