Python Examples of pyaudio.paFloat32

Source File: vad.py From audio with BSD 2-Clause "Simplified" License

11 votes

def __enter__(self):
        self._audio_interface = pyaudio.PyAudio()
        self._audio_stream = self._audio_interface.open(
            # format=pyaudio.paInt16,
            format=pyaudio.paFloat32,
            # The API currently only supports 1-channel (mono) audio
            # https://goo.gl/z757pE
            channels=1,
            rate=self._rate,
            input=True,
            frames_per_buffer=self._chunk,
            input_device_index=self._device,
            # Run the audio stream asynchronously to fill the buffer object.
            # This is necessary so that the input device's buffer doesn't
            # overflow while the calling thread makes network requests, etc.
            stream_callback=self._fill_buffer,
        )

        self.closed = False

        return self

Source File: playback.py From python-musical with MIT License

5 votes

def pyaudio_play(data, rate=44100):
    ''' Send audio array to pyaudio for playback
    '''
    import pyaudio
    p = pyaudio.PyAudio()
    stream = p.open(format=pyaudio.paFloat32, channels=1, rate=rate, output=1)
    stream.write(data.astype(numpy.float32).tostring())
    stream.close()
    p.terminate()

Source File: record_utils.py From speechT with Apache License 2.0

5 votes

def __init__(self, rate=16000, threshold=0.03, chunk_size=1024):
    self.rate = rate
    self.threshold = threshold
    self.chunk_size = chunk_size
    self.format = pyaudio.paFloat32
    self._pyaudio = pyaudio.PyAudio()

Source File: listener.py From pineal with GNU Affero General Public License v3.0

5 votes

def listen(device='default', channels=2, frames_per_buffer=1024, rate=44100):
    global audio_data
    audio_data = np.zeros([channels, frames_per_buffer])

    pa = pyaudio.PyAudio()
    devices = get_devices(pa)
    # print(devices)
    input_device = devices[device]

    def stream_cb(in_data, frame_count, time_info, status):
        data = np.fromstring(in_data, dtype=np.float32)
        data = np.reshape(data, (frame_count, channels))
        data = [data[:, i]
                for i in range(channels)]

        audio_data[:, :] = data

        return (in_data, pyaudio.paContinue)

    stream = pa.open(format=pyaudio.paFloat32,
                     channels=channels,
                     rate=rate,
                     frames_per_buffer=frames_per_buffer,
                     input=True,
                     input_device_index=input_device,
                     stream_callback=stream_cb)

    def idle():
        stream.start_stream()

        try:
            while True:
                sleep(1)
        finally:
            stream.stop_stream()
            stream.close()
            pa.terminate()

    t = Thread(target=idle)
    t.daemon = True
    t.start()

Source File: brain_beat.py From accel-brain-code with GNU General Public License v2.0

5 votes

def play_beat(
        self,
        frequencys,
        play_time,
        sample_rate=44100,
        volume=0.01
    ):
        '''
        引数で指定した条件でビートを鳴らす

        Args:
            frequencys:     (左の周波数(Hz), 右の周波数(Hz))のtuple
            play_time:      再生時間（秒）
            sample_rate:    サンプルレート
            volume:         音量

        Returns:
            void
        '''

        # 依存するライブラリの基底オブジェクト
        audio = pyaudio.PyAudio()
        # ストリーム
        stream = audio.open(
            format=pyaudio.paFloat32,
            channels=2,
            rate=sample_rate,
            output=1
        )
        left_frequency, right_frequency = frequencys
        left_chunk = self.__create_chunk(left_frequency, play_time, sample_rate)
        right_chunk = self.__create_chunk(right_frequency, play_time, sample_rate)
        self.write_stream(stream, left_chunk, right_chunk, volume)
        stream.stop_stream()
        stream.close()
        audio.terminate()

Source File: audio.py From LedFx with MIT License

5 votes

def activate(self):

        if self._audio is None:
            self._audio = pyaudio.PyAudio()

        # Setup a pre-emphasis filter to help balance the highs
        self.pre_emphasis = None
        if self._config['pre_emphasis']:
            self.pre_emphasis = aubio.digital_filter(3)
            self.pre_emphasis.set_biquad(1., -self._config['pre_emphasis'], 0, 0, 0)

        # Setup the phase vocoder to perform a windowed FFT
        self._phase_vocoder = aubio.pvoc(
            self._config['fft_size'], 
            self._config['mic_rate'] // self._config['sample_rate'])
        self._frequency_domain_null = aubio.cvec(self._config['fft_size'])
        self._frequency_domain = self._frequency_domain_null
        self._frequency_domain_x = np.linspace(0, self._config['mic_rate'], (self._config["fft_size"] // 2) + 1)

        # Enumerate all of the input devices and find the one matching the
        # configured device index
        _LOGGER.info("Audio Input Devices:")
        info = self._audio.get_host_api_info_by_index(0)
        for i in range(0, info.get('deviceCount')):
            if (self._audio.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels')) > 0:
                _LOGGER.info("  [{}] {}".format(i, self._audio.get_device_info_by_host_api_device_index(0, i).get('name')))

        # Open the audio stream and start processing the input
        self._stream = self._audio.open(
            input_device_index=self._config['device_index'],
            format=pyaudio.paFloat32,
            channels=1,
            rate=self._config['mic_rate'],
            input=True,
            frames_per_buffer = self._config['mic_rate'] // self._config['sample_rate'],
            stream_callback = self._audio_sample_callback)
        self._stream.start_stream()

        _LOGGER.info("Audio source opened.")

Source File: SoundCard.py From urh with GNU General Public License v3.0

5 votes

def prepare_sync_receive(cls, ctrl_connection: Connection):
        try:
            cls.pyaudio_stream = cls.pyaudio_handle.open(format=pyaudio.paFloat32,
                                                         channels=2,
                                                         rate=cls.SAMPLE_RATE,
                                                         input=True,
                                                         frames_per_buffer=cls.CHUNK_SIZE)
            ctrl_connection.send("Successfully started pyaudio stream")
            return 0
        except Exception as e:
            logger.exception(e)
            ctrl_connection.send("Failed to start pyaudio stream")

Source File: SoundCard.py From urh with GNU General Public License v3.0

5 votes

def prepare_sync_send(cls, ctrl_connection: Connection):
        try:
            cls.pyaudio_stream = cls.pyaudio_handle.open(format=pyaudio.paFloat32,
                                                         channels=2,
                                                         rate=cls.SAMPLE_RATE,
                                                         frames_per_buffer=cls.CHUNK_SIZE,
                                                         output=True)
            ctrl_connection.send("Successfully started pyaudio stream")
            return 0
        except Exception as e:
            logger.exception(e)
            ctrl_connection.send("Failed to start pyaudio stream")

Source File: record.py From robust_audio_ae with BSD 2-Clause "Simplified" License

5 votes

def main(out_path, in_path):
    global frame, indata, outdata

    pa = pyaudio.PyAudio()
    stream = pa.open(format=pyaudio.paFloat32, channels=nchannel, rate=fs, input=True, output=True, frames_per_buffer=chunk, stream_callback=callback)
    
    outdata = np.array(librosa.load(out_path, fs)[0], dtype=np.float32)
    nframe = int(math.ceil((len(outdata) + reverb * 1.5) / chunk))
    outdata = np.concatenate([np.zeros(int(reverb * 0.5), dtype=np.float32), outdata, np.zeros(reverb * 2, dtype=np.float32)])

    stream.start_stream()
    while frame < nframe:
        pass
    stream.stop_stream()

    rcv_data = np.frombuffer(b''.join(indata), dtype=np.float32)
    rcv_sig = np.zeros((nchannel, len(outdata) + reverb), dtype=np.float32)

    for i in range(nchannel):
        for j in range(len(outdata) + reverb):
            if j * nchannel + i < len(rcv_data):
                rcv_sig[i][j] = rcv_data[j * nchannel + i].astype(np.float32)

    if nchannel == 1:
        rcv_sig = rcv_sig.reshape((-1, ))

    _, tmppath = tempfile.mkstemp()
    librosa.output.write_wav(tmppath, rcv_sig, fs)
    commands.getoutput('ffmpeg -y -i %s -acodec pcm_s16le -ac %d -ar %d %s' % (tmppath, nchannel, fs, in_path))
    os.remove(tmppath)

    stream.close()
    pa.terminate()

Source File: recording.py From speechless with MIT License

4 votes

def record(self):
        """Records from the microphone and returns the data as an array of signed shorts."""

        print("Wait in silence to begin recording; wait in silence to terminate")

        import pyaudio

        p = pyaudio.PyAudio()
        stream = p.open(format=pyaudio.paFloat32, channels=1, rate=self.sample_rate, input=True, output=True,
                        frames_per_buffer=self.chunk_size)

        silent_chunk_count = 0
        has_recording_started = False
        is_first_chunk = False
        chunks = []

        while True:
            chunk_as_array = array.array('f', stream.read(self.chunk_size))

            # drop first, as it is often loud noise
            if not is_first_chunk:
                is_first_chunk = True
                continue

            if byteorder == 'big':
                chunk_as_array.byteswap()

            chunk = numpy.array(chunk_as_array)

            chunks.append(chunk)

            silent = self._is_silent(chunk)
            print("Silent: " + str(silent))

            if has_recording_started:
                if silent:
                    silent_chunk_count += 1
                    if silent_chunk_count * self.chunk_size > self.silence_until_terminate_in_s * self.sample_rate:
                        break
                else:
                    silent_chunk_count = 0
            elif not silent:
                has_recording_started = True

        stream.stop_stream()
        stream.close()
        print("Stopped recording.")

        p.terminate()

        return self._normalize(self._trim_silence(concatenate(chunks)))

Source File: TestSoundCard.py From urh with GNU General Public License v3.0

4 votes

def test_pyaudio():
    import pyaudio

    CHUNK = 1024
    p = pyaudio.PyAudio()

    stream = p.open(format=pyaudio.paFloat32,
                    channels=2,
                    rate=48000,
                    input=True,
                    frames_per_buffer=CHUNK)

    print("* recording")

    frames = []

    for i in range(0, 100):
        data = stream.read(CHUNK)
        frames.append(data)

    print("* done recording")

    stream.stop_stream()
    stream.close()
    p.terminate()
    data = b''.join(frames)

    print("* playing")

    p = pyaudio.PyAudio()
    stream = p.open(format=pyaudio.paFloat32,
                    channels=2,
                    rate=48000,
                    output=True,
                    )

    for i in range(0, len(data), CHUNK):
        stream.write(data[i:i+CHUNK])

    stream.stop_stream()
    stream.close()

    p.terminate()

    print("* done playing")

Source File: streaming_transcribe.py From rnnt-speech-recognition with MIT License

4 votes

def main(args):

    model_dir = os.path.dirname(os.path.realpath(args.checkpoint))

    hparams = model_utils.load_hparams(model_dir)

    _, tok_to_text, vocab_size = encoding.get_encoder(
        encoder_dir=model_dir,
        hparams=hparams)
    hparams[HP_VOCAB_SIZE.name] = vocab_size

    model = build_keras_model(hparams, stateful=True)
    model.load_weights(args.checkpoint)

    decoder_fn = decoding.greedy_decode_fn(model, hparams)

    p = pyaudio.PyAudio()

    def listen_callback(in_data, frame_count, time_info, status):
        global LAST_OUTPUT

        audio = tf.io.decode_raw(in_data, out_type=tf.float32)

        log_melspec = preprocessing.preprocess_audio(
            audio=audio,
            sample_rate=SAMPLE_RATE,
            hparams=hparams)
        log_melspec = tf.expand_dims(log_melspec, axis=0)

        decoded = decoder_fn(log_melspec)[0]

        transcription = LAST_OUTPUT + tok_to_text(decoded)\
            .numpy().decode('utf8')

        if transcription != LAST_OUTPUT:
            LAST_OUTPUT = transcription
            print(transcription)

        return in_data, pyaudio.paContinue

    stream = p.open(
        format=pyaudio.paFloat32,
        channels=NUM_CHANNELS,
        rate=SAMPLE_RATE,
        input=True,
        frames_per_buffer=CHUNK_SIZE,
        stream_callback=listen_callback)

    print('Listening...')

    stream.start_stream()

    while stream.is_active():
        time.sleep(0.1)

    stream.stop_stream()
    stream.close()

    p.terminate()

Python pyaudio.paFloat32() Examples