Python Examples of pydub.AudioSegment.from

Source File: cfp.py From Melody-extraction-with-melodic-segnet with MIT License

7 votes

def load_audio(filepath, sr=None, mono=True, dtype='float32'):

    if '.mp3' in filepath:
        from pydub import AudioSegment
        import tempfile
        import os
        mp3 = AudioSegment.from_mp3(filepath)
        _, path = tempfile.mkstemp()
        mp3.export(path, format="wav")
        del mp3
        x, fs = sf.read(path)
        os.remove(path)
    else:
        x, fs = sf.read(filepath)

    if mono and len(x.shape)>1:
        x = np.mean(x, axis = 1)
    if sr:
        x = scipy.signal.resample_poly(x, sr, fs)
        fs = sr 
    x = x.astype(dtype)

    return x, fs

Source File: bot.py From wukong-itchat with MIT License

7 votes

def convert_mp3_to_wav(mp3_path):
    """ 
    将 mp3 文件转成 wav

    :param mp3_path: mp3 文件路径
    :returns: wav 文件路径
    """
    target = mp3_path.replace(".mp3", ".wav")
    if not os.path.exists(mp3_path):
        logging.critical("文件错误 {}".format(mp3_path))
        return None
    AudioSegment.from_mp3(mp3_path).export(target, format="wav")
    return target

Source File: test_accuracy.py From nyumaya_audio_recognition with Apache License 2.0

6 votes

def load_audio_file(filename,resize=False):
	sound = None
	try:
		if filename.endswith('.mp3') or filename.endswith('.MP3'):
			sound = AudioSegment.from_mp3(filename)
		elif filename.endswith('.wav') or filename.endswith('.WAV'):
			sound = AudioSegment.from_wav(filename)
		elif filename.endswith('.ogg'):
			sound = AudioSegment.from_ogg(filename)
		elif filename.endswith('.flac'):
			sound = AudioSegment.from_file(filename, "flac")
		elif filename.endswith('.3gp'):
			sound = AudioSegment.from_file(filename, "3gp")
		elif filename.endswith('.3g'):
			sound = AudioSegment.from_file(filename, "3gp")

		sound = sound.set_frame_rate(samplerate)
		sound = sound.set_channels(1)
		sound = sound.set_sample_width(2)
		duration = sound.duration_seconds
	except:
		print("Couldn't load file")
		return None,None
		
		
	
	return sound,duration

Source File: tts.py From personal-backend with Apache License 2.0

6 votes

def convert(mp3):
    sound = AudioSegment.from_mp3(mp3)
    sound.export(mp3.replace(".mp3", ".wav"), format="wav")
    return mp3.replace(".mp3", ".wav")

Source File: baidu_voice.py From WeixinBot with Apache License 2.0

5 votes

def use_cloud(self, sourcefile_path, token):
        # sound = AudioSegment.from_mp3("big.mp3")
        # sound.export("/output", format="wav")
        fp = wave.open(sourcefile_path, 'rb')
        nf = fp.getnframes()
        f_len = nf * 2
        audio_data = fp.readframes(nf)

        cuid = "xxxxxxxxxx"  # my xiaomi phone MAC
        srv_url = 'http://vop.baidu.com/server_api' + '?cuid=' + cuid + '&token=' + token
        http_header = [
            'Content-Type: audio/pcm; rate=8000',
            'Content-Length: %d' % f_len
        ]

        c = pycurl.Curl()
        c.setopt(pycurl.URL, str(srv_url))  # curl doesn't support unicode
        # c.setopt(c.RETURNTRANSFER, 1)
        c.setopt(c.HTTPHEADER, http_header)  # must be list, not dict
        c.setopt(c.POST, 1)
        c.setopt(c.CONNECTTIMEOUT, 30)
        c.setopt(c.TIMEOUT, 30)
        c.setopt(c.WRITEFUNCTION, self.dump_res)
        c.setopt(c.POSTFIELDS, audio_data)
        c.setopt(c.POSTFIELDSIZE, f_len)
        c.perform()  # pycurl.perform() has no return val

Source File: io.py From auditok with MIT License

5 votes

def _load_with_pydub(filename, audio_format):
    """Open compressed audio file using pydub. If a video file
    is passed, its audio track(s) are extracted and loaded.
    This function should not be called directely, use :func:`from_file`
    instead.

    :Parameters:

    `filename`:
        path to audio file.
    `audio_format`:
        string, audio file format (e.g. raw, webm, wav, ogg)
    """
    func_dict = {
        "mp3": AudioSegment.from_mp3,
        "ogg": AudioSegment.from_ogg,
        "flv": AudioSegment.from_flv,
    }
    open_function = func_dict.get(audio_format, AudioSegment.from_file)
    segment = open_function(filename)
    return BufferAudioSource(
        data=segment.raw_data,
        sampling_rate=segment.frame_rate,
        sample_width=segment.sample_width,
        channels=segment.channels,
    )

Source File: speech_recognition.py From macaw with MIT License

5 votes

def mp3_to_ogg(input_file_name): # caller should delete the file afterwards.
    ogg_file = tempfile.NamedTemporaryFile(delete=False)
    AudioSegment.from_mp3(input_file_name).export(ogg_file.name, format='ogg', parameters=["-acodec", "libopus"])
    ogg_file.close()
    return ogg_file.name

Source File: feature.py From mgc-django with GNU General Public License v2.0

5 votes

def extract(file):
    """
    Extracts audio from a given file
    First the audio is converted into wav format
    """
    s = file.split('.')
    file_format = s[len(s) - 1]

    try:
        song = AudioSegment.from_file(file, file_format)
        #song = AudioSegment.from_mp3(file)
        song =  song[: 30 * 1000 ]
        song.export(file[:-3] + "wav", format="wav")
        file = file[:-3] + "wav"
    except Exception as e:
        print(e)
    try:
        (rate, data) = scipy.io.wavfile.read(file)
        mfcc_feat = mfcc(data,rate)
        #redusing mfcc dimension to 104
        mm = np.transpose(mfcc_feat)
        mf = np.mean(mm,axis=1)
        cf = np.cov(mm)
        ff=mf  

        #ff is a vector of size 104
        for i in range(mm.shape[0]):
            ff = np.append(ff,np.diag(cf,i))
        if file_format != 'wav':
            os.remove(file)
        return ff.reshape(1, -1)
    except Exception as e:
            print(e)

Source File: audio.py From deep-voice-conversion with MIT License

5 votes

def mp3_to_wav(src_path, tar_path):
    """
    Read mp3 file from source path, convert it to wav and write it to target path. 
    Necessary libraries: ffmpeg, libav.

    :param src_path: source mp3 file path
    :param tar_path: target wav file path
    """
    basepath, filename = os.path.split(src_path)
    os.chdir(basepath)
    AudioSegment.from_mp3(src_path).export(tar_path, format='wav')

Source File: rebreakcaptcha.py From rebreakcaptcha with MIT License

5 votes

def get_challenge_audio(self, url):
        # Download the challenge audio and store in memory
        request = requests.get(url)
        audio_file = io.BytesIO(request.content)
        
        # Convert the audio to a compatible format in memory
        converted_audio = io.BytesIO()
        sound = AudioSegment.from_mp3(audio_file)
        sound.export(converted_audio, format="wav")
        converted_audio.seek(0)
        
        return converted_audio

Source File: voice2txt.py From ns4_chatbot with Apache License 2.0

5 votes

def v2t(voice_data):
    try:
        wav_file_path = config.wxbot_cache_path + "/out.wav"
        audio = AudioSegment.from_mp3(BytesIO(voice_data))
        audio.export(wav_file_path, format="wav")
        with open(wav_file_path,'rb') as f:
            data = f.read()
            if config.voice2txt_engine=="baidu":
                return _covert2text_baidu(data)
            else:
                return _covert2text_xunfei(data)
    except Exception as e :
        logger.exception(e,"转化语音识别失败"+str(e))
        return None

Source File: baidu_voice.py From WeixinBot with Apache License 2.0

5 votes

def getOutput(sourcefile_path, targetfile_path):
    song = AudioSegment.from_mp3(sourcefile_path).export(targetfile_path, format="wav")
    voiceService = VoiceService()
    voiceService.voicepro(targetfile_path)
    while True:
        if voiceService.isOk:
            usage = json.loads(voiceService.buff)
            result = usage['result']
            return result

Source File: audiogrep.py From audiogrep with MIT License

5 votes

def extract_words(files):
    ''' Extracts individual words form files and exports them to individual files. '''
    output_directory = 'extracted_words'
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    for f in files:
        file_format = None
        source_segment = None
        if f.lower().endswith('.mp3'):
            file_format = 'mp3'
            source_segment = AudioSegment.from_mp3(f)
        elif f.lower().endswith('.wav'):
            file_format = 'wav'
            source_segment = AudioSegment.from_wav(f)
        if not file_format or source_segment:
            print('Unsupported audio format for ' + f)
        sentences = convert_timestamps(files)
        for s in sentences:
            for word in s['words']:
                start = float(word[1]) * 1000
                end = float(word[2]) * 1000
                word = word[0]
                total_time = end - start
                audio = AudioSegment.silent(duration=total_time)
                audio = audio.overlay(source_segment[start:end])
                number = 0
                output_path = None
                while True:
                    output_filename = word
                    if number:
                        output_filename += "_" + str(number)
                    output_filename = output_filename + '.' + file_format
                    output_path = os.path.join(output_directory, output_filename)
                    if not os.path.exists(output_path):
                        # this file doesn't exist, so we can continue
                        break
                    # file already exists, increment name and try again
                    number += 1
                print('Exporting to: ' + output_path)
                audio.export(output_path, format=file_format)

Source File: music.py From P2P-music-sharing with MIT License

5 votes

def convert_to_music(bytes):
    song = AudioSegment.from_file(io.BytesIO(bytes), format="mp3")
    output = io.StringIO()
    song.export(output, format="mp3", bitrate="192k")
    converted_sound = AudioSegment.from_mp3(cwd + "/music/copy.mp3")
    print("Done")

Source File: music.py From P2P-music-sharing with MIT License

5 votes

def convert(song=song_path):
    sound = AudioSegment.from_mp3(song)

    # get the raw data
    raw_data = sound._data

    return raw_data

Source File: prepro.py From voice-vector with MIT License

5 votes

def mp3_to_wav(src_path, tar_path):
    """
    Read mp3 file from source path, convert it to wav and write it to target path. 
    Necessary libraries: ffmpeg, libav.

    :param src_path: source mp3 file path
    :param tar_path: target wav file path
    """
    basepath, filename = os.path.split(src_path)
    os.chdir(basepath)
    AudioSegment.from_mp3(src_path).export(tar_path, format='wav')

Source File: audio.py From voice-vector with MIT License

5 votes

def mp3_to_wav(src_path, tar_path):
    """
    Read mp3 file from source path, convert it to wav and write it to target path. 
    Necessary libraries: ffmpeg, libav.

    :param src_path: source mp3 file path
    :param tar_path: target wav file path
    """
    basepath, filename = os.path.split(src_path)
    os.chdir(basepath)
    AudioSegment.from_mp3(src_path).export(tar_path, format='wav')

Source File: voice.py From Jarvis with MIT License

5 votes

def text_to_speech(self, speech):
        speech = remove_ansi_escape_seq(speech)
        tts = gTTS(speech, lang="en")
        tts.save("voice.mp3")
        audio = AudioSegment.from_mp3('voice.mp3')
        playback.play(audio)
        os.remove("voice.mp3")

Source File: audio.py From parallel-wavenet-vocoder with MIT License

5 votes

def mp3_to_wav(src_path, tar_path):
    """
    Read mp3 file from source path, convert it to wav and write it to target path. 
    Necessary libraries: ffmpeg, libav.

    :param src_path: source mp3 file path
    :param tar_path: target wav file path
    """
    basepath, filename = os.path.split(src_path)
    os.chdir(basepath)
    AudioSegment.from_mp3(src_path).export(tar_path, format='wav')

Source File: streaming_microphone.py From alexa-voice-service-client with MIT License

4 votes

def main(client_id, secret, refresh_token):
    alexa_client = AlexaClient(
        client_id=client_id,
        secret=secret,
        refresh_token=refresh_token,
    )

    p = pyaudio.PyAudio()

    def callback(in_data, frame_count, time_info, status):
        input_buffer.write(in_data)
        return (in_data, pyaudio.paContinue)

    stream = p.open(
        rate=16000,
        channels=1,
        format=pyaudio.paInt16,
        input=True,
        stream_callback=callback,
        frames_per_buffer=128,
        start=False
    )

    dialog_request_id = None

    try:
        print('listening. Press CTRL + C to exit.')
        input_buffer = io.BytesIO()
        stream.start_stream()
        print('Say something to Alexa.')
        alexa_client.connect()
        while True:
            directives = alexa_client.send_audio_file(
                input_buffer,
                dialog_request_id=dialog_request_id
            )
            stream.stop_stream()
            if directives:
                dialog_request_id = None
                print('Alexa\'s turn.')
                for directive in directives:
                    if directive.name == 'ExpectSpeech':
                        dialog_request_id = directive.dialog_request_id
                    if directive.name in ['Speak', 'Play']:
                        output_buffer = io.BytesIO(directive.audio_attachment)
                        track = AudioSegment.from_mp3(output_buffer)
                        play(track)
                input_buffer = io.BytesIO()
            stream.start_stream()
            print('Your turn. Say something.')
            time.sleep(1)
    finally:
        stream.stop_stream()
        stream.close()
        p.terminate()

Source File: audiogrep.py From audiogrep with MIT License

4 votes

def compose(segments, out='out.mp3', padding=0, crossfade=0, layer=False):
    '''Stiches together a new audiotrack'''

    files = {}

    working_segments = []

    audio = AudioSegment.empty()

    if layer:
        total_time = max([s['end'] - s['start'] for s in segments]) * 1000
        audio = AudioSegment.silent(duration=total_time)

    for i, s in enumerate(segments):
        try:
            start = s['start'] * 1000
            end = s['end'] * 1000
            f = s['file'].replace('.transcription.txt', '')
            if f not in files:
                if f.endswith('.wav'):
                    files[f] = AudioSegment.from_wav(f)
                elif f.endswith('.mp3'):
                    files[f] = AudioSegment.from_mp3(f)

            segment = files[f][start:end]

            print(start, end, f)

            if layer:
                audio = audio.overlay(segment, times=1)
            else:
                if i > 0:
                    audio = audio.append(segment, crossfade=crossfade)
                else:
                    audio = audio + segment

            if padding > 0:
                audio = audio + AudioSegment.silent(duration=padding)

            s['duration'] = len(segment)
            working_segments.append(s)
        except:
            continue

    audio.export(out, format=os.path.splitext(out)[1].replace('.', ''))
    return working_segments

Python pydub.AudioSegment.from_mp3() Examples