Python Examples of pydub.AudioSegment.from

Source File: youtube_processor.py From honk with MIT License

6 votes

def segment_audio(self, file_name, segments):
        audio_data = AudioSegment.from_wav(file_name)

        for segment in segments:
            assert segment[0] < segment[1]
            center = round((segment[0] + segment[1]) / 2)

            padding = round(self.audio_length / 2)
            if center < padding:
                start_time = 0
            else:
                start_time = center - padding

            end_time = start_time + self.audio_length

            audio_segment = audio_data[start_time:end_time]

            file_prefix = os.path.basename(file_name).split('.')[0]

            file_name = os.path.join(self.output_dir, file_prefix + "_" + str(start_time) + "~" + str(end_time) + ".wav")
            print(file_name)

            audio_segment.export(file_name, format="wav")

Source File: generatemovie.py From Automatic-Youtube-Reddit-Text-To-Speech-Video-Generator-and-Uploader with MIT License

6 votes

def addFrameWithPause(self, image_file, audio_file, pause):
        audio_file = audio_file.replace("\\", "/")
        f = sf.SoundFile(audio_file)
        audio_clip = AudioSegment.from_wav(audio_file)
        duration = (len(f) / f.samplerate) + pause / 1000
        audio_clip_with_pause = audio_clip + AudioSegment.silent(duration=pause)
        self.imageframes.append(image_file)
        self.audiofiles.append(audio_clip_with_pause)
        self.durations.append(duration)

Source File: generatemovie.py From Automatic-Youtube-Reddit-Text-To-Speech-Video-Generator-and-Uploader with MIT License

6 votes

def addFrameWithTransition(self, image_file, audio_file, transition_file):
        media_info = MediaInfo.parse(transition_file)
        duration_in_ms = media_info.tracks[0].duration
        audio_file = audio_file.replace("\\", "/")
        try:
            audio_clip = AudioSegment.from_wav(r"%s"%audio_file)
            f = sf.SoundFile(r"%s"%audio_file)
        except Exception as e:
            print(e)
            audio_clip = AudioSegment.from_wav("%s/pause.wav" % settings.assetPath)
            f = sf.SoundFile("%s/pause.wav" % settings.assetPath)
        duration = (len(f) / f.samplerate)
        audio_clip_with_pause = audio_clip
        self.imageframes.append(image_file)
        self.audiofiles.append(audio_clip_with_pause)
        self.durations.append(duration)
        self.transitions.append((transition_file, len(self.imageframes) - 1, duration_in_ms / 1000))

Source File: generatemovie.py From Automatic-Youtube-Reddit-Text-To-Speech-Video-Generator-and-Uploader with MIT License

6 votes

def addFrameWithTransitionAndPause(self, image_file, audio_file, transition_file, pause):
        media_info = MediaInfo.parse(transition_file)
        duration_in_ms = media_info.tracks[0].duration
        audio_file = r"%s"%audio_file
        f = sf.SoundFile(audio_file)
        try:
            audio_clip = AudioSegment.from_wav(audio_file)
        except:
            print("error with frame audio transition pause for %s" % audio_file)
            audio_clip = AudioSegment.silent(duration=pause)
        duration = (len(f) / f.samplerate)
        audio_clip_with_pause = audio_clip
        self.imageframes.append(image_file)
        self.audiofiles.append(audio_clip_with_pause)
        self.durations.append(duration + (pause/1000))
        self.transitions.append((transition_file, len(self.imageframes) - 1, (duration_in_ms / 1000) + (pause/1000)))

Source File: main.py From HanTTS with MIT License

6 votes

def synthesize(self, text, src, dst):
        """
        Synthesize .wav from text
        src is the folder that contains all syllables .wav files
        dst is the destination folder to save the synthesized file
        """
        print("Synthesizing ...")
        delay = 0
        increment = 355 # milliseconds
        pause = 500 # pause for punctuation
        syllables = lazy_pinyin(text, style=pypinyin.TONE3)

        # initialize to be complete silence, each character takes up ~500ms
        result = AudioSegment.silent(duration=500*len(text))
        for syllable in syllables:
            path = src+syllable+".wav"
            sound_file = Path(path)
            # insert 500 ms silence for punctuation marks
            if syllable in TextToSpeech.punctuation:
                short_silence = AudioSegment.silent(duration=pause)
                result = result.overlay(short_silence, position=delay)
                delay += increment
                continue
            # skip sound file that doesn't exist
            if not sound_file.is_file():
                continue
            segment = AudioSegment.from_wav(path)
            result = result.overlay(segment, position=delay)
            delay += increment

        directory = dst
        if not os.path.exists(directory):
            os.makedirs(directory)

        result.export(directory+"generated.wav", format="wav")
        print("Exported.")

Source File: test_accuracy.py From nyumaya_audio_recognition with Apache License 2.0

6 votes

def load_audio_file(filename,resize=False):
	sound = None
	try:
		if filename.endswith('.mp3') or filename.endswith('.MP3'):
			sound = AudioSegment.from_mp3(filename)
		elif filename.endswith('.wav') or filename.endswith('.WAV'):
			sound = AudioSegment.from_wav(filename)
		elif filename.endswith('.ogg'):
			sound = AudioSegment.from_ogg(filename)
		elif filename.endswith('.flac'):
			sound = AudioSegment.from_file(filename, "flac")
		elif filename.endswith('.3gp'):
			sound = AudioSegment.from_file(filename, "3gp")
		elif filename.endswith('.3g'):
			sound = AudioSegment.from_file(filename, "3gp")

		sound = sound.set_frame_rate(samplerate)
		sound = sound.set_channels(1)
		sound = sound.set_sample_width(2)
		duration = sound.duration_seconds
	except:
		print("Couldn't load file")
		return None,None
		
		
	
	return sound,duration

Source File: segment_mosei.py From Self-Supervised-Speech-Pretraining-and-Representation-Learning with MIT License

5 votes

def segment_mosei(args):
    output_dir = args.output_path
    mosei_summary = os.path.join(output_dir, 'mosei_no_semi.csv')
    flac_dir = os.path.join(output_dir, 'flac')
    assert os.path.exists(mosei_summary), 'Output path should already be created with a mosei_no_semi.csv inside it'
    for target_dir in [flac_dir]:
        if os.path.exists(target_dir):
            decision = input(f'{target_dir} already exists. Remove it? [Y/N]: ')
            if decision.upper() == 'Y':
                shutil.rmtree(target_dir)
                print(f'{target_dir} removed')
            else:
                print('Abort')
                exit(0)
        os.makedirs(target_dir)

    df = pd.read_csv(mosei_summary)

    for index, row in df.iterrows():
        underscore = row.key
        wavname = f'{row.filename}.wav'
        wavpath = os.path.join(args.data_path, wavname)
        assert os.path.exists(wavpath), f'wav not exists: {wavpath}'
        wav = AudioSegment.from_wav(wavpath)

        start = int(row.start * 1000)
        end = int(row.end * 1000)
        assert start >= 0, f'{underscore} has negative start time'
        assert end >= 0, f'{underscore} has negative end time'
        seg_wav = wav[start:end]
        seg_flacpath = os.path.join(flac_dir, f'{underscore}.flac')
        seg_wav.export(seg_flacpath, format='flac', parameters=['-ac', '1', '-sample_fmt', 's16', '-ar', '16000'])


########
# MAIN #
########

Source File: audiogrep.py From audiogrep with MIT License

5 votes

def extract_words(files):
    ''' Extracts individual words form files and exports them to individual files. '''
    output_directory = 'extracted_words'
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    for f in files:
        file_format = None
        source_segment = None
        if f.lower().endswith('.mp3'):
            file_format = 'mp3'
            source_segment = AudioSegment.from_mp3(f)
        elif f.lower().endswith('.wav'):
            file_format = 'wav'
            source_segment = AudioSegment.from_wav(f)
        if not file_format or source_segment:
            print('Unsupported audio format for ' + f)
        sentences = convert_timestamps(files)
        for s in sentences:
            for word in s['words']:
                start = float(word[1]) * 1000
                end = float(word[2]) * 1000
                word = word[0]
                total_time = end - start
                audio = AudioSegment.silent(duration=total_time)
                audio = audio.overlay(source_segment[start:end])
                number = 0
                output_path = None
                while True:
                    output_filename = word
                    if number:
                        output_filename += "_" + str(number)
                    output_filename = output_filename + '.' + file_format
                    output_path = os.path.join(output_directory, output_filename)
                    if not os.path.exists(output_path):
                        # this file doesn't exist, so we can continue
                        break
                    # file already exists, increment name and try again
                    number += 1
                print('Exporting to: ' + output_path)
                audio.export(output_path, format=file_format)

Source File: tts.py From parrots with Apache License 2.0

5 votes

def synthesize(self, input_text='', output_wav_path=''):
        """
        Synthesize .wav from text
        input_text: the folder that contains all syllables .wav files
        output_wav_path: the destination folder to save the synthesized file
        """
        delay = 0
        increment = 355  # milliseconds
        pause = 500  # pause for punctuation
        syllables = lazy_pinyin(input_text, style=pypinyin.TONE3)

        # initialize to be complete silence, each character takes up ~500ms
        result = AudioSegment.silent(duration=500 * len(input_text))
        for syllable in syllables:
            path = os.path.join(self.syllables_dir, syllable + ".wav")
            sound_file = Path(path)
            # insert 500 sr silence for punctuation marks
            if syllable in self.punctuation:
                short_silence = AudioSegment.silent(duration=pause)
                result = result.overlay(short_silence, position=delay)
                delay += increment
                continue
            # skip sound file that doesn't exist
            if not sound_file.is_file():
                continue
            segment = AudioSegment.from_wav(path)
            result = result.overlay(segment, position=delay)
            delay += increment
        if not output_wav_path:
            output_wav_path = 'out.wav'

        result.export(output_wav_path, format="wav")
        default_logger.debug("Exported:" + output_wav_path)
        return result

Source File: dataset_tools.py From DeepMusicClassification with MIT License

5 votes

def slice_audio(audio_file, end=3):
    """ Slices a single audio file into 3 second chunks """
    start = 0
    end *= 1000
    audio = AudioSegment.from_wav(audio_file)
    slices = []

    for i in range(10):
        audio_slice = audio[start:end]
        slices.append(audio_slice)
        start += 3000
        end += 3000

    return slices

Source File: utils.py From fake-voice-detection with Apache License 2.0

5 votes

def get_durations_from_dir(audio_dir, file_extension='.wav'):
    durations = list()
    for root, dirs, filenames in os.walk(audio_dir):
        for file_name in filenames:
            if file_extension in file_name:
                file_path = os.path.join(root, file_name)
                audio = AudioSegment.from_wav(file_path)
                duration = audio.duration_seconds
                durations.append(duration)
    return np.array(durations)

Source File: midi2voice.py From midi2voice with MIT License

5 votes

def sinsyFix(wavPath,tempo):
	song = AudioSegment.from_wav(wavPath)
	song = song[int(1000*4*60/tempo):] # Delete extra 4 beats of silence at the beginning of the file
	song.export(wavPath,format="wav")

Source File: generator.py From Speech-Hacker with Apache License 2.0

5 votes

def audio_generator(dict_dir, text, output_dest):

    with open(dict_dir + "/myDict.py") as f:
        myDict = ast.literal_eval(f.read())

    textList = text.split(" ")

    mainList = []

    for i in textList:
        if i in myDict.keys():
            mainList.append(AudioSegment.from_wav(dict_dir + "/" + myDict[i]))

    # Check to see if at least one word was generated
    if mainList == []:
        raise Exception('\033[91m' + "None of the words you entered was" +
                        " spoken by your figure." + '\033[0m')

    # If a file with the default name exits, create a new name with a
    # new suffix
    res = 0
    while(os.path.exists(output_dest + "/output" + str(res) + ".wav")):
        res += 1

    mainAudio = mainList[0]

    # Concatenate selected audio words
    for i in range(1, len(mainList)):
        mainAudio += mainList[i]

    # Export the joined audio
    mainAudio.export(output_dest + '/output' + str(res) + '.wav', format="wav")

    if os.path.exists(output_dest + "/output" + str(res) + ".wav"):
        print ('\033[94m' + "Speech-Hacker: " +
               "Your audio was generated at: " + output_dest + "/output" +
               str(res) + ".wav" + '\033[0m')
    else:
        print ("Speech-Hacker: " '\033[91m' +
               "Failed to generate your requested audio." + '\033[0m')

Source File: generatemovie.py From Automatic-Youtube-Reddit-Text-To-Speech-Video-Generator-and-Uploader with MIT License

5 votes

def addFrame(self, image_file, audio_file):
        audio_file = audio_file.replace("\\", "/")
        try:
            audio_clip = AudioSegment.from_wav(r"%s"%audio_file)
            f = sf.SoundFile(r"%s"%audio_file)
        except Exception as e:
            print(e)
            audio_clip = AudioSegment.from_wav("%s/pause.wav" % settings.assetPath)
            f = sf.SoundFile("%s/pause.wav" % settings.assetPath)

        duration = len(f) / f.samplerate
        self.imageframes.append(image_file)
        self.audiofiles.append(audio_clip)
        self.durations.append(duration)

Source File: sound_spleeter.py From dnn-model-services with MIT License

4 votes

def spleeter(audio_url=None, audio=None):
    try:
        audio_data = audio
        if audio_url:
            # Link
            if "http://" in audio_url or "https://" in audio_url:
                header = {'User-Agent': 'Mozilla/5.0 (Windows NT x.y; Win64; x64; rv:9.0) Gecko/20100101 Firefox/10.0'}
                # Check if audio file has less than 5Mb
                r = requests.head(audio_url, headers=header, allow_redirects=True)
                size = r.headers.get('content-length', 0)
                size = int(size) / float(1 << 20)
                log.info("File size: {:.2f} Mb".format(size))
                if size > 10:
                    return {"error": "Input audio file is too large! (max 10Mb)"}
                r = requests.get(audio_url, headers=header, allow_redirects=True)
                audio_data = r.content
            # Base64
            elif len(audio_url) > 500:
                audio_data = base64.b64decode(audio_url)

        log.info("Preparing directories...")
        tmp_dir = "/tmp/" + generate_uid() + "/"
        if not os.path.exists(tmp_dir):
            os.makedirs(tmp_dir)

        audio_path = generate_uid() + ".audio"
        with open(tmp_dir + audio_path, "wb") as f:
            f.write(audio_data)

        log.info("Preparing Spleeter...")
        # Using embedded configuration.
        separator = Separator("spleeter:2stems")
        separator.separate_to_file(tmp_dir + audio_path, tmp_dir)

        if os.path.exists(tmp_dir + audio_path):
            os.remove(tmp_dir + audio_path)

        # Getting the output files content
        out_dir = tmp_dir + audio_path.replace(".audio", "") + "/"
        output_vocals = out_dir + "vocals.wav"
        AudioSegment.from_wav(output_vocals).export(out_dir + "vocals.mp3", format="mp3")
        with open(out_dir + "vocals.mp3", "rb") as fv:
            vocals = fv.read()
        output_accomp = out_dir + "accompaniment.wav"
        AudioSegment.from_wav(output_accomp).export(out_dir + "accompaniment.mp3", format="mp3")
        with open(out_dir + "accompaniment.mp3", "rb") as fa:
            accomp = fa.read()

        # Deleting the files output directory
        shutil.rmtree(tmp_dir)
        return {"vocals": vocals, "accomp": accomp}

    except Exception as e:
        log.error(e)
        traceback.print_exc()
        return {"error": str(e)}

Source File: generatemovie.py From Automatic-Youtube-Reddit-Text-To-Speech-Video-Generator-and-Uploader with MIT License

4 votes

def renderVideo(self):
        clips = self.videoformat.renderClips(self.content, self.title)
        self.videoformat.createMovie(clips, self)
        self.background_music_name = self.videoformat.music

        clips = []
        for i, transition in enumerate(self.transitions):
            print("Putting together clip (%s/%s)" % (i + 1, len(self.transitions)))
            transition_file_name = transition[0]
            last_image_index = transition[1]
            transition_duration = transition[2]
            if i == 0:
                clip = ImageSequenceClip(self.imageframes[0:last_image_index + 1], durations=self.durations[0:last_image_index + 1])
                combined_sounds = sum(self.audiofiles[0:last_image_index + 1])
                audio_name = "%s/%s%s.wav" % (settings.tempPath, "atestaudio", i)
                combined_sounds.export(audio_name, format="wav")
                video_clip = VideoFileClip(transition_file_name).fx(afx.volumex, settings.voice_volume)
                audio_clip = AudioFileClip(audio_name)
                clip = clip.set_audio(audio_clip)
                clip_with_interval = concatenate_videoclips([clip, video_clip])
                clips.append(clip_with_interval)

            else:
                prev_image_index = self.transitions[i-1][1]
                clip = ImageSequenceClip(self.imageframes[prev_image_index + 1:last_image_index + 1], durations=self.durations[prev_image_index + 1:last_image_index + 1])
                combined_sounds = sum(self.audiofiles[prev_image_index + 1:last_image_index + 1])
                audio_name = "%s/%s%s.wav" % (settings.tempPath, "atestaudio", i)
                combined_sounds.export(audio_name, format="wav")
                video_clip = VideoFileClip(transition_file_name).fx(afx.volumex, settings.voice_volume)
                audio_clip = AudioFileClip(audio_name)
                clip = clip.set_audio(audio_clip)
                clip_with_interval = concatenate_videoclips([clip, video_clip])
                clips.append(clip_with_interval)

        main_vid_duration = 0
        for i in range(1, len(clips), 1):
            main_vid_duration += clips[i].duration

        print("Generating Audio Loop (%s) " % main_vid_duration)
        print("Using Audio Loop %s" % self.background_music_name)
        music_loop = afx.audio_loop(AudioFileClip(self.background_music_name).fx(afx.volumex, settings.background_music_volume),
                                    duration=int(main_vid_duration))
        music_loop.to_audiofile("%s/music-loop.wav" % settings.tempPath)
        pause_time = int(clips[0].duration * 1000)
        print("Adding pause to start of Audio Loop (%s) " % (pause_time / 1000))
        audio_clip = AudioSegment.from_wav("%s/music-loop.wav" % settings.tempPath)
        new_audio = AudioSegment.silent(duration=(pause_time)) + audio_clip
        new_audio.export("%s/music-loop2.wav" % settings.tempPath, format='wav')

        # here we are combining the first clip with the last
        print("Combining all Video Clips %s" % (pause_time / 1000))
        main_vid_combined = concatenate_videoclips(clips)
        main_vid_with_audio = main_vid_combined.set_audio(CompositeAudioClip([main_vid_combined.audio, AudioFileClip("%s/music-loop2.wav" % settings.tempPath)]))

        folder_location = settings.finishedvideosdirectory + "/vid%s" % self.scriptno
        if not os.path.exists(folder_location):
            os.makedirs(folder_location)
        print("Writing video to location %s" % folder_location)
        main_vid_with_audio.write_videofile("%s/%s.mp4" % (folder_location, "vid%s" % self.scriptno), threads=4,
                                            fps=settings.movieFPS, temp_audiofile=settings.currentPath + "\\temp.mp3")
        return folder_location

Source File: audiogrep.py From audiogrep with MIT License

4 votes

def compose(segments, out='out.mp3', padding=0, crossfade=0, layer=False):
    '''Stiches together a new audiotrack'''

    files = {}

    working_segments = []

    audio = AudioSegment.empty()

    if layer:
        total_time = max([s['end'] - s['start'] for s in segments]) * 1000
        audio = AudioSegment.silent(duration=total_time)

    for i, s in enumerate(segments):
        try:
            start = s['start'] * 1000
            end = s['end'] * 1000
            f = s['file'].replace('.transcription.txt', '')
            if f not in files:
                if f.endswith('.wav'):
                    files[f] = AudioSegment.from_wav(f)
                elif f.endswith('.mp3'):
                    files[f] = AudioSegment.from_mp3(f)

            segment = files[f][start:end]

            print(start, end, f)

            if layer:
                audio = audio.overlay(segment, times=1)
            else:
                if i > 0:
                    audio = audio.append(segment, crossfade=crossfade)
                else:
                    audio = audio + segment

            if padding > 0:
                audio = audio + AudioSegment.silent(duration=padding)

            s['duration'] = len(segment)
            working_segments.append(s)
        except:
            continue

    audio.export(out, format=os.path.splitext(out)[1].replace('.', ''))
    return working_segments

Python pydub.AudioSegment.from_wav() Examples