Python pydub.AudioSegment.from_wav() Examples
The following are 17
code examples of pydub.AudioSegment.from_wav().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pydub.AudioSegment
, or try the search function
.
Example #1
Source File: youtube_processor.py From honk with MIT License | 6 votes |
def segment_audio(self, file_name, segments): audio_data = AudioSegment.from_wav(file_name) for segment in segments: assert segment[0] < segment[1] center = round((segment[0] + segment[1]) / 2) padding = round(self.audio_length / 2) if center < padding: start_time = 0 else: start_time = center - padding end_time = start_time + self.audio_length audio_segment = audio_data[start_time:end_time] file_prefix = os.path.basename(file_name).split('.')[0] file_name = os.path.join(self.output_dir, file_prefix + "_" + str(start_time) + "~" + str(end_time) + ".wav") print(file_name) audio_segment.export(file_name, format="wav")
Example #2
Source File: generatemovie.py From Automatic-Youtube-Reddit-Text-To-Speech-Video-Generator-and-Uploader with MIT License | 6 votes |
def addFrameWithPause(self, image_file, audio_file, pause): audio_file = audio_file.replace("\\", "/") f = sf.SoundFile(audio_file) audio_clip = AudioSegment.from_wav(audio_file) duration = (len(f) / f.samplerate) + pause / 1000 audio_clip_with_pause = audio_clip + AudioSegment.silent(duration=pause) self.imageframes.append(image_file) self.audiofiles.append(audio_clip_with_pause) self.durations.append(duration)
Example #3
Source File: generatemovie.py From Automatic-Youtube-Reddit-Text-To-Speech-Video-Generator-and-Uploader with MIT License | 6 votes |
def addFrameWithTransition(self, image_file, audio_file, transition_file): media_info = MediaInfo.parse(transition_file) duration_in_ms = media_info.tracks[0].duration audio_file = audio_file.replace("\\", "/") try: audio_clip = AudioSegment.from_wav(r"%s"%audio_file) f = sf.SoundFile(r"%s"%audio_file) except Exception as e: print(e) audio_clip = AudioSegment.from_wav("%s/pause.wav" % settings.assetPath) f = sf.SoundFile("%s/pause.wav" % settings.assetPath) duration = (len(f) / f.samplerate) audio_clip_with_pause = audio_clip self.imageframes.append(image_file) self.audiofiles.append(audio_clip_with_pause) self.durations.append(duration) self.transitions.append((transition_file, len(self.imageframes) - 1, duration_in_ms / 1000))
Example #4
Source File: generatemovie.py From Automatic-Youtube-Reddit-Text-To-Speech-Video-Generator-and-Uploader with MIT License | 6 votes |
def addFrameWithTransitionAndPause(self, image_file, audio_file, transition_file, pause): media_info = MediaInfo.parse(transition_file) duration_in_ms = media_info.tracks[0].duration audio_file = r"%s"%audio_file f = sf.SoundFile(audio_file) try: audio_clip = AudioSegment.from_wav(audio_file) except: print("error with frame audio transition pause for %s" % audio_file) audio_clip = AudioSegment.silent(duration=pause) duration = (len(f) / f.samplerate) audio_clip_with_pause = audio_clip self.imageframes.append(image_file) self.audiofiles.append(audio_clip_with_pause) self.durations.append(duration + (pause/1000)) self.transitions.append((transition_file, len(self.imageframes) - 1, (duration_in_ms / 1000) + (pause/1000)))
Example #5
Source File: main.py From HanTTS with MIT License | 6 votes |
def synthesize(self, text, src, dst): """ Synthesize .wav from text src is the folder that contains all syllables .wav files dst is the destination folder to save the synthesized file """ print("Synthesizing ...") delay = 0 increment = 355 # milliseconds pause = 500 # pause for punctuation syllables = lazy_pinyin(text, style=pypinyin.TONE3) # initialize to be complete silence, each character takes up ~500ms result = AudioSegment.silent(duration=500*len(text)) for syllable in syllables: path = src+syllable+".wav" sound_file = Path(path) # insert 500 ms silence for punctuation marks if syllable in TextToSpeech.punctuation: short_silence = AudioSegment.silent(duration=pause) result = result.overlay(short_silence, position=delay) delay += increment continue # skip sound file that doesn't exist if not sound_file.is_file(): continue segment = AudioSegment.from_wav(path) result = result.overlay(segment, position=delay) delay += increment directory = dst if not os.path.exists(directory): os.makedirs(directory) result.export(directory+"generated.wav", format="wav") print("Exported.")
Example #6
Source File: test_accuracy.py From nyumaya_audio_recognition with Apache License 2.0 | 6 votes |
def load_audio_file(filename,resize=False): sound = None try: if filename.endswith('.mp3') or filename.endswith('.MP3'): sound = AudioSegment.from_mp3(filename) elif filename.endswith('.wav') or filename.endswith('.WAV'): sound = AudioSegment.from_wav(filename) elif filename.endswith('.ogg'): sound = AudioSegment.from_ogg(filename) elif filename.endswith('.flac'): sound = AudioSegment.from_file(filename, "flac") elif filename.endswith('.3gp'): sound = AudioSegment.from_file(filename, "3gp") elif filename.endswith('.3g'): sound = AudioSegment.from_file(filename, "3gp") sound = sound.set_frame_rate(samplerate) sound = sound.set_channels(1) sound = sound.set_sample_width(2) duration = sound.duration_seconds except: print("Couldn't load file") return None,None return sound,duration
Example #7
Source File: segment_mosei.py From Self-Supervised-Speech-Pretraining-and-Representation-Learning with MIT License | 5 votes |
def segment_mosei(args): output_dir = args.output_path mosei_summary = os.path.join(output_dir, 'mosei_no_semi.csv') flac_dir = os.path.join(output_dir, 'flac') assert os.path.exists(mosei_summary), 'Output path should already be created with a mosei_no_semi.csv inside it' for target_dir in [flac_dir]: if os.path.exists(target_dir): decision = input(f'{target_dir} already exists. Remove it? [Y/N]: ') if decision.upper() == 'Y': shutil.rmtree(target_dir) print(f'{target_dir} removed') else: print('Abort') exit(0) os.makedirs(target_dir) df = pd.read_csv(mosei_summary) for index, row in df.iterrows(): underscore = row.key wavname = f'{row.filename}.wav' wavpath = os.path.join(args.data_path, wavname) assert os.path.exists(wavpath), f'wav not exists: {wavpath}' wav = AudioSegment.from_wav(wavpath) start = int(row.start * 1000) end = int(row.end * 1000) assert start >= 0, f'{underscore} has negative start time' assert end >= 0, f'{underscore} has negative end time' seg_wav = wav[start:end] seg_flacpath = os.path.join(flac_dir, f'{underscore}.flac') seg_wav.export(seg_flacpath, format='flac', parameters=['-ac', '1', '-sample_fmt', 's16', '-ar', '16000']) ######## # MAIN # ########
Example #8
Source File: audiogrep.py From audiogrep with MIT License | 5 votes |
def extract_words(files): ''' Extracts individual words form files and exports them to individual files. ''' output_directory = 'extracted_words' if not os.path.exists(output_directory): os.makedirs(output_directory) for f in files: file_format = None source_segment = None if f.lower().endswith('.mp3'): file_format = 'mp3' source_segment = AudioSegment.from_mp3(f) elif f.lower().endswith('.wav'): file_format = 'wav' source_segment = AudioSegment.from_wav(f) if not file_format or source_segment: print('Unsupported audio format for ' + f) sentences = convert_timestamps(files) for s in sentences: for word in s['words']: start = float(word[1]) * 1000 end = float(word[2]) * 1000 word = word[0] total_time = end - start audio = AudioSegment.silent(duration=total_time) audio = audio.overlay(source_segment[start:end]) number = 0 output_path = None while True: output_filename = word if number: output_filename += "_" + str(number) output_filename = output_filename + '.' + file_format output_path = os.path.join(output_directory, output_filename) if not os.path.exists(output_path): # this file doesn't exist, so we can continue break # file already exists, increment name and try again number += 1 print('Exporting to: ' + output_path) audio.export(output_path, format=file_format)
Example #9
Source File: tts.py From parrots with Apache License 2.0 | 5 votes |
def synthesize(self, input_text='', output_wav_path=''): """ Synthesize .wav from text input_text: the folder that contains all syllables .wav files output_wav_path: the destination folder to save the synthesized file """ delay = 0 increment = 355 # milliseconds pause = 500 # pause for punctuation syllables = lazy_pinyin(input_text, style=pypinyin.TONE3) # initialize to be complete silence, each character takes up ~500ms result = AudioSegment.silent(duration=500 * len(input_text)) for syllable in syllables: path = os.path.join(self.syllables_dir, syllable + ".wav") sound_file = Path(path) # insert 500 sr silence for punctuation marks if syllable in self.punctuation: short_silence = AudioSegment.silent(duration=pause) result = result.overlay(short_silence, position=delay) delay += increment continue # skip sound file that doesn't exist if not sound_file.is_file(): continue segment = AudioSegment.from_wav(path) result = result.overlay(segment, position=delay) delay += increment if not output_wav_path: output_wav_path = 'out.wav' result.export(output_wav_path, format="wav") default_logger.debug("Exported:" + output_wav_path) return result
Example #10
Source File: dataset_tools.py From DeepMusicClassification with MIT License | 5 votes |
def slice_audio(audio_file, end=3): """ Slices a single audio file into 3 second chunks """ start = 0 end *= 1000 audio = AudioSegment.from_wav(audio_file) slices = [] for i in range(10): audio_slice = audio[start:end] slices.append(audio_slice) start += 3000 end += 3000 return slices
Example #11
Source File: utils.py From fake-voice-detection with Apache License 2.0 | 5 votes |
def get_durations_from_dir(audio_dir, file_extension='.wav'): durations = list() for root, dirs, filenames in os.walk(audio_dir): for file_name in filenames: if file_extension in file_name: file_path = os.path.join(root, file_name) audio = AudioSegment.from_wav(file_path) duration = audio.duration_seconds durations.append(duration) return np.array(durations)
Example #12
Source File: midi2voice.py From midi2voice with MIT License | 5 votes |
def sinsyFix(wavPath,tempo): song = AudioSegment.from_wav(wavPath) song = song[int(1000*4*60/tempo):] # Delete extra 4 beats of silence at the beginning of the file song.export(wavPath,format="wav")
Example #13
Source File: generator.py From Speech-Hacker with Apache License 2.0 | 5 votes |
def audio_generator(dict_dir, text, output_dest): with open(dict_dir + "/myDict.py") as f: myDict = ast.literal_eval(f.read()) textList = text.split(" ") mainList = [] for i in textList: if i in myDict.keys(): mainList.append(AudioSegment.from_wav(dict_dir + "/" + myDict[i])) # Check to see if at least one word was generated if mainList == []: raise Exception('\033[91m' + "None of the words you entered was" + " spoken by your figure." + '\033[0m') # If a file with the default name exits, create a new name with a # new suffix res = 0 while(os.path.exists(output_dest + "/output" + str(res) + ".wav")): res += 1 mainAudio = mainList[0] # Concatenate selected audio words for i in range(1, len(mainList)): mainAudio += mainList[i] # Export the joined audio mainAudio.export(output_dest + '/output' + str(res) + '.wav', format="wav") if os.path.exists(output_dest + "/output" + str(res) + ".wav"): print ('\033[94m' + "Speech-Hacker: " + "Your audio was generated at: " + output_dest + "/output" + str(res) + ".wav" + '\033[0m') else: print ("Speech-Hacker: " '\033[91m' + "Failed to generate your requested audio." + '\033[0m')
Example #14
Source File: generatemovie.py From Automatic-Youtube-Reddit-Text-To-Speech-Video-Generator-and-Uploader with MIT License | 5 votes |
def addFrame(self, image_file, audio_file): audio_file = audio_file.replace("\\", "/") try: audio_clip = AudioSegment.from_wav(r"%s"%audio_file) f = sf.SoundFile(r"%s"%audio_file) except Exception as e: print(e) audio_clip = AudioSegment.from_wav("%s/pause.wav" % settings.assetPath) f = sf.SoundFile("%s/pause.wav" % settings.assetPath) duration = len(f) / f.samplerate self.imageframes.append(image_file) self.audiofiles.append(audio_clip) self.durations.append(duration)
Example #15
Source File: sound_spleeter.py From dnn-model-services with MIT License | 4 votes |
def spleeter(audio_url=None, audio=None): try: audio_data = audio if audio_url: # Link if "http://" in audio_url or "https://" in audio_url: header = {'User-Agent': 'Mozilla/5.0 (Windows NT x.y; Win64; x64; rv:9.0) Gecko/20100101 Firefox/10.0'} # Check if audio file has less than 5Mb r = requests.head(audio_url, headers=header, allow_redirects=True) size = r.headers.get('content-length', 0) size = int(size) / float(1 << 20) log.info("File size: {:.2f} Mb".format(size)) if size > 10: return {"error": "Input audio file is too large! (max 10Mb)"} r = requests.get(audio_url, headers=header, allow_redirects=True) audio_data = r.content # Base64 elif len(audio_url) > 500: audio_data = base64.b64decode(audio_url) log.info("Preparing directories...") tmp_dir = "/tmp/" + generate_uid() + "/" if not os.path.exists(tmp_dir): os.makedirs(tmp_dir) audio_path = generate_uid() + ".audio" with open(tmp_dir + audio_path, "wb") as f: f.write(audio_data) log.info("Preparing Spleeter...") # Using embedded configuration. separator = Separator("spleeter:2stems") separator.separate_to_file(tmp_dir + audio_path, tmp_dir) if os.path.exists(tmp_dir + audio_path): os.remove(tmp_dir + audio_path) # Getting the output files content out_dir = tmp_dir + audio_path.replace(".audio", "") + "/" output_vocals = out_dir + "vocals.wav" AudioSegment.from_wav(output_vocals).export(out_dir + "vocals.mp3", format="mp3") with open(out_dir + "vocals.mp3", "rb") as fv: vocals = fv.read() output_accomp = out_dir + "accompaniment.wav" AudioSegment.from_wav(output_accomp).export(out_dir + "accompaniment.mp3", format="mp3") with open(out_dir + "accompaniment.mp3", "rb") as fa: accomp = fa.read() # Deleting the files output directory shutil.rmtree(tmp_dir) return {"vocals": vocals, "accomp": accomp} except Exception as e: log.error(e) traceback.print_exc() return {"error": str(e)}
Example #16
Source File: generatemovie.py From Automatic-Youtube-Reddit-Text-To-Speech-Video-Generator-and-Uploader with MIT License | 4 votes |
def renderVideo(self): clips = self.videoformat.renderClips(self.content, self.title) self.videoformat.createMovie(clips, self) self.background_music_name = self.videoformat.music clips = [] for i, transition in enumerate(self.transitions): print("Putting together clip (%s/%s)" % (i + 1, len(self.transitions))) transition_file_name = transition[0] last_image_index = transition[1] transition_duration = transition[2] if i == 0: clip = ImageSequenceClip(self.imageframes[0:last_image_index + 1], durations=self.durations[0:last_image_index + 1]) combined_sounds = sum(self.audiofiles[0:last_image_index + 1]) audio_name = "%s/%s%s.wav" % (settings.tempPath, "atestaudio", i) combined_sounds.export(audio_name, format="wav") video_clip = VideoFileClip(transition_file_name).fx(afx.volumex, settings.voice_volume) audio_clip = AudioFileClip(audio_name) clip = clip.set_audio(audio_clip) clip_with_interval = concatenate_videoclips([clip, video_clip]) clips.append(clip_with_interval) else: prev_image_index = self.transitions[i-1][1] clip = ImageSequenceClip(self.imageframes[prev_image_index + 1:last_image_index + 1], durations=self.durations[prev_image_index + 1:last_image_index + 1]) combined_sounds = sum(self.audiofiles[prev_image_index + 1:last_image_index + 1]) audio_name = "%s/%s%s.wav" % (settings.tempPath, "atestaudio", i) combined_sounds.export(audio_name, format="wav") video_clip = VideoFileClip(transition_file_name).fx(afx.volumex, settings.voice_volume) audio_clip = AudioFileClip(audio_name) clip = clip.set_audio(audio_clip) clip_with_interval = concatenate_videoclips([clip, video_clip]) clips.append(clip_with_interval) main_vid_duration = 0 for i in range(1, len(clips), 1): main_vid_duration += clips[i].duration print("Generating Audio Loop (%s) " % main_vid_duration) print("Using Audio Loop %s" % self.background_music_name) music_loop = afx.audio_loop(AudioFileClip(self.background_music_name).fx(afx.volumex, settings.background_music_volume), duration=int(main_vid_duration)) music_loop.to_audiofile("%s/music-loop.wav" % settings.tempPath) pause_time = int(clips[0].duration * 1000) print("Adding pause to start of Audio Loop (%s) " % (pause_time / 1000)) audio_clip = AudioSegment.from_wav("%s/music-loop.wav" % settings.tempPath) new_audio = AudioSegment.silent(duration=(pause_time)) + audio_clip new_audio.export("%s/music-loop2.wav" % settings.tempPath, format='wav') # here we are combining the first clip with the last print("Combining all Video Clips %s" % (pause_time / 1000)) main_vid_combined = concatenate_videoclips(clips) main_vid_with_audio = main_vid_combined.set_audio(CompositeAudioClip([main_vid_combined.audio, AudioFileClip("%s/music-loop2.wav" % settings.tempPath)])) folder_location = settings.finishedvideosdirectory + "/vid%s" % self.scriptno if not os.path.exists(folder_location): os.makedirs(folder_location) print("Writing video to location %s" % folder_location) main_vid_with_audio.write_videofile("%s/%s.mp4" % (folder_location, "vid%s" % self.scriptno), threads=4, fps=settings.movieFPS, temp_audiofile=settings.currentPath + "\\temp.mp3") return folder_location
Example #17
Source File: audiogrep.py From audiogrep with MIT License | 4 votes |
def compose(segments, out='out.mp3', padding=0, crossfade=0, layer=False): '''Stiches together a new audiotrack''' files = {} working_segments = [] audio = AudioSegment.empty() if layer: total_time = max([s['end'] - s['start'] for s in segments]) * 1000 audio = AudioSegment.silent(duration=total_time) for i, s in enumerate(segments): try: start = s['start'] * 1000 end = s['end'] * 1000 f = s['file'].replace('.transcription.txt', '') if f not in files: if f.endswith('.wav'): files[f] = AudioSegment.from_wav(f) elif f.endswith('.mp3'): files[f] = AudioSegment.from_mp3(f) segment = files[f][start:end] print(start, end, f) if layer: audio = audio.overlay(segment, times=1) else: if i > 0: audio = audio.append(segment, crossfade=crossfade) else: audio = audio + segment if padding > 0: audio = audio + AudioSegment.silent(duration=padding) s['duration'] = len(segment) working_segments.append(s) except: continue audio.export(out, format=os.path.splitext(out)[1].replace('.', '')) return working_segments