Python pydub.AudioSegment.from_file() Examples
The following are 30
code examples of pydub.AudioSegment.from_file().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pydub.AudioSegment
, or try the search function
.
Example #1
Source File: audio.py From parallel-wavenet-vocoder with MIT License | 8 votes |
def prepro_audio(source_path, target_path, format=None, sr=None, db=None): """ Read a wav, change sample rate, format, and average decibel and write to target path. :param source_path: source wav file path :param target_path: target wav file path :param sr: sample rate. :param format: output audio format. :param db: decibel. """ sound = AudioSegment.from_file(source_path, format) if sr: sound = sound.set_frame_rate(sr) if db: change_dBFS = db - sound.dBFS sound = sound.apply_gain(change_dBFS) sound.export(target_path, 'wav')
Example #2
Source File: data_maker.py From AudioNet with MIT License | 7 votes |
def makechunks(path): folders=glob.glob(path+'*') for folder in folders: waves = glob.glob(folder+'/'+ '*.wav') print ('w',waves) if len(waves) == 0: return 10 for i in waves: w = i myaudio = AudioSegment.from_file(i, 'wav') chunk_length_ms = 20000 chunks = make_chunks(myaudio, chunk_length_ms) print (chunks) for i, chunk in enumerate(chunks): chunk_name = w.split('.')[0] + "chunk{0}.wav".format(i) print (chunk_name) print ("exporting", chunk_name) chunk.export(folder+'/'+chunk_name, format="wav")
Example #3
Source File: audioBasicIO.py From pyAudioAnalysis with Apache License 2.0 | 7 votes |
def read_audio_generic(input_file): """ Function to read audio files with the following extensions [".mp3", ".wav", ".au", ".ogg"] """ sampling_rate = -1 signal = np.array([]) try: audiofile = AudioSegment.from_file(input_file) data = np.array([]) if audiofile.sample_width == 2: data = numpy.fromstring(audiofile._data, numpy.int16) elif audiofile.sample_width == 4: data = numpy.fromstring(audiofile._data, numpy.int32) if data.size > 0: sampling_rate = audiofile.frame_rate temp_signal = [] for chn in list(range(audiofile.channels)): temp_signal.append(data[chn::audiofile.channels]) signal = numpy.array(temp_signal).T except: print("Error: file not found or other I/O error. (DECODING FAILED)") return sampling_rate, signal
Example #4
Source File: testing.py From Jamais-Vu with MIT License | 6 votes |
def get_length_audio(audiopath, extension): """ Returns length of audio in seconds. Returns None if format isn't supported or in case of error. """ try: audio = AudioSegment.from_file(audiopath, extension.replace(".", "")) except: print "Error in get_length_audio(): %s" % traceback.format_exc() return None return int(len(audio) / 1000.0)
Example #5
Source File: utils.py From bard with GNU General Public License v3.0 | 6 votes |
def decodeAudio(filething): if hasattr(filething, 'seek'): filething.seek(0) filecontents = filething.read() data, properties = bard_audiofile.decode(data=filecontents) else: data, properties = bard_audiofile.decode(path=filething) if config['enable_internal_checks']: FILES_PYDUB_CANT_DECODE_RIGHT = \ ['/mnt/DD8/media/mp3/id13/k3/software_libre-hq.ogg'] if hasattr(filething, 'seek'): filething.seek(0) audio_segment = AudioSegment.from_file(filething) if (audio_segment.raw_data != data and filething not in FILES_PYDUB_CANT_DECODE_RIGHT): with open('/tmp/decoded-song-pydub.raw', 'wb') as f: f.write(audio_segment.raw_data) with open('/tmp/decoded-song-bard_audiofile.raw', 'wb') as f: f.write(data) raise Exception('DECODED AUDIO IS DIFFERENT BETWEEN ' 'BARD_AUDIOFILE AND PYDUB') print('bard_audiofile/pydub decode check ' + TerminalColors.Ok + 'OK' + TerminalColors.ENDC) return data, DecodedAudioPropertiesTupleFromDict(properties)
Example #6
Source File: audio.py From deep-voice-conversion with MIT License | 6 votes |
def prepro_audio(source_path, target_path, format=None, sr=None, db=None): """ Read a wav, change sample rate, format, and average decibel and write to target path. :param source_path: source wav file path :param target_path: target wav file path :param sr: sample rate. :param format: output audio format. :param db: decibel. """ sound = AudioSegment.from_file(source_path, format) if sr: sound = sound.set_frame_rate(sr) if db: change_dBFS = db - sound.dBFS sound = sound.apply_gain(change_dBFS) sound.export(target_path, 'wav')
Example #7
Source File: song.py From bard with GNU General Public License v3.0 | 6 votes |
def calculateSilences(self, threshold=None, min_length=None): try: audio_segment = AudioSegment.from_file(self.path()) except PydubException as exc: print('Error processing:', self.path(), ':', exc) raise self._audioSha256sum = calculateSHA256_data(audio_segment.raw_data) thr = threshold or Song.silence_threshold minlen = min_length or Song.min_silence_length silences = detect_silence_at_beginning_and_end(audio_segment, min_silence_len=minlen, silence_thresh=thr) if silences: silence1, silence2 = silences self._silenceAtStart = (silence1[1] - silence1[0]) / 1000 self._silenceAtEnd = (silence2[1] - silence2[0]) / 1000
Example #8
Source File: VinylBackgroundSpectrogramGenerator.py From crnn-lid with GNU General Public License v3.0 | 6 votes |
def audioToSpectrogram(self, file, pixel_per_sec, height): noise_file_index = random.randint(1, len(NOISE_FILES_LENGTH)) noise_file_name = "vinyl_noise/normalized-noise{}.wav".format(noise_file_index) with tempfile.NamedTemporaryFile(suffix='.wav') as noisy_speech_file: noise = AudioSegment.from_file(noise_file_name) speech = AudioSegment.from_file(file) speech.apply_gain(noise.dBFS - speech.dBFS) noisy_speech = speech.overlay(noise - 10, loop=True) noisy_speech.export(noisy_speech_file.name, format="wav") # shutil.copyfile(noisy_speech_file.name, os.path.join("/extra/tom/news2/debug", "mixed_" + os.path.basename(noisy_speech_file.name))) with tempfile.NamedTemporaryFile(suffix='.png') as image_file: command = "{} -n remix 1 rate 10k spectrogram -y {} -X {} -m -r -o {}". format(noisy_speech_file.name, height, pixel_per_sec, image_file.name) sox.core.sox([command]) # spectrogram can be inspected at image_file.name image = Image.open(image_file.name) return np.array(image)
Example #9
Source File: MusicBackgroundSpectrogramGenerator.py From crnn-lid with GNU General Public License v3.0 | 6 votes |
def audioToSpectrogram(self, file, pixel_per_sec, height): noise_file_index = random.randint(1, 190) noise_file_name = "/data/tom/backgroundmusic/normalized-background_{}.wav".format(noise_file_index) with tempfile.NamedTemporaryFile(suffix='.wav') as noisy_speech_file: noise = AudioSegment.from_file(noise_file_name) speech = AudioSegment.from_file(file) #speech.apply_gain(noise.dBFS - speech.dBFS) noisy_speech = speech.overlay(noise - 5, loop=True) noisy_speech.export(noisy_speech_file.name, format="wav") # shutil.copyfile(noisy_speech_file.name, os.path.join("/extra/tom/news2/debug", "mixed_" + os.path.basename(noisy_speech_file.name))) with tempfile.NamedTemporaryFile(suffix='.png') as image_file: command = "{} -n remix 1 rate 10k spectrogram -y {} -X {} -m -r -o {}". format(noisy_speech_file.name, height, pixel_per_sec, image_file.name) sox.core.sox([command]) # spectrogram can be inspected at image_file.name image = Image.open(image_file.name) return np.array(image)
Example #10
Source File: fft_analyser.py From code-jam-5 with MIT License | 6 votes |
def reset_media(self): """Resets the media to the currently playing song.""" audio_file = self.player.currentMedia().canonicalUrl().path() if os.name == 'nt' and audio_file.startswith('/'): audio_file = audio_file[1:] if audio_file: try: self.song = AudioSegment.from_file(audio_file).set_channels(1) except PermissionError: self.start_animate = False else: self.samples = np.array(self.song.get_array_of_samples()) self.max_sample = self.samples.max() self.maximum_amp = self.max_sample // 4 self.points = np.zeros(self.resolution) self.start_animate = True else: self.start_animate = False
Example #11
Source File: prepro.py From voice-vector with MIT License | 6 votes |
def prepro_audio(source_path, target_path, format=None, sr=None, db=None): """ Read a wav, change sample rate, format, and average decibel and write to target path. :param source_path: source wav file path :param target_path: target wav file path :param sr: sample rate. :param format: output audio format. :param db: decibel. """ sound = AudioSegment.from_file(source_path, format) if sr: sound = sound.set_frame_rate(sr) if db: change_dBFS = db - sound.dBFS sound = sound.apply_gain(change_dBFS) sound.export(target_path, 'wav')
Example #12
Source File: audio.py From voice-vector with MIT License | 6 votes |
def prepro_audio(source_path, target_path, format=None, sr=None, db=None): """ Read a wav, change sample rate, format, and average decibel and write to target path. :param source_path: source wav file path :param target_path: target wav file path :param sr: sample rate. :param format: output audio format. :param db: decibel. """ sound = AudioSegment.from_file(source_path, format) if sr: sound = sound.set_frame_rate(sr) if db: change_dBFS = db - sound.dBFS sound = sound.apply_gain(change_dBFS) sound.export(target_path, 'wav')
Example #13
Source File: decoder.py From Jamais-Vu with MIT License | 6 votes |
def read(filename, limit=None): """ Reads any file supported by pydub (ffmpeg) and returns the data contained within. If file reading fails due to input being a 24-bit wav file, wavio is used as a backup. Can be optionally limited to a certain amount of seconds from the start of the file by specifying the `limit` parameter. This is the amount of seconds from the start of the file. returns: (channels, samplerate) """ # pydub does not support 24-bit wav files, use wavio when this occurs try: audiofile = AudioSegment.from_file(filename) if limit: audiofile = audiofile[:limit * 1000] data = np.fromstring(audiofile._data, np.int16) channels = [] for chn in xrange(audiofile.channels): channels.append(data[chn::audiofile.channels]) fs = audiofile.frame_rate except audioop.error: fs, _, audiofile = wavio.readwav(filename) if limit: audiofile = audiofile[:limit * 1000] audiofile = audiofile.T audiofile = audiofile.astype(np.int16) channels = [] for chn in audiofile: channels.append(chn) return channels, audiofile.frame_rate, unique_hash(filename)
Example #14
Source File: reader_file.py From audio-fingerprint-identifying-python with MIT License | 6 votes |
def parse_audio(self): limit = None # limit = 10 songname, extension = os.path.splitext(os.path.basename(self.filename)) try: audiofile = AudioSegment.from_file(self.filename) if limit: audiofile = audiofile[:limit * 1000] data = np.fromstring(audiofile._data, np.int16) channels = [] for chn in xrange(audiofile.channels): channels.append(data[chn::audiofile.channels]) fs = audiofile.frame_rate except audioop.error: print('audioop.error') pass # fs, _, audiofile = wavio.readwav(filename) # if limit: # audiofile = audiofile[:limit * 1000] # audiofile = audiofile.T # audiofile = audiofile.astype(np.int16) # channels = [] # for chn in audiofile: # channels.append(chn) return { "songname": songname, "extension": extension, "channels": channels, "Fs": audiofile.frame_rate, "file_hash": self.parse_file_hash() }
Example #15
Source File: test_accuracy.py From nyumaya_audio_recognition with Apache License 2.0 | 6 votes |
def load_audio_file(filename,resize=False): sound = None try: if filename.endswith('.mp3') or filename.endswith('.MP3'): sound = AudioSegment.from_mp3(filename) elif filename.endswith('.wav') or filename.endswith('.WAV'): sound = AudioSegment.from_wav(filename) elif filename.endswith('.ogg'): sound = AudioSegment.from_ogg(filename) elif filename.endswith('.flac'): sound = AudioSegment.from_file(filename, "flac") elif filename.endswith('.3gp'): sound = AudioSegment.from_file(filename, "3gp") elif filename.endswith('.3g'): sound = AudioSegment.from_file(filename, "3gp") sound = sound.set_frame_rate(samplerate) sound = sound.set_channels(1) sound = sound.set_sample_width(2) duration = sound.duration_seconds except: print("Couldn't load file") return None,None return sound,duration
Example #16
Source File: io.py From auditok with MIT License | 5 votes |
def get_audio_source(input=None, **kwargs): """ Create and return an AudioSource from input. Parameters: ´input´ : str, bytes, "-" or None Source to read audio data from. If str, it should be a path to a valid audio file. If bytes, it is interpreted as raw audio data. if equals to "-", raw data will be read from stdin. If None, read audio data from microphone using PyAudio. """ if input == "-": return StdinAudioSource(*_get_audio_parameters(kwargs)) if isinstance(input, bytes): return BufferAudioSource(input, *_get_audio_parameters(kwargs)) # read data from a file if input is not None: return from_file(filename=input, **kwargs) # read data from microphone via pyaudio else: frames_per_buffer = kwargs.get("frames_per_buffer", 1024) input_device_index = kwargs.get("input_device_index") return PyAudioSource( *_get_audio_parameters(kwargs), frames_per_buffer=frames_per_buffer, input_device_index=input_device_index )
Example #17
Source File: voice.py From voice-corpus-tool with Mozilla Public License 2.0 | 5 votes |
def augment_sample(augmentation): index, src_file, dst_file, overlays, gain = augmentation orig_seg = AudioSegment.from_file(src_file, format="wav") aug_seg = AudioSegment.silent(duration=len(orig_seg)) for overlay in overlays: offset, overlay_file = overlay overlay_seg = AudioSegment.from_file(overlay_file, format="wav") if offset < 0: overlay_seg = overlay_seg[-offset:] offset = 0 aug_seg = aug_seg.overlay(overlay_seg, position=offset) aug_seg = aug_seg + (orig_seg.dBFS - aug_seg.dBFS + gain) orig_seg = orig_seg.overlay(aug_seg) orig_seg.export(dst_file, format="wav") return (index, dst_file)
Example #18
Source File: voice.py From voice-corpus-tool with Mozilla Public License 2.0 | 5 votes |
def _compr(self, kbit): def add_compr(s): with tempfile.TemporaryFile() as f: seg = s.read_audio_segment() seg.export(f, format='mp3', bitrate='%dk' % kbit) f.seek(0, 0) s.write_audio_segment(AudioSegment.from_file(f, format='mp3')) self._map('Adding compression artifacts...', self.samples, add_compr, worker_count=1) log('Applied compression artifacts to %d samples in buffer.' % len(self.samples))
Example #19
Source File: voice.py From voice-corpus-tool with Mozilla Public License 2.0 | 5 votes |
def read_audio_segment(self): self.write() return AudioSegment.from_file(self.file.filename, format="wav")
Example #20
Source File: io.py From auditok with MIT License | 5 votes |
def _load_with_pydub(filename, audio_format): """Open compressed audio file using pydub. If a video file is passed, its audio track(s) are extracted and loaded. This function should not be called directely, use :func:`from_file` instead. :Parameters: `filename`: path to audio file. `audio_format`: string, audio file format (e.g. raw, webm, wav, ogg) """ func_dict = { "mp3": AudioSegment.from_mp3, "ogg": AudioSegment.from_ogg, "flv": AudioSegment.from_flv, } open_function = func_dict.get(audio_format, AudioSegment.from_file) segment = open_function(filename) return BufferAudioSource( data=segment.raw_data, sampling_rate=segment.frame_rate, sample_width=segment.sample_width, channels=segment.channels, )
Example #21
Source File: feature.py From mgc-django with GNU General Public License v2.0 | 5 votes |
def extract(file): """ Extracts audio from a given file First the audio is converted into wav format """ s = file.split('.') file_format = s[len(s) - 1] try: song = AudioSegment.from_file(file, file_format) #song = AudioSegment.from_mp3(file) song = song[: 30 * 1000 ] song.export(file[:-3] + "wav", format="wav") file = file[:-3] + "wav" except Exception as e: print(e) try: (rate, data) = scipy.io.wavfile.read(file) mfcc_feat = mfcc(data,rate) #redusing mfcc dimension to 104 mm = np.transpose(mfcc_feat) mf = np.mean(mm,axis=1) cf = np.cov(mm) ff=mf #ff is a vector of size 104 for i in range(mm.shape[0]): ff = np.append(ff,np.diag(cf,i)) if file_format != 'wav': os.remove(file) return ff.reshape(1, -1) except Exception as e: print(e)
Example #22
Source File: utils.py From bard with GNU General Public License v3.0 | 5 votes |
def calculateAudioTrackSHA256_pyav(path): data, properties = audioSamplesFromAudioFile(path) audioSha256sum = calculateSHA256_data(data) # print('size:', len(audio_segment.raw_data)) if config['enable_internal_checks']: if hasattr(path, 'seek'): path.seek(0) audio_segment = AudioSegment.from_file(path) pydubAudioSha256sum = calculateSHA256_data(audio_segment.raw_data) if audio_segment.raw_data != data or \ pydubAudioSha256sum != audioSha256sum: raise Exception('SHA256sum IS DIFFERENT BETWEEN PYAV AND PYDUB') print('pyav/pydub decode check ' + TerminalColors.Ok + 'OK' + TerminalColors.ENDC) return audioSha256sum, data, properties
Example #23
Source File: filereader.py From shazam-demo with MIT License | 5 votes |
def parse_audio(self): limit = None songname, extension = os.path.splitext(os.path.basename(self.filename)) try: audiofile = AudioSegment.from_file(self.filename) if limit: audiofile = audiofile[:limit * 1000] data = np.fromstring(audiofile._data, np.int16) channels = [] for chn in xrange(audiofile.channels): channels.append(data[chn::audiofile.channels]) fs = audiofile.frame_rate except audioop.error: print('audioop.error') pass return { "songname": songname, "extension": extension, "channels": channels, "Fs": audiofile.frame_rate, "file_hash": self.parse_file_hash() }
Example #24
Source File: utils.py From bard with GNU General Public License v3.0 | 5 votes |
def calculateAudioTrackSHA256_pydub(path): audio_segment = AudioSegment.from_file(path) audioSha256sum = calculateSHA256_data(audio_segment.raw_data) # print('size:', len(audio_segment.raw_data)) return audioSha256sum
Example #25
Source File: backup.py From bard with GNU General Public License v3.0 | 5 votes |
def remoteFileAudioSha256Sum(path, sftp): data = remoteFile(path, sftp) if not data: return None audio_segment = AudioSegment.from_file(data) return calculateSHA256_data(audio_segment.raw_data)
Example #26
Source File: audiotools.py From kur with Apache License 2.0 | 5 votes |
def get_mime_type(filename): """ Returns the MIME type associated with a particular audio file. """ try: import magic except ImportError: if get_mime_type.warn: logger.warning('Python package "magic" could not be loaded, ' 'possibly because system library "libmagic" could not be ' 'found. We are falling back on our own heuristics.') get_mime_type.warn = False ext = os.path.splitext(filename)[1].lower() return { '.wav' : 'audio/x-wav', '.mp3' : 'audio/mpeg', '.flac' : 'audio/x-flac' }.get(ext, 'unknown') else: # Read off magic numbers and return MIME types mime_magic = magic.Magic(mime=True) ftype = mime_magic.from_file(filename) if isinstance(ftype, bytes): ftype = ftype.decode('utf-8') # If we are dealing with a symlink, read the link # and try again with the target file. We do this in # a while loop to cover the case of symlinks which # point to other symlinks current_filename = filename while ftype == 'inode/symlink': current_filename = os.readlink(current_filename) ftype = mime_magic.from_file(current_filename) ftype = ftype.decode('utf-8') if isinstance(ftype, bytes) else ftype return ftype
Example #27
Source File: audiotools.py From kur with Apache License 2.0 | 5 votes |
def load_pydub(filename): """ Loads an MP3 or FLAC file. """ try: from pydub import AudioSegment data = AudioSegment.from_file(filename) except ImportError: logger.exception('"pydub" is a required Python dependency for ' 'handling this audio file: %s.', filename) raise except FileNotFoundError: if os.path.isfile(filename): raise DependencyError() else: raise if data.channels > 1: data = functools.reduce( lambda x, y: x.overlay(y), data.split_to_mono() ) raw = data.get_array_of_samples() raw = numpy.frombuffer(raw, dtype=raw.typecode) return { 'signal' : raw, 'sample_rate' : data.frame_rate, 'sample_width' : data.sample_width * 8, 'channels' : data.channels } ###############################################################################
Example #28
Source File: audio.py From Speech_emotion_recognition_BLSTM with MIT License | 5 votes |
def extract_audio_track(self): for video in glob.glob(self._video_path + '*.mp4'): wav_filename = self._out_path + os.path.splitext(os.path.basename(video))[0] + '.wav' AudioSegment.from_file(video).export(wav_filename, format='wav')
Example #29
Source File: pydub_utils.py From nnabla with Apache License 2.0 | 5 votes |
def auread(path, channel_first=False, raw_format_param=None): """ Read audio with pydub module. Args: path (str or 'file object'): File path or object to read from. Currently only support .wav format audio, .raw format audio could be read only when additional params are provided. channel_first (bool): This argument specifies the shape of audio is whether (samples, channels) or (channels, samples). Default value is False, which means the audio shape shall be (samples, channels). raw_format_param(object): If audio is raw format, user should provide this object, example: { 'sample_width': 2, 'channels': 2, 'frame_rate': 44100 } Returns: numpy.ndarray """ _auread_before(path, raw_format_param) filepath = path if isinstance(path, str) else path.name audio_format = os.path.splitext(filepath)[-1][1:] if audio_format == 'raw': audio = AudioSegment.from_file( path, format=audio_format, **raw_format_param) else: audio = AudioSegment.from_file(path, format=audio_format) audio_arr = get_nparray_from_pydub(audio) if audio_arr.dtype.itemsize == 1 and audio_format == 'wav': # 8-bit wav file value should in uint8 range, but pydub read it as int8 audio_arr = audio_arr.astype(np.uint8) return _auread_after(audio_arr, channel_first)
Example #30
Source File: utils.py From fake-voice-detection with Apache License 2.0 | 5 votes |
def convert_to_flac(dir_path): for file_path in os.listdir(dir_path): if file_path.split('.')[-1] != "flac": read_file = AudioSegment.from_file(os.path.join(dir_path,file_path), file_path.split('.')[-1]) os.remove(os.path.join(dir_path,file_path)) base_name = file_path.split('.')[:-1] # read_file = read_file.set_channels(8) # base_name = ".".join(base_name) read_file.export(os.path.join(dir_path,f"{base_name[0]}.flac"), format="flac")