Python soundfile.SoundFile() Examples
The following are 30
code examples of soundfile.SoundFile().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
soundfile
, or try the search function
.
Example #1
Source File: zip_io.py From pykaldi2 with MIT License | 6 votes |
def read_wav(self, wavfilename): if wavfilename.find('@' + '/') >= 0: zip_obj, file_inzip = self.get_zip_obj_and_filename(wavfilename, zip_mode='r') byte_chunk = zip_obj.read(file_inzip) byte_stream = io.BytesIO(byte_chunk) with soundfile.SoundFile(byte_stream, 'r') as f: fs_read = f.samplerate x = f.read() else: with soundfile.SoundFile(wavfilename, 'r') as f: fs_read = f.samplerate x = f.read() if fs_read != self.fs: x = resampy.resample(x, fs_read, self.fs) fs_read = self.fs return fs_read, x.astype(self.dtype)
Example #2
Source File: audioread.py From pb_chime5 with MIT License | 6 votes |
def audio_shape(path): """ >>> path = '/net/fastdb/chime3/audio/16kHz/isolated/dt05_caf_real/F01_050C0102_CAF.CH1.wav' >>> audio_shape(path) 122111 >>> path = '/net/db/voiceHome/audio/noises/dev/home3_room2_arrayGeo3_arrayPos2_noiseCond1.wav' >>> audio_shape(path) # correct for multichannel (8, 960000) >>> audioread(path)[0].shape (8, 960000) """ with soundfile.SoundFile(str(path)) as f: channels = f.channels if channels == 1: return len(f) else: return channels, len(f)
Example #3
Source File: preprocess_wham.py From asteroid with MIT License | 6 votes |
def preprocess_one_dir(in_dir, out_dir, out_filename): """ Create .json file for one condition.""" file_infos = [] in_dir = os.path.abspath(in_dir) wav_list = os.listdir(in_dir) wav_list.sort() for wav_file in wav_list: if not wav_file.endswith('.wav'): continue wav_path = os.path.join(in_dir, wav_file) samples = sf.SoundFile(wav_path) file_infos.append((wav_path, len(samples))) if not os.path.exists(out_dir): os.makedirs(out_dir) with open(os.path.join(out_dir, out_filename + '.json'), 'w') as f: json.dump(file_infos, f, indent=4)
Example #4
Source File: preprocess_wham.py From asteroid with MIT License | 6 votes |
def preprocess_one_dir(in_dir, out_dir, out_filename): """ Create .json file for one condition.""" file_infos = [] in_dir = os.path.abspath(in_dir) wav_list = os.listdir(in_dir) wav_list.sort() for wav_file in wav_list: if not wav_file.endswith('.wav'): continue wav_path = os.path.join(in_dir, wav_file) samples = sf.SoundFile(wav_path) file_infos.append((wav_path, len(samples))) if not os.path.exists(out_dir): os.makedirs(out_dir) with open(os.path.join(out_dir, out_filename + '.json'), 'w') as f: json.dump(file_infos, f, indent=4)
Example #5
Source File: preprocess_wham.py From asteroid with MIT License | 6 votes |
def preprocess_one_dir(in_dir, out_dir, out_filename): """ Create .json file for one condition.""" file_infos = [] in_dir = os.path.abspath(in_dir) wav_list = os.listdir(in_dir) wav_list.sort() for wav_file in wav_list: if not wav_file.endswith('.wav'): continue wav_path = os.path.join(in_dir, wav_file) samples = sf.SoundFile(wav_path) file_infos.append((wav_path, len(samples))) if not os.path.exists(out_dir): os.makedirs(out_dir) with open(os.path.join(out_dir, out_filename + '.json'), 'w') as f: json.dump(file_infos, f, indent=4)
Example #6
Source File: preprocess_wsj0mix.py From asteroid with MIT License | 6 votes |
def preprocess_one_dir(in_dir, out_dir, out_filename): """ Create .json file for one condition.""" file_infos = [] in_dir = os.path.abspath(in_dir) wav_list = os.listdir(in_dir) wav_list.sort() for wav_file in wav_list: if not wav_file.endswith('.wav'): continue wav_path = os.path.join(in_dir, wav_file) samples = sf.SoundFile(wav_path) file_infos.append((wav_path, len(samples))) if not os.path.exists(out_dir): os.makedirs(out_dir) with open(os.path.join(out_dir, out_filename + '.json'), 'w') as f: json.dump(file_infos, f, indent=4)
Example #7
Source File: preprocess_kinect_wsj.py From asteroid with MIT License | 6 votes |
def preprocess_one_dir(in_dir, out_dir, out_filename): """ Create .json file for one condition.""" file_infos = [] in_dir = os.path.abspath(in_dir) wav_list = os.listdir(in_dir) wav_list.sort() for wav_file in wav_list: if not wav_file.endswith('.wav'): continue wav_path = os.path.join(in_dir, wav_file) samples = sf.SoundFile(wav_path) file_infos.append((wav_path, len(samples))) if not os.path.exists(out_dir): os.makedirs(out_dir) with open(os.path.join(out_dir, out_filename + '.json'), 'w') as f: json.dump(file_infos, f, indent=4)
Example #8
Source File: segment.py From training with Apache License 2.0 | 6 votes |
def from_file(cls, filename, target_sr=None, int_values=False, offset=0, duration=0, trim=False): """ Load a file supported by librosa and return as an AudioSegment. :param filename: path of file to load :param target_sr: the desired sample rate :param int_values: if true, load samples as 32-bit integers :param offset: offset in seconds when loading audio :param duration: duration in seconds when loading audio :return: numpy array of samples """ with sf.SoundFile(filename, 'r') as f: dtype = 'int32' if int_values else 'float32' sample_rate = f.samplerate if offset > 0: f.seek(int(offset * sample_rate)) if duration > 0: samples = f.read(int(duration * sample_rate), dtype=dtype) else: samples = f.read(dtype=dtype) samples = samples.transpose() return cls(samples, sample_rate, target_sr=target_sr, trim=trim)
Example #9
Source File: segment.py From NeMo with Apache License 2.0 | 6 votes |
def segment_from_file(cls, audio_file, target_sr=None, n_segments=0, trim=False): """Grabs n_segments number of samples from audio_file randomly from the file as opposed to at a specified offset. Note that audio_file can be either the file path, or a file-like object. """ with sf.SoundFile(audio_file, 'r') as f: sample_rate = f.samplerate if n_segments > 0 and len(f) > n_segments: max_audio_start = len(f) - n_segments audio_start = random.randint(0, max_audio_start) f.seek(audio_start) samples = f.read(n_segments, dtype='float32') else: samples = f.read(dtype='float32') samples = samples.transpose() return cls(samples, sample_rate, target_sr=target_sr, trim=trim)
Example #10
Source File: segment.py From NeMo with Apache License 2.0 | 6 votes |
def from_file( cls, audio_file, target_sr=None, int_values=False, offset=0, duration=0, trim=False, ): """ Load a file supported by librosa and return as an AudioSegment. :param audio_file: path of file to load :param target_sr: the desired sample rate :param int_values: if true, load samples as 32-bit integers :param offset: offset in seconds when loading audio :param duration: duration in seconds when loading audio :return: numpy array of samples """ with sf.SoundFile(audio_file, 'r') as f: dtype = 'int32' if int_values else 'float32' sample_rate = f.samplerate if offset > 0: f.seek(int(offset * sample_rate)) if duration > 0: samples = f.read(int(duration * sample_rate), dtype=dtype) else: samples = f.read(dtype=dtype) samples = samples.transpose() return cls(samples, sample_rate, target_sr=target_sr, trim=trim)
Example #11
Source File: write_files.py From sms_wsj with MIT License | 6 votes |
def audio_read(example): """ :param example: example dict :return: example dict with audio_data added """ audio_keys = ['rir', 'speech_source'] keys = list(example['audio_path'].keys()) example['audio_data'] = dict() for audio_key in audio_keys: assert audio_key in keys, ( f'Trying to read {audio_key} but only {keys} are available' ) audio_data = list() for wav_file in example['audio_path'][audio_key]: with soundfile.SoundFile(wav_file, mode='r') as f: audio_data.append(f.read().T) example['audio_data'][audio_key] = np.array(audio_data) return example
Example #12
Source File: Metadata.py From AdversarialAudioSeparation with MIT License | 6 votes |
def get_audio_metadata(audioPath, sphereType=False): ''' Returns sampling rate, number of channels and duration of an audio file :param audioPath: :param sphereType: :return: ''' ext = os.path.splitext(audioPath)[1][1:].lower() if ext=="aiff" or sphereType: # SPHERE headers for the TIMIT dataset audio = scikits.audiolab.Sndfile(audioPath) sr = audio.samplerate channels = audio.channels duration = float(audio.nframes) / float(audio.samplerate) elif ext=="mp3": # Use ffmpeg/ffprobe sr, channels, duration = get_mp3_metadata(audioPath) else: snd_file = SoundFile(audioPath, mode='r') inf = snd_file._info sr = inf.samplerate channels = inf.channels duration = float(inf.frames) / float(inf.samplerate) return int(sr), int(channels), float(duration)
Example #13
Source File: Input.py From AdversarialAudioSeparation with MIT License | 6 votes |
def readWave(audio_path, start_frame, end_frame, mono=True, sample_rate=None, clip=True): snd_file = SoundFile(audio_path, mode='r') inf = snd_file._info audio_sr = inf.samplerate snd_file.seek(start_frame) audio = snd_file.read(end_frame - start_frame, dtype='float32') snd_file.close() audio = audio.T # Tuple to numpy, transpose axis to (channels, frames) # Convert to mono if desired if mono and len(audio.shape) > 1 and audio.shape[0] > 1: audio = np.mean(audio, axis=0) # Resample if needed if sample_rate is not None and sample_rate != audio_sr: audio = Utils.resample(audio, audio_sr, sample_rate) audio_sr = sample_rate # Clip to [-1,1] if desired if clip: audio = np.minimum(np.maximum(audio, -1.0), 1.0) return audio, audio_sr
Example #14
Source File: segment.py From inference with Apache License 2.0 | 6 votes |
def from_file(cls, filename, target_sr=None, int_values=False, offset=0, duration=0, trim=False): """ Load a file supported by librosa and return as an AudioSegment. :param filename: path of file to load :param target_sr: the desired sample rate :param int_values: if true, load samples as 32-bit integers :param offset: offset in seconds when loading audio :param duration: duration in seconds when loading audio :return: numpy array of samples """ with sf.SoundFile(filename, 'r') as f: dtype = 'int32' if int_values else 'float32' sample_rate = f.samplerate if offset > 0: f.seek(int(offset * sample_rate)) if duration > 0: samples = f.read(int(duration * sample_rate), dtype=dtype) else: samples = f.read(dtype=dtype) samples = samples.transpose() return cls(samples, sample_rate, target_sr=target_sr, trim=trim)
Example #15
Source File: generatemovie.py From Automatic-Youtube-Reddit-Text-To-Speech-Video-Generator-and-Uploader with MIT License | 6 votes |
def addFrameWithPause(self, image_file, audio_file, pause): audio_file = audio_file.replace("\\", "/") f = sf.SoundFile(audio_file) audio_clip = AudioSegment.from_wav(audio_file) duration = (len(f) / f.samplerate) + pause / 1000 audio_clip_with_pause = audio_clip + AudioSegment.silent(duration=pause) self.imageframes.append(image_file) self.audiofiles.append(audio_clip_with_pause) self.durations.append(duration)
Example #16
Source File: generatemovie.py From Automatic-Youtube-Reddit-Text-To-Speech-Video-Generator-and-Uploader with MIT License | 6 votes |
def addFrameWithTransition(self, image_file, audio_file, transition_file): media_info = MediaInfo.parse(transition_file) duration_in_ms = media_info.tracks[0].duration audio_file = audio_file.replace("\\", "/") try: audio_clip = AudioSegment.from_wav(r"%s"%audio_file) f = sf.SoundFile(r"%s"%audio_file) except Exception as e: print(e) audio_clip = AudioSegment.from_wav("%s/pause.wav" % settings.assetPath) f = sf.SoundFile("%s/pause.wav" % settings.assetPath) duration = (len(f) / f.samplerate) audio_clip_with_pause = audio_clip self.imageframes.append(image_file) self.audiofiles.append(audio_clip_with_pause) self.durations.append(duration) self.transitions.append((transition_file, len(self.imageframes) - 1, duration_in_ms / 1000))
Example #17
Source File: generatemovie.py From Automatic-Youtube-Reddit-Text-To-Speech-Video-Generator-and-Uploader with MIT License | 6 votes |
def addFrameWithTransitionAndPause(self, image_file, audio_file, transition_file, pause): media_info = MediaInfo.parse(transition_file) duration_in_ms = media_info.tracks[0].duration audio_file = r"%s"%audio_file f = sf.SoundFile(audio_file) try: audio_clip = AudioSegment.from_wav(audio_file) except: print("error with frame audio transition pause for %s" % audio_file) audio_clip = AudioSegment.silent(duration=pause) duration = (len(f) / f.samplerate) audio_clip_with_pause = audio_clip self.imageframes.append(image_file) self.audiofiles.append(audio_clip_with_pause) self.durations.append(duration + (pause/1000)) self.transitions.append((transition_file, len(self.imageframes) - 1, (duration_in_ms / 1000) + (pause/1000)))
Example #18
Source File: Input.py From vimss with GNU General Public License v3.0 | 6 votes |
def readWave(audio_path, start_frame, end_frame, mono=True, sample_rate=None, clip=True): snd_file = SoundFile(audio_path, mode='r') inf = snd_file._info audio_sr = inf.samplerate start_read = max(start_frame, 0) pad_front = -min(start_frame, 0) end_read = min(end_frame, inf.frames) pad_back = max(end_frame - inf.frames, 0) snd_file.seek(start_read) audio = snd_file.read(end_read - start_read, dtype='float32', always_2d=True) # (num_frames, channels) snd_file.close() # Pad if necessary (start_frame or end_frame out of bounds) audio = np.pad(audio, [(pad_front, pad_back), (0, 0)], mode="constant", constant_values=0.0) # Convert to mono if desired if mono: audio = np.mean(audio, axis=1, keepdims=True) # Resample if needed if sample_rate is not None and sample_rate != audio_sr: res_length = int(np.ceil(float(audio.shape[0]) * float(sample_rate) / float(audio_sr))) audio = np.pad(audio, [(1, 1), (0,0)], mode="reflect") # Pad audio first audio = librosa.resample(audio.T, audio_sr, sample_rate, res_type="kaiser_fast").T skip = (audio.shape[0] - res_length) // 2 audio = audio[skip:skip+res_length,:] # Clip to [-1,1] if desired if clip: audio = np.minimum(np.maximum(audio, -1.0), 1.0) return audio, audio_sr
Example #19
Source File: stt_utils.py From training_results_v0.6 with Apache License 2.0 | 5 votes |
def spectrogram_from_file(filename, step=10, window=20, max_freq=None, eps=1e-14, overwrite=False, save_feature_as_csvfile=False): """ Calculate the log of linear spectrogram from FFT energy Params: filename (str): Path to the audio file step (int): Step size in milliseconds between windows window (int): FFT window size in milliseconds max_freq (int): Only FFT bins corresponding to frequencies between [0, max_freq] are returned eps (float): Small value to ensure numerical stability (for ln(x)) """ csvfilename = filename.replace(".wav", ".csv") if (os.path.isfile(csvfilename) is False) or overwrite: with soundfile.SoundFile(filename) as sound_file: audio = sound_file.read(dtype='float32') sample_rate = sound_file.samplerate if audio.ndim >= 2: audio = np.mean(audio, 1) if max_freq is None: max_freq = sample_rate / 2 if max_freq > sample_rate / 2: raise ValueError("max_freq must not be greater than half of " " sample rate") if step > window: raise ValueError("step size must not be greater than window size") hop_length = int(0.001 * step * sample_rate) fft_length = int(0.001 * window * sample_rate) pxx, freqs = spectrogram( audio, fft_length=fft_length, sample_rate=sample_rate, hop_length=hop_length) ind = np.where(freqs <= max_freq)[0][-1] + 1 res = np.transpose(np.log(pxx[:ind, :] + eps)) if save_feature_as_csvfile: np.savetxt(csvfilename, res) return res else: return np.loadtxt(csvfilename)
Example #20
Source File: generatemovie.py From Automatic-Youtube-Reddit-Text-To-Speech-Video-Generator-and-Uploader with MIT License | 5 votes |
def addFrame(self, image_file, audio_file): audio_file = audio_file.replace("\\", "/") try: audio_clip = AudioSegment.from_wav(r"%s"%audio_file) f = sf.SoundFile(r"%s"%audio_file) except Exception as e: print(e) audio_clip = AudioSegment.from_wav("%s/pause.wav" % settings.assetPath) f = sf.SoundFile("%s/pause.wav" % settings.assetPath) duration = len(f) / f.samplerate self.imageframes.append(image_file) self.audiofiles.append(audio_clip) self.durations.append(duration)
Example #21
Source File: audioread.py From pb_chime5 with MIT License | 5 votes |
def audio_channels(path): """ >>> path = '/net/fastdb/chime3/audio/16kHz/isolated/dt05_caf_real/F01_050C0102_CAF.CH1.wav' >>> audio_channels(path) 1 >>> path = '/net/db/voiceHome/audio/noises/dev/home3_room2_arrayGeo3_arrayPos2_noiseCond1.wav' >>> audio_channels(path) # correct for multichannel 8 """ with soundfile.SoundFile(str(path)) as f: return f.channels
Example #22
Source File: audioread.py From pb_chime5 with MIT License | 5 votes |
def audio_length(path, unit='samples'): """ Args: path: unit: Returns: >>> path = '/net/fastdb/chime3/audio/16kHz/isolated/dt05_caf_real/F01_050C0102_CAF.CH1.wav' >>> audio_length(path) 122111 >>> path = '/net/db/voiceHome/audio/noises/dev/home3_room2_arrayGeo3_arrayPos2_noiseCond1.wav' >>> audio_length(path) # correct for multichannel 960000 >>> with soundfile.SoundFile(str(path)) as f: ... print(f.read().shape) (960000, 8) """ # params = soundfile.info(str(path)) # return int(params.samplerate * params.duration) if unit == 'samples': with soundfile.SoundFile(str(path)) as f: return len(f) elif unit == 'seconds': with soundfile.SoundFile(str(path)) as f: return len(f) / f.samplerate else: return ValueError(unit)
Example #23
Source File: stt_utils.py From SNIPER-mxnet with Apache License 2.0 | 5 votes |
def spectrogram_from_file(filename, step=10, window=20, max_freq=None, eps=1e-14, overwrite=False, save_feature_as_csvfile=False): """ Calculate the log of linear spectrogram from FFT energy Params: filename (str): Path to the audio file step (int): Step size in milliseconds between windows window (int): FFT window size in milliseconds max_freq (int): Only FFT bins corresponding to frequencies between [0, max_freq] are returned eps (float): Small value to ensure numerical stability (for ln(x)) """ csvfilename = filename.replace(".wav", ".csv") if (os.path.isfile(csvfilename) is False) or overwrite: with soundfile.SoundFile(filename) as sound_file: audio = sound_file.read(dtype='float32') sample_rate = sound_file.samplerate if audio.ndim >= 2: audio = np.mean(audio, 1) if max_freq is None: max_freq = sample_rate / 2 if max_freq > sample_rate / 2: raise ValueError("max_freq must not be greater than half of " " sample rate") if step > window: raise ValueError("step size must not be greater than window size") hop_length = int(0.001 * step * sample_rate) fft_length = int(0.001 * window * sample_rate) pxx, freqs = spectrogram( audio, fft_length=fft_length, sample_rate=sample_rate, hop_length=hop_length) ind = np.where(freqs <= max_freq)[0][-1] + 1 res = np.transpose(np.log(pxx[:ind, :] + eps)) if save_feature_as_csvfile: np.savetxt(csvfilename, res) return res else: return np.loadtxt(csvfilename)
Example #24
Source File: preprocess_dns.py From asteroid with MIT License | 5 votes |
def preprocess_dns(in_dir, out_dir='./data'): """ Create json file from dataset folder. Args: in_dir (str): Location of the DNS data out_dir (str): Where to save the json files. """ # Get all file ids clean_wavs = glob.glob(os.path.join(in_dir, 'clean/*.wav')) clean_dic = make_wav_id_dict(clean_wavs) mix_wavs = glob.glob(os.path.join(in_dir, 'noisy/*.wav')) mix_dic = make_wav_id_dict(mix_wavs) noise_wavs = glob.glob(os.path.join(in_dir, 'noise/*.wav')) noise_dic = make_wav_id_dict(noise_wavs) assert clean_dic.keys() == mix_dic.keys() == noise_dic.keys() file_infos = {k: dict( mix=mix_dic[k], clean=clean_dic[k], noise=noise_dic[k], snr=get_snr_from_mix_path(mix_dic[k]), file_len=len(sf.SoundFile(mix_dic[k])) ) for k in clean_dic.keys()} # Save to JSON with open(os.path.join(out_dir, 'file_infos.json'), 'w') as f: json.dump(file_infos, f, indent=2)
Example #25
Source File: preprocess_whamr.py From asteroid with MIT License | 5 votes |
def preprocess_one_dir(in_dir): """ Create list of list for one condition, each list contains [path, wav_length].""" file_infos = [] in_dir = os.path.abspath(in_dir) wav_list = os.listdir(in_dir) wav_list.sort() for wav_file in wav_list: if not wav_file.endswith('.wav'): continue wav_path = os.path.join(in_dir, wav_file) samples = sf.SoundFile(wav_path) file_infos.append((wav_path, len(samples))) return file_infos
Example #26
Source File: utils.py From DeepXi with Mozilla Public License 2.0 | 5 votes |
def batch_list(file_dir, list_name, data_path='data', make_new=False): """ Places the file paths and wav lengths of an audio file into a dictionary, which is then appended to a list. 'glob' is used to support Unix style pathname pattern expansions. Checks if the training list has already been saved, and loads it. Argument/s: file_dir - directory containing the audio files. list_name - name for the list. data_path - path to store pickle files. make_new - re-create list. Returns: batch_list - list of file paths and wav length. """ extension = ['*.wav', '*.flac', '*.mp3'] if not make_new: if os.path.exists(data_path + '/' + list_name + '_list_' + platform.node() + '.p'): print('Loading ' + list_name + ' list...') with open(data_path + '/' + list_name + '_list_' + platform.node() + '.p', 'rb') as f: batch_list = pickle.load(f) if batch_list[0]['file_path'].find(file_dir) != -1: print(list_name + ' list has a total of %i entries.' % (len(batch_list))) return batch_list print('Creating ' + list_name + ' list...') batch_list = [] for i in extension: for j in glob.glob(os.path.join(file_dir, i)): f = SoundFile(j) wav_len = f.seek(0, SEEK_END) if wav_len == -1: wav, _ = read_wav(path) wav_len = len(wav) batch_list.append({'file_path': j, 'wav_len': wav_len}) # append dictionary. if not os.path.exists(data_path): os.makedirs(data_path) # make directory. with open(data_path + '/' + list_name + '_list_' + platform.node() + '.p', 'wb') as f: pickle.dump(batch_list, f) print('The ' + list_name + ' list has a total of %i entries.' % (len(batch_list))) return batch_list
Example #27
Source File: utils.py From emotion-recognition-using-speech with MIT License | 5 votes |
def extract_feature(file_name, **kwargs): """ Extract feature from audio file `file_name` Features supported: - MFCC (mfcc) - Chroma (chroma) - MEL Spectrogram Frequency (mel) - Contrast (contrast) - Tonnetz (tonnetz) e.g: `features = extract_feature(path, mel=True, mfcc=True)` """ mfcc = kwargs.get("mfcc") chroma = kwargs.get("chroma") mel = kwargs.get("mel") contrast = kwargs.get("contrast") tonnetz = kwargs.get("tonnetz") with soundfile.SoundFile(file_name) as sound_file: X = sound_file.read(dtype="float32") sample_rate = sound_file.samplerate if chroma or contrast: stft = np.abs(librosa.stft(X)) result = np.array([]) if mfcc: mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0) result = np.hstack((result, mfccs)) if chroma: chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0) result = np.hstack((result, chroma)) if mel: mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0) result = np.hstack((result, mel)) if contrast: contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0) result = np.hstack((result, contrast)) if tonnetz: tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0) result = np.hstack((result, tonnetz)) return result
Example #28
Source File: utils.py From ba-dls-deepspeech with Apache License 2.0 | 5 votes |
def spectrogram_from_file(filename, step=10, window=20, max_freq=None, eps=1e-14): """ Calculate the log of linear spectrogram from FFT energy Params: filename (str): Path to the audio file step (int): Step size in milliseconds between windows window (int): FFT window size in milliseconds max_freq (int): Only FFT bins corresponding to frequencies between [0, max_freq] are returned eps (float): Small value to ensure numerical stability (for ln(x)) """ with soundfile.SoundFile(filename) as sound_file: audio = sound_file.read(dtype='float32') sample_rate = sound_file.samplerate if audio.ndim >= 2: audio = np.mean(audio, 1) if max_freq is None: max_freq = sample_rate / 2 if max_freq > sample_rate / 2: raise ValueError("max_freq must not be greater than half of " " sample rate") if step > window: raise ValueError("step size must not be greater than window size") hop_length = int(0.001 * step * sample_rate) fft_length = int(0.001 * window * sample_rate) pxx, freqs = spectrogram( audio, fft_length=fft_length, sample_rate=sample_rate, hop_length=hop_length) ind = np.where(freqs <= max_freq)[0][-1] + 1 return np.transpose(np.log(pxx[:ind, :] + eps))
Example #29
Source File: test_deformers.py From muda with ISC License | 5 votes |
def __test_shifted_impulse(jam_orig, jam_new, ir_files, orig_duration, n_fft, rolloff_value): #delayed impulse with psf.SoundFile(str(ir_files), mode='r') as soundf: ir_data = soundf.read() ir_sr = soundf.samplerate #delay the impulse signal by zero-padding 1-second long zeros ir_data_delayed = np.pad(ir_data,(ir_sr,0),mode = 'constant') #dump the delayed audio file psf.write('tests/data/ir_file_delayed.wav', ir_data_delayed, ir_sr) D_delayed = muda.deformers.IRConvolution(ir_files = 'tests/data/ir_file_delayed.wav', n_fft=n_fft, rolloff_value = rolloff_value) for jam_shifted in D_delayed.transform(jam_orig): #Verify the duration that delayed annotations(Using chords here) are in valid range #__test_duration(jam_orig, jam_shifted, orig_duration) shifted_data = jam_shifted.search(namespace='chord')[0].data delayed_data = jam_new.search(namespace='chord')[0].data for i in range(len(shifted_data)): #For each observation, verify its onset time has been shifted 1s isclose_(1.00,shifted_data[i][0] - delayed_data[i][0])
Example #30
Source File: test_deformers.py From muda with ISC License | 5 votes |
def test_background(noise, n_samples, weight_min, weight_max, jam_fixture): D = muda.deformers.BackgroundNoise(files=noise, n_samples=n_samples, weight_min=weight_min, weight_max=weight_max) jam_orig = deepcopy(jam_fixture) orig_duration = librosa.get_duration(**jam_orig.sandbox.muda['_audio']) n_out = 0 for jam_new in D.transform(jam_orig): assert jam_new is not jam_fixture __test_effect(jam_orig, jam_fixture) assert not np.allclose(jam_orig.sandbox.muda['_audio']['y'], jam_new.sandbox.muda['_audio']['y']) d_state = jam_new.sandbox.muda.history[-1]['state'] filename = d_state['filename'] start = d_state['start'] stop = d_state['stop'] with psf.SoundFile(str(filename), mode='r') as soundf: max_index = len(soundf) noise_sr = soundf.samplerate assert 0 <= start < stop assert start < stop <= max_index assert ((stop - start) / float(noise_sr)) == orig_duration __test_effect(jam_orig, jam_new) n_out += 1 assert n_out == n_samples # Serialization test D2 = muda.deserialize(muda.serialize(D)) __test_params(D, D2)