Python librosa.resample() Examples
The following are 25
code examples of librosa.resample().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
librosa
, or try the search function
.
Example #1
Source File: rapSpeaker.py From rapLyrics with MIT License | 11 votes |
def doFileStuff(line,isSlow): myobj = gTTS(text=line, lang='en', slow=isSlow) myobj.save("placeholder.mp3") y, sr = librosa.load("placeholder.mp3") data = librosa.resample(y, sr, SAMPLE_RATE) librosa.output.write_wav('placeholder.wav', data, SAMPLE_RATE) d, sr = sf.read('placeholder.wav') sf.write('placeholder.wav', d, sr) y, sr = librosa.load("placeholder.mp3") lowData = librosa.resample(y, sr, SAMPLE_RATE*LOW_FACTOR) librosa.output.write_wav('lowPlaceholder.wav', lowData, SAMPLE_RATE) d, sr = sf.read('lowPlaceholder.wav') sf.write('lowPlaceholder.wav', d, sr) return data
Example #2
Source File: Input.py From vimss with GNU General Public License v3.0 | 6 votes |
def readWave(audio_path, start_frame, end_frame, mono=True, sample_rate=None, clip=True): snd_file = SoundFile(audio_path, mode='r') inf = snd_file._info audio_sr = inf.samplerate start_read = max(start_frame, 0) pad_front = -min(start_frame, 0) end_read = min(end_frame, inf.frames) pad_back = max(end_frame - inf.frames, 0) snd_file.seek(start_read) audio = snd_file.read(end_read - start_read, dtype='float32', always_2d=True) # (num_frames, channels) snd_file.close() # Pad if necessary (start_frame or end_frame out of bounds) audio = np.pad(audio, [(pad_front, pad_back), (0, 0)], mode="constant", constant_values=0.0) # Convert to mono if desired if mono: audio = np.mean(audio, axis=1, keepdims=True) # Resample if needed if sample_rate is not None and sample_rate != audio_sr: res_length = int(np.ceil(float(audio.shape[0]) * float(sample_rate) / float(audio_sr))) audio = np.pad(audio, [(1, 1), (0,0)], mode="reflect") # Pad audio first audio = librosa.resample(audio.T, audio_sr, sample_rate, res_type="kaiser_fast").T skip = (audio.shape[0] - res_length) // 2 audio = audio[skip:skip+res_length,:] # Clip to [-1,1] if desired if clip: audio = np.minimum(np.maximum(audio, -1.0), 1.0) return audio, audio_sr
Example #3
Source File: audio_signal.py From nussl with MIT License | 6 votes |
def sample_rate(self): """ ``int`` Sample rate associated with this object. If audio was read from a file, the sample rate will be set to the sample rate associated with the file. If this object was initialized from an array then the sample rate is set upon init. This property is read-only. To change the sample rate, use :func:`resample`. Notes: This property is read-only and cannot be set directly. To change See Also: * :func:`resample` to change the sample rate and resample data in :attr:`sample_rate`. * :func:`load_audio_from_array` to read audio from an array and set the sample rate. * :var:`nussl.constants.DEFAULT_SAMPLE_RATE` the default sample rate for *nussl* if not specified """ return self._sample_rate
Example #4
Source File: base.py From pumpp with ISC License | 6 votes |
def transform(self, y, sr): '''Transform an audio signal Parameters ---------- y : np.ndarray The audio signal sr : number > 0 The native sampling rate of y Returns ------- dict Data dictionary containing features extracted from y See Also -------- transform_audio ''' if sr != self.sr: y = resample(y, sr, self.sr) return self.merge([self.transform_audio(y)])
Example #5
Source File: prepare_data.py From music_transcription_MAPS with MIT License | 6 votes |
def read_audio(path, target_fs=None): """Read 1 dimension audio sequence from given path. Args: path: string, path of audio. target_fs: int, resampling rate. Returns: audio: 1 dimension audio sequence. fs: sampling rate of audio. """ (audio, fs) = soundfile.read(path) if audio.ndim > 1: audio = np.mean(audio, axis=1) if target_fs is not None and fs != target_fs: audio = librosa.resample(audio, orig_sr=fs, target_sr=target_fs) fs = target_fs return audio, fs
Example #6
Source File: perturb.py From espnet with Apache License 2.0 | 5 votes |
def __call__(self, x, uttid=None, train=True): if not train: return x x = x.astype(numpy.float32) if self.accept_uttid: ratio = self.utt2ratio[uttid] else: ratio = self.state.uniform(self.lower, self.upper) # Note1: resample requires the sampling-rate of input and output, # but actually only the ratio is used. y = librosa.resample(x, ratio, 1, res_type=self.res_type) if self.keep_length: diff = abs(len(x) - len(y)) if len(y) > len(x): # Truncate noise y = y[diff // 2 : -((diff + 1) // 2)] elif len(y) < len(x): # Assume the time-axis is the first: (Time, Channel) pad_width = [(diff // 2, (diff + 1) // 2)] + [ (0, 0) for _ in range(y.ndim - 1) ] y = numpy.pad( y, pad_width=pad_width, constant_values=0, mode="constant" ) return y
Example #7
Source File: prepare_data.py From dcase2017_task4_cvssp with MIT License | 5 votes |
def read_audio(path, target_fs=None): (audio, fs) = soundfile.read(path) if audio.ndim > 1: audio = np.mean(audio, axis=1) if target_fs is not None and fs != target_fs: audio = librosa.resample(audio, orig_sr=fs, target_sr=target_fs) fs = target_fs return audio, fs # Write wav
Example #8
Source File: extract_audioset_embedding.py From audioset_classification with MIT License | 5 votes |
def read_audio(path, target_fs=None): (audio, fs) = soundfile.read(path) if audio.ndim > 1: audio = np.mean(audio, axis=1) if target_fs is not None and fs != target_fs: audio = librosa.resample(audio, orig_sr=fs, target_sr=target_fs) fs = target_fs return audio, fs ### Feature extraction.
Example #9
Source File: utils.py From Wave-U-Net-Pytorch with MIT License | 5 votes |
def resample(audio, orig_sr, new_sr, mode="numpy"): if orig_sr == new_sr: return audio if isinstance(audio, torch.Tensor): audio = audio.detach().cpu().numpy() out = librosa.resample(audio, orig_sr, new_sr, res_type='kaiser_fast') if mode == "pytorch": out = torch.tensor(out) return out
Example #10
Source File: audio.py From Resemblyzer with Apache License 2.0 | 5 votes |
def preprocess_wav(fpath_or_wav: Union[str, Path, np.ndarray], source_sr: Optional[int]=None): """ Applies preprocessing operations to a waveform either on disk or in memory such that The waveform will be resampled to match the data hyperparameters. :param fpath_or_wav: either a filepath to an audio file (many extensions are supported, not just .wav), either the waveform as a numpy array of floats. :param source_sr: if passing an audio waveform, the sampling rate of the waveform before preprocessing. After preprocessing, the waveform'speaker sampling rate will match the data hyperparameters. If passing a filepath, the sampling rate will be automatically detected and this argument will be ignored. """ # Load the wav from disk if needed if isinstance(fpath_or_wav, str) or isinstance(fpath_or_wav, Path): wav, source_sr = librosa.load(str(fpath_or_wav), sr=None) else: wav = fpath_or_wav # Resample the wav if source_sr is not None: wav = librosa.resample(wav, source_sr, sampling_rate) # Apply the preprocessing: normalize volume and shorten long silences wav = normalize_volume(wav, audio_norm_target_dBFS, increase_only=True) wav = trim_long_silences(wav) return wav
Example #11
Source File: utilities.py From dcase2018_task1 with MIT License | 5 votes |
def read_audio(path, target_fs=None): (audio, fs) = soundfile.read(path) if audio.ndim > 1: audio = np.mean(audio, axis=1) if target_fs is not None and fs != target_fs: audio = librosa.resample(audio, orig_sr=fs, target_sr=target_fs) fs = target_fs return audio, fs
Example #12
Source File: audio.py From dcase_util with MIT License | 5 votes |
def resample(self, target_fs, scale=True, res_type='kaiser_best'): """Resample audio data. Parameters ---------- target_fs : int Target sampling rate scale : bool Scale the resampled signal to have approximately equal total energy (see `librosa.core.resample`). Default value True res_type : str Resample type (see `librosa.core.resample`) Default value 'kaiser_best' Returns ------- self """ if target_fs != self.fs: self._data = numpy.asfortranarray(self._data) self._data = librosa.resample( y=self._data, orig_sr=self.fs, target_sr=target_fs, scale=scale, res_type=res_type ) self.fs = target_fs return self
Example #13
Source File: Utils.py From Wave-U-Net with MIT License | 5 votes |
def resample(audio, orig_sr, new_sr): return librosa.resample(audio.T, orig_sr, new_sr).T
Example #14
Source File: base.py From Sound-of-Pixels with MIT License | 5 votes |
def _load_audio(self, path, center_timestamp, nearest_resample=False): audio = np.zeros(self.audLen, dtype=np.float32) # silent if path.endswith('silent'): return audio # load audio audio_raw, rate = self._load_audio_file(path) # repeat if audio is too short if audio_raw.shape[0] < rate * self.audSec: n = int(rate * self.audSec / audio_raw.shape[0]) + 1 audio_raw = np.tile(audio_raw, n) # resample if rate > self.audRate: # print('resmaple {}->{}'.format(rate, self.audRate)) if nearest_resample: audio_raw = audio_raw[::rate//self.audRate] else: audio_raw = librosa.resample(audio_raw, rate, self.audRate) # crop N seconds len_raw = audio_raw.shape[0] center = int(center_timestamp * self.audRate) start = max(0, center - self.audLen // 2) end = min(len_raw, center + self.audLen // 2) audio[self.audLen//2-(center-start): self.audLen//2+(end-center)] = \ audio_raw[start:end] # randomize volume if self.split == 'train': scale = random.random() + 0.5 # 0.5-1.5 audio *= scale audio[audio > 1.] = 1. audio[audio < -1.] = -1. return audio
Example #15
Source File: 02-compute-mel-specs.py From kaggle-freesound-audio-tagging with MIT License | 5 votes |
def compute_melspec(filename, indir, outdir): wav = np.load(indir + filename + '.npy') wav = librosa.resample(wav, 44100, 22050) melspec = librosa.feature.melspectrogram(wav, sr=22050, n_fft=1764, hop_length=220, n_mels=64) logmel = librosa.core.power_to_db(melspec) np.save(outdir + filename + '.npy', logmel)
Example #16
Source File: feature_description.py From Audio-Vision with MIT License | 5 votes |
def mel(features,path,dataset=None): """ This function extracts mel-spectrogram from audio. Make sure, you pass a dictionary containing all attributes and a path to audio. """ fsx=features['fs'][0] n_mels=features['n_mels'][0] #print n_mels fmin=features['fmin'][0] fmax=features['fmax'][0] mono=features['mono'][0] hamming_window=features['hamming_window'][0] noverlap=features['noverlap'][0] detrend=features['detrend'][0] return_onesided=features['return_onesided'][0] mode=features['mode'][0] wav, fs = read_audio('librosa',path,dataset) #fsx = librosa.resample(wav,fs, 44100) #wav, fs = librosa.load(path) wav=convert_mono(wav,mono) if fs != fsx: raise Exception("Assertion Error. Sampling rate Found {} Expected {}".format(fs,fsx)) ham_win = np.hamming(hamming_window) [f, t, X] = signal.spectral.spectrogram(wav,fs, window=ham_win, nperseg=hamming_window, noverlap=noverlap, detrend=detrend, return_onesided=return_onesided, mode=mode ) X = X.T # define global melW, avoid init melW every time, to speed up. if globals().get('melW') is None: global melW melW = librosa.filters.mel( fs, n_fft=hamming_window, n_mels=n_mels, fmin=fmin, fmax=fmax ) melW /= np.max(melW, axis=-1)[:,None] X = np.dot( X, melW.T ) X = X[:, 0:] X=feature_normalize(X) return X
Example #17
Source File: utilities.py From dcase2019_task2 with MIT License | 5 votes |
def read_audio(audio_path, target_fs=None): (audio, fs) = soundfile.read(audio_path) if audio.ndim > 1: audio = np.mean(audio, axis=1) if target_fs is not None and fs != target_fs: audio = librosa.resample(audio, orig_sr=fs, target_sr=target_fs) fs = target_fs return audio, fs
Example #18
Source File: audio_signal.py From nussl with MIT License | 5 votes |
def resample(self, new_sample_rate, **kwargs): """ Resample the data in :attr:`audio_data` to the new sample rate provided by :param:`new_sample_rate`. If the :param:`new_sample_rate` is the same as :attr:`sample_rate` then nothing happens. Args: new_sample_rate (int): The new sample rate of :attr:`audio_data`. kwargs: Keyword arguments to librosa.resample. """ if new_sample_rate == self.sample_rate: warnings.warn('Cannot resample to the same sample rate.') return resampled_signal = [] for channel in self.get_channels(): resampled_channel = librosa.resample( channel, self.sample_rate, new_sample_rate, **kwargs) resampled_signal.append(resampled_channel) self.audio_data = np.array(resampled_signal) self.original_signal_length = self.signal_length self._sample_rate = new_sample_rate ################################################## # Channel Utilities ##################################################
Example #19
Source File: audio.py From signaltrain with GNU General Public License v3.0 | 4 votes |
def read_audio_file(filename, sr=44100, mono=True, norm=False, device='cpu', dtype=np.float32, warn=True, fix_and_overwrite=False): """ Generic wrapper for reading an audio file. Different libraries offer different speeds for this, so this routine is the 'catch-all' for whatever read routine happens to work best Tries a fast method via scipy first, reverts to slower librosa when necessary. """ # first try to read via scipy, because it's fast scipy_ok = False with warnings.catch_warnings(record=True) as w: warnings.simplefilter("error") # scipy throws warnings which should be errors try: read_sr, signal = wavfile.read(filename) scipy_ok = True except wavfile.WavFileWarning: if warn: print("read_audio_file: Warning raised by scipy. ",end="") might_want_overwrite = False if scipy_ok: if mono and (len(signal.shape) > 1): # convert to mono signal = signal[:,0] if isinstance(signal[0], np.int16): # convert from ints to floats if necessary signal = np.array(signal/32767.0, dtype=dtype) # change from [-32767..32767] to [-1..1] if read_sr != int(sr): print(f"read_audio_file: Got sample rate of {read_sr} Hz instead of {sr} Hz requested. Resampling.") signal = librosa.resample(signal, read_sr*1.0, sr*1.0, res_type='kaiser_fast') might_want_overwrite = True else: # try librosa; it's slower but general if warn: print("Trying librosa.") signal, read_sr = librosa.core.load(filename, mono=mono, sr=sr, res_type='kaiser_fast') might_want_overwrite = True if fix_and_overwrite and might_want_overwrite: print(f" Overwriting {filename} (so we don't have to use process as much again)") write_audio_file(filename, signal, sr) if signal.dtype != dtype: signal = signal.astype(dtype, copy=False) if norm: absmax = np.max(np.abs(signal)) signal = signal/absmax if absmax > 0 else signal return signal, sr
Example #20
Source File: audio_io.py From synvae with MIT License | 4 votes |
def wav_data_to_samples(wav_data, sample_rate): """Read PCM-formatted WAV data and return a NumPy array of samples. Uses scipy to read and librosa to process WAV data. Audio will be converted to mono if necessary. Args: wav_data: WAV audio data to read. sample_rate: The number of samples per second at which the audio will be returned. Resampling will be performed if necessary. Returns: A numpy array of audio samples, single-channel (mono) and sampled at the specified rate, in float32 format. Raises: AudioIOReadError: If scipy is unable to read the WAV data. AudioIOError: If audio processing fails. """ try: # Read the wav file, converting sample rate & number of channels. native_sr, y = scipy.io.wavfile.read(six.BytesIO(wav_data)) except Exception as e: # pylint: disable=broad-except raise AudioIOReadError(e) if y.dtype == np.int16: # Convert to float32. y = int16_samples_to_float32(y) elif y.dtype == np.float32: # Already float32. pass else: raise AudioIOError( 'WAV file not 16-bit or 32-bit float PCM, unsupported') try: # Convert to mono and the desired sample rate. if y.ndim == 2 and y.shape[1] == 2: y = y.T y = librosa.to_mono(y) if native_sr != sample_rate: y = librosa.resample(y, native_sr, sample_rate) except Exception as e: # pylint: disable=broad-except raise AudioIOError(e) return y
Example #21
Source File: prepare_spectrograms.py From dcase_task2 with MIT License | 4 votes |
def process(self, file_path, **kwargs): n_fft = 1024 sr = 32000 mono = True log_spec = False n_mels = 128 hop_length = 192 fmax = None if mono: sig, sr = librosa.load(file_path, sr=sr, mono=True) sig = sig[np.newaxis] else: sig, sr = librosa.load(file_path, sr=sr, mono=False) # sig, sf_sr = sf.read(file_path) # sig = np.transpose(sig, (1, 0)) # sig = np.asarray([librosa.resample(s, sf_sr, sr) for s in sig]) spectrograms = [] for y in sig: # compute stft stft = librosa.stft(y, n_fft=n_fft, hop_length=hop_length, win_length=None, window='hann', center=True, pad_mode='reflect') # keep only amplitures stft = np.abs(stft) # spectrogram weighting if log_spec: stft = np.log10(stft + 1) else: freqs = librosa.core.fft_frequencies(sr=sr, n_fft=n_fft) stft = librosa.perceptual_weighting(stft**2, freqs, ref=1.0, amin=1e-10, top_db=99.0) # apply mel filterbank spectrogram = librosa.feature.melspectrogram(S=stft, sr=sr, n_mels=n_mels, fmax=fmax) # keep spectrogram spectrograms.append(np.asarray(spectrogram)) spectrograms = np.asarray(spectrograms) return spectrograms
Example #22
Source File: background.py From muda with ISC License | 4 votes |
def slice_clip(filename, start, stop, n_samples, sr, mono=True): """Slice a fragment of audio from a file. This uses pysoundfile to efficiently seek without loading the entire stream. Parameters ---------- filename : str Path to the input file start : int The sample index of `filename` at which the audio fragment should start stop : int The sample index of `filename` at which the audio fragment should stop (e.g. y = audio[start:stop]) n_samples : int > 0 The number of samples to load sr : int > 0 The target sampling rate mono : bool Ensure monophonic audio Returns ------- y : np.ndarray [shape=(n_samples,)] A fragment of audio sampled from `filename` Raises ------ ValueError If the source file is shorter than the requested length """ with psf.SoundFile(str(filename), mode="r") as soundf: n_target = stop - start soundf.seek(start) y = soundf.read(n_target).T if mono: y = librosa.to_mono(y) # Resample to initial sr y = librosa.resample(y, soundf.samplerate, sr) # Clip to the target length exactly y = librosa.util.fix_length(y, n_samples) return y
Example #23
Source File: music_processor.py From aurora-sdk-mac with Apache License 2.0 | 4 votes |
def process_music_data(data_in, is_fft, is_mel, n_out_bins, n_fft, n_mel, is_energy, is_visual): # length is len(data_in)/4 data_np = np.fromstring(data_in, 'Float32') # visualizer if is_visual: visualizer(data_np) # energy if is_energy: energy = np.abs(data_np) ** 2 energy = energy.sum() energy *= 2**5 energy_output = energy.astype(np.uint16) else: energy_output = np.zeros(2).astype(np.uint16) # fft or mel if is_fft or is_mel: global sample_rate # down-sample by 4, with filtering, energy not scaled data_np = librosa.resample(data_np, sample_rate, sample_rate/4, res_type='kaiser_fast') # short time fft over n_fft samples fft_data = librosa.stft(data_np, n_fft, hop_length=n_fft, center=False) # calculate FFT or Mel if is_fft: fft_data_mag = np.abs(fft_data[0:n_fft // 2]) ** 2 fft_data_mag *= 2**3 fft_output = get_output_fft_bins(fft_data_mag, n_out_bins) else: fft_data_mag = np.abs(fft_data)**2 fft_data_mag *= 2**2 mel_data = librosa.feature.melspectrogram(S=fft_data_mag, sr=sample_rate / 4, n_mels=n_mel) fft_output = get_output_fft_bins(mel_data, n_out_bins) # output uint8_t fft_output = fft_output.astype(np.uint8) else: fft_output = np.zeros(n_out_bins).astype(np.uint8) return fft_output, energy_output
Example #24
Source File: Evaluate.py From vimss with GNU General Public License v3.0 | 4 votes |
def predict_track(model_config, sess, mix_audio, mix_sr, sep_input_shape, sep_output_shape, separator_sources, mix_context): ''' Outputs source estimates for a given input mixture signal mix_audio [n_frames, n_channels] and a given Tensorflow session and placeholders belonging to the prediction network. It iterates through the track, collecting segment-wise predictions to form the output. :param model_config: Model configuration dictionary :param sess: Tensorflow session used to run the network inference :param mix_audio: [n_frames, n_channels] audio signal (numpy array). Can have higher sampling rate or channels than the model supports, will be downsampled correspondingly. :param mix_sr: Sampling rate of mix_audio :param sep_input_shape: Input shape of separator ([batch_size, num_samples, num_channels]) :param sep_output_shape: Input shape of separator ([batch_size, num_samples, num_channels]) :param separator_sources: List of Tensorflow tensors that represent the output of the separator network :param mix_context: Input tensor of the network :return: ''' # Load mixture, convert to mono and downsample then assert(len(mix_audio.shape) == 2) if model_config["mono_downmix"]: mix_audio = np.mean(mix_audio, axis=1, keepdims=True) else: if mix_audio.shape[1] == 1:# Duplicate channels if input is mono but model is stereo mix_audio = np.tile(mix_audio, [1, 2]) mix_audio = librosa.resample(mix_audio.T, mix_sr, model_config["expected_sr"], res_type="kaiser_fast").T # Preallocate source predictions (same shape as input mixture) source_time_frames = mix_audio.shape[0] source_preds = [np.zeros(mix_audio.shape, np.float32) for _ in range(model_config["num_sources"])] input_time_frames = sep_input_shape[1] output_time_frames = sep_output_shape[1] # Pad mixture across time at beginning and end so that neural network can make prediction at the beginning and end of signal pad_time_frames = (input_time_frames - output_time_frames) / 2 mix_audio_padded = np.pad(mix_audio, [(pad_time_frames, pad_time_frames), (0,0)], mode="constant", constant_values=0.0) # Iterate over mixture magnitudes, fetch network rpediction for source_pos in range(0, source_time_frames, output_time_frames): # If this output patch would reach over the end of the source spectrogram, set it so we predict the very end of the output, then stop if source_pos + output_time_frames > source_time_frames: source_pos = source_time_frames - output_time_frames # Prepare mixture excerpt by selecting time interval mix_part = mix_audio_padded[source_pos:source_pos + input_time_frames,:] mix_part = np.expand_dims(mix_part, axis=0) source_parts = sess.run(separator_sources, feed_dict={mix_context: mix_part}) # Save predictions # source_shape = [1, freq_bins, acc_mag_part.shape[2], num_chan] for i in range(model_config["num_sources"]): source_preds[i][source_pos:source_pos + output_time_frames] = source_parts[i][0, :, :] return source_preds
Example #25
Source File: audio.py From amen with BSD 2-Clause "Simplified" License | 4 votes |
def __init__( self, file_path=None, raw_samples=None, convert_to_mono=False, sample_rate=44100, analysis_sample_rate=22050, ): """ Audio constructor. Opens a file path, loads the audio with librosa, and prepares the features Parameters ---------- file_path: string path to the audio file to load raw_samples: np.array samples to use for audio output convert_to_mono: boolean (optional) converts the file to mono on loading sample_rate: number > 0 [scalar] (optional) sample rate to pass to librosa. Returns ------ An Audio object """ if file_path: y, sr = librosa.load(file_path, mono=convert_to_mono, sr=sample_rate) elif raw_samples is not None: # This assumes that we're passing in raw_samples # directly from another Audio's raw_samples. y = raw_samples sr = sample_rate self.file_path = file_path self.sample_rate = float(sr) self.analysis_sample_rate = float(analysis_sample_rate) self.num_channels = y.ndim self.duration = librosa.get_duration(y=y, sr=sr) self.analysis_samples = librosa.resample( librosa.to_mono(y), sr, self.analysis_sample_rate, res_type='kaiser_best' ) self.raw_samples = np.atleast_2d(y) self.zero_indexes = self._create_zero_indexes() self.features = self._create_features() self.timings = self._create_timings()