Python librosa.amplitude_to_db() Examples
The following are 30
code examples of librosa.amplitude_to_db().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
librosa
, or try the search function
.
Example #1
Source File: audio.py From parallel-wavenet-vocoder with MIT License | 6 votes |
def wav2melspec_db(wav, sr, n_fft, win_length, hop_length, n_mels, max_db=None, min_db=None, time_first=True, **kwargs): # Mel-spectrogram mel_spec = wav2melspec(wav, sr, n_fft, win_length, hop_length, n_mels, time_first=False, **kwargs) # Decibel mel_db = librosa.amplitude_to_db(mel_spec) # Normalization mel_db = normalize_db(mel_db, max_db, min_db) if max_db and min_db else mel_db # Time-axis first if time_first: mel_db = mel_db.T # (t, n_mels) return mel_db
Example #2
Source File: features.py From msaf with MIT License | 6 votes |
def compute_features(self): """Actual implementation of the features. Returns ------- mfcc: np.array(N, F) The features, each row representing a feature vector for a give time frame/beat. """ S = librosa.feature.melspectrogram(self._audio, sr=self.sr, n_fft=self.n_fft, hop_length=self.hop_length, n_mels=self.n_mels) log_S = librosa.amplitude_to_db(S, ref=self.ref_power) mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=self.n_mfcc).T return mfcc
Example #3
Source File: attn_visualize.py From KoSpeech with Apache License 2.0 | 6 votes |
def parse_audio(audio_path): sound = load_audio(audio_path, DEL_SILENCE) spectrogram = librosa.feature.melspectrogram(sound, SAMPLE_RATE, n_mels=N_MELS, n_fft=N_FFT, hop_length=HOP_LENGTH) spectrogram = librosa.amplitude_to_db(spectrogram, ref=np.max) if NORMALIZE: mean = np.mean(spectrogram) std = np.std(spectrogram) spectrogram -= mean spectrogram /= std spectrogram = spectrogram[:, ::-1] spectrogram = torch.FloatTensor(np.ascontiguousarray(np.swapaxes(spectrogram, 0, 1))) return spectrogram
Example #4
Source File: feature.py From KoSpeech with Apache License 2.0 | 6 votes |
def __init__(self, sample_rate=16000, n_mels=80, window_size=20, stride=10, feature_extract_by='librosa'): self.sample_rate = sample_rate self.n_mels = n_mels self.n_fft = int(sample_rate * 0.001 * window_size) self.hop_length = int(sample_rate * 0.001 * stride) self.feature_extract_by = feature_extract_by.lower() if self.feature_extract_by == 'torchaudio': self.transforms = torchaudio.transforms.MelSpectrogram( sample_rate=sample_rate, win_length=window_size, hop_length=self.hop_length, n_fft=self.n_fft, n_mels=n_mels ) self.amplitude_to_db = torchaudio.transforms.AmplitudeToDB()
Example #5
Source File: feature.py From KoSpeech with Apache License 2.0 | 6 votes |
def __call__(self, signal): if self.feature_extract_by == 'torchaudio': melspectrogram = self.transforms(torch.FloatTensor(signal)) melspectrogram = self.amplitude_to_db(melspectrogram) melspectrogram = melspectrogram.numpy() elif self.feature_extract_by == 'librosa': melspectrogram = librosa.feature.melspectrogram( y=signal, sr=self.sample_rate, n_mels=self.n_mels, n_fft=self.n_fft, hop_length=self.hop_length ) melspectrogram = librosa.amplitude_to_db(melspectrogram, ref=np.max) else: raise ValueError("Unsupported library : {0}".format(self.feature_extract_by)) return melspectrogram
Example #6
Source File: audio.py From deep-voice-conversion with MIT License | 6 votes |
def wav2melspec_db(wav, sr, n_fft, win_length, hop_length, n_mels, normalize=False, max_db=None, min_db=None, time_first=True, **kwargs): # Mel-spectrogram mel_spec = wav2melspec(wav, sr, n_fft, win_length, hop_length, n_mels, time_first=False, **kwargs) # Decibel mel_db = librosa.amplitude_to_db(mel_spec) # Normalization mel_db = normalize_db(mel_db, max_db, min_db) if normalize else mel_db # Time-axis first if time_first: mel_db = mel_db.T # (t, n_mels) return mel_db
Example #7
Source File: test_audio.py From emlearn with MIT License | 6 votes |
def test_melfilter_librosa(): filename = librosa.util.example_audio_file() y, sr = librosa.load(filename, offset=1.0, duration=0.3) n_fft = 1024 hop_length = 256 fmin = 500 fmax = 5000 n_mels = 16 spec = numpy.abs(librosa.core.stft(y, n_fft=n_fft, hop_length=hop_length))**2 spec1 = spec[:,0] ref = librosa.feature.melspectrogram(S=spec1, sr=sr, norm=None, htk=True, n_fft=n_fft, n_mels=n_mels, fmin=fmin, fmax=fmax) out = eml_audio.melfilter(spec1, sr, n_fft, n_mels, fmin, fmax) fig, (ref_ax, out_ax) = plt.subplots(2) def specshow(d, ax): s = librosa.amplitude_to_db(d, ref=numpy.max) librosa.display.specshow(s, ax=ax, x_axis='time') specshow(ref.reshape(-1, 1), ax=ref_ax) specshow(out.reshape(-1, 1), ax=out_ax) fig.savefig('melfilter.librosa.png') assert ref.shape == out.shape numpy.testing.assert_allclose(ref, out, rtol=0.01)
Example #8
Source File: feature_extract.py From voice-vector with MIT License | 6 votes |
def wav2melspec_db(wav, sr, n_fft, win_length, hop_length, n_mels, normalize=False, max_db=None, min_db=None, time_first=True, **kwargs): # Mel-spectrogram mel_spec = wav2melspec(wav, sr, n_fft, win_length, hop_length, n_mels, time_first=False, **kwargs) # Decibel mel_db = librosa.amplitude_to_db(mel_spec) # Normalization mel_db = normalize_db(mel_db, max_db, min_db) if normalize else mel_db # Time-axis first if time_first: mel_db = mel_db.T # (t, n_mels) return mel_db
Example #9
Source File: audio.py From voice-vector with MIT License | 6 votes |
def wav2melspec_db(wav, sr, n_fft, win_length, hop_length, n_mels, normalize=False, max_db=None, min_db=None, time_first=True, **kwargs): # Mel-spectrogram mel_spec = wav2melspec(wav, sr, n_fft, win_length, hop_length, n_mels, time_first=False, **kwargs) # Decibel mel_db = librosa.amplitude_to_db(mel_spec) # Normalization mel_db = normalize_db(mel_db, max_db, min_db) if normalize else mel_db # Time-axis first if time_first: mel_db = mel_db.T # (t, n_mels) return mel_db
Example #10
Source File: utils.py From nussl with MIT License | 6 votes |
def visualize_spectrogram(audio_signal, ch=0, do_mono=False, x_axis='time', y_axis='linear', **kwargs): """ Wrapper around `librosa.display.specshow` for usage with AudioSignals. Args: audio_signal (AudioSignal): AudioSignal to plot ch (int, optional): Which channel to plot. Defaults to 0. do_mono (bool, optional): Make the AudioSignal mono. Defaults to False. x_axis (str, optional): x_axis argument to librosa.display.specshow. Defaults to 'time'. y_axis (str, optional): y_axis argument to librosa.display.specshow. Defaults to 'linear'. kwargs: Additional keyword arguments to librosa.display.specshow. """ import librosa.display if do_mono: audio_signal = audio_signal.to_mono(overwrite=False) data = librosa.amplitude_to_db(np.abs(audio_signal.stft()), ref=np.max) librosa.display.specshow(data[..., ch], x_axis=x_axis, y_axis=y_axis, sr=audio_signal.sample_rate, hop_length=audio_signal.stft_params.hop_length, **kwargs)
Example #11
Source File: audio.py From Multilingual_Text_to_Speech with MIT License | 5 votes |
def spectrogram(y, mel=False): """Convert waveform to log-magnitude spectrogram.""" if hp.use_preemphasis: y = preemphasis(y) wf = ms_to_frames(hp.stft_window_ms) hf = ms_to_frames(hp.stft_shift_ms) S = np.abs(librosa.stft(y, n_fft=hp.num_fft, hop_length=hf, win_length=wf)) if mel: S = librosa.feature.melspectrogram(S=S, sr=hp.sample_rate, n_mels=hp.num_mels) return amplitude_to_db(S)
Example #12
Source File: audio.py From Multilingual_Text_to_Speech with MIT License | 5 votes |
def amplitude_to_db(x): """Convert amplitude to decibels.""" return librosa.amplitude_to_db(x, ref=np.max, top_db=None)
Example #13
Source File: utils.py From PyTorch_Speaker_Verification with BSD 3-Clause "New" or "Revised" License | 5 votes |
def mfccs_and_spec(wav_file, wav_process = False, calc_mfccs=False, calc_mag_db=False): sound_file, _ = librosa.core.load(wav_file, sr=hp.data.sr) window_length = int(hp.data.window*hp.data.sr) hop_length = int(hp.data.hop*hp.data.sr) duration = hp.data.tisv_frame * hp.data.hop + hp.data.window # Cut silence and fix length if wav_process == True: sound_file, index = librosa.effects.trim(sound_file, frame_length=window_length, hop_length=hop_length) length = int(hp.data.sr * duration) sound_file = librosa.util.fix_length(sound_file, length) spec = librosa.stft(sound_file, n_fft=hp.data.nfft, hop_length=hop_length, win_length=window_length) mag_spec = np.abs(spec) mel_basis = librosa.filters.mel(hp.data.sr, hp.data.nfft, n_mels=hp.data.nmels) mel_spec = np.dot(mel_basis, mag_spec) mag_db = librosa.amplitude_to_db(mag_spec) #db mel spectrogram mel_db = librosa.amplitude_to_db(mel_spec).T mfccs = None if calc_mfccs: mfccs = np.dot(librosa.filters.dct(40, mel_db.shape[0]), mel_db).T return mfccs, mel_db, mag_db
Example #14
Source File: data_tools.py From Speech-enhancement with MIT License | 5 votes |
def audio_to_magnitude_db_and_phase(n_fft, hop_length_fft, audio): """This function takes an audio and convert into spectrogram, it returns the magnitude in dB and the phase""" stftaudio = librosa.stft(audio, n_fft=n_fft, hop_length=hop_length_fft) stftaudio_magnitude, stftaudio_phase = librosa.magphase(stftaudio) stftaudio_magnitude_db = librosa.amplitude_to_db( stftaudio_magnitude, ref=np.max) return stftaudio_magnitude_db, stftaudio_phase
Example #15
Source File: plotting.py From DeepSpectrum with GNU General Public License v3.0 | 5 votes |
def plot_spectrogram(audio_data, sr, nfft=None, delta=None, **kwargs): spectrogram = librosa.stft(audio_data, n_fft=nfft, hop_length=int(nfft / 2), center=False) if delta: spectrogram = librosa.feature.delta(spectrogram, order=delta) spectrogram = librosa.amplitude_to_db(spectrogram, ref=np.max, top_db=None) return _create_plot(spectrogram, sr, nfft, **kwargs)
Example #16
Source File: datautils.py From panotti with MIT License | 5 votes |
def make_melgram(mono_sig, sr, n_mels=128): # @keunwoochoi upgraded form 96 to 128 mel bins in kapre #melgram = librosa.logamplitude(librosa.feature.melspectrogram(mono_sig, # latest librosa deprecated logamplitude in favor of amplitude_to_db # sr=sr, n_mels=96),ref_power=1.0)[np.newaxis,np.newaxis,:,:] melgram = librosa.amplitude_to_db(librosa.feature.melspectrogram(mono_sig, sr=sr, n_mels=n_mels))[np.newaxis,:,:,np.newaxis] # last newaxis is b/c tensorflow wants 'channels_last' order ''' # librosa docs also include a perceptual CQT example: CQT = librosa.cqt(mono_sig, sr=sr, fmin=librosa.note_to_hz('A1')) freqs = librosa.cqt_frequencies(CQT.shape[0], fmin=librosa.note_to_hz('A1')) perceptual_CQT = librosa.perceptual_weighting(CQT**2, freqs, ref=np.max) melgram = perceptual_CQT[np.newaxis,np.newaxis,:,:] ''' return melgram
Example #17
Source File: audio.py From deep-voice-conversion with MIT License | 5 votes |
def amp2db(amp): return librosa.amplitude_to_db(amp)
Example #18
Source File: audio.py From Multilingual_Text_to_Speech with MIT License | 5 votes |
def linear_to_mel(S): """Convert linear to mel spectrogram (this does not return the same spec. as mel_spec. method due to the db->amplitude conversion).""" S = db_to_amplitude(S) S = librosa.feature.melspectrogram(S=S, sr=hp.sample_rate, n_mels=hp.num_mels) return amplitude_to_db(S)
Example #19
Source File: audio.py From parallel-wavenet-vocoder with MIT License | 5 votes |
def amp2db(amp): return librosa.amplitude_to_db(amp)
Example #20
Source File: preprocessing.py From rnnt-speech-recognition with MIT License | 5 votes |
def plot_spec(spec, sr, transcription, name): spec_db = librosa.amplitude_to_db(spec, ref=np.max) plt.figure(figsize=(12,4)) librosa.display.specshow(spec_db, sr=sr, x_axis='time', y_axis='mel', hop_length=sr * 0.01) plt.colorbar(format='%+02.0f dB') plt.savefig('figs/{}.png'.format(name)) plt.clf()
Example #21
Source File: feat_ext.py From LIVE_SER with Apache License 2.0 | 5 votes |
def extract_log_spectrogram_frame(self, frames, file = None, sr = 16000, n_fft=512, hop_length=512): #spec = librosa.feature.logfsgram(y=frames, sr=sr, S=None, n_fft=n_fft, hop_length=hop_length) spec = np.abs(librosa.stft(frames, n_fft = n_fft)) log_spec = librosa.amplitude_to_db(spec**2) log_spec = log_spec.T if file != None: np.savetxt(file, log_spec, fmt='%.8e', delimiter=';', newline='\n', header='', footer='') return log_spec
Example #22
Source File: features.py From msaf with MIT License | 5 votes |
def compute_features(self): """Actual implementation of the features. Returns ------- cqt: np.array(N, F) The features, each row representing a feature vector for a give time frame/beat. """ linear_cqt = np.abs(librosa.cqt( self._audio, sr=self.sr, hop_length=self.hop_length, n_bins=self.n_bins, norm=self.norm, filter_scale=self.filter_scale) ) ** 2 cqt = librosa.amplitude_to_db(linear_cqt, ref=self.ref_power).T return cqt
Example #23
Source File: audio.py From BirdCLEF-Baseline with MIT License | 5 votes |
def stft(sig, rate, shape=(128, 256), fmin=500, fmax=15000, normalize=True): # shape = (height, width) in pixels # STFT-Spec parameters N_FFT = int((rate * shape[0] * 2) / abs(fmax - fmin)) + 1 P_MIN = int(float(N_FFT / 2) / rate * fmin) + 1 P_MAX = int(float(N_FFT / 2) / rate * fmax) + 1 HOP_LEN = len(sig) // (shape[1] - 1) # Librosa stft-spectrum spec = librosa.core.stft(sig, hop_length=HOP_LEN, n_fft=N_FFT, window='hamm') # Convert power spec to dB scale (compute dB relative to peak power) spec = librosa.amplitude_to_db(librosa.core.magphase(spec)[0], ref=np.max, top_db=80) # Trim to desired shape using cutoff frequencies spec = spec[P_MIN:P_MAX, :shape[1]] # Flip spectrum vertically (only for better visialization, low freq. at bottom) spec = spec[::-1, ...] # Normalize values between 0 and 1 if normalize: spec -= spec.min() if not spec.max() == 0: spec /= spec.max() else: spec = np.clip(spec, 0, 1) return spec.astype('float32')
Example #24
Source File: audio.py From BirdCLEF-Baseline with MIT License | 5 votes |
def melspec(sig, rate, shape=(128, 256), fmin=500, fmax=15000, normalize=True, preemphasis=0.95): # shape = (height, width) in pixels # Mel-Spec parameters SAMPLE_RATE = rate N_FFT = shape[0] * 8 # = window length N_MELS = shape[0] HOP_LEN = len(sig) // (shape[1] - 1) FMAX = fmax FMIN = fmin # Preemphasis as in python_speech_features by James Lyons if preemphasis: sig = np.append(sig[0], sig[1:] - preemphasis * sig[:-1]) # Librosa mel-spectrum melspec = librosa.feature.melspectrogram(y=sig, sr=SAMPLE_RATE, hop_length=HOP_LEN, n_fft=N_FFT, n_mels=N_MELS, fmax=FMAX, fmin=FMIN, power=1.0) # Convert power spec to dB scale (compute dB relative to peak power) melspec = librosa.amplitude_to_db(melspec, ref=np.max, top_db=80) # Flip spectrum vertically (only for better visialization, low freq. at bottom) melspec = melspec[::-1, ...] # Trim to desired shape if too large melspec = melspec[:shape[0], :shape[1]] # Normalize values between 0 and 1 if normalize: melspec -= melspec.min() if not melspec.max() == 0: melspec /= melspec.max() else: mlspec = np.clip(melspec, 0, 1) return melspec.astype('float32')
Example #25
Source File: mel.py From pumpp with ISC License | 5 votes |
def transform_audio(self, y): '''Compute the Mel spectrogram Parameters ---------- y : np.ndarray The audio buffer Returns ------- data : dict data['mag'] : np.ndarray, shape=(n_frames, n_mels) The Mel spectrogram ''' n_frames = self.n_frames(get_duration(y=y, sr=self.sr)) mel = np.sqrt(melspectrogram(y=y, sr=self.sr, n_fft=self.n_fft, hop_length=self.hop_length, n_mels=self.n_mels, fmax=self.fmax)) mel = fix_length(mel, n_frames) if self.log: mel = amplitude_to_db(mel, ref=np.max) # Type convert mel = to_dtype(mel, self.dtype) return {'mag': mel.T[self.idx]}
Example #26
Source File: cqt.py From pumpp with ISC License | 5 votes |
def transform_audio(self, y): '''Compute the HCQT Parameters ---------- y : np.ndarray The audio buffer Returns ------- data : dict data['mag'] : np.ndarray, shape = (n_frames, n_bins, n_harmonics) The CQT magnitude data['phase']: np.ndarray, shape = mag.shape The CQT phase ''' cqtm, phase = [], [] n_frames = self.n_frames(get_duration(y=y, sr=self.sr)) for h in self.harmonics: C = cqt(y=y, sr=self.sr, hop_length=self.hop_length, fmin=self.fmin * h, n_bins=(self.n_octaves * self.over_sample * 12), bins_per_octave=(self.over_sample * 12)) C = fix_length(C, n_frames) C, P = magphase(C) if self.log: C = amplitude_to_db(C, ref=np.max) cqtm.append(C) phase.append(P) cqtm = to_dtype(np.asarray(cqtm), self.dtype) phase = to_dtype(np.angle(np.asarray(phase)), self.dtype) return {'mag': self._index(cqtm), 'phase': self._index(phase)}
Example #27
Source File: cqt.py From pumpp with ISC License | 5 votes |
def transform_audio(self, y): '''Compute the CQT Parameters ---------- y : np.ndarray The audio buffer Returns ------- data : dict data['mag'] : np.ndarray, shape = (n_frames, n_bins) The CQT magnitude data['phase']: np.ndarray, shape = mag.shape The CQT phase ''' n_frames = self.n_frames(get_duration(y=y, sr=self.sr)) C = cqt(y=y, sr=self.sr, hop_length=self.hop_length, fmin=self.fmin, n_bins=(self.n_octaves * self.over_sample * 12), bins_per_octave=(self.over_sample * 12)) C = fix_length(C, n_frames) cqtm, phase = magphase(C) if self.log: cqtm = amplitude_to_db(cqtm, ref=np.max) dphase = phase_diff(np.angle(phase).T[self.idx], self.conv) return {'mag': to_dtype(cqtm.T[self.idx], self.dtype), 'dphase': to_dtype(dphase, self.dtype)}
Example #28
Source File: cqt.py From pumpp with ISC License | 5 votes |
def transform_audio(self, y): '''Compute the CQT Parameters ---------- y : np.ndarray The audio buffer Returns ------- data : dict data['mag'] : np.ndarray, shape = (n_frames, n_bins) The CQT magnitude data['phase']: np.ndarray, shape = mag.shape The CQT phase ''' n_frames = self.n_frames(get_duration(y=y, sr=self.sr)) C = cqt(y=y, sr=self.sr, hop_length=self.hop_length, fmin=self.fmin, n_bins=(self.n_octaves * self.over_sample * 12), bins_per_octave=(self.over_sample * 12)) C = fix_length(C, n_frames) cqtm, phase = magphase(C) if self.log: cqtm = amplitude_to_db(cqtm, ref=np.max) return {'mag': to_dtype(cqtm.T[self.idx], self.dtype), 'phase': to_dtype(np.angle(phase).T[self.idx], self.dtype)}
Example #29
Source File: fft.py From pumpp with ISC License | 5 votes |
def transform_audio(self, y): '''Compute the STFT magnitude and phase differential. Parameters ---------- y : np.ndarray The audio buffer Returns ------- data : dict data['mag'] : np.ndarray, shape=(n_frames, 1 + n_fft//2) STFT magnitude data['dphase'] : np.ndarray, shape=(n_frames, 1 + n_fft//2) STFT phase ''' n_frames = self.n_frames(get_duration(y=y, sr=self.sr)) D = stft(y, hop_length=self.hop_length, n_fft=self.n_fft) D = fix_length(D, n_frames) mag, phase = magphase(D) if self.log: mag = amplitude_to_db(mag, ref=np.max) phase = phase_diff(np.angle(phase.T)[self.idx], self.conv) return {'mag': to_dtype(mag.T[self.idx], self.dtype), 'dphase': to_dtype(phase, self.dtype)}
Example #30
Source File: fft.py From pumpp with ISC License | 5 votes |
def transform_audio(self, y): '''Compute the STFT magnitude and phase. Parameters ---------- y : np.ndarray The audio buffer Returns ------- data : dict data['mag'] : np.ndarray, shape=(n_frames, 1 + n_fft//2) STFT magnitude data['phase'] : np.ndarray, shape=(n_frames, 1 + n_fft//2) STFT phase ''' n_frames = self.n_frames(get_duration(y=y, sr=self.sr)) D = stft(y, hop_length=self.hop_length, n_fft=self.n_fft) D = fix_length(D, n_frames) mag, phase = magphase(D) if self.log: mag = amplitude_to_db(mag, ref=np.max) return {'mag': to_dtype(mag.T[self.idx], self.dtype), 'phase': to_dtype(np.angle(phase.T)[self.idx], self.dtype)}