Python librosa.power_to_db() Examples
The following are 30
code examples of librosa.power_to_db().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
librosa
, or try the search function
.
Example #1
Source File: test_rythm.py From audiomate with MIT License | 7 votes |
def test_compute_cleanup_after_one_utterance(self): test_file_path = resources.sample_wav_file('wav_1.wav') y, sr = librosa.load(test_file_path, sr=None) frames = librosa.util.frame(y, frame_length=2048, hop_length=1024).T # EXPECTED S = np.abs(librosa.stft(y, center=False, n_fft=2048, hop_length=1024)) ** 2 S = librosa.feature.melspectrogram(S=S, n_mels=128, sr=sr) S = librosa.power_to_db(S) onsets = librosa.onset.onset_strength(S=S, center=False) exp_tgram = librosa.feature.tempogram(onset_envelope=onsets, sr=sr, win_length=11, center=True).T # ACTUAL tgram_step = pipeline.Tempogram(win_length=11) # FIRST RUN tgrams = tgram_step.process_frames(frames, sr, last=True) assert np.allclose(tgrams, exp_tgram) # SECOND RUN tgrams = tgram_step.process_frames(frames, sr, last=True) assert np.allclose(tgrams, exp_tgram)
Example #2
Source File: test_onset.py From audiomate with MIT License | 6 votes |
def test_compute(self): test_file_path = resources.sample_wav_file('wav_1.wav') y, sr = librosa.load(test_file_path, sr=None) frames = librosa.util.frame(y, frame_length=2048, hop_length=1024).T # EXPECTED S = np.abs(librosa.stft(y, center=False, n_fft=2048, hop_length=1024)) ** 2 S = librosa.feature.melspectrogram(S=S, n_mels=128, sr=sr) S = librosa.power_to_db(S) exp_onsets = librosa.onset.onset_strength(S=S, center=False).T exp_onsets = exp_onsets.reshape(exp_onsets.shape[0], 1) # ACTUAL onset = pipeline.OnsetStrength() onsets = onset.process_frames(frames, sr, last=True) assert np.allclose(onsets, exp_onsets)
Example #3
Source File: data.py From magenta with Apache License 2.0 | 6 votes |
def wav_to_spec(wav_audio, hparams): """Transforms the contents of a wav file into a series of spectrograms.""" if hparams.spec_type == 'raw': spec = _wav_to_framed_samples(wav_audio, hparams) else: if hparams.spec_type == 'cqt': spec = _wav_to_cqt(wav_audio, hparams) elif hparams.spec_type == 'mel': spec = _wav_to_mel(wav_audio, hparams) else: raise ValueError('Invalid spec_type: {}'.format(hparams.spec_type)) if hparams.spec_log_amplitude: spec = librosa.power_to_db(spec) return spec
Example #4
Source File: spec_augment_tensorflow.py From Speech-Transformer with MIT License | 6 votes |
def visualization_tensor_spectrogram(mel_spectrogram, title): """visualizing first one result of SpecAugment # Arguments: mel_spectrogram(ndarray): mel_spectrogram to visualize. title(String): plot figure's title """ # session for plotting sess = tf.InteractiveSession() mel_spectrogram = mel_spectrogram.eval() # Show mel-spectrogram using librosa's specshow. plt.figure(figsize=(10, 4)) librosa.display.specshow(librosa.power_to_db(mel_spectrogram[0, :, :, 0], ref=np.max), y_axis='mel', fmax=8000, x_axis='time') # plt.colorbar(format='%+2.0f dB') plt.title(title) plt.tight_layout() plt.show()
Example #5
Source File: preprocessing.py From speechT with Apache License 2.0 | 6 votes |
def calc_power_spectrogram(audio_data, samplerate, n_mels=128, n_fft=512, hop_length=160): """ Calculate power spectrogram from the given raw audio data Args: audio_data: numpyarray of raw audio wave samplerate: the sample rate of the `audio_data` n_mels: the number of mels to generate n_fft: the window size of the fft hop_length: the hop length for the window Returns: the spectrogram in the form [time, n_mels] """ spectrogram = librosa.feature.melspectrogram(audio_data, sr=samplerate, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length) # convert to log scale (dB) log_spectrogram = librosa.power_to_db(spectrogram, ref=np.max) # normalize normalized_spectrogram = normalize(log_spectrogram) return normalized_spectrogram.T
Example #6
Source File: test_onset.py From audiomate with MIT License | 6 votes |
def test_compute_online(self): test_file_path = resources.sample_wav_file('wav_1.wav') y, sr = librosa.load(test_file_path, sr=None) # EXPECTED y_pad = np.pad(y, (0, 1024), mode='constant', constant_values=0) S = np.abs(librosa.stft(y_pad, center=False, n_fft=2048, hop_length=1024)) ** 2 S = librosa.feature.melspectrogram(S=S, n_mels=128, sr=sr) S = librosa.power_to_db(S) exp_onsets = librosa.onset.onset_strength(S=S, center=False).T exp_onsets = exp_onsets.reshape(exp_onsets.shape[0], 1) # ACTUAL test_file = tracks.FileTrack('idx', test_file_path) onset = pipeline.OnsetStrength() onset_gen = onset.process_track_online(test_file, 2048, 1024, chunk_size=5) chunks = list(onset_gen) onsets = np.vstack(chunks) print(onsets.shape, exp_onsets.shape) assert np.allclose(onsets, exp_onsets)
Example #7
Source File: test_rythm.py From audiomate with MIT License | 6 votes |
def test_compute_online(self): # Data: 41523 samples, 16 kHz # yields 40 frames with frame-size 2048 and hop-size 1024 test_file_path = resources.sample_wav_file('wav_1.wav') y, sr = librosa.load(test_file_path, sr=None) # EXPECTED y_pad = np.pad(y, (0, 1024), mode='constant', constant_values=0) S = np.abs(librosa.stft(y_pad, center=False, n_fft=2048, hop_length=1024)) ** 2 S = librosa.feature.melspectrogram(S=S, n_mels=128, sr=sr) S = librosa.power_to_db(S) onsets = librosa.onset.onset_strength(S=S, center=False) exp_tgram = librosa.feature.tempogram(onset_envelope=onsets, sr=sr, win_length=4, center=True).T # ACTUAL test_file = tracks.FileTrack('idx', test_file_path) tgram_step = pipeline.Tempogram(win_length=4) tgram_gen = tgram_step.process_track_online(test_file, 2048, 1024, chunk_size=5) chunks = list(tgram_gen) tgrams = np.vstack(chunks) assert np.allclose(tgrams, exp_tgram)
Example #8
Source File: plotting.py From DeepSpectrum with GNU General Public License v3.0 | 6 votes |
def plot_mel_spectrogram(audio_data, sr, nfft=None, melbands=64, delta=None, **kwargs): spectrogram = y_limited_spectrogram(audio_data, sr=sr, nfft=nfft, ylim=kwargs['ylim']) kwargs['scale'] = 'mel' if delta: spectrogram = librosa.feature.delta(spectrogram, order=delta) spectrogram = librosa.feature.melspectrogram(S=np.abs(spectrogram)**2, sr=sr, n_mels=melbands) spectrogram = librosa.power_to_db(spectrogram, ref=np.max, top_db=None) return _create_plot(spectrogram, sr, nfft, **kwargs)
Example #9
Source File: onset.py From audiomate with MIT License | 6 votes |
def compute(self, chunk, sampling_rate, corpus=None, utterance=None): # Compute mel-spetrogram power_spec = np.abs(spectral.stft_from_frames(chunk.data.T)) ** 2 mel = np.abs(librosa.feature.melspectrogram(S=power_spec, n_mels=self.n_mels, sr=sampling_rate)) mel_power = librosa.power_to_db(mel) # Compute onset strengths oenv = librosa.onset.onset_strength(S=mel_power, center=False) # Switch dimensions and add dimension to have frames oenv = oenv.T.reshape(oenv.shape[0], -1) # Remove context oenv = oenv[chunk.left_context:oenv.shape[0] - chunk.right_context] return oenv
Example #10
Source File: feature_extraction.py From Sound-Recognition-Tutorial with Apache License 2.0 | 6 votes |
def extract_mfcc(y, sr, size=3): """ extract MFCC feature :param y: np.ndarray [shape=(n,)], real-valued the input signal (audio time series) :param sr: sample rate of 'y' :param size: the length (seconds) of random crop from original audio, default as 3 seconds :return: MFCC feature """ # normalization y = y.astype(np.float32) normalization_factor = 1 / np.max(np.abs(y)) y = y * normalization_factor # random crop start = random.randint(0, len(y) - size * sr) y = y[start: start + size * sr] # extract log mel spectrogram ##### melspectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=1024) mfcc = librosa.feature.mfcc(S=librosa.power_to_db(melspectrogram), n_mfcc=20) mfcc_delta = librosa.feature.delta(mfcc) mfcc_delta_delta = librosa.feature.delta(mfcc_delta) mfcc_comb = np.concatenate([mfcc, mfcc_delta, mfcc_delta_delta], axis=0) return mfcc_comb
Example #11
Source File: feature_extraction.py From Sound-Recognition-Tutorial with Apache License 2.0 | 6 votes |
def extract_logmel(y, sr, size=3): """ extract log mel spectrogram feature :param y: the input signal (audio time series) :param sr: sample rate of 'y' :param size: the length (seconds) of random crop from original audio, default as 3 seconds :return: log-mel spectrogram feature """ # normalization y = y.astype(np.float32) normalization_factor = 1 / np.max(np.abs(y)) y = y * normalization_factor # random crop start = random.randint(0, len(y) - size * sr) y = y[start: start + size * sr] # extract log mel spectrogram ##### melspectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=1024, n_mels=60) logmelspec = librosa.power_to_db(melspectrogram) return logmelspec
Example #12
Source File: create_patches.py From tartarus with MIT License | 6 votes |
def prepare_testset(dataset_name): spec_folder=common.SPECTRO_PATH+SPECTRO_FOLDER+"/" test_folder=common.DATA_DIR+'/spectro_%s_testset/' % dataset_name if not os.path.exists(test_folder): os.makedirs(test_folder) items = open(common.DATASETS_DIR+'/items_index_test_%s.tsv' % dataset_name).read().splitlines() testset = [] testset_index = [] for t,track_id in enumerate(items): if MSD: msd_folder = track_id[2]+"/"+track_id[3]+"/"+track_id[4]+"/" else: msd_folder = "" file = spec_folder+msd_folder+track_id+".pk" try: spec = pickle.load(open(file)) spec = librosa.power_to_db(np.abs(spec) ** 2,ref=np.max).T pickle.dump(spec, open(test_folder+track_id+".pk","wb")) testset.append(track_id) testset_index.append(t) if t%1000==0: print t except: print "no exist", file
Example #13
Source File: singlelayer.py From EUSIPCO2017 with GNU Affero General Public License v3.0 | 6 votes |
def compute_spectrograms(filename): out_rate = 12000 N_FFT = 512 HOP_LEN = 256 frames, rate = librosa.load(filename, sr=out_rate, mono=True) if len(frames) < out_rate*3: # if less then 3 second - can't process raise Exception("Audio duration is too short") logam = librosa.power_to_db melgram = librosa.feature.melspectrogram x = logam(melgram(y=frames, sr=out_rate, hop_length=HOP_LEN, n_fft=N_FFT, n_mels=N_MEL_BANDS) ** 2, ref=1.0) # now going through spectrogram with the stride of the segment duration for start_idx in range(0, x.shape[1] - SEGMENT_DUR + 1, SEGMENT_DUR): yield x[:, start_idx:start_idx + SEGMENT_DUR]
Example #14
Source File: melgram.py From mxnet-audio with MIT License | 5 votes |
def melgram_v1(audio_file_path, to_file): sig, fs = librosa.load(audio_file_path) pylab.axis('off') # no axis pylab.axes([0., 0., 1., 1.], frameon=False, xticks=[], yticks=[]) # Remove the white edge S = librosa.feature.melspectrogram(y=sig, sr=fs) librosa.display.specshow(librosa.power_to_db(S, ref=np.max)) pylab.savefig(to_file, bbox_inches=None, pad_inches=0) pylab.close()
Example #15
Source File: audio_transforms.py From htmpapers with GNU Affero General Public License v3.0 | 5 votes |
def __call__(self, data): stft = data['stft'] sample_rate = data['sample_rate'] n_fft = data['n_fft'] mel_basis = librosa.filters.mel(sample_rate, n_fft, self.n_mels) s = np.dot(mel_basis, np.abs(stft)**2.0) data['mel_spectrogram'] = librosa.power_to_db(s, ref=np.max) return data
Example #16
Source File: audio_transforms.py From htmpapers with GNU Affero General Public License v3.0 | 5 votes |
def __call__(self, data): samples = data['samples'] sample_rate = data['sample_rate'] s = librosa.feature.melspectrogram(samples, sr=sample_rate, n_mels=self.n_mels) data['mel_spectrogram'] = librosa.power_to_db(s, ref=np.max) return data
Example #17
Source File: features.py From dcase_util with MIT License | 5 votes |
def extract(self, y): """Extract features for the audio signal. Parameters ---------- y : numpy.ndarray [shape=(n,)] Audio signal Returns ------- numpy.ndarray [shape=(n_mels, t)] mfccs """ spectrogram = self.get_spectrogram( y=y, n_fft=self.n_fft, win_length_samples=self.win_length_samples, hop_length_samples=self.hop_length_samples, spectrogram_type=self.spectrogram_type, center=True, window=self.window ) mel_spectrum = numpy.dot(self.mel_basis, spectrogram) mfccs = librosa.feature.mfcc( S=librosa.power_to_db(mel_spectrum), n_mfcc=self.n_mfcc ) if self.omit_zeroth: # Remove first coefficient mfccs = mfccs[1:, :] return mfccs
Example #18
Source File: melgram.py From keras-audio with MIT License | 5 votes |
def melgram_v1(audio_file_path, to_file): sig, fs = librosa.load(audio_file_path) pylab.axis('off') # no axis pylab.axes([0., 0., 1., 1.], frameon=False, xticks=[], yticks=[]) # Remove the white edge S = librosa.feature.melspectrogram(y=sig, sr=fs) librosa.display.specshow(librosa.power_to_db(S, ref=np.max)) pylab.savefig(to_file, bbox_inches=None, pad_inches=0) pylab.close()
Example #19
Source File: Plot.py From Wave-U-Net with MIT License | 5 votes |
def draw_spectrogram(example_wav="musb_005_angela thomas wade_audio_model_without_context_cut_28234samples_61002samples_93770samples_126538.wav"): y, sr = Utils.load(example_wav, sr=None) spec = np.abs(librosa.stft(y, 512, 256, 512)) norm_spec = librosa.power_to_db(spec**2) black_time_frames = np.array([28234, 61002, 93770, 126538]) / 256.0 fig, ax = plt.subplots() img = ax.imshow(norm_spec) plt.vlines(black_time_frames, [0, 0, 0, 0], [10, 10, 10, 10], colors="red", lw=2, alpha=0.5) plt.vlines(black_time_frames, [256, 256, 256, 256], [246, 246, 246, 246], colors="red", lw=2, alpha=0.5) divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.1) plt.colorbar(img, cax=cax) ax.xaxis.set_label_position("bottom") #ticks_x = ticker.FuncFormatter(lambda x, pos: '{0:g}'.format(x * 256.0 / sr)) #ax.xaxis.set_major_formatter(ticks_x) ax.xaxis.set_major_locator(ticker.FixedLocator(([i * sr / 256. for i in range(len(y)//sr + 1)]))) ax.xaxis.set_major_formatter(ticker.FixedFormatter(([str(i) for i in range(len(y)//sr + 1)]))) ax.yaxis.set_major_locator(ticker.FixedLocator(([float(i) * 2000.0 / (sr/2.0) * 256. for i in range(6)]))) ax.yaxis.set_major_formatter(ticker.FixedFormatter([str(i*2) for i in range(6)])) ax.set_xlabel("t (s)") ax.set_ylabel('f (KHz)') fig.set_size_inches(7., 3.) fig.savefig("spectrogram_example.pdf", bbox_inches='tight')
Example #20
Source File: spectrogram.py From cocktail-party with MIT License | 5 votes |
def signal_to_mel_spectrogram(self, audio_signal, log=True, get_phase=False): signal = audio_signal.get_data(channel_index=0) D = librosa.core.stft(signal, n_fft=self._N_FFT, hop_length=self._HOP_LENGTH) magnitude, phase = librosa.core.magphase(D) mel_spectrogram = np.dot(self._MEL_FILTER, magnitude) mel_spectrogram = mel_spectrogram ** 2 if log: mel_spectrogram = librosa.power_to_db(mel_spectrogram) if get_phase: return mel_spectrogram, phase else: return mel_spectrogram
Example #21
Source File: spec_augment_pytorch.py From Speech-Transformer with MIT License | 5 votes |
def visualization_spectrogram(mel_spectrogram, title): """visualizing result of specAugment # Arguments: mel_spectrogram(ndarray): mel_spectrogram to visualize. title(String): plot figure's title """ # Show mel-spectrogram using librosa's specshow. plt.figure(figsize=(10, 4)) librosa.display.specshow(librosa.power_to_db(mel_spectrogram[0, :, :], ref=np.max), y_axis='mel', fmax=8000, x_axis='time') # plt.colorbar(format='%+2.0f dB') plt.title(title) plt.tight_layout() plt.show()
Example #22
Source File: feature_extraction.py From deepsaber with GNU General Public License v3.0 | 5 votes |
def extract_features_mel(y, sr, hop,mel_dim=100): mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=mel_dim, hop_length=hop) # C2 is 65.4 Hz features = librosa.power_to_db(mel, ref=np.max) return features
Example #23
Source File: spec_augment_tensorflow.py From Speech-Transformer with MIT License | 5 votes |
def visualization_spectrogram(mel_spectrogram, title): """visualizing first one result of SpecAugment # Arguments: mel_spectrogram(ndarray): mel_spectrogram to visualize. title(String): plot figure's title """ # Show mel-spectrogram using librosa's specshow. plt.figure(figsize=(10, 4)) librosa.display.specshow(librosa.power_to_db(mel_spectrogram[0, :, :, 0], ref=np.max), y_axis='mel', fmax=8000, x_axis='time') plt.title(title) plt.tight_layout() plt.show()
Example #24
Source File: audio.py From MelNet with MIT License | 5 votes |
def pre_spec(self, x): return self.normalize(librosa.power_to_db(x) - self.hp.audio.ref_level_db)
Example #25
Source File: feature_extraction.py From deepsaber with GNU General Public License v3.0 | 5 votes |
def extract_features_multi_mel(y, sr=44100.0, hop=512, nffts=[1024, 2048, 4096], mel_dim=100): featuress = [] for nfft in nffts: mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=mel_dim, n_fft=nfft, hop_length=hop) # C2 is 65.4 Hz features = librosa.power_to_db(mel, ref=np.max) featuress.append(features) features = np.stack(featuress, axis=1) return features
Example #26
Source File: spec_augment_pytorch.py From SpecAugment with Apache License 2.0 | 5 votes |
def visualization_spectrogram(mel_spectrogram, title): """visualizing result of SpecAugment # Arguments: mel_spectrogram(ndarray): mel_spectrogram to visualize. title(String): plot figure's title """ # Show mel-spectrogram using librosa's specshow. plt.figure(figsize=(10, 4)) librosa.display.specshow(librosa.power_to_db(mel_spectrogram[0, :, :], ref=np.max), y_axis='mel', fmax=8000, x_axis='time') # plt.colorbar(format='%+2.0f dB') plt.title(title) plt.tight_layout() plt.show()
Example #27
Source File: audio.py From argus-freesound with MIT License | 5 votes |
def audio_to_melspectrogram(audio): spectrogram = librosa.feature.melspectrogram(audio, sr=config.sampling_rate, n_mels=config.n_mels, hop_length=config.hop_length, n_fft=config.n_fft, fmin=config.fmin, fmax=config.fmax) spectrogram = librosa.power_to_db(spectrogram) spectrogram = spectrogram.astype(np.float32) return spectrogram
Example #28
Source File: spectral.py From audiomate with MIT License | 5 votes |
def compute(self, chunk, sampling_rate, corpus=None, utterance=None): power_spec = np.abs(stft_from_frames(chunk.data.T)) ** 2 mel = librosa.feature.melspectrogram(S=power_spec, n_mels=self.n_mels, sr=sampling_rate) mel_power = librosa.power_to_db(mel) mfcc = librosa.feature.mfcc(S=mel_power, n_mfcc=self.n_mfcc) return mfcc.T
Example #29
Source File: magnitude_scaling.py From audiomate with MIT License | 5 votes |
def compute(self, chunk, sampling_rate, corpus=None, utterance=None): return librosa.power_to_db(chunk.data.T, ref=self.ref, amin=self.amin, top_db=self.top_db).T
Example #30
Source File: test_spectral.py From audiomate with MIT License | 5 votes |
def test_compute(self): samples = np.arange(8096).astype(np.float32) D = np.abs(librosa.core.stft(samples, n_fft=2048, hop_length=512, center=False)) ** 2 mel = librosa.feature.melspectrogram(S=D, sr=16000, n_mels=128) expected = librosa.feature.mfcc(S=librosa.power_to_db(mel), n_mfcc=13).T frames = librosa.util.frame(samples, frame_length=2048, hop_length=512).T mfcc = pipeline.MFCC(n_mfcc=13, n_mels=128) res = mfcc.process_frames(frames, sampling_rate=16000) assert np.array_equal(expected, res)