Python librosa.logamplitude() Examples
The following are 19
code examples of librosa.logamplitude().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
librosa
, or try the search function
.
Example #1
Source File: melspec.py From Deep-Music-Tagger with MIT License | 6 votes |
def __extract_melspec(audio_fpath, audio_fname): """ Using librosa to calculate log mel spectrogram values and scipy.misc to draw and store them (in grayscale). :param audio_fpath: :param audio_fname: :return: """ # Load sound file y, sr = librosa.load(audio_fpath, sr=12000) # Let's make and display a mel-scaled power (energy-squared) spectrogram S = librosa.feature.melspectrogram(y, sr=sr, hop_length=256, n_mels=96) # Convert to log scale (dB). We'll use the peak power as reference. log_S = librosa.logamplitude(S, ref_power=np.max) spectr_fname = audio_fname + '.png' subdir_path = __get_subdir(spectr_fname) # Draw log values matrix in grayscale scipy.misc.toimage(log_S).save(subdir_path.format(spectr_fname))
Example #2
Source File: melspec.py From Deep-Music-Tagger with MIT License | 6 votes |
def __extract_melspec(audio_fpath, audio_fname): """ Using librosa to calculate log mel spectrogram values and scipy.misc to draw and store them (in grayscale). :param audio_fpath: :param audio_fname: :return: """ # Load sound file y, sr = librosa.load(audio_fpath, sr=12000) # Let's make and display a mel-scaled power (energy-squared) spectrogram S = librosa.feature.melspectrogram(y, sr=sr, hop_length=256, n_mels=96) # Convert to log scale (dB). We'll use the peak power as reference. log_S = librosa.logamplitude(S, ref_power=np.max) spectr_fname = audio_fname + '.png' subdir_path = __get_subdir(spectr_fname) # Draw log values matrix in grayscale scipy.misc.toimage(log_S).save(subdir_path.format(spectr_fname))
Example #3
Source File: rosa_loader.py From crnn-lid with GNU General Public License v3.0 | 6 votes |
def process_file(self, file_path): # mel-spectrogram parameters SR = 12000 N_FFT = 512 N_MELS = 96 HOP_LEN = 256 src, sr = librosa.load(file_path, sr=SR) # whole signal logam = librosa.logamplitude melgram = librosa.feature.melspectrogram mel_spectrogram = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN, n_fft=N_FFT, n_mels=N_MELS) ** 2, ref_power=1.0) mel_spectrogram = np.expand_dims(mel_spectrogram, -1) # for 10secs shape (96, 469, 1) return mel_spectrogram
Example #4
Source File: han16.py From EUSIPCO2017 with GNU Affero General Public License v3.0 | 5 votes |
def compute_spectrograms(filename): out_rate = 22050 frames, rate = librosa.load(filename, sr=out_rate, mono=True) if len(frames) < out_rate: # if less then 1 second - can't process raise Exception("Audio duration is too short") normalized_audio = _normalize(frames) melspectr = librosa.feature.melspectrogram(y=normalized_audio, sr=out_rate, n_mels=N_MEL_BANDS, fmax=out_rate/2) logmelspectr = librosa.logamplitude(melspectr**2, ref_power=1.0) # now going through spectrogram with the stride of the segment duration for start_idx in range(0, logmelspectr.shape[1] - SEGMENT_DUR + 1, SEGMENT_DUR): yield logmelspectr[:, start_idx:start_idx + SEGMENT_DUR]
Example #5
Source File: feature-converter.py From Content-based-Music-Recommendation with Apache License 2.0 | 5 votes |
def extract_features(basedir,extension='.au'): features=[] labels=[] # iterate over all files in all subdirectories of the base directory for root, dirs, files in os.walk(basedir): files = glob.glob(os.path.join(root,'*'+extension)) # apply function to all files for f in files : genre = f.split('/')[4].split('.')[0] if (genre == 'hiphop' or genre == 'rock' or genre == 'pop' or genre == 'country'): print genre # Extract the mel-spectrogram y, sr = librosa.load(f) # Let's make and display a mel-scaled power (energy-squared) spectrogram mel_spec = librosa.feature.melspectrogram(y, sr=sr,n_mels=128,hop_length=1024,n_fft=2048) # Convert to log scale (dB). We'll use the peak power as reference. log_mel_spec = librosa.logamplitude(mel_spec, ref_power=np.max) #make dimensions of the array even 128x1292 log_mel_spec = np.resize(log_mel_spec,(128,644)) print log_mel_spec.shape #store into feature array features.append(log_mel_spec.flatten()) # print len(np.array(log_mel_spec.T.flatten())) # Extract label label = genreDict.get(genre) labels.append(label) else: pass features = np.asarray(features).reshape(len(features),82432) print features.shape print len(labels) return (features, one_hot_encode(labels))
Example #6
Source File: preproccess.py From MusicGenreClassification with MIT License | 5 votes |
def prepossessingAudio(audioPath, ppFilePath): print 'Prepossessing ' + audioPath featuresArray = [] for i in range(0, SOUND_SAMPLE_LENGTH, HAMMING_STRIDE): if i + HAMMING_SIZE <= SOUND_SAMPLE_LENGTH - 1: y, sr = librosa.load(audioPath, offset=i / 1000.0, duration=HAMMING_SIZE / 1000.0) # Let's make and display a mel-scaled power (energy-squared) spectrogram S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128) # Convert to log scale (dB). We'll use the peak power as reference. log_S = librosa.logamplitude(S, ref_power=np.max) mfcc = librosa.feature.mfcc(S=log_S, sr=sr, n_mfcc=13) # featuresArray.append(mfcc) featuresArray.append(S) if len(featuresArray) == 599: break print 'storing pp file: ' + ppFilePath f = open(ppFilePath, 'w') f.write(pickle.dumps(featuresArray)) f.close()
Example #7
Source File: preproccess.py From MusicGenreClassification with MIT License | 5 votes |
def prepossessingAudio(audioPath, ppFilePath): print 'Prepossessing ' + audioPath featuresArray = [] for i in range(0, SOUND_SAMPLE_LENGTH, HAMMING_STRIDE): if i + HAMMING_SIZE <= SOUND_SAMPLE_LENGTH - 1: y, sr = librosa.load(audioPath, offset=i / 1000.0, duration=HAMMING_SIZE / 1000.0) # Let's make and display a mel-scaled power (energy-squared) spectrogram S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128) # Convert to log scale (dB). We'll use the peak power as reference. log_S = librosa.logamplitude(S, ref_power=np.max) mfcc = librosa.feature.mfcc(S=log_S, sr=sr, n_mfcc=13) featuresArray.append(mfcc) # featuresArray.append(S) if len(featuresArray) == 599: break print 'storing pp file: ' + ppFilePath f = open(ppFilePath, 'w') f.write(pickle.dumps(featuresArray)) f.close()
Example #8
Source File: audio_processor.py From Music-Genre-Classification-with-Deep-Learning with MIT License | 5 votes |
def compute_melgram(audio_path): ''' Compute a mel-spectrogram and returns it in a shape of (1,1,96,1366), where 96 == #mel-bins and 1366 == #time frame parameters ---------- audio_path: path for the audio file. Any format supported by audioread will work. More info: http://librosa.github.io/librosa/generated/librosa.core.load.html#librosa.core.load ''' # mel-spectrogram parameters SR = 12000 N_FFT = 512 N_MELS = 96 HOP_LEN = 256 DURA = 29.12 # to make it 1366 frame.. src, sr = librosa.load(audio_path, sr=SR) # whole signal n_sample = src.shape[0] n_sample_fit = int(DURA*SR) if n_sample < n_sample_fit: # if too short src = np.hstack((src, np.zeros((int(DURA*SR) - n_sample,)))) elif n_sample > n_sample_fit: # if too long src = src[(n_sample-n_sample_fit)/2:(n_sample+n_sample_fit)/2] logam = librosa.logamplitude melgram = librosa.feature.melspectrogram ret = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN, n_fft=N_FFT, n_mels=N_MELS)**2, ref_power=1.0) ret = ret[np.newaxis, np.newaxis, :] return ret
Example #9
Source File: datautils.py From panotti with MIT License | 5 votes |
def make_melgram(mono_sig, sr, n_mels=128): # @keunwoochoi upgraded form 96 to 128 mel bins in kapre #melgram = librosa.logamplitude(librosa.feature.melspectrogram(mono_sig, # latest librosa deprecated logamplitude in favor of amplitude_to_db # sr=sr, n_mels=96),ref_power=1.0)[np.newaxis,np.newaxis,:,:] melgram = librosa.amplitude_to_db(librosa.feature.melspectrogram(mono_sig, sr=sr, n_mels=n_mels))[np.newaxis,:,:,np.newaxis] # last newaxis is b/c tensorflow wants 'channels_last' order ''' # librosa docs also include a perceptual CQT example: CQT = librosa.cqt(mono_sig, sr=sr, fmin=librosa.note_to_hz('A1')) freqs = librosa.cqt_frequencies(CQT.shape[0], fmin=librosa.note_to_hz('A1')) perceptual_CQT = librosa.perceptual_weighting(CQT**2, freqs, ref=np.max) melgram = perceptual_CQT[np.newaxis,np.newaxis,:,:] ''' return melgram
Example #10
Source File: preprocess_data.py From audio-classifier-keras-cnn with MIT License | 5 votes |
def preprocess_dataset(inpath="Samples/", outpath="Preproc/"): if not os.path.exists(outpath): os.mkdir( outpath, 0755 ); # make a new directory for preproc'd files class_names = get_class_names(path=inpath) # get the names of the subdirectories nb_classes = len(class_names) print("class_names = ",class_names) for idx, classname in enumerate(class_names): # go through the subdirs if not os.path.exists(outpath+classname): os.mkdir( outpath+classname, 0755 ); # make a new subdirectory for preproc class class_files = os.listdir(inpath+classname) n_files = len(class_files) n_load = n_files print(' class name = {:14s} - {:3d}'.format(classname,idx), ", ",n_files," files in this class",sep="") printevery = 20 for idx2, infilename in enumerate(class_files): audio_path = inpath + classname + '/' + infilename if (0 == idx2 % printevery): print('\r Loading class: {:14s} ({:2d} of {:2d} classes)'.format(classname,idx+1,nb_classes), ", file ",idx2+1," of ",n_load,": ",audio_path,sep="") #start = timer() aud, sr = librosa.load(audio_path, sr=None) melgram = librosa.logamplitude(librosa.feature.melspectrogram(aud, sr=sr, n_mels=96),ref_power=1.0)[np.newaxis,np.newaxis,:,:] outfile = outpath + classname + '/' + infilename+'.npy' np.save(outfile,melgram)
Example #11
Source File: audio_processor.py From music-auto_tagging-keras with MIT License | 5 votes |
def compute_melgram(audio_path): ''' Compute a mel-spectrogram and returns it in a shape of (1,1,96,1366), where 96 == #mel-bins and 1366 == #time frame parameters ---------- audio_path: path for the audio file. Any format supported by audioread will work. More info: http://librosa.github.io/librosa/generated/librosa.core.load.html#librosa.core.load ''' # mel-spectrogram parameters SR = 12000 N_FFT = 512 N_MELS = 96 HOP_LEN = 256 DURA = 29.12 # to make it 1366 frame.. src, sr = librosa.load(audio_path, sr=SR) # whole signal n_sample = src.shape[0] n_sample_fit = int(DURA*SR) if n_sample < n_sample_fit: # if too short src = np.hstack((src, np.zeros((int(DURA*SR) - n_sample,)))) elif n_sample > n_sample_fit: # if too long src = src[(n_sample-n_sample_fit)/2:(n_sample+n_sample_fit)/2] logam = librosa.logamplitude melgram = librosa.feature.melspectrogram ret = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN, n_fft=N_FFT, n_mels=N_MELS)**2, ref_power=1.0) ret = ret[np.newaxis, np.newaxis, :] return ret
Example #12
Source File: 03_autoencoding_and_tsne.py From Convolutional-Autoencoder-Music-Similarity with MIT License | 5 votes |
def readFile(filenbr): #Load data as array, noting that the log amplitude must be taken to scale the values spec = librosa.logamplitude(np.loadtxt(str(filenbr) + '.csv', delimiter=','), ref_power=np.max) x_train = spec.astype('float32') / 255. x_train = np.reshape(x_train, (512, 2584, 1)) #Test data will be the same as training data return x_train
Example #13
Source File: 02_wav_features_and_spectrogram.py From Convolutional-Autoencoder-Music-Similarity with MIT License | 5 votes |
def plotSpectrogram(self, mels=512, maxfreq=30000): #Plot the Mel power-scaled frequency spectrum, with any factor of 128 frequency bins and 512 frames (frame default) mel = librosa.feature.melspectrogram(y=self.wav, sr=self.samplefreq, n_mels=mels, fmax=maxfreq) librosa.display.specshow(librosa.logamplitude(mel, ref_power=np.max), y_axis='mel', fmax=maxfreq, x_axis='time') plt.colorbar(format='%+2.0f dB') plt.title('Mel Power-Scaled Frequency Spectrogram') plt.tight_layout() plt.show() return mel
Example #14
Source File: melspec.py From Deep-Music-Tagger with MIT License | 5 votes |
def __extract_hpss_melspec(audio_fpath, audio_fname): """ Extension of :func:`__extract_melspec`. Not used as it's about ten times slower, but if you have resources, try it out. :param audio_fpath: :param audio_fname: :return: """ y, sr = librosa.load(audio_fpath, sr=44100) # Harmonic-percussive source separation y_harmonic, y_percussive = librosa.effects.hpss(y) S_h = librosa.feature.melspectrogram(y_harmonic, sr=sr, n_mels=128) S_p = librosa.feature.melspectrogram(y_percussive, sr=sr, n_mels=128) log_S_h = librosa.logamplitude(S_h, ref_power=np.max) log_S_p = librosa.logamplitude(S_p, ref_power=np.max) spectr_fname_h = (audio_fname + '_h.png') spectr_fname_p = (audio_fname + '_p.png') subdir_path = __get_subdir(audio_fname) scipy.misc.toimage(log_S_h).save(subdir_path.format(spectr_fname_h)) scipy.misc.toimage(log_S_p).save(subdir_path.format(spectr_fname_p))
Example #15
Source File: melspec.py From Deep-Music-Tagger with MIT License | 5 votes |
def __extract_hpss_melspec(audio_fpath, audio_fname): """ Extension of :func:`__extract_melspec`. Not used as it's about ten times slower, but if you have resources, try it out. :param audio_fpath: :param audio_fname: :return: """ y, sr = librosa.load(audio_fpath, sr=44100) # Harmonic-percussive source separation y_harmonic, y_percussive = librosa.effects.hpss(y) S_h = librosa.feature.melspectrogram(y_harmonic, sr=sr, n_mels=128) S_p = librosa.feature.melspectrogram(y_percussive, sr=sr, n_mels=128) log_S_h = librosa.logamplitude(S_h, ref_power=np.max) log_S_p = librosa.logamplitude(S_p, ref_power=np.max) spectr_fname_h = (audio_fname + '_h.png') spectr_fname_p = (audio_fname + '_p.png') subdir_path = __get_subdir(audio_fname) scipy.misc.toimage(log_S_h).save(subdir_path.format(spectr_fname_h)) scipy.misc.toimage(log_S_p).save(subdir_path.format(spectr_fname_p))
Example #16
Source File: data_analysis.py From Sound-Recognition-Tutorial with Apache License 2.0 | 5 votes |
def plot_spectrum(sound_files, sound_names): """plot log power spectrum""" i = 1 fig = plt.figure(figsize=(20, 64)) for f, n in zip(sound_files, sound_names): y, sr = librosa.load(os.path.join('./data/esc10/audio/', f)) plt.subplot(10, 1, i) D = librosa.logamplitude(np.abs(librosa.stft(y)) ** 2, ref_power=np.max) librosa.display.specshow(D, sr=sr, y_axis='log') plt.title(n + ' - ' + 'Spectrum') i += 1 plt.tight_layout(pad=10) plt.show()
Example #17
Source File: audio_conv_utils.py From deep-learning-models with MIT License | 4 votes |
def preprocess_input(audio_path, dim_ordering='default'): '''Reads an audio file and outputs a Mel-spectrogram. ''' if dim_ordering == 'default': dim_ordering = K.image_dim_ordering() assert dim_ordering in {'tf', 'th'} if librosa_exists(): import librosa else: raise RuntimeError('Librosa is required to process audio files.\n' + 'Install it via `pip install librosa` \nor visit ' + 'http://librosa.github.io/librosa/ for details.') # mel-spectrogram parameters SR = 12000 N_FFT = 512 N_MELS = 96 HOP_LEN = 256 DURA = 29.12 src, sr = librosa.load(audio_path, sr=SR) n_sample = src.shape[0] n_sample_wanted = int(DURA * SR) # trim the signal at the center if n_sample < n_sample_wanted: # if too short src = np.hstack((src, np.zeros((int(DURA * SR) - n_sample,)))) elif n_sample > n_sample_wanted: # if too long src = src[(n_sample - n_sample_wanted) / 2: (n_sample + n_sample_wanted) / 2] logam = librosa.logamplitude melgram = librosa.feature.melspectrogram x = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN, n_fft=N_FFT, n_mels=N_MELS) ** 2, ref_power=1.0) if dim_ordering == 'th': x = np.expand_dims(x, axis=0) elif dim_ordering == 'tf': x = np.expand_dims(x, axis=3) return x
Example #18
Source File: extract_feature.py From end2end_AU_speech with MIT License | 4 votes |
def extract_one_file(videofile, audiofile): print (" --- " + videofile) ### return mfcc, fbank # get video FPS nFrames, fps = get_fps(videofile) # load audio data, sr = librosa.load(audiofile, sr=44100) # data is np.float32 # number of audio samples per video frame nSamPerFrame = int(math.floor(float(sr) / fps)) # number of samples per 0.025s n25sSam = int(math.ceil(float(sr) * 0.025)) # number of sample per step nSamPerStep = 512 #int(math.floor(float(sr) * 0.01)) # number of steps per frame nStepsPerFrame = 3 #int(math.floor(float(nSamPerFrame) / float(nSamPerStep))) # real frame size nFrameSize = (nStepsPerFrame - 1) * nSamPerStep + n25sSam # initial position in the sound stream # initPos negative means we need zero padding at the front. curPos = nSamPerFrame - nFrameSize mfccs = [] melspecs = [] chromas = [] for f in range(0,nFrames): # extract features frameData, nextPos = extract_one_frame_data(data, curPos, nFrameSize, nSamPerFrame) curPos = nextPos S = librosa.feature.melspectrogram(frameData, sr, n_mels=128, hop_length=nSamPerStep) # 1st is log mel spectrogram log_S = librosa.logamplitude(S, ref_power=np.max) # 2nd is MFCC and its deltas mfcc = librosa.feature.mfcc(y=frameData, sr=sr, hop_length=nSamPerStep, n_mfcc=13) delta_mfcc = librosa.feature.delta(mfcc) delta2_mfcc = librosa.feature.delta(delta_mfcc) # 3rd is chroma chroma = librosa.feature.chroma_cqt(frameData, sr, hop_length=nSamPerStep) full_mfcc = np.concatenate([mfcc[:,0:3].flatten(), delta_mfcc[:,0:3].flatten(), delta2_mfcc[:,0:3].flatten()]) mfccs.append(full_mfcc.tolist()) melspecs.append(log_S[:,0:3].flatten().tolist()) chromas.append(chroma[:,0:3].flatten().tolist()) return (mfccs, melspecs, chromas)
Example #19
Source File: utils.py From time-domain-neural-audio-style-transfer with Apache License 2.0 | 4 votes |
def rainbowgram(path, ax, peak=70.0, use_cqt=False, n_fft=1024, hop_length=256, sr=22050, over_sample=4, res_factor=0.8, octaves=5, notes_per_octave=10): audio = librosa.load(path, sr=sr)[0] if use_cqt: C = librosa.cqt(audio, sr=sr, hop_length=hop_length, bins_per_octave=int(notes_per_octave * over_sample), n_bins=int(octaves * notes_per_octave * over_sample), filter_scale=res_factor, fmin=librosa.note_to_hz('C2')) else: C = librosa.stft( audio, n_fft=n_fft, win_length=n_fft, hop_length=hop_length, center=True) mag, phase = librosa.core.magphase(C) phase_angle = np.angle(phase) phase_unwrapped = np.unwrap(phase_angle) dphase = phase_unwrapped[:, 1:] - phase_unwrapped[:, :-1] dphase = np.concatenate([phase_unwrapped[:, 0:1], dphase], axis=1) / np.pi mag = (librosa.logamplitude( mag**2, amin=1e-13, top_db=peak, ref_power=np.max) / peak) + 1 cdict = { 'red': ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)), 'green': ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)), 'blue': ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)), 'alpha': ((0.0, 1.0, 1.0), (1.0, 0.0, 0.0)) } my_mask = matplotlib.colors.LinearSegmentedColormap('MyMask', cdict) plt.register_cmap(cmap=my_mask) ax.matshow(dphase[::-1, :], cmap=plt.cm.rainbow) ax.matshow(mag[::-1, :], cmap=my_mask)