Python librosa.logamplitude() Examples

The following are 19 code examples of librosa.logamplitude(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module librosa , or try the search function .
Example #1
Source File: melspec.py    From Deep-Music-Tagger with MIT License 6 votes vote down vote up
def __extract_melspec(audio_fpath, audio_fname):
    """
    Using librosa to calculate log mel spectrogram values
    and scipy.misc to draw and store them (in grayscale).

    :param audio_fpath:
    :param audio_fname:
    :return:
    """
    # Load sound file
    y, sr = librosa.load(audio_fpath, sr=12000)

    # Let's make and display a mel-scaled power (energy-squared) spectrogram
    S = librosa.feature.melspectrogram(y, sr=sr, hop_length=256, n_mels=96)

    # Convert to log scale (dB). We'll use the peak power as reference.
    log_S = librosa.logamplitude(S, ref_power=np.max)

    spectr_fname = audio_fname + '.png'
    subdir_path = __get_subdir(spectr_fname)

    # Draw log values matrix in grayscale
    scipy.misc.toimage(log_S).save(subdir_path.format(spectr_fname)) 
Example #2
Source File: melspec.py    From Deep-Music-Tagger with MIT License 6 votes vote down vote up
def __extract_melspec(audio_fpath, audio_fname):
    """
    Using librosa to calculate log mel spectrogram values
    and scipy.misc to draw and store them (in grayscale).

    :param audio_fpath:
    :param audio_fname:
    :return:
    """
    # Load sound file
    y, sr = librosa.load(audio_fpath, sr=12000)

    # Let's make and display a mel-scaled power (energy-squared) spectrogram
    S = librosa.feature.melspectrogram(y, sr=sr, hop_length=256, n_mels=96)

    # Convert to log scale (dB). We'll use the peak power as reference.
    log_S = librosa.logamplitude(S, ref_power=np.max)

    spectr_fname = audio_fname + '.png'
    subdir_path = __get_subdir(spectr_fname)

    # Draw log values matrix in grayscale
    scipy.misc.toimage(log_S).save(subdir_path.format(spectr_fname)) 
Example #3
Source File: rosa_loader.py    From crnn-lid with GNU General Public License v3.0 6 votes vote down vote up
def process_file(self, file_path):

        # mel-spectrogram parameters
        SR = 12000
        N_FFT = 512
        N_MELS = 96
        HOP_LEN = 256

        src, sr = librosa.load(file_path, sr=SR)  # whole signal

        logam = librosa.logamplitude
        melgram = librosa.feature.melspectrogram
        mel_spectrogram = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN,
                                        n_fft=N_FFT, n_mels=N_MELS) ** 2,
                                ref_power=1.0)

        mel_spectrogram = np.expand_dims(mel_spectrogram, -1)

        # for 10secs shape (96, 469, 1)
        return mel_spectrogram 
Example #4
Source File: han16.py    From EUSIPCO2017 with GNU Affero General Public License v3.0 5 votes vote down vote up
def compute_spectrograms(filename):
    out_rate = 22050

    frames, rate = librosa.load(filename, sr=out_rate, mono=True)
    if len(frames) < out_rate:
        # if less then 1 second - can't process
        raise Exception("Audio duration is too short")

    normalized_audio = _normalize(frames)
    melspectr = librosa.feature.melspectrogram(y=normalized_audio, sr=out_rate, n_mels=N_MEL_BANDS, fmax=out_rate/2)
    logmelspectr = librosa.logamplitude(melspectr**2, ref_power=1.0)

    # now going through spectrogram with the stride of the segment duration
    for start_idx in range(0, logmelspectr.shape[1] - SEGMENT_DUR + 1, SEGMENT_DUR):
        yield logmelspectr[:, start_idx:start_idx + SEGMENT_DUR] 
Example #5
Source File: feature-converter.py    From Content-based-Music-Recommendation with Apache License 2.0 5 votes vote down vote up
def extract_features(basedir,extension='.au'):
    features=[]
    labels=[]
    # iterate over all files in all subdirectories of the base directory
    for root, dirs, files in os.walk(basedir):
        files = glob.glob(os.path.join(root,'*'+extension))
        # apply function to all files
        for f in files :
            genre = f.split('/')[4].split('.')[0]

            if (genre == 'hiphop' or genre == 'rock' or genre == 'pop' or genre == 'country'):
                print genre
                # Extract the mel-spectrogram
                y, sr = librosa.load(f)
                # Let's make and display a mel-scaled power (energy-squared) spectrogram
                mel_spec = librosa.feature.melspectrogram(y, sr=sr,n_mels=128,hop_length=1024,n_fft=2048)
                # Convert to log scale (dB). We'll use the peak power as reference.
                log_mel_spec = librosa.logamplitude(mel_spec, ref_power=np.max)
                #make dimensions of the array even 128x1292
                log_mel_spec = np.resize(log_mel_spec,(128,644))
                print log_mel_spec.shape
                #store into feature array
                features.append(log_mel_spec.flatten())
                # print len(np.array(log_mel_spec.T.flatten()))
                # Extract label
                label = genreDict.get(genre)
                labels.append(label)
            else:
                pass
    features = np.asarray(features).reshape(len(features),82432)
    print features.shape
    print len(labels)

    return (features, one_hot_encode(labels)) 
Example #6
Source File: preproccess.py    From MusicGenreClassification with MIT License 5 votes vote down vote up
def prepossessingAudio(audioPath, ppFilePath):
    print 'Prepossessing ' + audioPath

    featuresArray = []
    for i in range(0, SOUND_SAMPLE_LENGTH, HAMMING_STRIDE):
        if i + HAMMING_SIZE <= SOUND_SAMPLE_LENGTH - 1:
            y, sr = librosa.load(audioPath, offset=i / 1000.0, duration=HAMMING_SIZE / 1000.0)

            # Let's make and display a mel-scaled power (energy-squared) spectrogram
            S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)

            # Convert to log scale (dB). We'll use the peak power as reference.
            log_S = librosa.logamplitude(S, ref_power=np.max)

            mfcc = librosa.feature.mfcc(S=log_S, sr=sr, n_mfcc=13)
            # featuresArray.append(mfcc)

            featuresArray.append(S)

            if len(featuresArray) == 599:
                break

    print 'storing pp file: ' + ppFilePath

    f = open(ppFilePath, 'w')
    f.write(pickle.dumps(featuresArray))
    f.close() 
Example #7
Source File: preproccess.py    From MusicGenreClassification with MIT License 5 votes vote down vote up
def prepossessingAudio(audioPath, ppFilePath):
    print 'Prepossessing ' + audioPath

    featuresArray = []
    for i in range(0, SOUND_SAMPLE_LENGTH, HAMMING_STRIDE):
        if i + HAMMING_SIZE <= SOUND_SAMPLE_LENGTH - 1:
            y, sr = librosa.load(audioPath, offset=i / 1000.0, duration=HAMMING_SIZE / 1000.0)

            # Let's make and display a mel-scaled power (energy-squared) spectrogram
            S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)

            # Convert to log scale (dB). We'll use the peak power as reference.
            log_S = librosa.logamplitude(S, ref_power=np.max)

            mfcc = librosa.feature.mfcc(S=log_S, sr=sr, n_mfcc=13)
            featuresArray.append(mfcc)

            # featuresArray.append(S)

            if len(featuresArray) == 599:
                break

    print 'storing pp file: ' + ppFilePath

    f = open(ppFilePath, 'w')
    f.write(pickle.dumps(featuresArray))
    f.close() 
Example #8
Source File: audio_processor.py    From Music-Genre-Classification-with-Deep-Learning with MIT License 5 votes vote down vote up
def compute_melgram(audio_path):
    ''' Compute a mel-spectrogram and returns it in a shape of (1,1,96,1366), where
    96 == #mel-bins and 1366 == #time frame

    parameters
    ----------
    audio_path: path for the audio file.
                Any format supported by audioread will work.
    More info: http://librosa.github.io/librosa/generated/librosa.core.load.html#librosa.core.load

    '''

    # mel-spectrogram parameters
    SR = 12000
    N_FFT = 512
    N_MELS = 96
    HOP_LEN = 256
    DURA = 29.12  # to make it 1366 frame..

    src, sr = librosa.load(audio_path, sr=SR)  # whole signal
    n_sample = src.shape[0]
    n_sample_fit = int(DURA*SR)

    if n_sample < n_sample_fit:  # if too short
        src = np.hstack((src, np.zeros((int(DURA*SR) - n_sample,))))
    elif n_sample > n_sample_fit:  # if too long
        src = src[(n_sample-n_sample_fit)/2:(n_sample+n_sample_fit)/2]
    logam = librosa.logamplitude
    melgram = librosa.feature.melspectrogram
    ret = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN,
                        n_fft=N_FFT, n_mels=N_MELS)**2,
                ref_power=1.0)
    ret = ret[np.newaxis, np.newaxis, :]
    return ret 
Example #9
Source File: datautils.py    From panotti with MIT License 5 votes vote down vote up
def make_melgram(mono_sig, sr, n_mels=128):   # @keunwoochoi upgraded form 96 to 128 mel bins in kapre
    #melgram = librosa.logamplitude(librosa.feature.melspectrogram(mono_sig,  # latest librosa deprecated logamplitude in favor of amplitude_to_db
    #    sr=sr, n_mels=96),ref_power=1.0)[np.newaxis,np.newaxis,:,:]

    melgram = librosa.amplitude_to_db(librosa.feature.melspectrogram(mono_sig,
        sr=sr, n_mels=n_mels))[np.newaxis,:,:,np.newaxis]     # last newaxis is b/c tensorflow wants 'channels_last' order

    '''
    # librosa docs also include a perceptual CQT example:
    CQT = librosa.cqt(mono_sig, sr=sr, fmin=librosa.note_to_hz('A1'))
    freqs = librosa.cqt_frequencies(CQT.shape[0], fmin=librosa.note_to_hz('A1'))
    perceptual_CQT = librosa.perceptual_weighting(CQT**2, freqs, ref=np.max)
    melgram = perceptual_CQT[np.newaxis,np.newaxis,:,:]
    '''
    return melgram 
Example #10
Source File: preprocess_data.py    From audio-classifier-keras-cnn with MIT License 5 votes vote down vote up
def preprocess_dataset(inpath="Samples/", outpath="Preproc/"):

    if not os.path.exists(outpath):
        os.mkdir( outpath, 0755 );   # make a new directory for preproc'd files

    class_names = get_class_names(path=inpath)   # get the names of the subdirectories
    nb_classes = len(class_names)
    print("class_names = ",class_names)
    for idx, classname in enumerate(class_names):   # go through the subdirs

        if not os.path.exists(outpath+classname):
            os.mkdir( outpath+classname, 0755 );   # make a new subdirectory for preproc class

        class_files = os.listdir(inpath+classname)
        n_files = len(class_files)
        n_load = n_files
        print(' class name = {:14s} - {:3d}'.format(classname,idx),
            ", ",n_files," files in this class",sep="")

        printevery = 20
        for idx2, infilename in enumerate(class_files):
            audio_path = inpath + classname + '/' + infilename
            if (0 == idx2 % printevery):
                print('\r Loading class: {:14s} ({:2d} of {:2d} classes)'.format(classname,idx+1,nb_classes),
                       ", file ",idx2+1," of ",n_load,": ",audio_path,sep="")
            #start = timer()
            aud, sr = librosa.load(audio_path, sr=None)
            melgram = librosa.logamplitude(librosa.feature.melspectrogram(aud, sr=sr, n_mels=96),ref_power=1.0)[np.newaxis,np.newaxis,:,:]
            outfile = outpath + classname + '/' + infilename+'.npy'
            np.save(outfile,melgram) 
Example #11
Source File: audio_processor.py    From music-auto_tagging-keras with MIT License 5 votes vote down vote up
def compute_melgram(audio_path):
    ''' Compute a mel-spectrogram and returns it in a shape of (1,1,96,1366), where
    96 == #mel-bins and 1366 == #time frame

    parameters
    ----------
    audio_path: path for the audio file.
                Any format supported by audioread will work.
    More info: http://librosa.github.io/librosa/generated/librosa.core.load.html#librosa.core.load

    '''

    # mel-spectrogram parameters
    SR = 12000
    N_FFT = 512
    N_MELS = 96
    HOP_LEN = 256
    DURA = 29.12  # to make it 1366 frame..

    src, sr = librosa.load(audio_path, sr=SR)  # whole signal
    n_sample = src.shape[0]
    n_sample_fit = int(DURA*SR)

    if n_sample < n_sample_fit:  # if too short
        src = np.hstack((src, np.zeros((int(DURA*SR) - n_sample,))))
    elif n_sample > n_sample_fit:  # if too long
        src = src[(n_sample-n_sample_fit)/2:(n_sample+n_sample_fit)/2]
    logam = librosa.logamplitude
    melgram = librosa.feature.melspectrogram
    ret = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN,
                        n_fft=N_FFT, n_mels=N_MELS)**2,
                ref_power=1.0)
    ret = ret[np.newaxis, np.newaxis, :]
    return ret 
Example #12
Source File: 03_autoencoding_and_tsne.py    From Convolutional-Autoencoder-Music-Similarity with MIT License 5 votes vote down vote up
def readFile(filenbr):
    #Load data as array, noting that the log amplitude must be taken to scale the values
    spec = librosa.logamplitude(np.loadtxt(str(filenbr) + '.csv', delimiter=','), ref_power=np.max)
    x_train = spec.astype('float32') / 255.
    x_train = np.reshape(x_train, (512, 2584, 1))
    #Test data will be the same as training data
    return x_train 
Example #13
Source File: 02_wav_features_and_spectrogram.py    From Convolutional-Autoencoder-Music-Similarity with MIT License 5 votes vote down vote up
def plotSpectrogram(self, mels=512, maxfreq=30000):
        #Plot the Mel power-scaled frequency spectrum, with any factor of 128 frequency bins and 512 frames (frame default)
        mel = librosa.feature.melspectrogram(y=self.wav, sr=self.samplefreq, n_mels=mels, fmax=maxfreq)
        librosa.display.specshow(librosa.logamplitude(mel, ref_power=np.max), y_axis='mel', fmax=maxfreq, x_axis='time')
        plt.colorbar(format='%+2.0f dB')
        plt.title('Mel Power-Scaled Frequency Spectrogram')
        plt.tight_layout()
        plt.show()
        return mel 
Example #14
Source File: melspec.py    From Deep-Music-Tagger with MIT License 5 votes vote down vote up
def __extract_hpss_melspec(audio_fpath, audio_fname):
    """
    Extension of :func:`__extract_melspec`.
    Not used as it's about ten times slower, but
    if you have resources, try it out.

    :param audio_fpath:
    :param audio_fname:
    :return:
    """
    y, sr = librosa.load(audio_fpath, sr=44100)

    # Harmonic-percussive source separation
    y_harmonic, y_percussive = librosa.effects.hpss(y)

    S_h = librosa.feature.melspectrogram(y_harmonic, sr=sr, n_mels=128)
    S_p = librosa.feature.melspectrogram(y_percussive, sr=sr, n_mels=128)

    log_S_h = librosa.logamplitude(S_h, ref_power=np.max)
    log_S_p = librosa.logamplitude(S_p, ref_power=np.max)

    spectr_fname_h = (audio_fname + '_h.png')
    spectr_fname_p = (audio_fname + '_p.png')

    subdir_path = __get_subdir(audio_fname)

    scipy.misc.toimage(log_S_h).save(subdir_path.format(spectr_fname_h))
    scipy.misc.toimage(log_S_p).save(subdir_path.format(spectr_fname_p)) 
Example #15
Source File: melspec.py    From Deep-Music-Tagger with MIT License 5 votes vote down vote up
def __extract_hpss_melspec(audio_fpath, audio_fname):
    """
    Extension of :func:`__extract_melspec`.
    Not used as it's about ten times slower, but
    if you have resources, try it out.

    :param audio_fpath:
    :param audio_fname:
    :return:
    """
    y, sr = librosa.load(audio_fpath, sr=44100)

    # Harmonic-percussive source separation
    y_harmonic, y_percussive = librosa.effects.hpss(y)

    S_h = librosa.feature.melspectrogram(y_harmonic, sr=sr, n_mels=128)
    S_p = librosa.feature.melspectrogram(y_percussive, sr=sr, n_mels=128)

    log_S_h = librosa.logamplitude(S_h, ref_power=np.max)
    log_S_p = librosa.logamplitude(S_p, ref_power=np.max)

    spectr_fname_h = (audio_fname + '_h.png')
    spectr_fname_p = (audio_fname + '_p.png')

    subdir_path = __get_subdir(audio_fname)

    scipy.misc.toimage(log_S_h).save(subdir_path.format(spectr_fname_h))
    scipy.misc.toimage(log_S_p).save(subdir_path.format(spectr_fname_p)) 
Example #16
Source File: data_analysis.py    From Sound-Recognition-Tutorial with Apache License 2.0 5 votes vote down vote up
def plot_spectrum(sound_files, sound_names):
    """plot log power spectrum"""
    i = 1
    fig = plt.figure(figsize=(20, 64))
    for f, n in zip(sound_files, sound_names):
        y, sr = librosa.load(os.path.join('./data/esc10/audio/', f))
        plt.subplot(10, 1, i)
        D = librosa.logamplitude(np.abs(librosa.stft(y)) ** 2, ref_power=np.max)
        librosa.display.specshow(D, sr=sr, y_axis='log')
        plt.title(n + ' - ' + 'Spectrum')

        i += 1

    plt.tight_layout(pad=10)
    plt.show() 
Example #17
Source File: audio_conv_utils.py    From deep-learning-models with MIT License 4 votes vote down vote up
def preprocess_input(audio_path, dim_ordering='default'):
    '''Reads an audio file and outputs a Mel-spectrogram.
    '''
    if dim_ordering == 'default':
        dim_ordering = K.image_dim_ordering()
    assert dim_ordering in {'tf', 'th'}

    if librosa_exists():
        import librosa
    else:
        raise RuntimeError('Librosa is required to process audio files.\n' +
                           'Install it via `pip install librosa` \nor visit ' +
                           'http://librosa.github.io/librosa/ for details.')

    # mel-spectrogram parameters
    SR = 12000
    N_FFT = 512
    N_MELS = 96
    HOP_LEN = 256
    DURA = 29.12

    src, sr = librosa.load(audio_path, sr=SR)
    n_sample = src.shape[0]
    n_sample_wanted = int(DURA * SR)

    # trim the signal at the center
    if n_sample < n_sample_wanted:  # if too short
        src = np.hstack((src, np.zeros((int(DURA * SR) - n_sample,))))
    elif n_sample > n_sample_wanted:  # if too long
        src = src[(n_sample - n_sample_wanted) / 2:
                  (n_sample + n_sample_wanted) / 2]

    logam = librosa.logamplitude
    melgram = librosa.feature.melspectrogram
    x = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN,
                      n_fft=N_FFT, n_mels=N_MELS) ** 2,
              ref_power=1.0)

    if dim_ordering == 'th':
        x = np.expand_dims(x, axis=0)
    elif dim_ordering == 'tf':
        x = np.expand_dims(x, axis=3)
    return x 
Example #18
Source File: extract_feature.py    From end2end_AU_speech with MIT License 4 votes vote down vote up
def extract_one_file(videofile, audiofile):
    print (" --- " + videofile)
    ### return mfcc, fbank
    # get video FPS
    nFrames, fps = get_fps(videofile)
    # load audio
    data, sr = librosa.load(audiofile, sr=44100) # data is np.float32
    # number of audio samples per video frame
    nSamPerFrame = int(math.floor(float(sr) / fps))
    # number of samples per 0.025s
    n25sSam = int(math.ceil(float(sr) * 0.025))
    # number of sample per step
    nSamPerStep = 512  #int(math.floor(float(sr) * 0.01))
    # number of steps per frame
    nStepsPerFrame = 3 #int(math.floor(float(nSamPerFrame) / float(nSamPerStep)))
    # real frame size
    nFrameSize = (nStepsPerFrame - 1) * nSamPerStep + n25sSam
    # initial position in the sound stream
    # initPos negative means we need zero padding at the front.
    curPos = nSamPerFrame - nFrameSize
    mfccs = []
    melspecs = []
    chromas = []
    for f in range(0,nFrames):
        # extract features
        frameData, nextPos = extract_one_frame_data(data, curPos, nFrameSize, nSamPerFrame)
        curPos = nextPos
        S = librosa.feature.melspectrogram(frameData, sr, n_mels=128, hop_length=nSamPerStep)
        # 1st is log mel spectrogram
        log_S = librosa.logamplitude(S, ref_power=np.max)
        # 2nd is MFCC and its deltas
        mfcc = librosa.feature.mfcc(y=frameData, sr=sr, hop_length=nSamPerStep, n_mfcc=13)
        delta_mfcc = librosa.feature.delta(mfcc)
        delta2_mfcc = librosa.feature.delta(delta_mfcc)
        # 3rd is chroma
        chroma = librosa.feature.chroma_cqt(frameData, sr, hop_length=nSamPerStep)        

        full_mfcc = np.concatenate([mfcc[:,0:3].flatten(), delta_mfcc[:,0:3].flatten(), delta2_mfcc[:,0:3].flatten()])
        mfccs.append(full_mfcc.tolist())
        melspecs.append(log_S[:,0:3].flatten().tolist())
        chromas.append(chroma[:,0:3].flatten().tolist())
    return (mfccs, melspecs, chromas) 
Example #19
Source File: utils.py    From time-domain-neural-audio-style-transfer with Apache License 2.0 4 votes vote down vote up
def rainbowgram(path,
                ax,
                peak=70.0,
                use_cqt=False,
                n_fft=1024,
                hop_length=256,
                sr=22050,
                over_sample=4,
                res_factor=0.8,
                octaves=5,
                notes_per_octave=10):
    audio = librosa.load(path, sr=sr)[0]
    if use_cqt:
        C = librosa.cqt(audio,
                        sr=sr,
                        hop_length=hop_length,
                        bins_per_octave=int(notes_per_octave * over_sample),
                        n_bins=int(octaves * notes_per_octave * over_sample),
                        filter_scale=res_factor,
                        fmin=librosa.note_to_hz('C2'))
    else:
        C = librosa.stft(
            audio,
            n_fft=n_fft,
            win_length=n_fft,
            hop_length=hop_length,
            center=True)
    mag, phase = librosa.core.magphase(C)
    phase_angle = np.angle(phase)
    phase_unwrapped = np.unwrap(phase_angle)
    dphase = phase_unwrapped[:, 1:] - phase_unwrapped[:, :-1]
    dphase = np.concatenate([phase_unwrapped[:, 0:1], dphase], axis=1) / np.pi
    mag = (librosa.logamplitude(
        mag**2, amin=1e-13, top_db=peak, ref_power=np.max) / peak) + 1
    cdict = {
        'red': ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)),
        'green': ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)),
        'blue': ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)),
        'alpha': ((0.0, 1.0, 1.0), (1.0, 0.0, 0.0))
    }
    my_mask = matplotlib.colors.LinearSegmentedColormap('MyMask', cdict)
    plt.register_cmap(cmap=my_mask)
    ax.matshow(dphase[::-1, :], cmap=plt.cm.rainbow)
    ax.matshow(mag[::-1, :], cmap=my_mask)