Python Examples of librosa.logamplitude

Source File: melspec.py From Deep-Music-Tagger with MIT License

6 votes

def __extract_melspec(audio_fpath, audio_fname):
    """
    Using librosa to calculate log mel spectrogram values
    and scipy.misc to draw and store them (in grayscale).

    :param audio_fpath:
    :param audio_fname:
    :return:
    """
    # Load sound file
    y, sr = librosa.load(audio_fpath, sr=12000)

    # Let's make and display a mel-scaled power (energy-squared) spectrogram
    S = librosa.feature.melspectrogram(y, sr=sr, hop_length=256, n_mels=96)

    # Convert to log scale (dB). We'll use the peak power as reference.
    log_S = librosa.logamplitude(S, ref_power=np.max)

    spectr_fname = audio_fname + '.png'
    subdir_path = __get_subdir(spectr_fname)

    # Draw log values matrix in grayscale
    scipy.misc.toimage(log_S).save(subdir_path.format(spectr_fname))

Source File: melspec.py From Deep-Music-Tagger with MIT License

6 votes

def __extract_melspec(audio_fpath, audio_fname):
    """
    Using librosa to calculate log mel spectrogram values
    and scipy.misc to draw and store them (in grayscale).

    :param audio_fpath:
    :param audio_fname:
    :return:
    """
    # Load sound file
    y, sr = librosa.load(audio_fpath, sr=12000)

    # Let's make and display a mel-scaled power (energy-squared) spectrogram
    S = librosa.feature.melspectrogram(y, sr=sr, hop_length=256, n_mels=96)

    # Convert to log scale (dB). We'll use the peak power as reference.
    log_S = librosa.logamplitude(S, ref_power=np.max)

    spectr_fname = audio_fname + '.png'
    subdir_path = __get_subdir(spectr_fname)

    # Draw log values matrix in grayscale
    scipy.misc.toimage(log_S).save(subdir_path.format(spectr_fname))

Source File: rosa_loader.py From crnn-lid with GNU General Public License v3.0

6 votes

def process_file(self, file_path):

        # mel-spectrogram parameters
        SR = 12000
        N_FFT = 512
        N_MELS = 96
        HOP_LEN = 256

        src, sr = librosa.load(file_path, sr=SR)  # whole signal

        logam = librosa.logamplitude
        melgram = librosa.feature.melspectrogram
        mel_spectrogram = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN,
                                        n_fft=N_FFT, n_mels=N_MELS) ** 2,
                                ref_power=1.0)

        mel_spectrogram = np.expand_dims(mel_spectrogram, -1)

        # for 10secs shape (96, 469, 1)
        return mel_spectrogram

Source File: han16.py From EUSIPCO2017 with GNU Affero General Public License v3.0

5 votes

def compute_spectrograms(filename):
    out_rate = 22050

    frames, rate = librosa.load(filename, sr=out_rate, mono=True)
    if len(frames) < out_rate:
        # if less then 1 second - can't process
        raise Exception("Audio duration is too short")

    normalized_audio = _normalize(frames)
    melspectr = librosa.feature.melspectrogram(y=normalized_audio, sr=out_rate, n_mels=N_MEL_BANDS, fmax=out_rate/2)
    logmelspectr = librosa.logamplitude(melspectr**2, ref_power=1.0)

    # now going through spectrogram with the stride of the segment duration
    for start_idx in range(0, logmelspectr.shape[1] - SEGMENT_DUR + 1, SEGMENT_DUR):
        yield logmelspectr[:, start_idx:start_idx + SEGMENT_DUR]

Source File: feature-converter.py From Content-based-Music-Recommendation with Apache License 2.0

5 votes

def extract_features(basedir,extension='.au'):
    features=[]
    labels=[]
    # iterate over all files in all subdirectories of the base directory
    for root, dirs, files in os.walk(basedir):
        files = glob.glob(os.path.join(root,'*'+extension))
        # apply function to all files
        for f in files :
            genre = f.split('/')[4].split('.')[0]

            if (genre == 'hiphop' or genre == 'rock' or genre == 'pop' or genre == 'country'):
                print genre
                # Extract the mel-spectrogram
                y, sr = librosa.load(f)
                # Let's make and display a mel-scaled power (energy-squared) spectrogram
                mel_spec = librosa.feature.melspectrogram(y, sr=sr,n_mels=128,hop_length=1024,n_fft=2048)
                # Convert to log scale (dB). We'll use the peak power as reference.
                log_mel_spec = librosa.logamplitude(mel_spec, ref_power=np.max)
                #make dimensions of the array even 128x1292
                log_mel_spec = np.resize(log_mel_spec,(128,644))
                print log_mel_spec.shape
                #store into feature array
                features.append(log_mel_spec.flatten())
                # print len(np.array(log_mel_spec.T.flatten()))
                # Extract label
                label = genreDict.get(genre)
                labels.append(label)
            else:
                pass
    features = np.asarray(features).reshape(len(features),82432)
    print features.shape
    print len(labels)

    return (features, one_hot_encode(labels))

Source File: preproccess.py From MusicGenreClassification with MIT License

5 votes

def prepossessingAudio(audioPath, ppFilePath):
    print 'Prepossessing ' + audioPath

    featuresArray = []
    for i in range(0, SOUND_SAMPLE_LENGTH, HAMMING_STRIDE):
        if i + HAMMING_SIZE <= SOUND_SAMPLE_LENGTH - 1:
            y, sr = librosa.load(audioPath, offset=i / 1000.0, duration=HAMMING_SIZE / 1000.0)

            # Let's make and display a mel-scaled power (energy-squared) spectrogram
            S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)

            # Convert to log scale (dB). We'll use the peak power as reference.
            log_S = librosa.logamplitude(S, ref_power=np.max)

            mfcc = librosa.feature.mfcc(S=log_S, sr=sr, n_mfcc=13)
            # featuresArray.append(mfcc)

            featuresArray.append(S)

            if len(featuresArray) == 599:
                break

    print 'storing pp file: ' + ppFilePath

    f = open(ppFilePath, 'w')
    f.write(pickle.dumps(featuresArray))
    f.close()

Source File: preproccess.py From MusicGenreClassification with MIT License

5 votes

def prepossessingAudio(audioPath, ppFilePath):
    print 'Prepossessing ' + audioPath

    featuresArray = []
    for i in range(0, SOUND_SAMPLE_LENGTH, HAMMING_STRIDE):
        if i + HAMMING_SIZE <= SOUND_SAMPLE_LENGTH - 1:
            y, sr = librosa.load(audioPath, offset=i / 1000.0, duration=HAMMING_SIZE / 1000.0)

            # Let's make and display a mel-scaled power (energy-squared) spectrogram
            S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)

            # Convert to log scale (dB). We'll use the peak power as reference.
            log_S = librosa.logamplitude(S, ref_power=np.max)

            mfcc = librosa.feature.mfcc(S=log_S, sr=sr, n_mfcc=13)
            featuresArray.append(mfcc)

            # featuresArray.append(S)

            if len(featuresArray) == 599:
                break

    print 'storing pp file: ' + ppFilePath

    f = open(ppFilePath, 'w')
    f.write(pickle.dumps(featuresArray))
    f.close()

Source File: audio_processor.py From Music-Genre-Classification-with-Deep-Learning with MIT License

5 votes

def compute_melgram(audio_path):
    ''' Compute a mel-spectrogram and returns it in a shape of (1,1,96,1366), where
    96 == #mel-bins and 1366 == #time frame

    parameters
    ----------
    audio_path: path for the audio file.
                Any format supported by audioread will work.
    More info: http://librosa.github.io/librosa/generated/librosa.core.load.html#librosa.core.load

    '''

    # mel-spectrogram parameters
    SR = 12000
    N_FFT = 512
    N_MELS = 96
    HOP_LEN = 256
    DURA = 29.12  # to make it 1366 frame..

    src, sr = librosa.load(audio_path, sr=SR)  # whole signal
    n_sample = src.shape[0]
    n_sample_fit = int(DURA*SR)

    if n_sample < n_sample_fit:  # if too short
        src = np.hstack((src, np.zeros((int(DURA*SR) - n_sample,))))
    elif n_sample > n_sample_fit:  # if too long
        src = src[(n_sample-n_sample_fit)/2:(n_sample+n_sample_fit)/2]
    logam = librosa.logamplitude
    melgram = librosa.feature.melspectrogram
    ret = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN,
                        n_fft=N_FFT, n_mels=N_MELS)**2,
                ref_power=1.0)
    ret = ret[np.newaxis, np.newaxis, :]
    return ret

Source File: datautils.py From panotti with MIT License

5 votes

def make_melgram(mono_sig, sr, n_mels=128):   # @keunwoochoi upgraded form 96 to 128 mel bins in kapre
    #melgram = librosa.logamplitude(librosa.feature.melspectrogram(mono_sig,  # latest librosa deprecated logamplitude in favor of amplitude_to_db
    #    sr=sr, n_mels=96),ref_power=1.0)[np.newaxis,np.newaxis,:,:]

    melgram = librosa.amplitude_to_db(librosa.feature.melspectrogram(mono_sig,
        sr=sr, n_mels=n_mels))[np.newaxis,:,:,np.newaxis]     # last newaxis is b/c tensorflow wants 'channels_last' order

    '''
    # librosa docs also include a perceptual CQT example:
    CQT = librosa.cqt(mono_sig, sr=sr, fmin=librosa.note_to_hz('A1'))
    freqs = librosa.cqt_frequencies(CQT.shape[0], fmin=librosa.note_to_hz('A1'))
    perceptual_CQT = librosa.perceptual_weighting(CQT**2, freqs, ref=np.max)
    melgram = perceptual_CQT[np.newaxis,np.newaxis,:,:]
    '''
    return melgram

Source File: preprocess_data.py From audio-classifier-keras-cnn with MIT License

5 votes

def preprocess_dataset(inpath="Samples/", outpath="Preproc/"):

    if not os.path.exists(outpath):
        os.mkdir( outpath, 0755 );   # make a new directory for preproc'd files

    class_names = get_class_names(path=inpath)   # get the names of the subdirectories
    nb_classes = len(class_names)
    print("class_names = ",class_names)
    for idx, classname in enumerate(class_names):   # go through the subdirs

        if not os.path.exists(outpath+classname):
            os.mkdir( outpath+classname, 0755 );   # make a new subdirectory for preproc class

        class_files = os.listdir(inpath+classname)
        n_files = len(class_files)
        n_load = n_files
        print(' class name = {:14s} - {:3d}'.format(classname,idx),
            ", ",n_files," files in this class",sep="")

        printevery = 20
        for idx2, infilename in enumerate(class_files):
            audio_path = inpath + classname + '/' + infilename
            if (0 == idx2 % printevery):
                print('\r Loading class: {:14s} ({:2d} of {:2d} classes)'.format(classname,idx+1,nb_classes),
                       ", file ",idx2+1," of ",n_load,": ",audio_path,sep="")
            #start = timer()
            aud, sr = librosa.load(audio_path, sr=None)
            melgram = librosa.logamplitude(librosa.feature.melspectrogram(aud, sr=sr, n_mels=96),ref_power=1.0)[np.newaxis,np.newaxis,:,:]
            outfile = outpath + classname + '/' + infilename+'.npy'
            np.save(outfile,melgram)

Source File: audio_processor.py From music-auto_tagging-keras with MIT License

5 votes

def compute_melgram(audio_path):
    ''' Compute a mel-spectrogram and returns it in a shape of (1,1,96,1366), where
    96 == #mel-bins and 1366 == #time frame

    parameters
    ----------
    audio_path: path for the audio file.
                Any format supported by audioread will work.
    More info: http://librosa.github.io/librosa/generated/librosa.core.load.html#librosa.core.load

    '''

    # mel-spectrogram parameters
    SR = 12000
    N_FFT = 512
    N_MELS = 96
    HOP_LEN = 256
    DURA = 29.12  # to make it 1366 frame..

    src, sr = librosa.load(audio_path, sr=SR)  # whole signal
    n_sample = src.shape[0]
    n_sample_fit = int(DURA*SR)

    if n_sample < n_sample_fit:  # if too short
        src = np.hstack((src, np.zeros((int(DURA*SR) - n_sample,))))
    elif n_sample > n_sample_fit:  # if too long
        src = src[(n_sample-n_sample_fit)/2:(n_sample+n_sample_fit)/2]
    logam = librosa.logamplitude
    melgram = librosa.feature.melspectrogram
    ret = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN,
                        n_fft=N_FFT, n_mels=N_MELS)**2,
                ref_power=1.0)
    ret = ret[np.newaxis, np.newaxis, :]
    return ret

Source File: 03_autoencoding_and_tsne.py From Convolutional-Autoencoder-Music-Similarity with MIT License

5 votes

def readFile(filenbr):
    #Load data as array, noting that the log amplitude must be taken to scale the values
    spec = librosa.logamplitude(np.loadtxt(str(filenbr) + '.csv', delimiter=','), ref_power=np.max)
    x_train = spec.astype('float32') / 255.
    x_train = np.reshape(x_train, (512, 2584, 1))
    #Test data will be the same as training data
    return x_train

Source File: 02_wav_features_and_spectrogram.py From Convolutional-Autoencoder-Music-Similarity with MIT License

5 votes

def plotSpectrogram(self, mels=512, maxfreq=30000):
        #Plot the Mel power-scaled frequency spectrum, with any factor of 128 frequency bins and 512 frames (frame default)
        mel = librosa.feature.melspectrogram(y=self.wav, sr=self.samplefreq, n_mels=mels, fmax=maxfreq)
        librosa.display.specshow(librosa.logamplitude(mel, ref_power=np.max), y_axis='mel', fmax=maxfreq, x_axis='time')
        plt.colorbar(format='%+2.0f dB')
        plt.title('Mel Power-Scaled Frequency Spectrogram')
        plt.tight_layout()
        plt.show()
        return mel

Source File: melspec.py From Deep-Music-Tagger with MIT License

5 votes

def __extract_hpss_melspec(audio_fpath, audio_fname):
    """
    Extension of :func:`__extract_melspec`.
    Not used as it's about ten times slower, but
    if you have resources, try it out.

    :param audio_fpath:
    :param audio_fname:
    :return:
    """
    y, sr = librosa.load(audio_fpath, sr=44100)

    # Harmonic-percussive source separation
    y_harmonic, y_percussive = librosa.effects.hpss(y)

    S_h = librosa.feature.melspectrogram(y_harmonic, sr=sr, n_mels=128)
    S_p = librosa.feature.melspectrogram(y_percussive, sr=sr, n_mels=128)

    log_S_h = librosa.logamplitude(S_h, ref_power=np.max)
    log_S_p = librosa.logamplitude(S_p, ref_power=np.max)

    spectr_fname_h = (audio_fname + '_h.png')
    spectr_fname_p = (audio_fname + '_p.png')

    subdir_path = __get_subdir(audio_fname)

    scipy.misc.toimage(log_S_h).save(subdir_path.format(spectr_fname_h))
    scipy.misc.toimage(log_S_p).save(subdir_path.format(spectr_fname_p))

Source File: melspec.py From Deep-Music-Tagger with MIT License

5 votes

def __extract_hpss_melspec(audio_fpath, audio_fname):
    """
    Extension of :func:`__extract_melspec`.
    Not used as it's about ten times slower, but
    if you have resources, try it out.

    :param audio_fpath:
    :param audio_fname:
    :return:
    """
    y, sr = librosa.load(audio_fpath, sr=44100)

    # Harmonic-percussive source separation
    y_harmonic, y_percussive = librosa.effects.hpss(y)

    S_h = librosa.feature.melspectrogram(y_harmonic, sr=sr, n_mels=128)
    S_p = librosa.feature.melspectrogram(y_percussive, sr=sr, n_mels=128)

    log_S_h = librosa.logamplitude(S_h, ref_power=np.max)
    log_S_p = librosa.logamplitude(S_p, ref_power=np.max)

    spectr_fname_h = (audio_fname + '_h.png')
    spectr_fname_p = (audio_fname + '_p.png')

    subdir_path = __get_subdir(audio_fname)

    scipy.misc.toimage(log_S_h).save(subdir_path.format(spectr_fname_h))
    scipy.misc.toimage(log_S_p).save(subdir_path.format(spectr_fname_p))

Source File: data_analysis.py From Sound-Recognition-Tutorial with Apache License 2.0

5 votes

def plot_spectrum(sound_files, sound_names):
    """plot log power spectrum"""
    i = 1
    fig = plt.figure(figsize=(20, 64))
    for f, n in zip(sound_files, sound_names):
        y, sr = librosa.load(os.path.join('./data/esc10/audio/', f))
        plt.subplot(10, 1, i)
        D = librosa.logamplitude(np.abs(librosa.stft(y)) ** 2, ref_power=np.max)
        librosa.display.specshow(D, sr=sr, y_axis='log')
        plt.title(n + ' - ' + 'Spectrum')

        i += 1

    plt.tight_layout(pad=10)
    plt.show()

Source File: audio_conv_utils.py From deep-learning-models with MIT License

4 votes

def preprocess_input(audio_path, dim_ordering='default'):
    '''Reads an audio file and outputs a Mel-spectrogram.
    '''
    if dim_ordering == 'default':
        dim_ordering = K.image_dim_ordering()
    assert dim_ordering in {'tf', 'th'}

    if librosa_exists():
        import librosa
    else:
        raise RuntimeError('Librosa is required to process audio files.\n' +
                           'Install it via `pip install librosa` \nor visit ' +
                           'http://librosa.github.io/librosa/ for details.')

    # mel-spectrogram parameters
    SR = 12000
    N_FFT = 512
    N_MELS = 96
    HOP_LEN = 256
    DURA = 29.12

    src, sr = librosa.load(audio_path, sr=SR)
    n_sample = src.shape[0]
    n_sample_wanted = int(DURA * SR)

    # trim the signal at the center
    if n_sample < n_sample_wanted:  # if too short
        src = np.hstack((src, np.zeros((int(DURA * SR) - n_sample,))))
    elif n_sample > n_sample_wanted:  # if too long
        src = src[(n_sample - n_sample_wanted) / 2:
                  (n_sample + n_sample_wanted) / 2]

    logam = librosa.logamplitude
    melgram = librosa.feature.melspectrogram
    x = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN,
                      n_fft=N_FFT, n_mels=N_MELS) ** 2,
              ref_power=1.0)

    if dim_ordering == 'th':
        x = np.expand_dims(x, axis=0)
    elif dim_ordering == 'tf':
        x = np.expand_dims(x, axis=3)
    return x

Source File: extract_feature.py From end2end_AU_speech with MIT License

4 votes

def extract_one_file(videofile, audiofile):
    print (" --- " + videofile)
    ### return mfcc, fbank
    # get video FPS
    nFrames, fps = get_fps(videofile)
    # load audio
    data, sr = librosa.load(audiofile, sr=44100) # data is np.float32
    # number of audio samples per video frame
    nSamPerFrame = int(math.floor(float(sr) / fps))
    # number of samples per 0.025s
    n25sSam = int(math.ceil(float(sr) * 0.025))
    # number of sample per step
    nSamPerStep = 512  #int(math.floor(float(sr) * 0.01))
    # number of steps per frame
    nStepsPerFrame = 3 #int(math.floor(float(nSamPerFrame) / float(nSamPerStep)))
    # real frame size
    nFrameSize = (nStepsPerFrame - 1) * nSamPerStep + n25sSam
    # initial position in the sound stream
    # initPos negative means we need zero padding at the front.
    curPos = nSamPerFrame - nFrameSize
    mfccs = []
    melspecs = []
    chromas = []
    for f in range(0,nFrames):
        # extract features
        frameData, nextPos = extract_one_frame_data(data, curPos, nFrameSize, nSamPerFrame)
        curPos = nextPos
        S = librosa.feature.melspectrogram(frameData, sr, n_mels=128, hop_length=nSamPerStep)
        # 1st is log mel spectrogram
        log_S = librosa.logamplitude(S, ref_power=np.max)
        # 2nd is MFCC and its deltas
        mfcc = librosa.feature.mfcc(y=frameData, sr=sr, hop_length=nSamPerStep, n_mfcc=13)
        delta_mfcc = librosa.feature.delta(mfcc)
        delta2_mfcc = librosa.feature.delta(delta_mfcc)
        # 3rd is chroma
        chroma = librosa.feature.chroma_cqt(frameData, sr, hop_length=nSamPerStep)        

        full_mfcc = np.concatenate([mfcc[:,0:3].flatten(), delta_mfcc[:,0:3].flatten(), delta2_mfcc[:,0:3].flatten()])
        mfccs.append(full_mfcc.tolist())
        melspecs.append(log_S[:,0:3].flatten().tolist())
        chromas.append(chroma[:,0:3].flatten().tolist())
    return (mfccs, melspecs, chromas)

Source File: utils.py From time-domain-neural-audio-style-transfer with Apache License 2.0

4 votes

def rainbowgram(path,
                ax,
                peak=70.0,
                use_cqt=False,
                n_fft=1024,
                hop_length=256,
                sr=22050,
                over_sample=4,
                res_factor=0.8,
                octaves=5,
                notes_per_octave=10):
    audio = librosa.load(path, sr=sr)[0]
    if use_cqt:
        C = librosa.cqt(audio,
                        sr=sr,
                        hop_length=hop_length,
                        bins_per_octave=int(notes_per_octave * over_sample),
                        n_bins=int(octaves * notes_per_octave * over_sample),
                        filter_scale=res_factor,
                        fmin=librosa.note_to_hz('C2'))
    else:
        C = librosa.stft(
            audio,
            n_fft=n_fft,
            win_length=n_fft,
            hop_length=hop_length,
            center=True)
    mag, phase = librosa.core.magphase(C)
    phase_angle = np.angle(phase)
    phase_unwrapped = np.unwrap(phase_angle)
    dphase = phase_unwrapped[:, 1:] - phase_unwrapped[:, :-1]
    dphase = np.concatenate([phase_unwrapped[:, 0:1], dphase], axis=1) / np.pi
    mag = (librosa.logamplitude(
        mag**2, amin=1e-13, top_db=peak, ref_power=np.max) / peak) + 1
    cdict = {
        'red': ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)),
        'green': ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)),
        'blue': ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)),
        'alpha': ((0.0, 1.0, 1.0), (1.0, 0.0, 0.0))
    }
    my_mask = matplotlib.colors.LinearSegmentedColormap('MyMask', cdict)
    plt.register_cmap(cmap=my_mask)
    ax.matshow(dphase[::-1, :], cmap=plt.cm.rainbow)
    ax.matshow(mag[::-1, :], cmap=my_mask)

Python librosa.logamplitude() Examples