Python Examples of librosa.display

Source File: 02_wav_features_and_spectrogram.py From Convolutional-Autoencoder-Music-Similarity with MIT License

6 votes

def plotTempogram(self):
        """
        The tempogram visualizes the rhythm (pattern recurrence), using the 
        onset envelope, oenv, to determine the start points for the patterns.
        """
        oenv = librosa.onset.onset_strength(y=self.wav, sr=self.samplefreq, hop_length=512)
        tempogram = librosa.feature.tempogram(onset_envelope=oenv, sr=self.samplefreq, hop_length=512)
        librosa.display.specshow(tempogram, sr=self.samplefreq, hop_length=512, x_axis='time', y_axis='tempo')
        plt.colorbar()
        plt.title('Tempogram')
        plt.tight_layout()
        plt.show()
        plt.plot(oenv, label='Onset strength')
        plt.title('Onset Strength Over Time')
        plt.xlabel('Time')
        plt.ylabel('Onset Strength')
        plt.show()
        return tempogram

Source File: plotting.py From DeepSpectrum with GNU General Public License v3.0

6 votes

def _create_plot(spectrogram,
                 sr,
                 nfft,
                 ylim=None,
                 cmap='viridis',
                 scale='linear',
                 **kwargs):
    if not ylim:
        ylim = sr / 2
    spectrogram_axes = librosa.display.specshow(spectrogram,
                                                hop_length=int(nfft / 2),
                                                fmax=ylim,
                                                sr=sr,
                                                cmap=cmap,
                                                y_axis=scale,
                                                x_axis='time')
    if scale == 'linear':
        spectrogram_axes.set_ylim(0, ylim)

    return spectrogram_axes

Source File: helpers.py From pychorus with MIT License

6 votes

def draw_lines(num_samples, sample_rate, lines):
    """Debugging function to draw detected lines in black"""
    lines_matrix = np.zeros((num_samples, num_samples))
    for line in lines:
        lines_matrix[line.lag:line.lag + 4, line.start:line.end + 1] = 1

    # Import here since this function is only for debugging
    import librosa.display
    import matplotlib.pyplot as plt
    librosa.display.specshow(
        lines_matrix,
        y_axis='time',
        x_axis='time',
        sr=sample_rate / (N_FFT / 2048))
    plt.colorbar()
    plt.set_cmap("hot_r")
    plt.show()

Source File: spec_augment_tensorflow.py From Speech-Transformer with MIT License

6 votes

def visualization_tensor_spectrogram(mel_spectrogram, title):
    """visualizing first one result of SpecAugment
    # Arguments:
      mel_spectrogram(ndarray): mel_spectrogram to visualize.
      title(String): plot figure's title
    """

    # session for plotting
    sess = tf.InteractiveSession()
    mel_spectrogram = mel_spectrogram.eval()

    # Show mel-spectrogram using librosa's specshow.
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(librosa.power_to_db(mel_spectrogram[0, :, :, 0], ref=np.max), y_axis='mel', fmax=8000,
                             x_axis='time')
    # plt.colorbar(format='%+2.0f dB')
    plt.title(title)
    plt.tight_layout()
    plt.show()

Source File: utils.py From nussl with MIT License

6 votes

def visualize_spectrogram(audio_signal, ch=0, do_mono=False, x_axis='time', 
                          y_axis='linear',  **kwargs):
    """
    Wrapper around `librosa.display.specshow` for usage with AudioSignals.
    
    Args:
        audio_signal (AudioSignal): AudioSignal to plot
        ch (int, optional): Which channel to plot. Defaults to 0.
        do_mono (bool, optional): Make the AudioSignal mono. Defaults to False.
        x_axis (str, optional): x_axis argument to librosa.display.specshow. Defaults to 'time'.
        y_axis (str, optional): y_axis argument to librosa.display.specshow. Defaults to 'linear'.
        kwargs: Additional keyword arguments to librosa.display.specshow.
    """
    import librosa.display

    if do_mono:
        audio_signal = audio_signal.to_mono(overwrite=False)
    
    data = librosa.amplitude_to_db(np.abs(audio_signal.stft()), ref=np.max)
    librosa.display.specshow(data[..., ch], x_axis=x_axis, y_axis=y_axis, 
        sr=audio_signal.sample_rate, hop_length=audio_signal.stft_params.hop_length,
        **kwargs)

Source File: utils.py From nussl with MIT License

6 votes

def visualize_waveform(audio_signal, ch=0, do_mono=False, x_axis='time', **kwargs):
    """
    Wrapper around `librosa.display.waveplot` for usage with AudioSignals.
    
    Args:
        audio_signal (AudioSignal): AudioSignal to plot
        ch (int, optional): Which channel to plot. Defaults to 0.
        do_mono (bool, optional): Make the AudioSignal mono. Defaults to False.
        x_axis (str, optional): x_axis argument to librosa.display.waveplot. Defaults to 'time'.
        kwargs: Additional keyword arguments to librosa.display.waveplot.
    """
    import librosa.display
    import matplotlib.pyplot as plt

    if do_mono:
        audio_signal = audio_signal.to_mono(overwrite=False)
    
    data = np.asfortranarray(audio_signal.audio_data[ch])
    librosa.display.waveplot(data, sr=audio_signal.sample_rate, x_axis=x_axis, **kwargs)
    plt.ylabel('Amplitude')

Source File: feature_extraction_functions.py From Build-CNN-or-LSTM-or-CNNLSTM-with-speech-features with MIT License

6 votes

def save2png(time_step,frame_width,wav,feature_type,num_filters,num_feature_columns,head_folder,delta=False,dom_freq=False,noise_wavefile=None,vad = True):
    feats = coll_feats_manage_timestep(time_step,frame_width,wav,feature_type,num_filters,num_feature_columns,head_folder,delta=delta,dom_freq=dom_freq,noise_wavefile=noise_wavefile,vad = vad)
    
    #transpose the features to go from left to right in time:
    feats = np.transpose(feats)
    
    #create graph and save to png
    plt.clf()
    librosa.display.specshow(feats)
    if noise_wavefile:
        noise = True
    else:
        noise = False
    plt.title("{}: {} timesteps, frame width of {}".format(wav,time_step,frame_width))
    plt.tight_layout(pad=0)
    pic_path = "{}{}_vad{}_noise{}_delta{}_domfreq{}".format(feature_type,num_feature_columns,vad,noise,delta,dom_freq)
    path = unique_path(Path(head_folder), pic_path+"{:03d}.png")
    plt.savefig(path)

    return True

Source File: test_audio.py From emlearn with MIT License

6 votes

def test_melfilter_librosa():
    filename = librosa.util.example_audio_file()
    y, sr = librosa.load(filename, offset=1.0, duration=0.3)
    n_fft = 1024
    hop_length = 256
    fmin = 500
    fmax = 5000
    n_mels = 16

    spec = numpy.abs(librosa.core.stft(y, n_fft=n_fft, hop_length=hop_length))**2
    spec1 = spec[:,0]

    ref = librosa.feature.melspectrogram(S=spec1, sr=sr, norm=None, htk=True, n_fft=n_fft, n_mels=n_mels, fmin=fmin, fmax=fmax)
    out = eml_audio.melfilter(spec1, sr, n_fft, n_mels, fmin, fmax)

    fig, (ref_ax, out_ax) = plt.subplots(2)
    def specshow(d, ax):
        s = librosa.amplitude_to_db(d, ref=numpy.max)
        librosa.display.specshow(s, ax=ax, x_axis='time')
    specshow(ref.reshape(-1, 1), ax=ref_ax)
    specshow(out.reshape(-1, 1), ax=out_ax)
    fig.savefig('melfilter.librosa.png')

    assert ref.shape == out.shape
    numpy.testing.assert_allclose(ref, out, rtol=0.01)

Source File: audio.py From speech-music-detection with MIT License

5 votes

def display_mel_bands(bands, figsize=None):
    plt.figure(figsize=figsize)
    librosa.display.specshow(bands, y_axis='mel', x_axis='time')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Log-scale Mel spectrogram')
    plt.tight_layout()
    plt.show()

Source File: 02_wav_features_and_spectrogram.py From Convolutional-Autoencoder-Music-Similarity with MIT License

5 votes

def plotMFCCs(self):
        """
        The Mel Frequency Cepstral Coefficient is a measure of timbre
        """
        mfccs = librosa.feature.mfcc(y=self.wav, sr=self.samplefreq)
        librosa.display.specshow(mfccs, x_axis='time')
        plt.colorbar()
        plt.title('MFCC')
        plt.tight_layout()
        plt.show()
        return mfccs

Source File: audio.py From speech-music-detection with MIT License

5 votes

def display_spec(spec, figsize=None):
    plt.figure(figsize=figsize)
    spec = librosa.amplitude_to_db(spec, ref=np.max)
    librosa.display.specshow(spec, x_axis='time', y_axis='log')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Log-frequency power spectrogram')
    plt.tight_layout()
    plt.show()

Source File: spec_augment_tensorflow.py From Speech-Transformer with MIT License

5 votes

def visualization_spectrogram(mel_spectrogram, title):
    """visualizing first one result of SpecAugment
    # Arguments:
      mel_spectrogram(ndarray): mel_spectrogram to visualize.
      title(String): plot figure's title
    """
    # Show mel-spectrogram using librosa's specshow.
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(librosa.power_to_db(mel_spectrogram[0, :, :, 0], ref=np.max), y_axis='mel', fmax=8000,
                             x_axis='time')
    plt.title(title)
    plt.tight_layout()
    plt.show()

Source File: spec_augment_pytorch.py From Speech-Transformer with MIT License

5 votes

def visualization_spectrogram(mel_spectrogram, title):
    """visualizing result of specAugment
    # Arguments:
      mel_spectrogram(ndarray): mel_spectrogram to visualize.
      title(String): plot figure's title
    """
    # Show mel-spectrogram using librosa's specshow.
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(librosa.power_to_db(mel_spectrogram[0, :, :], ref=np.max), y_axis='mel', fmax=8000,
                             x_axis='time')
    # plt.colorbar(format='%+2.0f dB')
    plt.title(title)
    plt.tight_layout()
    plt.show()

Source File: melgram.py From keras-audio with MIT License

5 votes

def melgram_v1(audio_file_path, to_file):
    sig, fs = librosa.load(audio_file_path)

    pylab.axis('off')  # no axis
    pylab.axes([0., 0., 1., 1.], frameon=False, xticks=[], yticks=[])  # Remove the white edge
    S = librosa.feature.melspectrogram(y=sig, sr=fs)
    librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
    pylab.savefig(to_file, bbox_inches=None, pad_inches=0)
    pylab.close()

Source File: melgram.py From keras-audio with MIT License

5 votes

def melgram_v2(audio_file_path):
    # Load sound file
    y, sr = librosa.load(audio_file_path)

    # Let's make and display a mel-scaled power (energy-squared) spectrogram
    S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)

    # Convert to log scale (dB). We'll use the peak power as reference.
    log_S = librosa.core.amplitude_to_db(S, ref=np.max)

    # Make a new figure
    plt.figure(figsize=(12, 4))

    # Display the spectrogram on a mel scale
    # sample rate and hop length parameters are used to render the time axis
    librosa.display.specshow(log_S, sr=sr, x_axis='time', y_axis='mel')

    # Put a descriptive title on the plot
    plt.title('mel power spectrogram')

    # draw a color bar
    plt.colorbar(format='%+02.0f dB')

    # Make the figure layout compact
    plt.tight_layout()
    plt.show()

Source File: similarity_matrix.py From pychorus with MIT License

5 votes

def display(self):
        import librosa.display
        import matplotlib.pyplot as plt
        librosa.display.specshow(
            self.matrix,
            y_axis='time',
            x_axis='time',
            sr=self.sample_rate / (N_FFT / 2048))
        plt.colorbar()
        plt.set_cmap("hot_r")
        plt.show()

Source File: melgram.py From mxnet-audio with MIT License

5 votes

def melgram_v1(audio_file_path, to_file):
    sig, fs = librosa.load(audio_file_path)

    pylab.axis('off')  # no axis
    pylab.axes([0., 0., 1., 1.], frameon=False, xticks=[], yticks=[])  # Remove the white edge
    S = librosa.feature.melspectrogram(y=sig, sr=fs)
    librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
    pylab.savefig(to_file, bbox_inches=None, pad_inches=0)
    pylab.close()

Source File: melgram.py From mxnet-audio with MIT License

5 votes

def melgram_v2(audio_file_path):
    # Load sound file
    y, sr = librosa.load(audio_file_path)

    # Let's make and display a mel-scaled power (energy-squared) spectrogram
    S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)

    # Convert to log scale (dB). We'll use the peak power as reference.
    log_S = librosa.core.amplitude_to_db(S, ref=np.max)

    # Make a new figure
    plt.figure(figsize=(12, 4))

    # Display the spectrogram on a mel scale
    # sample rate and hop length parameters are used to render the time axis
    librosa.display.specshow(log_S, sr=sr, x_axis='time', y_axis='mel')

    # Put a descriptive title on the plot
    plt.title('mel power spectrogram')

    # draw a color bar
    plt.colorbar(format='%+02.0f dB')

    # Make the figure layout compact
    plt.tight_layout()
    plt.show()

Source File: preprocessing.py From rnnt-speech-recognition with MIT License

5 votes

def plot_spec(spec, sr, transcription, name):

    spec_db = librosa.amplitude_to_db(spec, ref=np.max)

    plt.figure(figsize=(12,4))
    librosa.display.specshow(spec_db, sr=sr,
        x_axis='time', y_axis='mel',
        hop_length=sr * 0.01)
    plt.colorbar(format='%+02.0f dB')
    plt.savefig('figs/{}.png'.format(name))
    plt.clf()

Source File: audio.py From speech-music-detection with MIT License

5 votes

def display_waveform(audio, figname="Waveform", figsize=None):
    plt.figure(figsize=figsize)
    librosa.display.waveplot(audio)
    plt.title(figname)
    plt.show()

Source File: data_analysis.py From Sound-Recognition-Tutorial with Apache License 2.0

5 votes

def plot_wave(sound_files, sound_names):
    """plot wave"""
    i = 1
    fig = plt.figure(figsize=(20, 64))
    for f, n in zip(sound_files, sound_names):
        y, sr = librosa.load(os.path.join('./data/esc10/audio/', f))
        plt.subplot(10, 1, i)
        librosa.display.waveplot(y, sr, x_axis=None)
        plt.title(n + ' - ' + 'Wave')

        i += 1

    plt.tight_layout(pad=10)
    plt.show()

Source File: 02_wav_features_and_spectrogram.py From Convolutional-Autoencoder-Music-Similarity with MIT License

5 votes

def plotSpectrogram(self, mels=512, maxfreq=30000):
        #Plot the Mel power-scaled frequency spectrum, with any factor of 128 frequency bins and 512 frames (frame default)
        mel = librosa.feature.melspectrogram(y=self.wav, sr=self.samplefreq, n_mels=mels, fmax=maxfreq)
        librosa.display.specshow(librosa.logamplitude(mel, ref_power=np.max), y_axis='mel', fmax=maxfreq, x_axis='time')
        plt.colorbar(format='%+2.0f dB')
        plt.title('Mel Power-Scaled Frequency Spectrogram')
        plt.tight_layout()
        plt.show()
        return mel

Source File: 02_wav_features_and_spectrogram.py From Convolutional-Autoencoder-Music-Similarity with MIT License

5 votes

def plotChromagram(self):
        #Get chromagram of frequencies
        chroma = librosa.feature.chroma_stft(y=self.wav, sr=self.samplefreq)
        librosa.display.specshow(chroma, y_axis='chroma', x_axis='time')
        plt.colorbar()
        plt.title('Chromagram')
        plt.tight_layout()
        plt.show()
        return chroma

Source File: spec_augment_pytorch.py From SpecAugment with Apache License 2.0

5 votes

def visualization_spectrogram(mel_spectrogram, title):
    """visualizing result of SpecAugment
    # Arguments:
      mel_spectrogram(ndarray): mel_spectrogram to visualize.
      title(String): plot figure's title
    """
    # Show mel-spectrogram using librosa's specshow.
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(librosa.power_to_db(mel_spectrogram[0, :, :], ref=np.max), y_axis='mel', fmax=8000, x_axis='time')
    # plt.colorbar(format='%+2.0f dB')
    plt.title(title)
    plt.tight_layout()
    plt.show()

Source File: spec_augment_tensorflow.py From SpecAugment with Apache License 2.0

5 votes

def visualization_tensor_spectrogram(mel_spectrogram, title):
    """visualizing first one result of SpecAugment

    # Arguments:
      mel_spectrogram(ndarray): mel_spectrogram to visualize.
      title(String): plot figure's title
    """

    # Show mel-spectrogram using librosa's specshow.
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(librosa.power_to_db(mel_spectrogram[0, :, :, 0], ref=np.max), y_axis='mel', fmax=8000, x_axis='time')
    # plt.colorbar(format='%+2.0f dB')
    plt.title(title)
    plt.tight_layout()
    plt.show()

Source File: spec_augment_tensorflow.py From SpecAugment with Apache License 2.0

5 votes

def visualization_spectrogram(mel_spectrogram, title):
    """visualizing first one result of SpecAugment

    # Arguments:
      mel_spectrogram(ndarray): mel_spectrogram to visualize.
      title(String): plot figure's title
    """
    # Show mel-spectrogram using librosa's specshow.
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(librosa.power_to_db(mel_spectrogram[0, :, :, 0], ref=np.max), y_axis='mel', fmax=8000, x_axis='time')
    plt.title(title)
    plt.tight_layout()
    plt.show()

Source File: train_data.py From subsync with Apache License 2.0

5 votes

def plot_mfcc(mfcc):
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(mfcc, x_axis='time')
    plt.colorbar()
    plt.title('MFCC')
    plt.tight_layout()
    plt.show()

Source File: audio.py From argus-freesound with MIT License

5 votes

def read_as_melspectrogram(file_path, time_stretch=1.0, pitch_shift=0.0,
                           debug_display=False):
    x = read_audio(file_path)
    if time_stretch != 1.0:
        x = librosa.effects.time_stretch(x, time_stretch)

    if pitch_shift != 0.0:
        librosa.effects.pitch_shift(x, config.sampling_rate, n_steps=pitch_shift)

    mels = audio_to_melspectrogram(x)
    if debug_display:
        import IPython
        IPython.display.display(IPython.display.Audio(x, rate=config.sampling_rate))
        show_melspectrogram(mels)
    return mels

Source File: audio.py From argus-freesound with MIT License

5 votes

def show_melspectrogram(mels, title='Log-frequency power spectrogram'):
    import matplotlib.pyplot as plt

    librosa.display.specshow(mels, x_axis='time', y_axis='mel',
                             sr=config.sampling_rate, hop_length=config.hop_length,
                             fmin=config.fmin, fmax=config.fmax)
    plt.colorbar(format='%+2.0f dB')
    plt.title(title)
    plt.show()

Source File: data_analysis.py From Sound-Recognition-Tutorial with Apache License 2.0

5 votes

def plot_spectrum(sound_files, sound_names):
    """plot log power spectrum"""
    i = 1
    fig = plt.figure(figsize=(20, 64))
    for f, n in zip(sound_files, sound_names):
        y, sr = librosa.load(os.path.join('./data/esc10/audio/', f))
        plt.subplot(10, 1, i)
        D = librosa.logamplitude(np.abs(librosa.stft(y)) ** 2, ref_power=np.max)
        librosa.display.specshow(D, sr=sr, y_axis='log')
        plt.title(n + ' - ' + 'Spectrum')

        i += 1

    plt.tight_layout(pad=10)
    plt.show()

Python librosa.display() Examples