Python Examples of librosa.power_to

Source File: test_rythm.py From audiomate with MIT License

7 votes

def test_compute_cleanup_after_one_utterance(self):
        test_file_path = resources.sample_wav_file('wav_1.wav')
        y, sr = librosa.load(test_file_path, sr=None)
        frames = librosa.util.frame(y, frame_length=2048, hop_length=1024).T

        # EXPECTED
        S = np.abs(librosa.stft(y, center=False, n_fft=2048, hop_length=1024)) ** 2
        S = librosa.feature.melspectrogram(S=S, n_mels=128, sr=sr)
        S = librosa.power_to_db(S)
        onsets = librosa.onset.onset_strength(S=S, center=False)
        exp_tgram = librosa.feature.tempogram(onset_envelope=onsets, sr=sr, win_length=11, center=True).T

        # ACTUAL
        tgram_step = pipeline.Tempogram(win_length=11)

        # FIRST RUN
        tgrams = tgram_step.process_frames(frames, sr, last=True)

        assert np.allclose(tgrams, exp_tgram)

        # SECOND RUN
        tgrams = tgram_step.process_frames(frames, sr, last=True)

        assert np.allclose(tgrams, exp_tgram)

Source File: test_onset.py From audiomate with MIT License

6 votes

def test_compute(self):
        test_file_path = resources.sample_wav_file('wav_1.wav')
        y, sr = librosa.load(test_file_path, sr=None)
        frames = librosa.util.frame(y, frame_length=2048, hop_length=1024).T

        # EXPECTED
        S = np.abs(librosa.stft(y, center=False, n_fft=2048, hop_length=1024)) ** 2
        S = librosa.feature.melspectrogram(S=S, n_mels=128, sr=sr)
        S = librosa.power_to_db(S)
        exp_onsets = librosa.onset.onset_strength(S=S, center=False).T
        exp_onsets = exp_onsets.reshape(exp_onsets.shape[0], 1)

        # ACTUAL
        onset = pipeline.OnsetStrength()
        onsets = onset.process_frames(frames, sr, last=True)

        assert np.allclose(onsets, exp_onsets)

Source File: data.py From magenta with Apache License 2.0

6 votes

def wav_to_spec(wav_audio, hparams):
  """Transforms the contents of a wav file into a series of spectrograms."""
  if hparams.spec_type == 'raw':
    spec = _wav_to_framed_samples(wav_audio, hparams)
  else:
    if hparams.spec_type == 'cqt':
      spec = _wav_to_cqt(wav_audio, hparams)
    elif hparams.spec_type == 'mel':
      spec = _wav_to_mel(wav_audio, hparams)
    else:
      raise ValueError('Invalid spec_type: {}'.format(hparams.spec_type))

    if hparams.spec_log_amplitude:
      spec = librosa.power_to_db(spec)

  return spec

Source File: spec_augment_tensorflow.py From Speech-Transformer with MIT License

6 votes

def visualization_tensor_spectrogram(mel_spectrogram, title):
    """visualizing first one result of SpecAugment
    # Arguments:
      mel_spectrogram(ndarray): mel_spectrogram to visualize.
      title(String): plot figure's title
    """

    # session for plotting
    sess = tf.InteractiveSession()
    mel_spectrogram = mel_spectrogram.eval()

    # Show mel-spectrogram using librosa's specshow.
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(librosa.power_to_db(mel_spectrogram[0, :, :, 0], ref=np.max), y_axis='mel', fmax=8000,
                             x_axis='time')
    # plt.colorbar(format='%+2.0f dB')
    plt.title(title)
    plt.tight_layout()
    plt.show()

Source File: preprocessing.py From speechT with Apache License 2.0

6 votes

def calc_power_spectrogram(audio_data, samplerate, n_mels=128, n_fft=512, hop_length=160):
  """
  Calculate power spectrogram from the given raw audio data

  Args:
    audio_data: numpyarray of raw audio wave
    samplerate: the sample rate of the `audio_data`
    n_mels: the number of mels to generate
    n_fft: the window size of the fft
    hop_length: the hop length for the window

  Returns: the spectrogram in the form [time, n_mels]

  """
  spectrogram = librosa.feature.melspectrogram(audio_data, sr=samplerate, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length)

  # convert to log scale (dB)
  log_spectrogram = librosa.power_to_db(spectrogram, ref=np.max)

  # normalize
  normalized_spectrogram = normalize(log_spectrogram)

  return normalized_spectrogram.T

Source File: test_onset.py From audiomate with MIT License

6 votes

def test_compute_online(self):
        test_file_path = resources.sample_wav_file('wav_1.wav')
        y, sr = librosa.load(test_file_path, sr=None)

        # EXPECTED
        y_pad = np.pad(y, (0, 1024), mode='constant', constant_values=0)
        S = np.abs(librosa.stft(y_pad, center=False, n_fft=2048, hop_length=1024)) ** 2
        S = librosa.feature.melspectrogram(S=S, n_mels=128, sr=sr)
        S = librosa.power_to_db(S)
        exp_onsets = librosa.onset.onset_strength(S=S, center=False).T
        exp_onsets = exp_onsets.reshape(exp_onsets.shape[0], 1)

        # ACTUAL
        test_file = tracks.FileTrack('idx', test_file_path)
        onset = pipeline.OnsetStrength()
        onset_gen = onset.process_track_online(test_file, 2048, 1024, chunk_size=5)

        chunks = list(onset_gen)
        onsets = np.vstack(chunks)

        print(onsets.shape, exp_onsets.shape)

        assert np.allclose(onsets, exp_onsets)

Source File: test_rythm.py From audiomate with MIT License

6 votes

def test_compute_online(self):
        # Data: 41523 samples, 16 kHz
        # yields 40 frames with frame-size 2048 and hop-size 1024
        test_file_path = resources.sample_wav_file('wav_1.wav')
        y, sr = librosa.load(test_file_path, sr=None)

        # EXPECTED
        y_pad = np.pad(y, (0, 1024), mode='constant', constant_values=0)
        S = np.abs(librosa.stft(y_pad, center=False, n_fft=2048, hop_length=1024)) ** 2
        S = librosa.feature.melspectrogram(S=S, n_mels=128, sr=sr)
        S = librosa.power_to_db(S)
        onsets = librosa.onset.onset_strength(S=S, center=False)
        exp_tgram = librosa.feature.tempogram(onset_envelope=onsets, sr=sr, win_length=4, center=True).T

        # ACTUAL
        test_file = tracks.FileTrack('idx', test_file_path)
        tgram_step = pipeline.Tempogram(win_length=4)
        tgram_gen = tgram_step.process_track_online(test_file, 2048, 1024, chunk_size=5)

        chunks = list(tgram_gen)
        tgrams = np.vstack(chunks)

        assert np.allclose(tgrams, exp_tgram)

Source File: plotting.py From DeepSpectrum with GNU General Public License v3.0

6 votes

def plot_mel_spectrogram(audio_data,
                         sr,
                         nfft=None,
                         melbands=64,
                         delta=None,
                         **kwargs):
    spectrogram = y_limited_spectrogram(audio_data,
                                        sr=sr,
                                        nfft=nfft,
                                        ylim=kwargs['ylim'])
    kwargs['scale'] = 'mel'
    if delta:
        spectrogram = librosa.feature.delta(spectrogram, order=delta)
    spectrogram = librosa.feature.melspectrogram(S=np.abs(spectrogram)**2,
                                                 sr=sr,
                                                 n_mels=melbands)
    spectrogram = librosa.power_to_db(spectrogram, ref=np.max, top_db=None)
    return _create_plot(spectrogram, sr, nfft, **kwargs)

Source File: onset.py From audiomate with MIT License

6 votes

def compute(self, chunk, sampling_rate, corpus=None, utterance=None):
        # Compute mel-spetrogram
        power_spec = np.abs(spectral.stft_from_frames(chunk.data.T)) ** 2
        mel = np.abs(librosa.feature.melspectrogram(S=power_spec, n_mels=self.n_mels, sr=sampling_rate))
        mel_power = librosa.power_to_db(mel)

        # Compute onset strengths
        oenv = librosa.onset.onset_strength(S=mel_power, center=False)

        # Switch dimensions and add dimension to have frames
        oenv = oenv.T.reshape(oenv.shape[0], -1)

        # Remove context
        oenv = oenv[chunk.left_context:oenv.shape[0] - chunk.right_context]

        return oenv

Source File: feature_extraction.py From Sound-Recognition-Tutorial with Apache License 2.0

6 votes

def extract_mfcc(y, sr, size=3):
    """
    extract MFCC feature
    :param y: np.ndarray [shape=(n,)], real-valued the input signal (audio time series)
    :param sr: sample rate of 'y'
    :param size: the length (seconds) of random crop from original audio, default as 3 seconds
    :return: MFCC feature
    """
    # normalization
    y = y.astype(np.float32)
    normalization_factor = 1 / np.max(np.abs(y))
    y = y * normalization_factor

    # random crop
    start = random.randint(0, len(y) - size * sr)
    y = y[start: start + size * sr]

    # extract log mel spectrogram #####
    melspectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=1024)
    mfcc = librosa.feature.mfcc(S=librosa.power_to_db(melspectrogram), n_mfcc=20)
    mfcc_delta = librosa.feature.delta(mfcc)
    mfcc_delta_delta = librosa.feature.delta(mfcc_delta)
    mfcc_comb = np.concatenate([mfcc, mfcc_delta, mfcc_delta_delta], axis=0)

    return mfcc_comb

Source File: feature_extraction.py From Sound-Recognition-Tutorial with Apache License 2.0

6 votes

def extract_logmel(y, sr, size=3):
    """
    extract log mel spectrogram feature
    :param y: the input signal (audio time series)
    :param sr: sample rate of 'y'
    :param size: the length (seconds) of random crop from original audio, default as 3 seconds
    :return: log-mel spectrogram feature
    """
    # normalization
    y = y.astype(np.float32)
    normalization_factor = 1 / np.max(np.abs(y))
    y = y * normalization_factor

    # random crop
    start = random.randint(0, len(y) - size * sr)
    y = y[start: start + size * sr]

    # extract log mel spectrogram #####
    melspectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=1024, n_mels=60)
    logmelspec = librosa.power_to_db(melspectrogram)

    return logmelspec

Source File: create_patches.py From tartarus with MIT License

6 votes

def prepare_testset(dataset_name):
    spec_folder=common.SPECTRO_PATH+SPECTRO_FOLDER+"/"
    test_folder=common.DATA_DIR+'/spectro_%s_testset/' % dataset_name
    if not os.path.exists(test_folder):
        os.makedirs(test_folder)
    items = open(common.DATASETS_DIR+'/items_index_test_%s.tsv' % dataset_name).read().splitlines()
    testset = []
    testset_index = []
    for t,track_id in enumerate(items):
        if MSD:
            msd_folder = track_id[2]+"/"+track_id[3]+"/"+track_id[4]+"/"
        else:
            msd_folder = ""
        file = spec_folder+msd_folder+track_id+".pk"
        try:
            spec = pickle.load(open(file))
            spec = librosa.power_to_db(np.abs(spec) ** 2,ref=np.max).T
            pickle.dump(spec, open(test_folder+track_id+".pk","wb"))
            testset.append(track_id)
            testset_index.append(t)
            if t%1000==0:
                print t
        except:
            print "no exist", file

Source File: singlelayer.py From EUSIPCO2017 with GNU Affero General Public License v3.0

6 votes

def compute_spectrograms(filename):
    out_rate = 12000
    N_FFT = 512
    HOP_LEN = 256

    frames, rate = librosa.load(filename, sr=out_rate, mono=True)
    if len(frames) < out_rate*3:
        # if less then 3 second - can't process
        raise Exception("Audio duration is too short")

    logam = librosa.power_to_db
    melgram = librosa.feature.melspectrogram
    x = logam(melgram(y=frames, sr=out_rate, hop_length=HOP_LEN,
                      n_fft=N_FFT, n_mels=N_MEL_BANDS) ** 2,
              ref=1.0)

    # now going through spectrogram with the stride of the segment duration
    for start_idx in range(0, x.shape[1] - SEGMENT_DUR + 1, SEGMENT_DUR):
        yield x[:, start_idx:start_idx + SEGMENT_DUR]

Source File: melgram.py From mxnet-audio with MIT License

5 votes

def melgram_v1(audio_file_path, to_file):
    sig, fs = librosa.load(audio_file_path)

    pylab.axis('off')  # no axis
    pylab.axes([0., 0., 1., 1.], frameon=False, xticks=[], yticks=[])  # Remove the white edge
    S = librosa.feature.melspectrogram(y=sig, sr=fs)
    librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
    pylab.savefig(to_file, bbox_inches=None, pad_inches=0)
    pylab.close()

Source File: audio_transforms.py From htmpapers with GNU Affero General Public License v3.0

5 votes

def __call__(self, data):
        stft = data['stft']
        sample_rate = data['sample_rate']
        n_fft = data['n_fft']
        mel_basis = librosa.filters.mel(sample_rate, n_fft, self.n_mels)
        s = np.dot(mel_basis, np.abs(stft)**2.0)
        data['mel_spectrogram'] = librosa.power_to_db(s, ref=np.max)
        return data

Source File: audio_transforms.py From htmpapers with GNU Affero General Public License v3.0

5 votes

def __call__(self, data):
        samples = data['samples']
        sample_rate = data['sample_rate']
        s = librosa.feature.melspectrogram(samples, sr=sample_rate, n_mels=self.n_mels)
        data['mel_spectrogram'] = librosa.power_to_db(s, ref=np.max)
        return data

Source File: features.py From dcase_util with MIT License

5 votes

def extract(self, y):
        """Extract features for the audio signal.

        Parameters
        ----------
        y : numpy.ndarray [shape=(n,)]
            Audio signal

        Returns
        -------
        numpy.ndarray [shape=(n_mels, t)]
            mfccs

        """

        spectrogram = self.get_spectrogram(
            y=y,
            n_fft=self.n_fft,
            win_length_samples=self.win_length_samples,
            hop_length_samples=self.hop_length_samples,
            spectrogram_type=self.spectrogram_type,
            center=True,
            window=self.window
        )

        mel_spectrum = numpy.dot(self.mel_basis, spectrogram)
        mfccs = librosa.feature.mfcc(
            S=librosa.power_to_db(mel_spectrum),
            n_mfcc=self.n_mfcc
        )

        if self.omit_zeroth:
            # Remove first coefficient
            mfccs = mfccs[1:, :]

        return mfccs

Source File: melgram.py From keras-audio with MIT License

5 votes

def melgram_v1(audio_file_path, to_file):
    sig, fs = librosa.load(audio_file_path)

    pylab.axis('off')  # no axis
    pylab.axes([0., 0., 1., 1.], frameon=False, xticks=[], yticks=[])  # Remove the white edge
    S = librosa.feature.melspectrogram(y=sig, sr=fs)
    librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
    pylab.savefig(to_file, bbox_inches=None, pad_inches=0)
    pylab.close()

Source File: Plot.py From Wave-U-Net with MIT License

5 votes

def draw_spectrogram(example_wav="musb_005_angela thomas wade_audio_model_without_context_cut_28234samples_61002samples_93770samples_126538.wav"):
    y, sr = Utils.load(example_wav, sr=None)
    spec = np.abs(librosa.stft(y, 512, 256, 512))
    norm_spec = librosa.power_to_db(spec**2)
    black_time_frames = np.array([28234, 61002, 93770, 126538]) / 256.0

    fig, ax = plt.subplots()
    img = ax.imshow(norm_spec)
    plt.vlines(black_time_frames, [0, 0, 0, 0], [10, 10, 10, 10], colors="red", lw=2, alpha=0.5)
    plt.vlines(black_time_frames, [256, 256, 256, 256], [246, 246, 246, 246], colors="red", lw=2, alpha=0.5)

    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", size="5%", pad=0.1)
    plt.colorbar(img, cax=cax)

    ax.xaxis.set_label_position("bottom")
    #ticks_x = ticker.FuncFormatter(lambda x, pos: '{0:g}'.format(x * 256.0 / sr))
    #ax.xaxis.set_major_formatter(ticks_x)
    ax.xaxis.set_major_locator(ticker.FixedLocator(([i * sr / 256. for i in range(len(y)//sr + 1)])))
    ax.xaxis.set_major_formatter(ticker.FixedFormatter(([str(i) for i in range(len(y)//sr + 1)])))

    ax.yaxis.set_major_locator(ticker.FixedLocator(([float(i) * 2000.0 / (sr/2.0) * 256. for i in range(6)])))
    ax.yaxis.set_major_formatter(ticker.FixedFormatter([str(i*2) for i in range(6)]))

    ax.set_xlabel("t (s)")
    ax.set_ylabel('f (KHz)')

    fig.set_size_inches(7., 3.)
    fig.savefig("spectrogram_example.pdf", bbox_inches='tight')

Source File: spectrogram.py From cocktail-party with MIT License

5 votes

def signal_to_mel_spectrogram(self, audio_signal, log=True, get_phase=False):
		signal = audio_signal.get_data(channel_index=0)
		D = librosa.core.stft(signal, n_fft=self._N_FFT, hop_length=self._HOP_LENGTH)
		magnitude, phase = librosa.core.magphase(D)

		mel_spectrogram = np.dot(self._MEL_FILTER, magnitude)

		mel_spectrogram = mel_spectrogram ** 2
		if log:
			mel_spectrogram = librosa.power_to_db(mel_spectrogram)

		if get_phase:
			return mel_spectrogram, phase
		else:
			return mel_spectrogram

Source File: spec_augment_pytorch.py From Speech-Transformer with MIT License

5 votes

def visualization_spectrogram(mel_spectrogram, title):
    """visualizing result of specAugment
    # Arguments:
      mel_spectrogram(ndarray): mel_spectrogram to visualize.
      title(String): plot figure's title
    """
    # Show mel-spectrogram using librosa's specshow.
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(librosa.power_to_db(mel_spectrogram[0, :, :], ref=np.max), y_axis='mel', fmax=8000,
                             x_axis='time')
    # plt.colorbar(format='%+2.0f dB')
    plt.title(title)
    plt.tight_layout()
    plt.show()

Source File: feature_extraction.py From deepsaber with GNU General Public License v3.0

5 votes

def extract_features_mel(y, sr, hop,mel_dim=100):
    mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=mel_dim, hop_length=hop)  # C2 is 65.4 Hz
    features = librosa.power_to_db(mel, ref=np.max)
    return features

Source File: spec_augment_tensorflow.py From Speech-Transformer with MIT License

5 votes

def visualization_spectrogram(mel_spectrogram, title):
    """visualizing first one result of SpecAugment
    # Arguments:
      mel_spectrogram(ndarray): mel_spectrogram to visualize.
      title(String): plot figure's title
    """
    # Show mel-spectrogram using librosa's specshow.
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(librosa.power_to_db(mel_spectrogram[0, :, :, 0], ref=np.max), y_axis='mel', fmax=8000,
                             x_axis='time')
    plt.title(title)
    plt.tight_layout()
    plt.show()

Source File: audio.py From MelNet with MIT License

5 votes

def pre_spec(self, x):
        return self.normalize(librosa.power_to_db(x) - self.hp.audio.ref_level_db)

Source File: feature_extraction.py From deepsaber with GNU General Public License v3.0

5 votes

def extract_features_multi_mel(y, sr=44100.0, hop=512, nffts=[1024, 2048, 4096], mel_dim=100):
    featuress = []
    for nfft in nffts:
        mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=mel_dim, n_fft=nfft, hop_length=hop)  # C2 is 65.4 Hz
        features = librosa.power_to_db(mel, ref=np.max)
        featuress.append(features)
    features = np.stack(featuress, axis=1)
    return features

Source File: spec_augment_pytorch.py From SpecAugment with Apache License 2.0

5 votes

def visualization_spectrogram(mel_spectrogram, title):
    """visualizing result of SpecAugment
    # Arguments:
      mel_spectrogram(ndarray): mel_spectrogram to visualize.
      title(String): plot figure's title
    """
    # Show mel-spectrogram using librosa's specshow.
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(librosa.power_to_db(mel_spectrogram[0, :, :], ref=np.max), y_axis='mel', fmax=8000, x_axis='time')
    # plt.colorbar(format='%+2.0f dB')
    plt.title(title)
    plt.tight_layout()
    plt.show()

Source File: audio.py From argus-freesound with MIT License

5 votes

def audio_to_melspectrogram(audio):
    spectrogram = librosa.feature.melspectrogram(audio,
                                                 sr=config.sampling_rate,
                                                 n_mels=config.n_mels,
                                                 hop_length=config.hop_length,
                                                 n_fft=config.n_fft,
                                                 fmin=config.fmin,
                                                 fmax=config.fmax)
    spectrogram = librosa.power_to_db(spectrogram)
    spectrogram = spectrogram.astype(np.float32)
    return spectrogram

Source File: spectral.py From audiomate with MIT License

5 votes

def compute(self, chunk, sampling_rate, corpus=None, utterance=None):
        power_spec = np.abs(stft_from_frames(chunk.data.T)) ** 2

        mel = librosa.feature.melspectrogram(S=power_spec, n_mels=self.n_mels, sr=sampling_rate)
        mel_power = librosa.power_to_db(mel)
        mfcc = librosa.feature.mfcc(S=mel_power, n_mfcc=self.n_mfcc)

        return mfcc.T

Source File: magnitude_scaling.py From audiomate with MIT License

5 votes

def compute(self, chunk, sampling_rate, corpus=None, utterance=None):
        return librosa.power_to_db(chunk.data.T, ref=self.ref, amin=self.amin, top_db=self.top_db).T

Source File: test_spectral.py From audiomate with MIT License

5 votes

def test_compute(self):
        samples = np.arange(8096).astype(np.float32)
        D = np.abs(librosa.core.stft(samples, n_fft=2048, hop_length=512, center=False)) ** 2
        mel = librosa.feature.melspectrogram(S=D, sr=16000, n_mels=128)
        expected = librosa.feature.mfcc(S=librosa.power_to_db(mel), n_mfcc=13).T

        frames = librosa.util.frame(samples, frame_length=2048, hop_length=512).T
        mfcc = pipeline.MFCC(n_mfcc=13, n_mels=128)
        res = mfcc.process_frames(frames, sampling_rate=16000)

        assert np.array_equal(expected, res)

Python librosa.power_to_db() Examples