Python Examples of librosa.to

Source File: test_deformation.py From amen with BSD 2-Clause "Simplified" License

6 votes

def test_pitch_shift():
    shift_amount = 4
    step_size = 24
    pitch_shift_audio = pitch_shift(mono_audio, shift_amount, step_size=step_size)
    test_pitch_shift = librosa.effects.pitch_shift(
        librosa.to_mono(mono_audio.raw_samples),
        mono_audio.sample_rate,
        shift_amount,
        bins_per_octave=step_size,
    )
    test_pitch_shift_audio = Audio(
        raw_samples=test_pitch_shift, sample_rate=mono_audio.sample_rate
    )
    assert np.allclose(
        pitch_shift_audio.raw_samples,
        test_pitch_shift_audio.raw_samples,
        rtol=1e-3,
        atol=1e-4,
    )

Source File: deformation.py From amen with BSD 2-Clause "Simplified" License

6 votes

def harmonic_separation(audio, margin=3.0):
    """
    Wraps librosa's `harmonic` function, and returns a new Audio object.
    Note that this folds to mono.

    Parameters
    ---------
    audio : Audio
        The Audio object to act on.

    margin : float
        The larger the margin, the larger the separation.
        The default is `3.0`.
    """
    harmonic = librosa.effects.harmonic(
        librosa.to_mono(audio.raw_samples), margin=margin
    )
    harmonic_audio = Audio(raw_samples=harmonic, sample_rate=audio.sample_rate)

    return harmonic_audio

Source File: deformation.py From amen with BSD 2-Clause "Simplified" License

6 votes

def percussive_separation(audio, margin=3.0):
    """
    Wraps librosa's `percussive` function, and returns a new Audio object.
    Note that this folds to mono.

    Parameters
    ---------
    audio : Audio
        The Audio object to act on.

    margin : float
        The larger the margin, the larger the separation.
        The default is `3.0`.
    """
    percussive = librosa.effects.percussive(
        librosa.to_mono(audio.raw_samples), margin=margin
    )
    percussive_audio = Audio(raw_samples=percussive, sample_rate=audio.sample_rate)

    return percussive_audio

Source File: audio.py From audiomate with MIT License

6 votes

def process_buffer(buffer, n_channels):
    """
    Merge the read blocks and resample if necessary.

    Args:
        buffer (list): A list of blocks of samples.
        n_channels (int): The number of channels of the input data.

    Returns:
        np.array: The samples
    """
    samples = np.concatenate(buffer)

    if n_channels > 1:
        samples = samples.reshape((-1, n_channels)).T
        samples = librosa.to_mono(samples)

    return samples

Source File: preprocess.py From Singing_Voice_Separation_RNN with MIT License

6 votes

def load_wavs(filenames, sr):

    wavs_mono = list()
    wavs_src1 = list()
    wavs_src2 = list()

    for filename in filenames:
        wav, _ = librosa.load(filename, sr = sr, mono = False)
        assert (wav.ndim == 2) and (wav.shape[0] == 2), 'Require wav to have two channels'
        wav_mono = librosa.to_mono(wav) * 2 # Cancelling average
        wav_src1 = wav[0, :]
        wav_src2 = wav[1, :]
        wavs_mono.append(wav_mono)
        wavs_src1.append(wav_src1)
        wavs_src2.append(wav_src2)

    return wavs_mono, wavs_src1, wavs_src2

Source File: preprocess.py From Singing_Voice_Separation_RNN with MIT License

6 votes

def get_random_wav(filename, sr, duration):

    # Get a random range from wav

    wav, _ = librosa.load(filename, sr = sr, mono = False)
    print(wav)
    assert (wav.ndim == 2) and (wav.shape[0] == 2), 'Require wav to have two channels'

    wav_pad = pad_wav(wav = wav, sr = sr, duration = duration)
    wav_sample = sample_range(wav = wav, sr = sr, duration = duration)

    wav_sample_mono = librosa.to_mono(wav_sample)
    wav_sample_src1 = wav_sample[0, :]
    wav_sample_src2 = wav_sample[1, :]

    return wav_sample_mono, wav_sample_src1, wav_sample_src2

Source File: test_deformation.py From amen with BSD 2-Clause "Simplified" License

5 votes

def test_time_stretch():
    stretch_amount = 1.5
    time_stretch_audio = time_stretch(mono_audio, stretch_amount)
    test_time_stretch = librosa.effects.time_stretch(
        librosa.to_mono(mono_audio.raw_samples), stretch_amount
    )
    test_time_stretch_audio = Audio(
        raw_samples=test_time_stretch, sample_rate=mono_audio.sample_rate
    )
    assert np.allclose(
        time_stretch_audio.raw_samples,
        test_time_stretch_audio.raw_samples,
        rtol=1e-3,
        atol=1e-4,
    )

Source File: test_deformation.py From amen with BSD 2-Clause "Simplified" License

5 votes

def test_harmonic():
    harmonic_audio = harmonic_separation(mono_audio)
    test_harmonic = librosa.effects.harmonic(
        librosa.to_mono(mono_audio.raw_samples), margin=3.0
    )
    test_harmonic_audio = Audio(
        raw_samples=test_harmonic, sample_rate=mono_audio.sample_rate
    )
    assert np.allclose(
        harmonic_audio.raw_samples,
        test_harmonic_audio.raw_samples,
        rtol=1e-3,
        atol=1e-4,
    )

Source File: test_deformation.py From amen with BSD 2-Clause "Simplified" License

5 votes

def test_percussive():
    percussive_audio = percussive_separation(mono_audio)
    test_percussive = librosa.effects.percussive(
        librosa.to_mono(mono_audio.raw_samples), margin=3.0
    )
    test_percussive_audio = Audio(
        raw_samples=test_percussive, sample_rate=mono_audio.sample_rate
    )
    assert np.allclose(
        percussive_audio.raw_samples,
        test_percussive_audio.raw_samples,
        rtol=1e-3,
        atol=1e-4,
    )

Source File: deformation.py From amen with BSD 2-Clause "Simplified" License

5 votes

def pitch_shift(audio, steps, step_size=12):
    """
    Wraps librosa's `pitch_shift` function, and returns a new Audio object.
    Note that this folds to mono.

    Parameters
    ---------
    audio : Audio
        The Audio object to act on.

    steps : float
        The pitch shift amount.
        The default unit is semitones, as set by `step_size`.

    step_size : float > 0
        The number of equal-tempered steps per octave.
        The default is semitones, as set by `step_size=12`.
        Quarter-tones, for example, would be `step_size=24`.
    """
    shifted = librosa.effects.pitch_shift(
        librosa.to_mono(audio.raw_samples),
        audio.sample_rate,
        steps,
        bins_per_octave=step_size,
    )
    stretched_audio = Audio(raw_samples=shifted, sample_rate=audio.sample_rate)

    return stretched_audio

Source File: audio.py From amen with BSD 2-Clause "Simplified" License

4 votes

def __init__(
        self,
        file_path=None,
        raw_samples=None,
        convert_to_mono=False,
        sample_rate=44100,
        analysis_sample_rate=22050,
    ):
        """
        Audio constructor.
        Opens a file path, loads the audio with librosa, and prepares the features

        Parameters
        ----------

        file_path: string
            path to the audio file to load

        raw_samples: np.array
            samples to use for audio output

        convert_to_mono: boolean
            (optional) converts the file to mono on loading

        sample_rate: number > 0 [scalar]
            (optional) sample rate to pass to librosa.


        Returns
        ------
        An Audio object
        """

        if file_path:
            y, sr = librosa.load(file_path, mono=convert_to_mono, sr=sample_rate)
        elif raw_samples is not None:
            # This assumes that we're passing in raw_samples
            # directly from another Audio's raw_samples.
            y = raw_samples
            sr = sample_rate

        self.file_path = file_path
        self.sample_rate = float(sr)
        self.analysis_sample_rate = float(analysis_sample_rate)
        self.num_channels = y.ndim
        self.duration = librosa.get_duration(y=y, sr=sr)

        self.analysis_samples = librosa.resample(
            librosa.to_mono(y), sr, self.analysis_sample_rate, res_type='kaiser_best'
        )
        self.raw_samples = np.atleast_2d(y)

        self.zero_indexes = self._create_zero_indexes()
        self.features = self._create_features()
        self.timings = self._create_timings()

Source File: pncc.py From PNCC with MIT License

4 votes

def pncc(audio_wave, n_fft=512, sr=16000, winlen=0.020, winstep=0.010,
         n_mels=128, n_pncc=13, weight_N=4, power=2):

    pre_emphasis_signal = scipy.signal.lfilter([1.0, -0.97], 1, audio_wave)
    mono_wave = to_mono(pre_emphasis_signal.T)
    stft_pre_emphasis_signal = np.abs(stft(mono_wave,
                                           n_fft=n_fft,
                                           hop_length=int(sr * winstep),
                                           win_length=int(sr * winlen),
                                           window=np.ones(int(sr * winlen)),
                                           center=False)) ** power

    mel_filter = np.abs(filters.mel(sr, n_fft=n_fft, n_mels=n_mels)) ** power
    power_stft_signal = np.dot(stft_pre_emphasis_signal.T, mel_filter.T)

    medium_time_power = medium_time_power_calculation(power_stft_signal)

    lower_envelope = asymmetric_lawpass_filtering(
        medium_time_power, 0.999, 0.5)

    subtracted_lower_envelope = medium_time_power - lower_envelope

    rectified_signal = halfwave_rectification(subtracted_lower_envelope)

    floor_level = asymmetric_lawpass_filtering(rectified_signal)

    temporal_masked_signal = temporal_masking(rectified_signal)

    final_output = switch_excitation_or_non_excitation(
        temporal_masked_signal, floor_level, lower_envelope,
        medium_time_power)

    spectral_weight_smoothing = weight_smoothing(
        final_output, medium_time_power, L=n_mels)

    transfer_function = time_frequency_normalization(
        power_stft_signal,
        spectral_weight_smoothing)

    normalized_power = mean_power_normalization(
        transfer_function, final_output, L=n_mels)

    power_law_nonlinearity = power_function_nonlinearity(normalized_power)

    dct = np.dot(power_law_nonlinearity, filters.dct(
        n_pncc, power_law_nonlinearity.shape[1]).T)

    return dct

Source File: audio_io.py From synvae with MIT License

4 votes

def wav_data_to_samples(wav_data, sample_rate):
  """Read PCM-formatted WAV data and return a NumPy array of samples.

  Uses scipy to read and librosa to process WAV data. Audio will be converted to
  mono if necessary.

  Args:
    wav_data: WAV audio data to read.
    sample_rate: The number of samples per second at which the audio will be
        returned. Resampling will be performed if necessary.

  Returns:
    A numpy array of audio samples, single-channel (mono) and sampled at the
    specified rate, in float32 format.

  Raises:
    AudioIOReadError: If scipy is unable to read the WAV data.
    AudioIOError: If audio processing fails.
  """
  try:
    # Read the wav file, converting sample rate & number of channels.
    native_sr, y = scipy.io.wavfile.read(six.BytesIO(wav_data))
  except Exception as e:  # pylint: disable=broad-except
    raise AudioIOReadError(e)

  if y.dtype == np.int16:
    # Convert to float32.
    y = int16_samples_to_float32(y)
  elif y.dtype == np.float32:
    # Already float32.
    pass
  else:
    raise AudioIOError(
        'WAV file not 16-bit or 32-bit float PCM, unsupported')
  try:
    # Convert to mono and the desired sample rate.
    if y.ndim == 2 and y.shape[1] == 2:
      y = y.T
      y = librosa.to_mono(y)
    if native_sr != sample_rate:
      y = librosa.resample(y, native_sr, sample_rate)
  except Exception as e:  # pylint: disable=broad-except
    raise AudioIOError(e)
  return y

Source File: sox.py From muda with ISC License

4 votes

def __sox(y, sr, *args):
    """Execute sox

    Parameters
    ----------
    y : np.ndarray
        Audio time series

    sr : int > 0
        Sampling rate of `y`

    *args
        Additional arguments to sox

    Returns
    -------
    y_out : np.ndarray
        `y` after sox transformation
    """

    assert sr > 0

    fdesc, infile = tempfile.mkstemp(suffix=".wav")
    os.close(fdesc)
    fdesc, outfile = tempfile.mkstemp(suffix=".wav")
    os.close(fdesc)

    # Dump the audio
    psf.write(infile, y, sr)

    try:
        arguments = ["sox", infile, outfile, "-q"]
        arguments.extend(args)

        subprocess.check_call(arguments)

        y_out, sr = psf.read(outfile)
        y_out = y_out.T
        if y.ndim == 1:
            y_out = librosa.to_mono(y_out)

    finally:
        os.unlink(infile)
        os.unlink(outfile)

    return y_out

Source File: background.py From muda with ISC License

4 votes

def slice_clip(filename, start, stop, n_samples, sr, mono=True):
    """Slice a fragment of audio from a file.

    This uses pysoundfile to efficiently seek without
    loading the entire stream.

    Parameters
    ----------
    filename : str
        Path to the input file

    start : int
        The sample index of `filename` at which the audio fragment should start

    stop : int
        The sample index of `filename` at which the audio fragment should stop (e.g. y = audio[start:stop])

    n_samples : int > 0
        The number of samples to load

    sr : int > 0
        The target sampling rate

    mono : bool
        Ensure monophonic audio

    Returns
    -------
    y : np.ndarray [shape=(n_samples,)]
        A fragment of audio sampled from `filename`

    Raises
    ------
    ValueError
        If the source file is shorter than the requested length

    """

    with psf.SoundFile(str(filename), mode="r") as soundf:
        n_target = stop - start

        soundf.seek(start)

        y = soundf.read(n_target).T

        if mono:
            y = librosa.to_mono(y)

        # Resample to initial sr
        y = librosa.resample(y, soundf.samplerate, sr)

        # Clip to the target length exactly
        y = librosa.util.fix_length(y, n_samples)

        return y

Python librosa.to_mono() Examples