Python librosa.to_mono() Examples
The following are 15
code examples of librosa.to_mono().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
librosa
, or try the search function
.
Example #1
Source File: test_deformation.py From amen with BSD 2-Clause "Simplified" License | 6 votes |
def test_pitch_shift(): shift_amount = 4 step_size = 24 pitch_shift_audio = pitch_shift(mono_audio, shift_amount, step_size=step_size) test_pitch_shift = librosa.effects.pitch_shift( librosa.to_mono(mono_audio.raw_samples), mono_audio.sample_rate, shift_amount, bins_per_octave=step_size, ) test_pitch_shift_audio = Audio( raw_samples=test_pitch_shift, sample_rate=mono_audio.sample_rate ) assert np.allclose( pitch_shift_audio.raw_samples, test_pitch_shift_audio.raw_samples, rtol=1e-3, atol=1e-4, )
Example #2
Source File: deformation.py From amen with BSD 2-Clause "Simplified" License | 6 votes |
def harmonic_separation(audio, margin=3.0): """ Wraps librosa's `harmonic` function, and returns a new Audio object. Note that this folds to mono. Parameters --------- audio : Audio The Audio object to act on. margin : float The larger the margin, the larger the separation. The default is `3.0`. """ harmonic = librosa.effects.harmonic( librosa.to_mono(audio.raw_samples), margin=margin ) harmonic_audio = Audio(raw_samples=harmonic, sample_rate=audio.sample_rate) return harmonic_audio
Example #3
Source File: deformation.py From amen with BSD 2-Clause "Simplified" License | 6 votes |
def percussive_separation(audio, margin=3.0): """ Wraps librosa's `percussive` function, and returns a new Audio object. Note that this folds to mono. Parameters --------- audio : Audio The Audio object to act on. margin : float The larger the margin, the larger the separation. The default is `3.0`. """ percussive = librosa.effects.percussive( librosa.to_mono(audio.raw_samples), margin=margin ) percussive_audio = Audio(raw_samples=percussive, sample_rate=audio.sample_rate) return percussive_audio
Example #4
Source File: audio.py From audiomate with MIT License | 6 votes |
def process_buffer(buffer, n_channels): """ Merge the read blocks and resample if necessary. Args: buffer (list): A list of blocks of samples. n_channels (int): The number of channels of the input data. Returns: np.array: The samples """ samples = np.concatenate(buffer) if n_channels > 1: samples = samples.reshape((-1, n_channels)).T samples = librosa.to_mono(samples) return samples
Example #5
Source File: preprocess.py From Singing_Voice_Separation_RNN with MIT License | 6 votes |
def load_wavs(filenames, sr): wavs_mono = list() wavs_src1 = list() wavs_src2 = list() for filename in filenames: wav, _ = librosa.load(filename, sr = sr, mono = False) assert (wav.ndim == 2) and (wav.shape[0] == 2), 'Require wav to have two channels' wav_mono = librosa.to_mono(wav) * 2 # Cancelling average wav_src1 = wav[0, :] wav_src2 = wav[1, :] wavs_mono.append(wav_mono) wavs_src1.append(wav_src1) wavs_src2.append(wav_src2) return wavs_mono, wavs_src1, wavs_src2
Example #6
Source File: preprocess.py From Singing_Voice_Separation_RNN with MIT License | 6 votes |
def get_random_wav(filename, sr, duration): # Get a random range from wav wav, _ = librosa.load(filename, sr = sr, mono = False) print(wav) assert (wav.ndim == 2) and (wav.shape[0] == 2), 'Require wav to have two channels' wav_pad = pad_wav(wav = wav, sr = sr, duration = duration) wav_sample = sample_range(wav = wav, sr = sr, duration = duration) wav_sample_mono = librosa.to_mono(wav_sample) wav_sample_src1 = wav_sample[0, :] wav_sample_src2 = wav_sample[1, :] return wav_sample_mono, wav_sample_src1, wav_sample_src2
Example #7
Source File: test_deformation.py From amen with BSD 2-Clause "Simplified" License | 5 votes |
def test_time_stretch(): stretch_amount = 1.5 time_stretch_audio = time_stretch(mono_audio, stretch_amount) test_time_stretch = librosa.effects.time_stretch( librosa.to_mono(mono_audio.raw_samples), stretch_amount ) test_time_stretch_audio = Audio( raw_samples=test_time_stretch, sample_rate=mono_audio.sample_rate ) assert np.allclose( time_stretch_audio.raw_samples, test_time_stretch_audio.raw_samples, rtol=1e-3, atol=1e-4, )
Example #8
Source File: test_deformation.py From amen with BSD 2-Clause "Simplified" License | 5 votes |
def test_harmonic(): harmonic_audio = harmonic_separation(mono_audio) test_harmonic = librosa.effects.harmonic( librosa.to_mono(mono_audio.raw_samples), margin=3.0 ) test_harmonic_audio = Audio( raw_samples=test_harmonic, sample_rate=mono_audio.sample_rate ) assert np.allclose( harmonic_audio.raw_samples, test_harmonic_audio.raw_samples, rtol=1e-3, atol=1e-4, )
Example #9
Source File: test_deformation.py From amen with BSD 2-Clause "Simplified" License | 5 votes |
def test_percussive(): percussive_audio = percussive_separation(mono_audio) test_percussive = librosa.effects.percussive( librosa.to_mono(mono_audio.raw_samples), margin=3.0 ) test_percussive_audio = Audio( raw_samples=test_percussive, sample_rate=mono_audio.sample_rate ) assert np.allclose( percussive_audio.raw_samples, test_percussive_audio.raw_samples, rtol=1e-3, atol=1e-4, )
Example #10
Source File: deformation.py From amen with BSD 2-Clause "Simplified" License | 5 votes |
def pitch_shift(audio, steps, step_size=12): """ Wraps librosa's `pitch_shift` function, and returns a new Audio object. Note that this folds to mono. Parameters --------- audio : Audio The Audio object to act on. steps : float The pitch shift amount. The default unit is semitones, as set by `step_size`. step_size : float > 0 The number of equal-tempered steps per octave. The default is semitones, as set by `step_size=12`. Quarter-tones, for example, would be `step_size=24`. """ shifted = librosa.effects.pitch_shift( librosa.to_mono(audio.raw_samples), audio.sample_rate, steps, bins_per_octave=step_size, ) stretched_audio = Audio(raw_samples=shifted, sample_rate=audio.sample_rate) return stretched_audio
Example #11
Source File: audio.py From amen with BSD 2-Clause "Simplified" License | 4 votes |
def __init__( self, file_path=None, raw_samples=None, convert_to_mono=False, sample_rate=44100, analysis_sample_rate=22050, ): """ Audio constructor. Opens a file path, loads the audio with librosa, and prepares the features Parameters ---------- file_path: string path to the audio file to load raw_samples: np.array samples to use for audio output convert_to_mono: boolean (optional) converts the file to mono on loading sample_rate: number > 0 [scalar] (optional) sample rate to pass to librosa. Returns ------ An Audio object """ if file_path: y, sr = librosa.load(file_path, mono=convert_to_mono, sr=sample_rate) elif raw_samples is not None: # This assumes that we're passing in raw_samples # directly from another Audio's raw_samples. y = raw_samples sr = sample_rate self.file_path = file_path self.sample_rate = float(sr) self.analysis_sample_rate = float(analysis_sample_rate) self.num_channels = y.ndim self.duration = librosa.get_duration(y=y, sr=sr) self.analysis_samples = librosa.resample( librosa.to_mono(y), sr, self.analysis_sample_rate, res_type='kaiser_best' ) self.raw_samples = np.atleast_2d(y) self.zero_indexes = self._create_zero_indexes() self.features = self._create_features() self.timings = self._create_timings()
Example #12
Source File: pncc.py From PNCC with MIT License | 4 votes |
def pncc(audio_wave, n_fft=512, sr=16000, winlen=0.020, winstep=0.010, n_mels=128, n_pncc=13, weight_N=4, power=2): pre_emphasis_signal = scipy.signal.lfilter([1.0, -0.97], 1, audio_wave) mono_wave = to_mono(pre_emphasis_signal.T) stft_pre_emphasis_signal = np.abs(stft(mono_wave, n_fft=n_fft, hop_length=int(sr * winstep), win_length=int(sr * winlen), window=np.ones(int(sr * winlen)), center=False)) ** power mel_filter = np.abs(filters.mel(sr, n_fft=n_fft, n_mels=n_mels)) ** power power_stft_signal = np.dot(stft_pre_emphasis_signal.T, mel_filter.T) medium_time_power = medium_time_power_calculation(power_stft_signal) lower_envelope = asymmetric_lawpass_filtering( medium_time_power, 0.999, 0.5) subtracted_lower_envelope = medium_time_power - lower_envelope rectified_signal = halfwave_rectification(subtracted_lower_envelope) floor_level = asymmetric_lawpass_filtering(rectified_signal) temporal_masked_signal = temporal_masking(rectified_signal) final_output = switch_excitation_or_non_excitation( temporal_masked_signal, floor_level, lower_envelope, medium_time_power) spectral_weight_smoothing = weight_smoothing( final_output, medium_time_power, L=n_mels) transfer_function = time_frequency_normalization( power_stft_signal, spectral_weight_smoothing) normalized_power = mean_power_normalization( transfer_function, final_output, L=n_mels) power_law_nonlinearity = power_function_nonlinearity(normalized_power) dct = np.dot(power_law_nonlinearity, filters.dct( n_pncc, power_law_nonlinearity.shape[1]).T) return dct
Example #13
Source File: audio_io.py From synvae with MIT License | 4 votes |
def wav_data_to_samples(wav_data, sample_rate): """Read PCM-formatted WAV data and return a NumPy array of samples. Uses scipy to read and librosa to process WAV data. Audio will be converted to mono if necessary. Args: wav_data: WAV audio data to read. sample_rate: The number of samples per second at which the audio will be returned. Resampling will be performed if necessary. Returns: A numpy array of audio samples, single-channel (mono) and sampled at the specified rate, in float32 format. Raises: AudioIOReadError: If scipy is unable to read the WAV data. AudioIOError: If audio processing fails. """ try: # Read the wav file, converting sample rate & number of channels. native_sr, y = scipy.io.wavfile.read(six.BytesIO(wav_data)) except Exception as e: # pylint: disable=broad-except raise AudioIOReadError(e) if y.dtype == np.int16: # Convert to float32. y = int16_samples_to_float32(y) elif y.dtype == np.float32: # Already float32. pass else: raise AudioIOError( 'WAV file not 16-bit or 32-bit float PCM, unsupported') try: # Convert to mono and the desired sample rate. if y.ndim == 2 and y.shape[1] == 2: y = y.T y = librosa.to_mono(y) if native_sr != sample_rate: y = librosa.resample(y, native_sr, sample_rate) except Exception as e: # pylint: disable=broad-except raise AudioIOError(e) return y
Example #14
Source File: sox.py From muda with ISC License | 4 votes |
def __sox(y, sr, *args): """Execute sox Parameters ---------- y : np.ndarray Audio time series sr : int > 0 Sampling rate of `y` *args Additional arguments to sox Returns ------- y_out : np.ndarray `y` after sox transformation """ assert sr > 0 fdesc, infile = tempfile.mkstemp(suffix=".wav") os.close(fdesc) fdesc, outfile = tempfile.mkstemp(suffix=".wav") os.close(fdesc) # Dump the audio psf.write(infile, y, sr) try: arguments = ["sox", infile, outfile, "-q"] arguments.extend(args) subprocess.check_call(arguments) y_out, sr = psf.read(outfile) y_out = y_out.T if y.ndim == 1: y_out = librosa.to_mono(y_out) finally: os.unlink(infile) os.unlink(outfile) return y_out
Example #15
Source File: background.py From muda with ISC License | 4 votes |
def slice_clip(filename, start, stop, n_samples, sr, mono=True): """Slice a fragment of audio from a file. This uses pysoundfile to efficiently seek without loading the entire stream. Parameters ---------- filename : str Path to the input file start : int The sample index of `filename` at which the audio fragment should start stop : int The sample index of `filename` at which the audio fragment should stop (e.g. y = audio[start:stop]) n_samples : int > 0 The number of samples to load sr : int > 0 The target sampling rate mono : bool Ensure monophonic audio Returns ------- y : np.ndarray [shape=(n_samples,)] A fragment of audio sampled from `filename` Raises ------ ValueError If the source file is shorter than the requested length """ with psf.SoundFile(str(filename), mode="r") as soundf: n_target = stop - start soundf.seek(start) y = soundf.read(n_target).T if mono: y = librosa.to_mono(y) # Resample to initial sr y = librosa.resample(y, soundf.samplerate, sr) # Clip to the target length exactly y = librosa.util.fix_length(y, n_samples) return y