Python Examples of soundfile.SoundFile

Source File: zip_io.py From pykaldi2 with MIT License

6 votes

def read_wav(self, wavfilename):
        if wavfilename.find('@' + '/') >= 0:
            zip_obj, file_inzip = self.get_zip_obj_and_filename(wavfilename, zip_mode='r')
            byte_chunk = zip_obj.read(file_inzip)
            byte_stream = io.BytesIO(byte_chunk)

            with soundfile.SoundFile(byte_stream, 'r') as f:
                fs_read = f.samplerate
                x = f.read()
        else:
            with soundfile.SoundFile(wavfilename, 'r') as f:
                fs_read = f.samplerate
                x = f.read()
        if fs_read != self.fs:
            x = resampy.resample(x, fs_read, self.fs)
            fs_read = self.fs
        return fs_read, x.astype(self.dtype)

Source File: audioread.py From pb_chime5 with MIT License

6 votes

def audio_shape(path):
    """

    >>> path = '/net/fastdb/chime3/audio/16kHz/isolated/dt05_caf_real/F01_050C0102_CAF.CH1.wav'
    >>> audio_shape(path)
    122111
    >>> path = '/net/db/voiceHome/audio/noises/dev/home3_room2_arrayGeo3_arrayPos2_noiseCond1.wav'
    >>> audio_shape(path)  # correct for multichannel
    (8, 960000)
    >>> audioread(path)[0].shape
    (8, 960000)
    """
    with soundfile.SoundFile(str(path)) as f:
        channels = f.channels
        if channels == 1:
            return len(f)
        else:
            return channels, len(f)

Source File: preprocess_wham.py From asteroid with MIT License

6 votes

def preprocess_one_dir(in_dir, out_dir, out_filename):
    """ Create .json file for one condition."""
    file_infos = []
    in_dir = os.path.abspath(in_dir)
    wav_list = os.listdir(in_dir)
    wav_list.sort()
    for wav_file in wav_list:
        if not wav_file.endswith('.wav'):
            continue
        wav_path = os.path.join(in_dir, wav_file)
        samples = sf.SoundFile(wav_path)
        file_infos.append((wav_path, len(samples)))
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    with open(os.path.join(out_dir, out_filename + '.json'), 'w') as f:
        json.dump(file_infos, f, indent=4)

Source File: preprocess_wham.py From asteroid with MIT License

6 votes

def preprocess_one_dir(in_dir, out_dir, out_filename):
    """ Create .json file for one condition."""
    file_infos = []
    in_dir = os.path.abspath(in_dir)
    wav_list = os.listdir(in_dir)
    wav_list.sort()
    for wav_file in wav_list:
        if not wav_file.endswith('.wav'):
            continue
        wav_path = os.path.join(in_dir, wav_file)
        samples = sf.SoundFile(wav_path)
        file_infos.append((wav_path, len(samples)))
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    with open(os.path.join(out_dir, out_filename + '.json'), 'w') as f:
        json.dump(file_infos, f, indent=4)

Source File: preprocess_wham.py From asteroid with MIT License

6 votes

def preprocess_one_dir(in_dir, out_dir, out_filename):
    """ Create .json file for one condition."""
    file_infos = []
    in_dir = os.path.abspath(in_dir)
    wav_list = os.listdir(in_dir)
    wav_list.sort()
    for wav_file in wav_list:
        if not wav_file.endswith('.wav'):
            continue
        wav_path = os.path.join(in_dir, wav_file)
        samples = sf.SoundFile(wav_path)
        file_infos.append((wav_path, len(samples)))
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    with open(os.path.join(out_dir, out_filename + '.json'), 'w') as f:
        json.dump(file_infos, f, indent=4)

Source File: preprocess_wsj0mix.py From asteroid with MIT License

6 votes

def preprocess_one_dir(in_dir, out_dir, out_filename):
    """ Create .json file for one condition."""
    file_infos = []
    in_dir = os.path.abspath(in_dir)
    wav_list = os.listdir(in_dir)
    wav_list.sort()
    for wav_file in wav_list:
        if not wav_file.endswith('.wav'):
            continue
        wav_path = os.path.join(in_dir, wav_file)
        samples = sf.SoundFile(wav_path)
        file_infos.append((wav_path, len(samples)))
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    with open(os.path.join(out_dir, out_filename + '.json'), 'w') as f:
        json.dump(file_infos, f, indent=4)

Source File: preprocess_kinect_wsj.py From asteroid with MIT License

6 votes

def preprocess_one_dir(in_dir, out_dir, out_filename):
    """ Create .json file for one condition."""
    file_infos = []
    in_dir = os.path.abspath(in_dir)
    wav_list = os.listdir(in_dir)
    wav_list.sort()
    for wav_file in wav_list:
        if not wav_file.endswith('.wav'):
            continue
        wav_path = os.path.join(in_dir, wav_file)
        samples = sf.SoundFile(wav_path)
        file_infos.append((wav_path, len(samples)))
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    with open(os.path.join(out_dir, out_filename + '.json'), 'w') as f:
        json.dump(file_infos, f, indent=4)

Source File: segment.py From training with Apache License 2.0

6 votes

def from_file(cls, filename, target_sr=None, int_values=False, offset=0,
                                duration=0, trim=False):
        """
        Load a file supported by librosa and return as an AudioSegment.
        :param filename: path of file to load
        :param target_sr: the desired sample rate
        :param int_values: if true, load samples as 32-bit integers
        :param offset: offset in seconds when loading audio
        :param duration: duration in seconds when loading audio
        :return: numpy array of samples
        """
        with sf.SoundFile(filename, 'r') as f:
            dtype = 'int32' if int_values else 'float32'
            sample_rate = f.samplerate
            if offset > 0:
                f.seek(int(offset * sample_rate))
            if duration > 0:
                samples = f.read(int(duration * sample_rate), dtype=dtype)
            else:
                samples = f.read(dtype=dtype)
        samples = samples.transpose()
        return cls(samples, sample_rate, target_sr=target_sr, trim=trim)

Source File: segment.py From NeMo with Apache License 2.0

6 votes

def segment_from_file(cls, audio_file, target_sr=None, n_segments=0, trim=False):
        """Grabs n_segments number of samples from audio_file randomly from the
        file as opposed to at a specified offset.

        Note that audio_file can be either the file path, or a file-like object.
        """
        with sf.SoundFile(audio_file, 'r') as f:
            sample_rate = f.samplerate
            if n_segments > 0 and len(f) > n_segments:
                max_audio_start = len(f) - n_segments
                audio_start = random.randint(0, max_audio_start)
                f.seek(audio_start)
                samples = f.read(n_segments, dtype='float32')
            else:
                samples = f.read(dtype='float32')

        samples = samples.transpose()
        return cls(samples, sample_rate, target_sr=target_sr, trim=trim)

Source File: segment.py From NeMo with Apache License 2.0

6 votes

def from_file(
        cls, audio_file, target_sr=None, int_values=False, offset=0, duration=0, trim=False,
    ):
        """
        Load a file supported by librosa and return as an AudioSegment.
        :param audio_file: path of file to load
        :param target_sr: the desired sample rate
        :param int_values: if true, load samples as 32-bit integers
        :param offset: offset in seconds when loading audio
        :param duration: duration in seconds when loading audio
        :return: numpy array of samples
        """
        with sf.SoundFile(audio_file, 'r') as f:
            dtype = 'int32' if int_values else 'float32'
            sample_rate = f.samplerate
            if offset > 0:
                f.seek(int(offset * sample_rate))
            if duration > 0:
                samples = f.read(int(duration * sample_rate), dtype=dtype)
            else:
                samples = f.read(dtype=dtype)

        samples = samples.transpose()
        return cls(samples, sample_rate, target_sr=target_sr, trim=trim)

Source File: write_files.py From sms_wsj with MIT License

6 votes

def audio_read(example):
    """
    :param example: example dict
    :return: example dict with audio_data added
    """
    audio_keys = ['rir', 'speech_source']
    keys = list(example['audio_path'].keys())
    example['audio_data'] = dict()
    for audio_key in audio_keys:
        assert audio_key in keys, (
            f'Trying to read {audio_key} but only {keys} are available'
        )
        audio_data = list()
        for wav_file in example['audio_path'][audio_key]:

            with soundfile.SoundFile(wav_file, mode='r') as f:
                audio_data.append(f.read().T)
        example['audio_data'][audio_key] = np.array(audio_data)
    return example

Source File: Metadata.py From AdversarialAudioSeparation with MIT License

6 votes

def get_audio_metadata(audioPath, sphereType=False):
    '''
    Returns sampling rate, number of channels and duration of an audio file
    :param audioPath: 
    :param sphereType: 
    :return: 
    '''
    ext = os.path.splitext(audioPath)[1][1:].lower()
    if ext=="aiff" or sphereType:  # SPHERE headers for the TIMIT dataset
        audio = scikits.audiolab.Sndfile(audioPath)
        sr = audio.samplerate
        channels = audio.channels
        duration = float(audio.nframes) / float(audio.samplerate)
    elif ext=="mp3": # Use ffmpeg/ffprobe
        sr, channels, duration = get_mp3_metadata(audioPath)
    else:
        snd_file = SoundFile(audioPath, mode='r')
        inf = snd_file._info
        sr = inf.samplerate
        channels = inf.channels
        duration = float(inf.frames) / float(inf.samplerate)
    return int(sr), int(channels), float(duration)

Source File: Input.py From AdversarialAudioSeparation with MIT License

6 votes

def readWave(audio_path, start_frame, end_frame, mono=True, sample_rate=None, clip=True):
    snd_file = SoundFile(audio_path, mode='r')
    inf = snd_file._info
    audio_sr = inf.samplerate

    snd_file.seek(start_frame)
    audio = snd_file.read(end_frame - start_frame, dtype='float32')
    snd_file.close()
    audio = audio.T # Tuple to numpy, transpose axis to (channels, frames)

    # Convert to mono if desired
    if mono and len(audio.shape) > 1 and audio.shape[0] > 1:
        audio = np.mean(audio, axis=0)

    # Resample if needed
    if sample_rate is not None and sample_rate != audio_sr:
        audio = Utils.resample(audio, audio_sr, sample_rate)
        audio_sr = sample_rate

    # Clip to [-1,1] if desired
    if clip:
        audio = np.minimum(np.maximum(audio, -1.0), 1.0)

    return audio, audio_sr

Source File: segment.py From inference with Apache License 2.0

6 votes

def from_file(cls, filename, target_sr=None, int_values=False, offset=0,
                  duration=0, trim=False):
        """
        Load a file supported by librosa and return as an AudioSegment.
        :param filename: path of file to load
        :param target_sr: the desired sample rate
        :param int_values: if true, load samples as 32-bit integers
        :param offset: offset in seconds when loading audio
        :param duration: duration in seconds when loading audio
        :return: numpy array of samples
        """
        with sf.SoundFile(filename, 'r') as f:
            dtype = 'int32' if int_values else 'float32'
            sample_rate = f.samplerate
            if offset > 0:
                f.seek(int(offset * sample_rate))
            if duration > 0:
                samples = f.read(int(duration * sample_rate), dtype=dtype)
            else:
                samples = f.read(dtype=dtype)
        samples = samples.transpose()
        return cls(samples, sample_rate, target_sr=target_sr, trim=trim)

Source File: generatemovie.py From Automatic-Youtube-Reddit-Text-To-Speech-Video-Generator-and-Uploader with MIT License

6 votes

def addFrameWithPause(self, image_file, audio_file, pause):
        audio_file = audio_file.replace("\\", "/")
        f = sf.SoundFile(audio_file)
        audio_clip = AudioSegment.from_wav(audio_file)
        duration = (len(f) / f.samplerate) + pause / 1000
        audio_clip_with_pause = audio_clip + AudioSegment.silent(duration=pause)
        self.imageframes.append(image_file)
        self.audiofiles.append(audio_clip_with_pause)
        self.durations.append(duration)

Source File: generatemovie.py From Automatic-Youtube-Reddit-Text-To-Speech-Video-Generator-and-Uploader with MIT License

6 votes

def addFrameWithTransition(self, image_file, audio_file, transition_file):
        media_info = MediaInfo.parse(transition_file)
        duration_in_ms = media_info.tracks[0].duration
        audio_file = audio_file.replace("\\", "/")
        try:
            audio_clip = AudioSegment.from_wav(r"%s"%audio_file)
            f = sf.SoundFile(r"%s"%audio_file)
        except Exception as e:
            print(e)
            audio_clip = AudioSegment.from_wav("%s/pause.wav" % settings.assetPath)
            f = sf.SoundFile("%s/pause.wav" % settings.assetPath)
        duration = (len(f) / f.samplerate)
        audio_clip_with_pause = audio_clip
        self.imageframes.append(image_file)
        self.audiofiles.append(audio_clip_with_pause)
        self.durations.append(duration)
        self.transitions.append((transition_file, len(self.imageframes) - 1, duration_in_ms / 1000))

Source File: generatemovie.py From Automatic-Youtube-Reddit-Text-To-Speech-Video-Generator-and-Uploader with MIT License

6 votes

def addFrameWithTransitionAndPause(self, image_file, audio_file, transition_file, pause):
        media_info = MediaInfo.parse(transition_file)
        duration_in_ms = media_info.tracks[0].duration
        audio_file = r"%s"%audio_file
        f = sf.SoundFile(audio_file)
        try:
            audio_clip = AudioSegment.from_wav(audio_file)
        except:
            print("error with frame audio transition pause for %s" % audio_file)
            audio_clip = AudioSegment.silent(duration=pause)
        duration = (len(f) / f.samplerate)
        audio_clip_with_pause = audio_clip
        self.imageframes.append(image_file)
        self.audiofiles.append(audio_clip_with_pause)
        self.durations.append(duration + (pause/1000))
        self.transitions.append((transition_file, len(self.imageframes) - 1, (duration_in_ms / 1000) + (pause/1000)))

Source File: Input.py From vimss with GNU General Public License v3.0

6 votes

def readWave(audio_path, start_frame, end_frame, mono=True, sample_rate=None, clip=True):
    snd_file = SoundFile(audio_path, mode='r')
    inf = snd_file._info
    audio_sr = inf.samplerate

    start_read = max(start_frame, 0)
    pad_front = -min(start_frame, 0)
    end_read = min(end_frame, inf.frames)
    pad_back = max(end_frame - inf.frames, 0)

    snd_file.seek(start_read)
    audio = snd_file.read(end_read - start_read, dtype='float32', always_2d=True) # (num_frames, channels)
    snd_file.close()

    # Pad if necessary (start_frame or end_frame out of bounds)
    audio = np.pad(audio, [(pad_front, pad_back), (0, 0)], mode="constant", constant_values=0.0)

    # Convert to mono if desired
    if mono:
        audio = np.mean(audio, axis=1, keepdims=True)

    # Resample if needed
    if sample_rate is not None and sample_rate != audio_sr:
        res_length = int(np.ceil(float(audio.shape[0]) * float(sample_rate) / float(audio_sr)))
        audio = np.pad(audio, [(1, 1), (0,0)], mode="reflect")  # Pad audio first
        audio = librosa.resample(audio.T, audio_sr, sample_rate, res_type="kaiser_fast").T
        skip = (audio.shape[0] - res_length) // 2
        audio = audio[skip:skip+res_length,:]

    # Clip to [-1,1] if desired
    if clip:
        audio = np.minimum(np.maximum(audio, -1.0), 1.0)

    return audio, audio_sr

Source File: stt_utils.py From training_results_v0.6 with Apache License 2.0

5 votes

def spectrogram_from_file(filename, step=10, window=20, max_freq=None,
                          eps=1e-14, overwrite=False, save_feature_as_csvfile=False):
    """ Calculate the log of linear spectrogram from FFT energy
    Params:
        filename (str): Path to the audio file
        step (int): Step size in milliseconds between windows
        window (int): FFT window size in milliseconds
        max_freq (int): Only FFT bins corresponding to frequencies between
            [0, max_freq] are returned
        eps (float): Small value to ensure numerical stability (for ln(x))
    """

    csvfilename = filename.replace(".wav", ".csv")
    if (os.path.isfile(csvfilename) is False) or overwrite:
        with soundfile.SoundFile(filename) as sound_file:
            audio = sound_file.read(dtype='float32')
            sample_rate = sound_file.samplerate
            if audio.ndim >= 2:
                audio = np.mean(audio, 1)
            if max_freq is None:
                max_freq = sample_rate / 2
            if max_freq > sample_rate / 2:
                raise ValueError("max_freq must not be greater than half of "
                                 " sample rate")
            if step > window:
                raise ValueError("step size must not be greater than window size")
            hop_length = int(0.001 * step * sample_rate)
            fft_length = int(0.001 * window * sample_rate)

            pxx, freqs = spectrogram(
                audio, fft_length=fft_length, sample_rate=sample_rate,
                hop_length=hop_length)

            ind = np.where(freqs <= max_freq)[0][-1] + 1
            res = np.transpose(np.log(pxx[:ind, :] + eps))
            if save_feature_as_csvfile:
                np.savetxt(csvfilename, res)
            return res
    else:
        return np.loadtxt(csvfilename)

Source File: generatemovie.py From Automatic-Youtube-Reddit-Text-To-Speech-Video-Generator-and-Uploader with MIT License

5 votes

def addFrame(self, image_file, audio_file):
        audio_file = audio_file.replace("\\", "/")
        try:
            audio_clip = AudioSegment.from_wav(r"%s"%audio_file)
            f = sf.SoundFile(r"%s"%audio_file)
        except Exception as e:
            print(e)
            audio_clip = AudioSegment.from_wav("%s/pause.wav" % settings.assetPath)
            f = sf.SoundFile("%s/pause.wav" % settings.assetPath)

        duration = len(f) / f.samplerate
        self.imageframes.append(image_file)
        self.audiofiles.append(audio_clip)
        self.durations.append(duration)

Source File: audioread.py From pb_chime5 with MIT License

5 votes

def audio_channels(path):
    """

    >>> path = '/net/fastdb/chime3/audio/16kHz/isolated/dt05_caf_real/F01_050C0102_CAF.CH1.wav'
    >>> audio_channels(path)
    1
    >>> path = '/net/db/voiceHome/audio/noises/dev/home3_room2_arrayGeo3_arrayPos2_noiseCond1.wav'
    >>> audio_channels(path)  # correct for multichannel
    8
    """
    with soundfile.SoundFile(str(path)) as f:
        return f.channels

Source File: audioread.py From pb_chime5 with MIT License

5 votes

def audio_length(path, unit='samples'):
    """

    Args:
        path:
        unit:

    Returns:

    >>> path = '/net/fastdb/chime3/audio/16kHz/isolated/dt05_caf_real/F01_050C0102_CAF.CH1.wav'
    >>> audio_length(path)
    122111
    >>> path = '/net/db/voiceHome/audio/noises/dev/home3_room2_arrayGeo3_arrayPos2_noiseCond1.wav'
    >>> audio_length(path)  # correct for multichannel
    960000
    >>> with soundfile.SoundFile(str(path)) as f:
    ...     print(f.read().shape)
    (960000, 8)
    """

    # params = soundfile.info(str(path))
    # return int(params.samplerate * params.duration)

    if unit == 'samples':
        with soundfile.SoundFile(str(path)) as f:
            return len(f)
    elif unit == 'seconds':
        with soundfile.SoundFile(str(path)) as f:
            return len(f) / f.samplerate
    else:
        return ValueError(unit)

Source File: stt_utils.py From SNIPER-mxnet with Apache License 2.0

5 votes

def spectrogram_from_file(filename, step=10, window=20, max_freq=None,
                          eps=1e-14, overwrite=False, save_feature_as_csvfile=False):
    """ Calculate the log of linear spectrogram from FFT energy
    Params:
        filename (str): Path to the audio file
        step (int): Step size in milliseconds between windows
        window (int): FFT window size in milliseconds
        max_freq (int): Only FFT bins corresponding to frequencies between
            [0, max_freq] are returned
        eps (float): Small value to ensure numerical stability (for ln(x))
    """

    csvfilename = filename.replace(".wav", ".csv")
    if (os.path.isfile(csvfilename) is False) or overwrite:
        with soundfile.SoundFile(filename) as sound_file:
            audio = sound_file.read(dtype='float32')
            sample_rate = sound_file.samplerate
            if audio.ndim >= 2:
                audio = np.mean(audio, 1)
            if max_freq is None:
                max_freq = sample_rate / 2
            if max_freq > sample_rate / 2:
                raise ValueError("max_freq must not be greater than half of "
                                 " sample rate")
            if step > window:
                raise ValueError("step size must not be greater than window size")
            hop_length = int(0.001 * step * sample_rate)
            fft_length = int(0.001 * window * sample_rate)

            pxx, freqs = spectrogram(
                audio, fft_length=fft_length, sample_rate=sample_rate,
                hop_length=hop_length)

            ind = np.where(freqs <= max_freq)[0][-1] + 1
            res = np.transpose(np.log(pxx[:ind, :] + eps))
            if save_feature_as_csvfile:
                np.savetxt(csvfilename, res)
            return res
    else:
        return np.loadtxt(csvfilename)

Source File: preprocess_dns.py From asteroid with MIT License

5 votes

def preprocess_dns(in_dir, out_dir='./data'):
    """ Create json file from dataset folder.

    Args:
        in_dir (str): Location of the DNS data
        out_dir (str): Where to save the json files.
    """
    # Get all file ids
    clean_wavs = glob.glob(os.path.join(in_dir, 'clean/*.wav'))
    clean_dic = make_wav_id_dict(clean_wavs)

    mix_wavs = glob.glob(os.path.join(in_dir, 'noisy/*.wav'))
    mix_dic = make_wav_id_dict(mix_wavs)

    noise_wavs = glob.glob(os.path.join(in_dir, 'noise/*.wav'))
    noise_dic = make_wav_id_dict(noise_wavs)
    assert clean_dic.keys() == mix_dic.keys() == noise_dic.keys()
    file_infos = {k: dict(
        mix=mix_dic[k],
        clean=clean_dic[k],
        noise=noise_dic[k],
        snr=get_snr_from_mix_path(mix_dic[k]),
        file_len=len(sf.SoundFile(mix_dic[k]))
    ) for k in clean_dic.keys()}

    # Save to JSON
    with open(os.path.join(out_dir, 'file_infos.json'), 'w') as f:
        json.dump(file_infos, f, indent=2)

Source File: preprocess_whamr.py From asteroid with MIT License

5 votes

def preprocess_one_dir(in_dir):
    """ Create list of list for one condition, each list contains
    [path, wav_length]."""
    file_infos = []
    in_dir = os.path.abspath(in_dir)
    wav_list = os.listdir(in_dir)
    wav_list.sort()
    for wav_file in wav_list:
        if not wav_file.endswith('.wav'):
            continue
        wav_path = os.path.join(in_dir, wav_file)
        samples = sf.SoundFile(wav_path)
        file_infos.append((wav_path, len(samples)))
    return file_infos

Source File: utils.py From DeepXi with Mozilla Public License 2.0

5 votes

def batch_list(file_dir, list_name, data_path='data', make_new=False):
	"""
	Places the file paths and wav lengths of an audio file into a dictionary, which
	is then appended to a list. 'glob' is used to support Unix style pathname
	pattern expansions. Checks if the training list has already been saved, and loads
	it.

	Argument/s:
		file_dir - directory containing the audio files.
		list_name - name for the list.
		data_path - path to store pickle files.
		make_new - re-create list.

	Returns:
		batch_list - list of file paths and wav length.
	"""
	extension = ['*.wav', '*.flac', '*.mp3']
	if not make_new:
		if os.path.exists(data_path + '/' + list_name + '_list_' + platform.node() + '.p'):
			print('Loading ' + list_name + ' list...')
			with open(data_path + '/' + list_name + '_list_' + platform.node() + '.p', 'rb') as f:
				batch_list = pickle.load(f)
			if batch_list[0]['file_path'].find(file_dir) != -1:
				print(list_name + ' list has a total of %i entries.' % (len(batch_list)))
				return batch_list

	print('Creating ' + list_name + ' list...')
	batch_list = []
	for i in extension:
		for j in glob.glob(os.path.join(file_dir, i)):
			f = SoundFile(j)
			wav_len = f.seek(0, SEEK_END)
			if wav_len == -1:
				wav, _ = read_wav(path)
				wav_len = len(wav)
			batch_list.append({'file_path': j, 'wav_len': wav_len}) # append dictionary.
	if not os.path.exists(data_path): os.makedirs(data_path) # make directory.
	with open(data_path + '/' + list_name + '_list_' + platform.node() + '.p', 'wb') as f:
		pickle.dump(batch_list, f)
	print('The ' + list_name + ' list has a total of %i entries.' % (len(batch_list)))
	return batch_list

Source File: utils.py From emotion-recognition-using-speech with MIT License

5 votes

def extract_feature(file_name, **kwargs):
    """
    Extract feature from audio file `file_name`
        Features supported:
            - MFCC (mfcc)
            - Chroma (chroma)
            - MEL Spectrogram Frequency (mel)
            - Contrast (contrast)
            - Tonnetz (tonnetz)
        e.g:
        `features = extract_feature(path, mel=True, mfcc=True)`
    """
    mfcc = kwargs.get("mfcc")
    chroma = kwargs.get("chroma")
    mel = kwargs.get("mel")
    contrast = kwargs.get("contrast")
    tonnetz = kwargs.get("tonnetz")
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate = sound_file.samplerate
        if chroma or contrast:
            stft = np.abs(librosa.stft(X))
        result = np.array([])
        if mfcc:
            mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result = np.hstack((result, mfccs))
        if chroma:
            chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result = np.hstack((result, chroma))
        if mel:
            mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
            result = np.hstack((result, mel))
        if contrast:
            contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
            result = np.hstack((result, contrast))
        if tonnetz:
            tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
            result = np.hstack((result, tonnetz))
    return result

Source File: utils.py From ba-dls-deepspeech with Apache License 2.0

5 votes

def spectrogram_from_file(filename, step=10, window=20, max_freq=None,
                          eps=1e-14):
    """ Calculate the log of linear spectrogram from FFT energy
    Params:
        filename (str): Path to the audio file
        step (int): Step size in milliseconds between windows
        window (int): FFT window size in milliseconds
        max_freq (int): Only FFT bins corresponding to frequencies between
            [0, max_freq] are returned
        eps (float): Small value to ensure numerical stability (for ln(x))
    """
    with soundfile.SoundFile(filename) as sound_file:
        audio = sound_file.read(dtype='float32')
        sample_rate = sound_file.samplerate
        if audio.ndim >= 2:
            audio = np.mean(audio, 1)
        if max_freq is None:
            max_freq = sample_rate / 2
        if max_freq > sample_rate / 2:
            raise ValueError("max_freq must not be greater than half of "
                             " sample rate")
        if step > window:
            raise ValueError("step size must not be greater than window size")
        hop_length = int(0.001 * step * sample_rate)
        fft_length = int(0.001 * window * sample_rate)
        pxx, freqs = spectrogram(
            audio, fft_length=fft_length, sample_rate=sample_rate,
            hop_length=hop_length)
        ind = np.where(freqs <= max_freq)[0][-1] + 1
    return np.transpose(np.log(pxx[:ind, :] + eps))

Source File: test_deformers.py From muda with ISC License

5 votes

def __test_shifted_impulse(jam_orig, jam_new, ir_files, orig_duration, n_fft, rolloff_value):

    #delayed impulse
    with psf.SoundFile(str(ir_files), mode='r') as soundf:
        ir_data = soundf.read()
        ir_sr = soundf.samplerate

    #delay the impulse signal by zero-padding 1-second long zeros
    ir_data_delayed = np.pad(ir_data,(ir_sr,0),mode = 'constant')

    #dump the delayed audio file
    psf.write('tests/data/ir_file_delayed.wav', ir_data_delayed, ir_sr)

    D_delayed = muda.deformers.IRConvolution(ir_files = 'tests/data/ir_file_delayed.wav',
                                             n_fft=n_fft, rolloff_value = rolloff_value)

    for jam_shifted in D_delayed.transform(jam_orig):

        #Verify the duration that delayed annotations(Using chords here) are in valid range
        #__test_duration(jam_orig, jam_shifted, orig_duration)

        shifted_data = jam_shifted.search(namespace='chord')[0].data
        delayed_data = jam_new.search(namespace='chord')[0].data

        for i in range(len(shifted_data)):
            #For each observation, verify its onset time has been shifted 1s
            isclose_(1.00,shifted_data[i][0] - delayed_data[i][0])

Source File: test_deformers.py From muda with ISC License

5 votes

def test_background(noise, n_samples, weight_min, weight_max, jam_fixture):

    D = muda.deformers.BackgroundNoise(files=noise,
                                       n_samples=n_samples,
                                       weight_min=weight_min,
                                       weight_max=weight_max)

    jam_orig = deepcopy(jam_fixture)
    orig_duration = librosa.get_duration(**jam_orig.sandbox.muda['_audio'])

    n_out = 0
    for jam_new in D.transform(jam_orig):

        assert jam_new is not jam_fixture
        __test_effect(jam_orig, jam_fixture)

        assert not np.allclose(jam_orig.sandbox.muda['_audio']['y'],
                               jam_new.sandbox.muda['_audio']['y'])

        d_state = jam_new.sandbox.muda.history[-1]['state']
        filename = d_state['filename']
        start = d_state['start']
        stop = d_state['stop']

        with psf.SoundFile(str(filename), mode='r') as soundf:
            max_index = len(soundf)
            noise_sr = soundf.samplerate

        assert 0 <= start < stop
        assert start < stop <= max_index
        assert ((stop - start) / float(noise_sr)) == orig_duration

        __test_effect(jam_orig, jam_new)
        n_out += 1

    assert n_out == n_samples
    # Serialization test
    D2 = muda.deserialize(muda.serialize(D))
    __test_params(D, D2)

Python soundfile.SoundFile() Examples