Python soundfile.SoundFile() Examples

The following are 30 code examples of soundfile.SoundFile(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module soundfile , or try the search function .
Example #1
Source File: zip_io.py    From pykaldi2 with MIT License 6 votes vote down vote up
def read_wav(self, wavfilename):
        if wavfilename.find('@' + '/') >= 0:
            zip_obj, file_inzip = self.get_zip_obj_and_filename(wavfilename, zip_mode='r')
            byte_chunk = zip_obj.read(file_inzip)
            byte_stream = io.BytesIO(byte_chunk)

            with soundfile.SoundFile(byte_stream, 'r') as f:
                fs_read = f.samplerate
                x = f.read()
        else:
            with soundfile.SoundFile(wavfilename, 'r') as f:
                fs_read = f.samplerate
                x = f.read()
        if fs_read != self.fs:
            x = resampy.resample(x, fs_read, self.fs)
            fs_read = self.fs
        return fs_read, x.astype(self.dtype) 
Example #2
Source File: audioread.py    From pb_chime5 with MIT License 6 votes vote down vote up
def audio_shape(path):
    """

    >>> path = '/net/fastdb/chime3/audio/16kHz/isolated/dt05_caf_real/F01_050C0102_CAF.CH1.wav'
    >>> audio_shape(path)
    122111
    >>> path = '/net/db/voiceHome/audio/noises/dev/home3_room2_arrayGeo3_arrayPos2_noiseCond1.wav'
    >>> audio_shape(path)  # correct for multichannel
    (8, 960000)
    >>> audioread(path)[0].shape
    (8, 960000)
    """
    with soundfile.SoundFile(str(path)) as f:
        channels = f.channels
        if channels == 1:
            return len(f)
        else:
            return channels, len(f) 
Example #3
Source File: preprocess_wham.py    From asteroid with MIT License 6 votes vote down vote up
def preprocess_one_dir(in_dir, out_dir, out_filename):
    """ Create .json file for one condition."""
    file_infos = []
    in_dir = os.path.abspath(in_dir)
    wav_list = os.listdir(in_dir)
    wav_list.sort()
    for wav_file in wav_list:
        if not wav_file.endswith('.wav'):
            continue
        wav_path = os.path.join(in_dir, wav_file)
        samples = sf.SoundFile(wav_path)
        file_infos.append((wav_path, len(samples)))
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    with open(os.path.join(out_dir, out_filename + '.json'), 'w') as f:
        json.dump(file_infos, f, indent=4) 
Example #4
Source File: preprocess_wham.py    From asteroid with MIT License 6 votes vote down vote up
def preprocess_one_dir(in_dir, out_dir, out_filename):
    """ Create .json file for one condition."""
    file_infos = []
    in_dir = os.path.abspath(in_dir)
    wav_list = os.listdir(in_dir)
    wav_list.sort()
    for wav_file in wav_list:
        if not wav_file.endswith('.wav'):
            continue
        wav_path = os.path.join(in_dir, wav_file)
        samples = sf.SoundFile(wav_path)
        file_infos.append((wav_path, len(samples)))
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    with open(os.path.join(out_dir, out_filename + '.json'), 'w') as f:
        json.dump(file_infos, f, indent=4) 
Example #5
Source File: preprocess_wham.py    From asteroid with MIT License 6 votes vote down vote up
def preprocess_one_dir(in_dir, out_dir, out_filename):
    """ Create .json file for one condition."""
    file_infos = []
    in_dir = os.path.abspath(in_dir)
    wav_list = os.listdir(in_dir)
    wav_list.sort()
    for wav_file in wav_list:
        if not wav_file.endswith('.wav'):
            continue
        wav_path = os.path.join(in_dir, wav_file)
        samples = sf.SoundFile(wav_path)
        file_infos.append((wav_path, len(samples)))
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    with open(os.path.join(out_dir, out_filename + '.json'), 'w') as f:
        json.dump(file_infos, f, indent=4) 
Example #6
Source File: preprocess_wsj0mix.py    From asteroid with MIT License 6 votes vote down vote up
def preprocess_one_dir(in_dir, out_dir, out_filename):
    """ Create .json file for one condition."""
    file_infos = []
    in_dir = os.path.abspath(in_dir)
    wav_list = os.listdir(in_dir)
    wav_list.sort()
    for wav_file in wav_list:
        if not wav_file.endswith('.wav'):
            continue
        wav_path = os.path.join(in_dir, wav_file)
        samples = sf.SoundFile(wav_path)
        file_infos.append((wav_path, len(samples)))
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    with open(os.path.join(out_dir, out_filename + '.json'), 'w') as f:
        json.dump(file_infos, f, indent=4) 
Example #7
Source File: preprocess_kinect_wsj.py    From asteroid with MIT License 6 votes vote down vote up
def preprocess_one_dir(in_dir, out_dir, out_filename):
    """ Create .json file for one condition."""
    file_infos = []
    in_dir = os.path.abspath(in_dir)
    wav_list = os.listdir(in_dir)
    wav_list.sort()
    for wav_file in wav_list:
        if not wav_file.endswith('.wav'):
            continue
        wav_path = os.path.join(in_dir, wav_file)
        samples = sf.SoundFile(wav_path)
        file_infos.append((wav_path, len(samples)))
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    with open(os.path.join(out_dir, out_filename + '.json'), 'w') as f:
        json.dump(file_infos, f, indent=4) 
Example #8
Source File: segment.py    From training with Apache License 2.0 6 votes vote down vote up
def from_file(cls, filename, target_sr=None, int_values=False, offset=0,
                                duration=0, trim=False):
        """
        Load a file supported by librosa and return as an AudioSegment.
        :param filename: path of file to load
        :param target_sr: the desired sample rate
        :param int_values: if true, load samples as 32-bit integers
        :param offset: offset in seconds when loading audio
        :param duration: duration in seconds when loading audio
        :return: numpy array of samples
        """
        with sf.SoundFile(filename, 'r') as f:
            dtype = 'int32' if int_values else 'float32'
            sample_rate = f.samplerate
            if offset > 0:
                f.seek(int(offset * sample_rate))
            if duration > 0:
                samples = f.read(int(duration * sample_rate), dtype=dtype)
            else:
                samples = f.read(dtype=dtype)
        samples = samples.transpose()
        return cls(samples, sample_rate, target_sr=target_sr, trim=trim) 
Example #9
Source File: segment.py    From NeMo with Apache License 2.0 6 votes vote down vote up
def segment_from_file(cls, audio_file, target_sr=None, n_segments=0, trim=False):
        """Grabs n_segments number of samples from audio_file randomly from the
        file as opposed to at a specified offset.

        Note that audio_file can be either the file path, or a file-like object.
        """
        with sf.SoundFile(audio_file, 'r') as f:
            sample_rate = f.samplerate
            if n_segments > 0 and len(f) > n_segments:
                max_audio_start = len(f) - n_segments
                audio_start = random.randint(0, max_audio_start)
                f.seek(audio_start)
                samples = f.read(n_segments, dtype='float32')
            else:
                samples = f.read(dtype='float32')

        samples = samples.transpose()
        return cls(samples, sample_rate, target_sr=target_sr, trim=trim) 
Example #10
Source File: segment.py    From NeMo with Apache License 2.0 6 votes vote down vote up
def from_file(
        cls, audio_file, target_sr=None, int_values=False, offset=0, duration=0, trim=False,
    ):
        """
        Load a file supported by librosa and return as an AudioSegment.
        :param audio_file: path of file to load
        :param target_sr: the desired sample rate
        :param int_values: if true, load samples as 32-bit integers
        :param offset: offset in seconds when loading audio
        :param duration: duration in seconds when loading audio
        :return: numpy array of samples
        """
        with sf.SoundFile(audio_file, 'r') as f:
            dtype = 'int32' if int_values else 'float32'
            sample_rate = f.samplerate
            if offset > 0:
                f.seek(int(offset * sample_rate))
            if duration > 0:
                samples = f.read(int(duration * sample_rate), dtype=dtype)
            else:
                samples = f.read(dtype=dtype)

        samples = samples.transpose()
        return cls(samples, sample_rate, target_sr=target_sr, trim=trim) 
Example #11
Source File: write_files.py    From sms_wsj with MIT License 6 votes vote down vote up
def audio_read(example):
    """
    :param example: example dict
    :return: example dict with audio_data added
    """
    audio_keys = ['rir', 'speech_source']
    keys = list(example['audio_path'].keys())
    example['audio_data'] = dict()
    for audio_key in audio_keys:
        assert audio_key in keys, (
            f'Trying to read {audio_key} but only {keys} are available'
        )
        audio_data = list()
        for wav_file in example['audio_path'][audio_key]:

            with soundfile.SoundFile(wav_file, mode='r') as f:
                audio_data.append(f.read().T)
        example['audio_data'][audio_key] = np.array(audio_data)
    return example 
Example #12
Source File: Metadata.py    From AdversarialAudioSeparation with MIT License 6 votes vote down vote up
def get_audio_metadata(audioPath, sphereType=False):
    '''
    Returns sampling rate, number of channels and duration of an audio file
    :param audioPath: 
    :param sphereType: 
    :return: 
    '''
    ext = os.path.splitext(audioPath)[1][1:].lower()
    if ext=="aiff" or sphereType:  # SPHERE headers for the TIMIT dataset
        audio = scikits.audiolab.Sndfile(audioPath)
        sr = audio.samplerate
        channels = audio.channels
        duration = float(audio.nframes) / float(audio.samplerate)
    elif ext=="mp3": # Use ffmpeg/ffprobe
        sr, channels, duration = get_mp3_metadata(audioPath)
    else:
        snd_file = SoundFile(audioPath, mode='r')
        inf = snd_file._info
        sr = inf.samplerate
        channels = inf.channels
        duration = float(inf.frames) / float(inf.samplerate)
    return int(sr), int(channels), float(duration) 
Example #13
Source File: Input.py    From AdversarialAudioSeparation with MIT License 6 votes vote down vote up
def readWave(audio_path, start_frame, end_frame, mono=True, sample_rate=None, clip=True):
    snd_file = SoundFile(audio_path, mode='r')
    inf = snd_file._info
    audio_sr = inf.samplerate

    snd_file.seek(start_frame)
    audio = snd_file.read(end_frame - start_frame, dtype='float32')
    snd_file.close()
    audio = audio.T # Tuple to numpy, transpose axis to (channels, frames)

    # Convert to mono if desired
    if mono and len(audio.shape) > 1 and audio.shape[0] > 1:
        audio = np.mean(audio, axis=0)

    # Resample if needed
    if sample_rate is not None and sample_rate != audio_sr:
        audio = Utils.resample(audio, audio_sr, sample_rate)
        audio_sr = sample_rate

    # Clip to [-1,1] if desired
    if clip:
        audio = np.minimum(np.maximum(audio, -1.0), 1.0)

    return audio, audio_sr 
Example #14
Source File: segment.py    From inference with Apache License 2.0 6 votes vote down vote up
def from_file(cls, filename, target_sr=None, int_values=False, offset=0,
                  duration=0, trim=False):
        """
        Load a file supported by librosa and return as an AudioSegment.
        :param filename: path of file to load
        :param target_sr: the desired sample rate
        :param int_values: if true, load samples as 32-bit integers
        :param offset: offset in seconds when loading audio
        :param duration: duration in seconds when loading audio
        :return: numpy array of samples
        """
        with sf.SoundFile(filename, 'r') as f:
            dtype = 'int32' if int_values else 'float32'
            sample_rate = f.samplerate
            if offset > 0:
                f.seek(int(offset * sample_rate))
            if duration > 0:
                samples = f.read(int(duration * sample_rate), dtype=dtype)
            else:
                samples = f.read(dtype=dtype)
        samples = samples.transpose()
        return cls(samples, sample_rate, target_sr=target_sr, trim=trim) 
Example #15
Source File: generatemovie.py    From Automatic-Youtube-Reddit-Text-To-Speech-Video-Generator-and-Uploader with MIT License 6 votes vote down vote up
def addFrameWithPause(self, image_file, audio_file, pause):
        audio_file = audio_file.replace("\\", "/")
        f = sf.SoundFile(audio_file)
        audio_clip = AudioSegment.from_wav(audio_file)
        duration = (len(f) / f.samplerate) + pause / 1000
        audio_clip_with_pause = audio_clip + AudioSegment.silent(duration=pause)
        self.imageframes.append(image_file)
        self.audiofiles.append(audio_clip_with_pause)
        self.durations.append(duration) 
Example #16
Source File: generatemovie.py    From Automatic-Youtube-Reddit-Text-To-Speech-Video-Generator-and-Uploader with MIT License 6 votes vote down vote up
def addFrameWithTransition(self, image_file, audio_file, transition_file):
        media_info = MediaInfo.parse(transition_file)
        duration_in_ms = media_info.tracks[0].duration
        audio_file = audio_file.replace("\\", "/")
        try:
            audio_clip = AudioSegment.from_wav(r"%s"%audio_file)
            f = sf.SoundFile(r"%s"%audio_file)
        except Exception as e:
            print(e)
            audio_clip = AudioSegment.from_wav("%s/pause.wav" % settings.assetPath)
            f = sf.SoundFile("%s/pause.wav" % settings.assetPath)
        duration = (len(f) / f.samplerate)
        audio_clip_with_pause = audio_clip
        self.imageframes.append(image_file)
        self.audiofiles.append(audio_clip_with_pause)
        self.durations.append(duration)
        self.transitions.append((transition_file, len(self.imageframes) - 1, duration_in_ms / 1000)) 
Example #17
Source File: generatemovie.py    From Automatic-Youtube-Reddit-Text-To-Speech-Video-Generator-and-Uploader with MIT License 6 votes vote down vote up
def addFrameWithTransitionAndPause(self, image_file, audio_file, transition_file, pause):
        media_info = MediaInfo.parse(transition_file)
        duration_in_ms = media_info.tracks[0].duration
        audio_file = r"%s"%audio_file
        f = sf.SoundFile(audio_file)
        try:
            audio_clip = AudioSegment.from_wav(audio_file)
        except:
            print("error with frame audio transition pause for %s" % audio_file)
            audio_clip = AudioSegment.silent(duration=pause)
        duration = (len(f) / f.samplerate)
        audio_clip_with_pause = audio_clip
        self.imageframes.append(image_file)
        self.audiofiles.append(audio_clip_with_pause)
        self.durations.append(duration + (pause/1000))
        self.transitions.append((transition_file, len(self.imageframes) - 1, (duration_in_ms / 1000) + (pause/1000))) 
Example #18
Source File: Input.py    From vimss with GNU General Public License v3.0 6 votes vote down vote up
def readWave(audio_path, start_frame, end_frame, mono=True, sample_rate=None, clip=True):
    snd_file = SoundFile(audio_path, mode='r')
    inf = snd_file._info
    audio_sr = inf.samplerate

    start_read = max(start_frame, 0)
    pad_front = -min(start_frame, 0)
    end_read = min(end_frame, inf.frames)
    pad_back = max(end_frame - inf.frames, 0)

    snd_file.seek(start_read)
    audio = snd_file.read(end_read - start_read, dtype='float32', always_2d=True) # (num_frames, channels)
    snd_file.close()

    # Pad if necessary (start_frame or end_frame out of bounds)
    audio = np.pad(audio, [(pad_front, pad_back), (0, 0)], mode="constant", constant_values=0.0)

    # Convert to mono if desired
    if mono:
        audio = np.mean(audio, axis=1, keepdims=True)

    # Resample if needed
    if sample_rate is not None and sample_rate != audio_sr:
        res_length = int(np.ceil(float(audio.shape[0]) * float(sample_rate) / float(audio_sr)))
        audio = np.pad(audio, [(1, 1), (0,0)], mode="reflect")  # Pad audio first
        audio = librosa.resample(audio.T, audio_sr, sample_rate, res_type="kaiser_fast").T
        skip = (audio.shape[0] - res_length) // 2
        audio = audio[skip:skip+res_length,:]

    # Clip to [-1,1] if desired
    if clip:
        audio = np.minimum(np.maximum(audio, -1.0), 1.0)

    return audio, audio_sr 
Example #19
Source File: stt_utils.py    From training_results_v0.6 with Apache License 2.0 5 votes vote down vote up
def spectrogram_from_file(filename, step=10, window=20, max_freq=None,
                          eps=1e-14, overwrite=False, save_feature_as_csvfile=False):
    """ Calculate the log of linear spectrogram from FFT energy
    Params:
        filename (str): Path to the audio file
        step (int): Step size in milliseconds between windows
        window (int): FFT window size in milliseconds
        max_freq (int): Only FFT bins corresponding to frequencies between
            [0, max_freq] are returned
        eps (float): Small value to ensure numerical stability (for ln(x))
    """

    csvfilename = filename.replace(".wav", ".csv")
    if (os.path.isfile(csvfilename) is False) or overwrite:
        with soundfile.SoundFile(filename) as sound_file:
            audio = sound_file.read(dtype='float32')
            sample_rate = sound_file.samplerate
            if audio.ndim >= 2:
                audio = np.mean(audio, 1)
            if max_freq is None:
                max_freq = sample_rate / 2
            if max_freq > sample_rate / 2:
                raise ValueError("max_freq must not be greater than half of "
                                 " sample rate")
            if step > window:
                raise ValueError("step size must not be greater than window size")
            hop_length = int(0.001 * step * sample_rate)
            fft_length = int(0.001 * window * sample_rate)

            pxx, freqs = spectrogram(
                audio, fft_length=fft_length, sample_rate=sample_rate,
                hop_length=hop_length)

            ind = np.where(freqs <= max_freq)[0][-1] + 1
            res = np.transpose(np.log(pxx[:ind, :] + eps))
            if save_feature_as_csvfile:
                np.savetxt(csvfilename, res)
            return res
    else:
        return np.loadtxt(csvfilename) 
Example #20
Source File: generatemovie.py    From Automatic-Youtube-Reddit-Text-To-Speech-Video-Generator-and-Uploader with MIT License 5 votes vote down vote up
def addFrame(self, image_file, audio_file):
        audio_file = audio_file.replace("\\", "/")
        try:
            audio_clip = AudioSegment.from_wav(r"%s"%audio_file)
            f = sf.SoundFile(r"%s"%audio_file)
        except Exception as e:
            print(e)
            audio_clip = AudioSegment.from_wav("%s/pause.wav" % settings.assetPath)
            f = sf.SoundFile("%s/pause.wav" % settings.assetPath)

        duration = len(f) / f.samplerate
        self.imageframes.append(image_file)
        self.audiofiles.append(audio_clip)
        self.durations.append(duration) 
Example #21
Source File: audioread.py    From pb_chime5 with MIT License 5 votes vote down vote up
def audio_channels(path):
    """

    >>> path = '/net/fastdb/chime3/audio/16kHz/isolated/dt05_caf_real/F01_050C0102_CAF.CH1.wav'
    >>> audio_channels(path)
    1
    >>> path = '/net/db/voiceHome/audio/noises/dev/home3_room2_arrayGeo3_arrayPos2_noiseCond1.wav'
    >>> audio_channels(path)  # correct for multichannel
    8
    """
    with soundfile.SoundFile(str(path)) as f:
        return f.channels 
Example #22
Source File: audioread.py    From pb_chime5 with MIT License 5 votes vote down vote up
def audio_length(path, unit='samples'):
    """

    Args:
        path:
        unit:

    Returns:

    >>> path = '/net/fastdb/chime3/audio/16kHz/isolated/dt05_caf_real/F01_050C0102_CAF.CH1.wav'
    >>> audio_length(path)
    122111
    >>> path = '/net/db/voiceHome/audio/noises/dev/home3_room2_arrayGeo3_arrayPos2_noiseCond1.wav'
    >>> audio_length(path)  # correct for multichannel
    960000
    >>> with soundfile.SoundFile(str(path)) as f:
    ...     print(f.read().shape)
    (960000, 8)
    """

    # params = soundfile.info(str(path))
    # return int(params.samplerate * params.duration)

    if unit == 'samples':
        with soundfile.SoundFile(str(path)) as f:
            return len(f)
    elif unit == 'seconds':
        with soundfile.SoundFile(str(path)) as f:
            return len(f) / f.samplerate
    else:
        return ValueError(unit) 
Example #23
Source File: stt_utils.py    From SNIPER-mxnet with Apache License 2.0 5 votes vote down vote up
def spectrogram_from_file(filename, step=10, window=20, max_freq=None,
                          eps=1e-14, overwrite=False, save_feature_as_csvfile=False):
    """ Calculate the log of linear spectrogram from FFT energy
    Params:
        filename (str): Path to the audio file
        step (int): Step size in milliseconds between windows
        window (int): FFT window size in milliseconds
        max_freq (int): Only FFT bins corresponding to frequencies between
            [0, max_freq] are returned
        eps (float): Small value to ensure numerical stability (for ln(x))
    """

    csvfilename = filename.replace(".wav", ".csv")
    if (os.path.isfile(csvfilename) is False) or overwrite:
        with soundfile.SoundFile(filename) as sound_file:
            audio = sound_file.read(dtype='float32')
            sample_rate = sound_file.samplerate
            if audio.ndim >= 2:
                audio = np.mean(audio, 1)
            if max_freq is None:
                max_freq = sample_rate / 2
            if max_freq > sample_rate / 2:
                raise ValueError("max_freq must not be greater than half of "
                                 " sample rate")
            if step > window:
                raise ValueError("step size must not be greater than window size")
            hop_length = int(0.001 * step * sample_rate)
            fft_length = int(0.001 * window * sample_rate)

            pxx, freqs = spectrogram(
                audio, fft_length=fft_length, sample_rate=sample_rate,
                hop_length=hop_length)

            ind = np.where(freqs <= max_freq)[0][-1] + 1
            res = np.transpose(np.log(pxx[:ind, :] + eps))
            if save_feature_as_csvfile:
                np.savetxt(csvfilename, res)
            return res
    else:
        return np.loadtxt(csvfilename) 
Example #24
Source File: preprocess_dns.py    From asteroid with MIT License 5 votes vote down vote up
def preprocess_dns(in_dir, out_dir='./data'):
    """ Create json file from dataset folder.

    Args:
        in_dir (str): Location of the DNS data
        out_dir (str): Where to save the json files.
    """
    # Get all file ids
    clean_wavs = glob.glob(os.path.join(in_dir, 'clean/*.wav'))
    clean_dic = make_wav_id_dict(clean_wavs)

    mix_wavs = glob.glob(os.path.join(in_dir, 'noisy/*.wav'))
    mix_dic = make_wav_id_dict(mix_wavs)

    noise_wavs = glob.glob(os.path.join(in_dir, 'noise/*.wav'))
    noise_dic = make_wav_id_dict(noise_wavs)
    assert clean_dic.keys() == mix_dic.keys() == noise_dic.keys()
    file_infos = {k: dict(
        mix=mix_dic[k],
        clean=clean_dic[k],
        noise=noise_dic[k],
        snr=get_snr_from_mix_path(mix_dic[k]),
        file_len=len(sf.SoundFile(mix_dic[k]))
    ) for k in clean_dic.keys()}

    # Save to JSON
    with open(os.path.join(out_dir, 'file_infos.json'), 'w') as f:
        json.dump(file_infos, f, indent=2) 
Example #25
Source File: preprocess_whamr.py    From asteroid with MIT License 5 votes vote down vote up
def preprocess_one_dir(in_dir):
    """ Create list of list for one condition, each list contains
    [path, wav_length]."""
    file_infos = []
    in_dir = os.path.abspath(in_dir)
    wav_list = os.listdir(in_dir)
    wav_list.sort()
    for wav_file in wav_list:
        if not wav_file.endswith('.wav'):
            continue
        wav_path = os.path.join(in_dir, wav_file)
        samples = sf.SoundFile(wav_path)
        file_infos.append((wav_path, len(samples)))
    return file_infos 
Example #26
Source File: utils.py    From DeepXi with Mozilla Public License 2.0 5 votes vote down vote up
def batch_list(file_dir, list_name, data_path='data', make_new=False):
	"""
	Places the file paths and wav lengths of an audio file into a dictionary, which
	is then appended to a list. 'glob' is used to support Unix style pathname
	pattern expansions. Checks if the training list has already been saved, and loads
	it.

	Argument/s:
		file_dir - directory containing the audio files.
		list_name - name for the list.
		data_path - path to store pickle files.
		make_new - re-create list.

	Returns:
		batch_list - list of file paths and wav length.
	"""
	extension = ['*.wav', '*.flac', '*.mp3']
	if not make_new:
		if os.path.exists(data_path + '/' + list_name + '_list_' + platform.node() + '.p'):
			print('Loading ' + list_name + ' list...')
			with open(data_path + '/' + list_name + '_list_' + platform.node() + '.p', 'rb') as f:
				batch_list = pickle.load(f)
			if batch_list[0]['file_path'].find(file_dir) != -1:
				print(list_name + ' list has a total of %i entries.' % (len(batch_list)))
				return batch_list

	print('Creating ' + list_name + ' list...')
	batch_list = []
	for i in extension:
		for j in glob.glob(os.path.join(file_dir, i)):
			f = SoundFile(j)
			wav_len = f.seek(0, SEEK_END)
			if wav_len == -1:
				wav, _ = read_wav(path)
				wav_len = len(wav)
			batch_list.append({'file_path': j, 'wav_len': wav_len}) # append dictionary.
	if not os.path.exists(data_path): os.makedirs(data_path) # make directory.
	with open(data_path + '/' + list_name + '_list_' + platform.node() + '.p', 'wb') as f:
		pickle.dump(batch_list, f)
	print('The ' + list_name + ' list has a total of %i entries.' % (len(batch_list)))
	return batch_list 
Example #27
Source File: utils.py    From emotion-recognition-using-speech with MIT License 5 votes vote down vote up
def extract_feature(file_name, **kwargs):
    """
    Extract feature from audio file `file_name`
        Features supported:
            - MFCC (mfcc)
            - Chroma (chroma)
            - MEL Spectrogram Frequency (mel)
            - Contrast (contrast)
            - Tonnetz (tonnetz)
        e.g:
        `features = extract_feature(path, mel=True, mfcc=True)`
    """
    mfcc = kwargs.get("mfcc")
    chroma = kwargs.get("chroma")
    mel = kwargs.get("mel")
    contrast = kwargs.get("contrast")
    tonnetz = kwargs.get("tonnetz")
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate = sound_file.samplerate
        if chroma or contrast:
            stft = np.abs(librosa.stft(X))
        result = np.array([])
        if mfcc:
            mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result = np.hstack((result, mfccs))
        if chroma:
            chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result = np.hstack((result, chroma))
        if mel:
            mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
            result = np.hstack((result, mel))
        if contrast:
            contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
            result = np.hstack((result, contrast))
        if tonnetz:
            tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
            result = np.hstack((result, tonnetz))
    return result 
Example #28
Source File: utils.py    From ba-dls-deepspeech with Apache License 2.0 5 votes vote down vote up
def spectrogram_from_file(filename, step=10, window=20, max_freq=None,
                          eps=1e-14):
    """ Calculate the log of linear spectrogram from FFT energy
    Params:
        filename (str): Path to the audio file
        step (int): Step size in milliseconds between windows
        window (int): FFT window size in milliseconds
        max_freq (int): Only FFT bins corresponding to frequencies between
            [0, max_freq] are returned
        eps (float): Small value to ensure numerical stability (for ln(x))
    """
    with soundfile.SoundFile(filename) as sound_file:
        audio = sound_file.read(dtype='float32')
        sample_rate = sound_file.samplerate
        if audio.ndim >= 2:
            audio = np.mean(audio, 1)
        if max_freq is None:
            max_freq = sample_rate / 2
        if max_freq > sample_rate / 2:
            raise ValueError("max_freq must not be greater than half of "
                             " sample rate")
        if step > window:
            raise ValueError("step size must not be greater than window size")
        hop_length = int(0.001 * step * sample_rate)
        fft_length = int(0.001 * window * sample_rate)
        pxx, freqs = spectrogram(
            audio, fft_length=fft_length, sample_rate=sample_rate,
            hop_length=hop_length)
        ind = np.where(freqs <= max_freq)[0][-1] + 1
    return np.transpose(np.log(pxx[:ind, :] + eps)) 
Example #29
Source File: test_deformers.py    From muda with ISC License 5 votes vote down vote up
def __test_shifted_impulse(jam_orig, jam_new, ir_files, orig_duration, n_fft, rolloff_value):

    #delayed impulse
    with psf.SoundFile(str(ir_files), mode='r') as soundf:
        ir_data = soundf.read()
        ir_sr = soundf.samplerate

    #delay the impulse signal by zero-padding 1-second long zeros
    ir_data_delayed = np.pad(ir_data,(ir_sr,0),mode = 'constant')

    #dump the delayed audio file
    psf.write('tests/data/ir_file_delayed.wav', ir_data_delayed, ir_sr)

    D_delayed = muda.deformers.IRConvolution(ir_files = 'tests/data/ir_file_delayed.wav',
                                             n_fft=n_fft, rolloff_value = rolloff_value)

    for jam_shifted in D_delayed.transform(jam_orig):

        #Verify the duration that delayed annotations(Using chords here) are in valid range
        #__test_duration(jam_orig, jam_shifted, orig_duration)

        shifted_data = jam_shifted.search(namespace='chord')[0].data
        delayed_data = jam_new.search(namespace='chord')[0].data

        for i in range(len(shifted_data)):
            #For each observation, verify its onset time has been shifted 1s
            isclose_(1.00,shifted_data[i][0] - delayed_data[i][0]) 
Example #30
Source File: test_deformers.py    From muda with ISC License 5 votes vote down vote up
def test_background(noise, n_samples, weight_min, weight_max, jam_fixture):

    D = muda.deformers.BackgroundNoise(files=noise,
                                       n_samples=n_samples,
                                       weight_min=weight_min,
                                       weight_max=weight_max)

    jam_orig = deepcopy(jam_fixture)
    orig_duration = librosa.get_duration(**jam_orig.sandbox.muda['_audio'])

    n_out = 0
    for jam_new in D.transform(jam_orig):

        assert jam_new is not jam_fixture
        __test_effect(jam_orig, jam_fixture)

        assert not np.allclose(jam_orig.sandbox.muda['_audio']['y'],
                               jam_new.sandbox.muda['_audio']['y'])

        d_state = jam_new.sandbox.muda.history[-1]['state']
        filename = d_state['filename']
        start = d_state['start']
        stop = d_state['stop']

        with psf.SoundFile(str(filename), mode='r') as soundf:
            max_index = len(soundf)
            noise_sr = soundf.samplerate

        assert 0 <= start < stop
        assert start < stop <= max_index
        assert ((stop - start) / float(noise_sr)) == orig_duration

        __test_effect(jam_orig, jam_new)
        n_out += 1

    assert n_out == n_samples
    # Serialization test
    D2 = muda.deserialize(muda.serialize(D))
    __test_params(D, D2)