Python load audio

Source File: ikala.py From mirdata with BSD 3-Clause "New" or "Revised" License

7 votes

def load_vocal_audio(audio_path):
    """Load an ikala vocal.

    Args:
        audio_path (str): path to audio file

    Returns:
        y (np.ndarray): the mono audio signal
        sr (float): The sample rate of the audio file

    """
    if not os.path.exists(audio_path):
        raise IOError("audio_path {} does not exist".format(audio_path))

    audio, sr = librosa.load(audio_path, sr=None, mono=False)
    vocal_channel = audio[1, :]
    return vocal_channel, sr

Source File: data_loader.py From inference with Apache License 2.0

6 votes

def load_audio(path, frame_start=0, frame_end=-1):
    sound, _ = torchaudio.load(path)
    sound = sound.numpy()
    if len(sound.shape) > 1:
        if sound.shape[1] == 1:
            sound = sound.squeeze()
        else:
            sound = sound.mean(axis=1)  # multiple channels, average
    if frame_end > 0 or frame_start > 0:
        assert frame_start < frame_end, "slicing does not yet support inverting audio"
        if frame_end > sound.shape[0]:
            repeats = ceil((frame_end - sound.shape[0])/float(sound.shape[0]))
            appendage = sound
            for _ in range(int(repeats)):
                sound = np.concatenate((sound,appendage))
        sound = sound[frame_start:frame_end]
    return sound

Source File: fastgen.py From magenta with Apache License 2.0

6 votes

def load_batch_audio(files, sample_length=64000):
  """Load a batch of audio from either .wav files.

  Args:
    files: A list of filepaths to .wav files.
    sample_length: Maximum sample length

  Returns:
    batch: A padded array of audio [n_files, sample_length]
  """
  batch = []
  # Load the data
  for f in files:
    data = utils.load_audio(f, sample_length, sr=16000)
    length = data.shape[0]
    # Add padding if less than sample length
    if length < sample_length:
      padded = np.zeros([sample_length])
      padded[:length] = data
      batch.append(padded)
    else:
      batch.append(data)
  # Return as an numpy array
  batch = np.array(batch)
  return batch

Source File: resnet_v2_predict.py From keras-audio with MIT License

6 votes

def load_audio_path_label_pairs(max_allowed_pairs=None):
    download_gtzan_genres_if_not_found('./very_large_data/gtzan')
    audio_paths = []
    with open('./data/lists/test_songs_gtzan_list.txt', 'rt') as file:
        for line in file:
            audio_path = './very_large_data/' + line.strip()
            audio_paths.append(audio_path)
    pairs = []
    with open('./data/lists/test_gt_gtzan_list.txt', 'rt') as file:
        for line in file:
            label = int(line)
            if max_allowed_pairs is None or len(pairs) < max_allowed_pairs:
                pairs.append((audio_paths[len(pairs)], label))
            else:
                break
    return pairs

Source File: ikala.py From mirdata with BSD 3-Clause "New" or "Revised" License

6 votes

def load_instrumental_audio(audio_path):
    """Load an ikala instrumental.

    Args:
        audio_path (str): path to audio file

    Returns:
        y (np.ndarray): the mono audio signal
        sr (float): The sample rate of the audio file

    """
    if not os.path.exists(audio_path):
        raise IOError("audio_path {} does not exist".format(audio_path))

    audio, sr = librosa.load(audio_path, sr=None, mono=False)
    instrumental_channel = audio[0, :]
    return instrumental_channel, sr

Source File: ikala.py From mirdata with BSD 3-Clause "New" or "Revised" License

6 votes

def load_mix_audio(audio_path):
    """Load an ikala mix.

    Args:
        audio_path (str): path to audio file

    Returns:
        y (np.ndarray): the mono audio signal
        sr (float): The sample rate of the audio file

    """
    if not os.path.exists(audio_path):
        raise IOError("audio_path {} does not exist".format(audio_path))

    mixed_audio, sr = librosa.load(audio_path, sr=None, mono=True)
    # multipy by 2 because librosa averages the left and right channel.
    return 2.0 * mixed_audio, sr

Source File: groove_midi.py From mirdata with BSD 3-Clause "New" or "Revised" License

6 votes

def load_audio(audio_path):
    """Load a Groove MIDI audio file.

    Args:
        audio_path (str): path to audio file

    Returns:
        y (np.ndarray): the mono audio signal
        sr (float): The sample rate of the audio file

    """
    if audio_path is None:
        return None, None

    if not os.path.exists(audio_path):
        raise IOError("audio_path {} does not exist".format(audio_path))

    return librosa.load(audio_path, sr=22050, mono=True)

Source File: resnet_v2_predict.py From mxnet-audio with MIT License

6 votes

def load_audio_path_label_pairs(max_allowed_pairs=None):
    from mxnet_audio.library.utility.gtzan_loader import download_gtzan_genres_if_not_found
    download_gtzan_genres_if_not_found(patch_path('very_large_data/gtzan'))
    audio_paths = []
    with open(patch_path('data/lists/test_songs_gtzan_list.txt'), 'rt') as file:
        for line in file:
            audio_path = patch_path('very_large_data/' + line.strip())
            audio_paths.append(audio_path)
    pairs = []
    with open(patch_path('data/lists/test_gt_gtzan_list.txt'), 'rt') as file:
        for line in file:
            label = int(line)
            if max_allowed_pairs is None or len(pairs) < max_allowed_pairs:
                pairs.append((audio_paths[len(pairs)], label))
            else:
                break
    return pairs

Source File: transcribe.py From onsets-and-frames with MIT License

6 votes

def load_and_process_audio(flac_path, sequence_length, device):

    random = np.random.RandomState(seed=42)

    audio, sr = soundfile.read(flac_path, dtype='int16')
    assert sr == SAMPLE_RATE

    audio = torch.ShortTensor(audio)

    if sequence_length is not None:
        audio_length = len(audio)
        step_begin = random.randint(audio_length - sequence_length) // HOP_LENGTH
        n_steps = sequence_length // HOP_LENGTH

        begin = step_begin * HOP_LENGTH
        end = begin + sequence_length

        audio = audio[begin:end].to(device)
    else:
        audio = audio.to(device)

    audio = audio.float().div_(32768.0)

    return audio

Source File: audio_io.py From synvae with MIT License

6 votes

def load_audio(audio_filename, sample_rate):
  """Loads an audio file.

  Args:
    audio_filename: File path to load.
    sample_rate: The number of samples per second at which the audio will be
        returned. Resampling will be performed if necessary.

  Returns:
    A numpy array of audio samples, single-channel (mono) and sampled at the
    specified rate, in float32 format.

  Raises:
    AudioIOReadError: If librosa is unable to load the audio data.
  """
  try:
    y, unused_sr = librosa.load(audio_filename, sr=sample_rate, mono=True)
  except Exception as e:  # pylint: disable=broad-except
    raise AudioIOReadError(e)
  return y

Source File: cfp.py From Melody-extraction-with-melodic-segnet with MIT License

6 votes

def load_audio(filepath, sr=None, mono=True, dtype='float32'):

    if '.mp3' in filepath:
        from pydub import AudioSegment
        import tempfile
        import os
        mp3 = AudioSegment.from_mp3(filepath)
        _, path = tempfile.mkstemp()
        mp3.export(path, format="wav")
        del mp3
        x, fs = sf.read(path)
        os.remove(path)
    else:
        x, fs = sf.read(filepath)

    if mono and len(x.shape)>1:
        x = np.mean(x, axis = 1)
    if sr:
        x = scipy.signal.resample_poly(x, sr, fs)
        fs = sr 
    x = x.astype(dtype)

    return x, fs

Source File: audio_reader.py From tensorflow-wavenet with MIT License

6 votes

def load_generic_audio(directory, sample_rate):
    '''Generator that yields audio waveforms from the directory.'''
    files = find_files(directory)
    id_reg_exp = re.compile(FILE_PATTERN)
    print("files length: {}".format(len(files)))
    randomized_files = randomize_files(files)
    for filename in randomized_files:
        ids = id_reg_exp.findall(filename)
        if not ids:
            # The file name does not match the pattern containing ids, so
            # there is no id.
            category_id = None
        else:
            # The file name matches the pattern for containing ids.
            category_id = int(ids[0][0])
        audio, _ = librosa.load(filename, sr=sample_rate, mono=True)
        audio = audio.reshape(-1, 1)
        yield audio, filename, category_id

Source File: core.py From KoSpeech with Apache License 2.0

6 votes

def load_audio(audio_path, del_silence):
    """
    Load audio file (PCM) to sound. if del_silence is True, Eliminate all sounds below 30dB.
    If exception occurs in numpy.memmap(), return None.
    """
    try:
        signal = np.memmap(audio_path, dtype='h', mode='r').astype('float32')

        if del_silence:
            non_silence_indices = split(signal, top_db=30)
            signal = np.concatenate([signal[start:end] for start, end in non_silence_indices])

        return signal / 32767  # normalize audio

    except ValueError:
        logger.debug('ValueError in {0}'.format(audio_path))
        return None
    except RuntimeError:
        logger.debug('RuntimeError in {0}'.format(audio_path))
        return None
    except IOError:
        logger.debug('IOError in {0}'.format(audio_path))
        return None

Source File: audio_reader.py From SampleRNN with GNU Lesser General Public License v3.0

5 votes

def load_generic_audio(directory, sample_rate):
    '''Generator that yields audio waveforms from the directory.'''
    files = find_files(directory)
    print("files length: {}".format(len(files)))
    randomized_files = randomize_files(files)
    for filename in randomized_files:
        audio, _ = librosa.load(filename, sr=sample_rate, mono=True)
        audio = audio.reshape(-1, 1)
        yield audio, filename

Source File: model.py From honk with MIT License

5 votes

def load_audio(self, example, silence=False):
        if silence:
            example = "__silence__"
        if random.random() < 0.7 or not self.set_type == DatasetType.TRAIN:
            try:
                return self._audio_cache[example]
            except KeyError:
                pass
        in_len = self.input_length
        if self.bg_noise_audio:
            bg_noise = random.choice(self.bg_noise_audio)
            a = random.randint(0, len(bg_noise) - in_len - 1)
            bg_noise = bg_noise[a:a + in_len]
        else:
            bg_noise = np.zeros(in_len)

        if silence:
            data = np.zeros(in_len, dtype=np.float32)
        else:
            file_data = self._file_cache.get(example)
            data = librosa.core.load(example, sr=16000)[0] if file_data is None else file_data
            self._file_cache[example] = data
        data = np.pad(data, (0, max(0, in_len - len(data))), "constant")
        if self.set_type == DatasetType.TRAIN:
            data = self._timeshift_audio(data)

        if random.random() < self.noise_prob or silence:
            a = random.random() * 0.1
            data = np.clip(a * bg_noise + data, -1, 1)

        self._audio_cache[example] = data
        return data

Source File: data_loader.py From pytorch-nlp with MIT License

5 votes

def load_randomly_augmented_audio(path, sample_rate=16000, tempo_range=(0.85, 1.15),
                                  gain_range=(-6, 8)):
    """
    Picks tempo and gain uniformly, applies it to the utterance by using sox utility.
    Returns the augmented utterance.
    """
    low_tempo, high_tempo = tempo_range
    tempo_value = np.random.uniform(low=low_tempo, high=high_tempo)
    low_gain, high_gain = gain_range
    gain_value = np.random.uniform(low=low_gain, high=high_gain)
    audio = augment_audio_with_sox(path=path, sample_rate=sample_rate,
                                   tempo=tempo_value, gain=gain_value)
    return audio

Source File: data_loader.py From pytorch-nlp with MIT License

5 votes

def load_audio(path):
    sound, _ = torchaudio.load(path)
    sound = sound.numpy()
    if len(sound.shape) > 1:
        if sound.shape[1] == 1:
            sound = sound.squeeze()
        else:
            sound = sound.mean(axis=1)  # multiple channels, average
    return sound

Source File: datautils.py From panotti with MIT License

5 votes

def load_audio(audio_path, mono=None, sr=None, convertOSXaliases=True):  # wrapper for librosa.load
    try:
        signal, sr = librosa.load(audio_path, mono=mono, sr=sr)
    except NoBackendError as e:
        if ('Darwin' == platform.system()):   # handle OS X alias files gracefully
            source = resolve_osx_alias(audio_path, convert=convertOSXaliases, already_checked_os=True) # convert to symlinks for next time
            try:
                signal, sr = librosa.load(source, mono=mono, sr=sr)
            except NoBackendError as e:
                print("\n*** ERROR: Could not open audio file {}".format(audio_path),"\n",flush=True)
                raise e
        else:
            print("\n*** ERROR: Could not open audio file {}".format(audio_path),"\n",flush=True)
            raise e
    return signal, sr

Source File: utils.py From Tensorflow-Keyword-Spotting with Apache License 2.0

5 votes

def load_audio_file(file_path,sample_rate):
    input_length = sample_rate
    data = librosa.core.load(file_path, sr=sample_rate)[0]  # , sr=16000
    if len(data) > input_length:
        data = data[:input_length]
    else:
        data = np.pad(data, (0, max(0, input_length - len(data))), "constant")
    return data

Source File: voyagerimb.py From voyagerimb with MIT License

5 votes

def model_load_audio_data(self, filename):
        self.root.config(cursor="watch")
        self.root.update()
        self.rate, self.audio_data = scipy.io.wavfile.read(filename)
        self.root.config(cursor="")

Source File: models.py From openl3 with MIT License

5 votes

def load_audio_embedding_model(input_repr, content_type, embedding_size):
    """
    Returns a model with the given characteristics. Loads the model
    if the model has not been loaded yet.

    Parameters
    ----------
    input_repr : "linear", "mel128", or "mel256"
        Spectrogram representation used for audio model.
    content_type : "music" or "env"
        Type of content used to train embedding.
    embedding_size : 6144 or 512
        Embedding dimensionality.

    Returns
    -------
    model : keras.models.Model
        Model object.
    """

    # Construct embedding model and load model weights
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        m = AUDIO_MODELS[input_repr]()

    m.load_weights(get_audio_embedding_model_path(input_repr, content_type))

    # Pooling for final output embedding size
    pool_size = AUDIO_POOLING_SIZES[input_repr][embedding_size]
    y_a = MaxPooling2D(pool_size=pool_size, padding='same')(m.output)
    y_a = Flatten()(y_a)
    m = Model(inputs=m.input, outputs=y_a)
    return m

Source File: predict.py From Looking-to-Listen with MIT License

5 votes

def LoadAudio(fname):
    y, sr = load(fname, sr=SR)
    spec = stft(y, n_fft=FFT_SIZE, hop_length=HOP_LEN, win_length=WIN_LEN)
    mag = np.abs(spec)
    mag /= np.max(mag)
    phase = np.exp(1.j*np.angle(spec))
    return mag, phase

Source File: tvnplayer.py From filmkodi with Apache License 2.0

5 votes

def LOAD_AND_PLAY_AUDIO(self, url, title, player=True):
        if url != False:
            self.__LOAD_AND_PLAY(url, title, player, "music")
        else:
            d = xbmcgui.Dialog()
            d.ok('Brak linku!', 'Przepraszamy, chwilowa awaria.', 'Zapraszamy w innym terminie.')

Source File: tvnplayer.py From filmkodi with Apache License 2.0

5 votes

def LOAD_AND_PLAY_AUDIO_WATCHED(self, url):  # NOWE wersja używa xbmcplugin.setResolvedUrl wspiera status "watched"
        if url != False:
            return self.__LOAD_AND_PLAY_WATCHED(url, 'audio')
        else:
            d = xbmcgui.Dialog()
            d.ok('Brak linku!', 'Przepraszamy, chwilowa awaria.', 'Zapraszamy w innym terminie.')
            return False

Source File: audio.py From python-dlpy with Apache License 2.0

5 votes

def load_audio_metadata_speechrecognition(cls, conn, path, audio_path):
        '''
        Pre-process and loads the metadata

        Parameters
        ----------
        conn : CAS
            A connection object to the current session.
        path : string
            Location to the input metadata file.
        audio_path : delimiter
            Delimiter for the metadata file.

        Returns
        -------
        :class:`CASTable`

        '''

        if conn is None:
            conn = cls.get_connection()

        if conn is None:
            raise DLPyError('cannot get a connection object to the current session.')

        output_name = random_name('AudioTable_Metadata', 6)
        
        dc = DataClean(conn=conn, contents_as_path=path)
        dc_response = dc.process_contents(audio_path = audio_path)
        tbl = dc.create_castable(dc_response['results'], output_name, replace=True, promote=False,
                                 col_names=dc_response['col_names'])

        scode = 'length _fName_ varchar(*); '
        scode += '_fName_ = _filename_; '

        ctbl = CASTable(tbl, computedvars=['_fName_'],
                        computedvarsprogram=scode)

        conn.table.partition(table=ctbl, casout=dict(name=tbl, replace=True))

        return CASTable(tbl)

Source File: audio.py From python-dlpy with Apache License 2.0

5 votes

def load_audio_metadata(cls, conn, path, audio_path, task='speech2text'):
        '''
        Pre-process and loads the metadata

        Parameters
        ----------
        conn : CAS
            A connection object to the current session.
        path : string
            Location to the input metadata file.
        audio_path : string
            Location to the audio files.
        task : string, optional
            Specifies the task
            Note: currently only support 'speech2text' (default)

        Returns
        -------
        :class:`CASTable`

        Raises
        ------
        DLPyError
            If anything goes wrong, it complains and prints the appropriate message.

        '''

        if conn is None:
            conn = cls.get_connection()

        if conn is None:
            raise DLPyError('cannot get a connection object to the current session.')

        if task == 'speech2text':
            return cls.load_audio_metadata_speechrecognition(conn, path, audio_path)
        else:
            raise DLPyError("We do not support this task yet!")

Source File: wpe.py From fdndlp with MIT License

5 votes

def load_audio(self, filename):
        data, fs = sf.read(filename, always_2d=True)
        data = data.T
        assert(data.shape[0] >= self.channels)
        if data.shape[0] > self.channels:
            print(
                "The number of the input channels is %d," % data.shape[0],
                "and only the first %d channels are loaded." % self.channels)
            data = data[0: self.channels]
        return data.copy(), fs

Source File: audio_classes.py From sigsep-mus-db with MIT License

5 votes

def load_audio(self, path, stem_id, chunk_start=0, chunk_duration=None):
        """array_like: [shape=(num_samples, num_channels)]
        """
        if os.path.exists(self.path):
            if not self.is_wav:
                # read using stempeg
                audio, rate = stempeg.read_stems(
                    filename=path,
                    stem_id=stem_id,
                    start=chunk_start,
                    duration=chunk_duration,
                    info=self.info
                )
            else:
                chunk_start = int(chunk_start * self.rate)

                # check if dur is none
                if chunk_duration:
                    # stop in soundfile is calc in samples, not seconds
                    stop = chunk_start + int(chunk_duration * self.rate)
                else:
                    stop = chunk_duration

                audio, rate = sf.read(
                    path,
                    always_2d=True,
                    start=chunk_start,
                    stop=stop
                )
            self._rate = rate
            return audio
        else:
            self._rate = None
            self._audio = None
            raise ValueError("Oops! %s cannot be loaded" % path)

Source File: wavenet_data.py From HandsOnDeepLearningWithPytorch with MIT License

5 votes

def load_audio(filename, sample_rate=16000, trim=True, trim_frame_length=2048):
    audio, _ = librosa.load(filename, sr=sample_rate, mono=True)
    audio = audio.reshape(-1, 1)

    if trim > 0:
        audio, _ = librosa.effects.trim(audio, frame_length=trim_frame_length)

    return audio

Source File: refi.py From QualCoder with MIT License

5 votes

def load_audio_source(self, element):
        """ Load audio source into .
        Load the description and codings into sqlite.

        path to file can be internal or relative.
        e.g. path="relative:///DF370983‐F009‐4D47‐8615‐711633FA9DE6.m4a"
        """

        name, creating_user, create_date, source_path = self.name_creating_user_create_date_source_path_helper(element)

        # Copy file into .qda audio folder and rename into original name
        #print(source_path)
        destination = self.app.project_path + "/audio/" + name
        media_path = "/audio/" + name
        #print(destination)
        try:
            shutil.copyfile(source_path, destination)
        except Exception as e:
            self.parent_textEdit.append(_('Cannot copy Audio file from: ') + source_path + "\nto: " + destination + '\n' + str(e))

        cur = self.app.conn.cursor()
        cur.execute("insert into source(name,memo,owner,date, mediapath, fulltext) values(?,?,?,?,?,?)",
            (name, '', creating_user, create_date, media_path, None))
        self.app.conn.commit()
        cur.execute("select last_insert_rowid()")
        id_ = cur.fetchone()[0]

        #TODO load transcript
        #TODO transcript contains SynchPoints AKA timestamps
        #TODO load codings
        '''
        <PictureSelection guid="04980e59-b290-4481-8cb6-e732824440a1" firstX="783" firstY="1238" secondX="1172" secondY="1788" name="a stylised faced on the lecture slide.
        " creatingUser="70daf61c-b6f0-4b5e-8c2f-548fde3ad3d4" creationDateTime="2019-03-09T23:19:07Z">
        <Coding guid="7a7e80ca-ed8c-4006-86b3-731e36baca19" creatingUser="70daf61c-b6f0-4b5e-8c2f-548fde3ad3d4" ><CodeRef targetGUID="1b594544-2954-4b67-86ff-fb552f090ba8"/>
        </Coding></PictureSelection>'''

Source File: audio.py From PolyglotDB with MIT License

5 votes

def load_audio(self, discourse, file_type):
        """
        Loads a given audio file at the specified sampling rate type (``consonant``, ``vowel`` or ``low_freq``).
        Consonant files have a sampling rate of 16 kHz, vowel files a sampling rate of 11 kHz, and low frequency files
        a sampling rate of 1.2 kHz.

        Parameters
        ----------
        discourse : str
            Name of the audio file to load
        file_type : str
            One of ``consonant``, ``vowel`` or ``low_freq``

        Returns
        -------
        numpy.array
            Audio signal
        int
            Sampling rate of the file
        """
        sound_file = self.discourse_sound_file(discourse)
        if file_type == 'consonant':
            path = os.path.expanduser(sound_file.consonant_file_path)
        elif file_type == 'vowel':
            path = os.path.expanduser(sound_file.vowel_file_path)
        elif file_type == 'low_freq':
            path = os.path.expanduser(sound_file.low_freq_file_path)
        else:
            path = os.path.expanduser(sound_file.file_path)
        signal, sr = librosa.load(path, sr=None)
        return signal, sr

Source File: sound_to_tfrecords.py From kinetics-downloader with MIT License

5 votes

def load_audio(path, sampling_rate):
  """
  Load audio using librosa library.
  :param path:            Path to the audio file.
  :param sampling_rate:   Sampling rate to convert all audios to.
  :return:                Audio data.
  """
  audio, _ = librosa.load(path, sr=sampling_rate, mono=True)
  return audio

Source File: audio_model.py From Tensorflow-Audio-Classification with Apache License 2.0

5 votes

def load_audio_slim_checkpoint(session, checkpoint_path):
    """Loads a pre-trained audio-compatible checkpoint.
    
    This function can be used as an initialization function (referred to as
    init_fn in TensorFlow documentation) which is called in a Session after
    initializating all variables. When used as an init_fn, this will load
    a pre-trained checkpoint that is compatible with the audio model
    definition. Only variables defined by audio will be loaded.
    
    Args:
        session: an active TensorFlow session.
        checkpoint_path: path to a file containing a checkpoint that is
          compatible with the audio model definition.
    """

    # Get the list of names of all audio variables that exist in
    # the checkpoint (i.e., all inference-mode audio variables).
    with tf.Graph().as_default():
        define_audio_slim(training=False)
        audio_var_names = [v.name for v in tf.global_variables()]

    # Get list of variables from exist graph which passed by session
    with session.graph.as_default():
        global_variables = tf.global_variables()

    # Get the list of all currently existing variables that match
    # the list of variable names we just computed.
    audio_vars = [v for v in global_variables if v.name in audio_var_names]

    # Use a Saver to restore just the variables selected above.
    saver = tf.train.Saver(audio_vars, name='audio_load_pretrained',
                         write_version=1)
    saver.restore(session, checkpoint_path)

Source File: data_helpers.py From MELD with GNU General Public License v3.0

5 votes

def load_audio_data(self, ):

        AUDIO_PATH = "./data/pickles/audio_embeddings_feature_selection_{}.pkl".format(self.MODE.lower())
        self.train_audio_emb, self.val_audio_emb, self.test_audio_emb = pickle.load(open(AUDIO_PATH,"rb"))
        
        self.get_dialogue_audio_embs()
        self.get_dialogue_lengths()
        self.get_dialogue_labels()
        self.get_masks()

Source File: _player.py From ai-makers-kit with MIT License

5 votes

def load_audio(self, wav_path):
        wav = wave.open(wav_path, 'r')
        if wav.getnchannels() != 1:
            raise ValueError(wav_path + ' is not a mono file')

        self._loaded_bytes = wav.readframes(wav.getnframes())
        self._loaded_samplerate = wav.getframerate()
        self._loaded_samplewidth = wav.getsampwidth()
        wav.close()

Source File: data_loader.py From LipReading with MIT License

5 votes

def load_audio(path):
    sound, _ = torchaudio.load(path, normalization=True)
    sound = sound.numpy()
    if len(sound.shape) > 1:
        if sound.shape[1] == 1:
            sound = sound.squeeze()
        else:
            sound = sound.mean(axis=1)  # multiple channels, average
    return sound

Source File: audio_signal.py From nussl with MIT License

5 votes

def load_audio_from_array(self, signal, sample_rate=constants.DEFAULT_SAMPLE_RATE):
        """
        Loads an audio signal from a :obj:`np.ndarray`. :param:`sample_rate` is the sample
        of the signal.

        See Also:
            * :func:`load_audio_from_file` to read in an audio file from disc.

        Notes:
            Only accepts float arrays and int arrays of depth 16-bits.

        Parameters:
            signal (:obj:`np.ndarray`): Array containing the audio signal sampled at
                :param:`sample_rate`.
            sample_rate (int): The sample rate of signal.
                Default is :ref:`constants.DEFAULT_SAMPLE_RATE` (44.1kHz)

        """
        assert (type(signal) == np.ndarray)

        self.path_to_input_file = None

        # Change from fixed point to floating point
        if not np.issubdtype(signal.dtype, np.floating):
            signal = signal.astype('float') / (np.iinfo(np.dtype('int16')).max + 1.0)

        self.audio_data = signal
        self.original_signal_length = self.signal_length
        self._sample_rate = sample_rate if sample_rate is not None \
            else constants.DEFAULT_SAMPLE_RATE

        self.set_active_region_to_default()

Source File: feat_ext.py From icassp19 with MIT License

5 votes

def load_audio_file(file_path, input_fixed_length=0, params_extract=None):
    """

    :param file_path:
    :param input_fixed_length:
    :param params_extract:
    :return:
    """
    data, source_fs = soundfile.read(file=file_path)
    data = data.T

    # Resample if the source_fs is different from expected
    if params_extract.get('fs') != source_fs:
        data = librosa.core.resample(data, source_fs, params_extract.get('fs'))
        print('Resampling to %d: %s' % (params_extract.get('fs'), file_path))

    if len(data) > 0:
        data = get_normalized_audio(data)
    else:
        # 3 files are corrupted in the test set. They belong to the padding group (not used for evaluation)
        data = np.ones((input_fixed_length, 1))
        print('File corrupted. Could not open: %s' % file_path)

    # careful with the shape
    data = np.reshape(data, [-1, 1])
    return data

Source File: train_audio.py From AudioEmotion with MIT License

5 votes

def load_audio_data():
    data = pd.read_csv('../Output/data.csv')
    data = data.drop(['filename'], axis=1)

    emotion_list = data.iloc[:, -1]
    encoder = LabelEncoder()
    y = encoder.fit_transform(emotion_list)
    x = np.array(data.iloc[:, :-1], dtype=float)
    # scaler = StandardScaler().fit(x)
    # x = scaler.transform()

    return train_test_split(x, y, test_size=0.2)

Source File: __init__.py From SimpleAudioIndexer with Apache License 2.0

5 votes

def load_indexed_audio(self, indexed_audio_file_abs_path):
        """
        Parameters
        ----------
        indexed_audio_file_abs_path : str
        """
        with open(indexed_audio_file_abs_path, "rb") as f:
            self.__timestamps = pickle.load(f)

Source File: audio.py From end2end-asr-pytorch with MIT License

5 votes

def load_randomly_augmented_audio(path, sample_rate=16000, tempo_range=(0.85, 1.15), gain_range=(-6, 8)):
    """
    Picks tempo and gain uniformly, applies it to the utterance by using sox utility.
    Returns the augmented utterance.
    """
    low_tempo, high_tempo = tempo_range
    tempo_value = np.random.uniform(low=low_tempo, high=high_tempo)
    low_gain, high_gain = gain_range
    gain_value = np.random.uniform(low=low_gain, high=high_gain)
    audio = augment_audio_with_sox(path=path, sample_rate=sample_rate,
                                   tempo=tempo_value, gain=gain_value)
    return audio

Source File: audio.py From end2end-asr-pytorch with MIT License

5 votes

def load_audio(path):
    sound, _ = torchaudio.load(path, normalization=True)
    sound = sound.numpy().T
    if len(sound.shape) > 1:
        if sound.shape[1] == 1:
            sound = sound.squeeze()
        else:
            sound = sound.mean(axis=1)  # multiple channels, average
    return sound

Source File: soundnet.py From soundnet_keras with MIT License

5 votes

def load_audio(audio_file):
    sample_rate = 22050  # SoundNet works on mono audio files with a sample rate of 22050.
    audio, sr = librosa.load(audio_file, dtype='float32', sr=22050, mono=True)
    audio = preprocess(audio)
    return audio

Source File: util.py From SoundNet-tensorflow with MIT License

5 votes

def load_audio(audio_path, sr=None):
    # By default, librosa will resample the signal to 22050Hz(sr=None). And range in (-1., 1.)
    sound_sample, sr = librosa.load(audio_path, sr=sr, mono=False)

    return sound_sample, sr

Source File: dataset.py From rafiki with Apache License 2.0

5 votes

def load_dataset_of_audio_files(self, dataset_path, dataset_dir):
        '''
            Loads dataset with type `AUDIO_FILES`.

            :param str dataset_uri: URI of the dataset file
            :returns: An instance of ``AudioFilesDataset``.
        '''
        return AudioFilesDataset(dataset_path, dataset_dir)

Source File: utils.py From magenta with Apache License 2.0

5 votes

def load_audio(path, sample_length=64000, sr=16000):
  """Loading of a wave file.

  Args:
    path: Location of a wave file to load.
    sample_length: The truncated total length of the final wave file.
    sr: Samples per a second.

  Returns:
    out: The audio in samples from -1.0 to 1.0
  """
  audio, _ = librosa.load(path, sr=sr)
  audio = audio[:sample_length]
  return audio

Source File: orchset.py From mirdata with BSD 3-Clause "New" or "Revised" License

5 votes

def load_audio_mono(audio_path):
    """Load a Orchset audio file.

    Args:
        audio_path (str): path to audio file

    Returns:
        y (np.ndarray): the mono audio signal
        sr (float): The sample rate of the audio file

    """
    if not os.path.exists(audio_path):
        raise IOError("audio_path {} does not exist".format(audio_path))

    return librosa.load(audio_path, sr=None, mono=True)

Source File: guitarset.py From mirdata with BSD 3-Clause "New" or "Revised" License

5 votes

def load_multitrack_audio(audio_path):
    """Load a Guitarset multitrack audio file.

    Args:
        audio_path (str): path to audio file

    Returns:
        y (np.ndarray): the mono audio signal
        sr (float): The sample rate of the audio file

    """
    if not os.path.exists(audio_path):
        raise IOError("audio_path {} does not exist".format(audio_path))
    return librosa.load(audio_path, sr=None, mono=False)

Source File: orchset.py From mirdata with BSD 3-Clause "New" or "Revised" License

5 votes

def load_audio_stereo(audio_path):
    """Load a Orchset audio file.

    Args:
        audio_path (str): path to audio file

    Returns:
        y (np.ndarray): the mono audio signal
        sr (float): The sample rate of the audio file

    """
    if not os.path.exists(audio_path):
        raise IOError("audio_path {} does not exist".format(audio_path))

    return librosa.load(audio_path, sr=None, mono=False)

Source File: data_loader.py From inference with Apache License 2.0

5 votes

def load_randomly_augmented_audio(path, sample_rate=16000, tempo_range=(0.85, 1.15),
                                  gain_range=(-6, 8), frame_start=0, frame_end=-1):
    """
    Picks tempo and gain uniformly, applies it to the utterance by using sox utility.
    Returns the augmented utterance.
    """
    low_tempo, high_tempo = tempo_range
    tempo_value = np.random.uniform(low=low_tempo, high=high_tempo)
    low_gain, high_gain = gain_range
    gain_value = np.random.uniform(low=low_gain, high=high_gain)
    audio = augment_audio_with_sox(path=path, sample_rate=sample_rate,
                                   tempo=tempo_value, gain=gain_value,
                                   frame_start=frame_start, frame_end=frame_end)
    return audio

Source File: data_source_loader.py From nnabla with Apache License 2.0

5 votes

def load_audio(file, shape=None, normalize=False):
    global pydub_available
    if pydub_available:
        return load_audio_pydub(file, shape, normalize)
    else:
        return load_wav(file, shape, normalize)

Source File: paradrop_client.py From Paradrop with Apache License 2.0

5 votes

def load_audio_module(self, module_name):
        """
        Load a module into the audio subsystem.
        """
        url = "{}/audio/modules".format(self.base_url)
        data = {
            "name": module_name
        }
        return self.request("POST", url, json=data)

Source File: gui.py From MIA-Japanese-Add-on with GNU General Public License v3.0

5 votes

def loadAudioGraphFieldsCB(self):
        self.ui.audioFieldsCB.addItem('Clipboard')
        self.ui.audioFieldsCB.addItem('──────────────────')
        self.ui.audioFieldsCB.model().item(self.ui.audioFieldsCB.count() - 1).setEnabled(False)
        self.ui.audioFieldsCB.model().item(self.ui.audioFieldsCB.count() - 1).setTextAlignment(Qt.AlignCenter)
        self.ui.audioFieldsCB.addItems(self.allFields)
        self.ui.pitchGraphsCB.addItem('Clipboard')
        self.ui.pitchGraphsCB.addItem('──────────────────')
        self.ui.pitchGraphsCB.model().item(self.ui.pitchGraphsCB.count() - 1).setEnabled(False)
        self.ui.pitchGraphsCB.model().item(self.ui.pitchGraphsCB.count() - 1).setTextAlignment(Qt.AlignCenter)
        self.ui.pitchGraphsCB.addItems(self.allFields)

Source File: core.py From muda with ISC License

4 votes

def load_jam_audio(
    jam_in, audio_file, validate=True, strict=True, fmt="auto", **kwargs
):
    """Load a jam and pack it with audio.

    Parameters
    ----------
    jam_in : str, file descriptor, jams.JAMS, or None
        JAMS filename, open file-descriptor, or object to load.
        See ``jams.load`` for acceptable formats.

        If `None` is provided, an empty JAMS object is constructed.

    audio_file : str
        Audio filename to load

    validate : bool
    strict : bool
    fmt : str
        Parameters to `jams.load`

    kwargs : additional keyword arguments
        See `librosa.load`

    Returns
    -------
    jam : jams.JAMS
        A jams object with audio data in the top-level sandbox

    Notes
    -----
    This operation can modify the `file_metadata.duration` field of `jam_in`:
    If it is not currently set, it will be populated with the duration of the
    audio file.

    See Also
    --------
    jams.load
    librosa.core.load


    Examples
    --------

    Load a JAMS object and audio from disk

    >>> jam = muda.load_jam_audio('my_file.jams', 'my_file.wav')


    Load an audio file with no jams annotation

    >>> jam = muda.load_jam_audio(None, 'my_file.wav')

    """

    if isinstance(jam_in, jams.JAMS):
        jam = jam_in
    elif jam_in is None:
        jam = jams.JAMS()
    else:
        jam = jams.load(jam_in, validate=validate, strict=strict, fmt=fmt)

    y, sr = librosa.load(audio_file, **kwargs)

    if jam.file_metadata.duration is None:
        jam.file_metadata.duration = librosa.get_duration(y=y, sr=sr)

    return jam_pack(jam, _audio=dict(y=y, sr=sr))

Source File: LoadClipsExt.py From Luminosity with GNU General Public License v3.0

4 votes

def LoadAudio(self, *args, local = True):

		#args(path,name,mediaType,bank,channel,clip)
		#print('Loading Movie')

		name = args[0]
		path = args[1]
		mediaType = args[2]
		bank = args[3]
		channel = args[4]
		clip = args [5]

		srcPlugin = op(me.fetch('PLUGINS') + '/players/audioPlayer/plugin')
		dataClipPath = me.fetch('CLIP_DATA') +'/'+ bank +'/'+ channel +'/'+ clip
		clipComp = op(dataClipPath)


		audioInfo = self.LoadAudioComp.op('audioInfo')
		sampleRate = audioInfo['sample_rate'].eval()
		length = audioInfo['true_file_length_frames'].eval()
		end = length + 1

		if clipComp.op('plugin'):
			clipComp.op('plugin').destroy()
		clipComp.copy(srcPlugin)

		pluginComp = clipComp.op('plugin')
		compAttr = pluginComp.storage['CompAttr']

		compAttr['attr']['type'] = 'audio'
		compAttr['attr']['name'] = name 
		compAttr['attr']['fileType'] = mediaType
		compAttr['attr']['length'] = length

		compAttr['uiAttr']['file']['default'] = path
		compAttr['uiAttr']['sampleRate']['default'] = sampleRate
		compAttr['uiAttr']['trimStart']['rangeHigh'] = end
		compAttr['uiAttr']['trimEnd']['rangeHigh'] = end
		compAttr['uiAttr']['trimEnd']['default'] = end
		compAttr['uiAttr']['scrub']['rangeHigh'] = end
		compAttr['uiAttr']['scrub']['default'] = 1
		compAttr['uiAttr']['speed']['default'] = 1


		compPar = pluginComp.storage['CompPar']

		compPar['values']['file']['value'] = path
		compPar['values']['sampleRate']['value'] = sampleRate
		compPar['values']['trimStart']['value'] = 1
		compPar['values']['trimEnd']['value'] = end
		compPar['values']['scrub']['value'] = 1
		compPar['values']['speed']['value'] = 1

		if me.fetch('NODE') == 'master':
			self.LoadProc(name, path, mediaType, bank, channel, clip, dataClipPath, [compAttr['attr'], compAttr['uiAttr']], compPar['values'])

		parTable = pluginComp.op('parameters')

		parTable['file','value'] = path
		parTable['sampleRate','value'] = sampleRate
		parTable['trimStart','value'] = 1
		parTable['trimEnd','value'] = end
		parTable['speed','value'] = 1
		parTable['play','value'] = 0

		audioPlayer = pluginComp.op('audiofilein')
		audioPlayer.par.file = path

		setDefault = "op('loadAudio/loader').par.file = 'C:/Program Files/Derivative/TouchDesigner088/Samples/Audio/JeremyCaulfield_www.dumb-unit.com.mp3'"

		run(setDefault, delayFrames = 10)

Source File: audio_analysis.py From AudioEmotion with MIT License

4 votes

def load_audio(path, with_path=True, recursive=True, ignore_failure=True, random_order=False):
    """
    Loads WAV file(s) from a path.

    Parameters
    ----------
    path : str
        Path to WAV files to be loaded.

    with_path : bool, optional
        Indicates whether a path column is added to the returned SFrame.

    recursive : bool, optional
        Indicates whether ``load_audio`` should do a recursive directory traversal,
        or only load audio files directly under ``path``.

    ignore_failure : bool, optional
        If True, only print warnings for failed files and keep loading the remaining
        audio files.

    random_order : bool, optional
        Load audio files in random order.

    Returns
    -------
    out : SFrame
        Returns an SFrame with either an 'audio' column or both an 'audio' and
        a 'path' column. The 'audio' column is a column of dictionaries.

        Each dictionary contains two items. One item is the sample rate, in
        samples per second (int type). The other item will be the data in a numpy
        array. If the wav file has a single channel, the array will have a single
        dimension. If there are multiple channels, the array will have shape
        (L,C) where L is the number of samples and C is the number of channels.

    Examples
    --------
    >>> audio_path = "~/Documents/myAudioFiles/"
    >>> audio_sframe = tc.audio_analysis.load_audio(audio_path, recursive=True)
    """
    all_wav_files = []

    if _fnmatch(path, '*.wav'):    # single file
        all_wav_files.append(path)
    elif recursive:
        for (dir_path, _, file_names) in _os.walk(path):
            for cur_file in file_names:
                if _fnmatch(cur_file, '*.wav'):
                    all_wav_files.append(dir_path + '/' + cur_file)
    else:
        all_wav_files = _glob(path + '/*.wav')

    if random_order:
        _shuffle(all_wav_files)

    result_builder = _tc.SFrameBuilder(column_types=[dict, str], column_names=['audio', 'path'])
    for cur_file_path in all_wav_files:
        try:
            data, sample_rate = librosa.load(cur_file_path, sr=None, res_type='scipy')
            data = data * 32768
            # sample_rate, data = _wavfile.read(cur_file_path)
        except Exception as e:
            error_string = "Could not read {}: {}".format(cur_file_path, e)
            if not ignore_failure:
                raise _ToolkitError(error_string)
            else:
                print(error_string)
                continue

        result_builder.append([{'sample_rate': sample_rate, 'data': data}, cur_file_path])

    result = result_builder.close()
    if not with_path:
        del result['path']
    return result

Source File: audio_signal.py From nussl with MIT License

4 votes

def load_audio_from_file(self, input_file_path, offset=0, duration=None, new_sample_rate=None):
        # type: (str, float, float, int) -> None
        """
        Loads an audio signal into memory from a file on disc. The audio is stored in
        :class:`AudioSignal` as a :obj:`np.ndarray` of `float` s. The sample rate is read from
        the file, and this :class:`AudioSignal` object's sample rate is set from it. If
        :param:`new_sample_rate` is not ``None`` nor the same as the sample rate of the file,
        the audio will be resampled to the sample rate provided in the :param:`new_sample_rate`
        parameter. After reading the audio data into memory, the active region is set to default.

        :param:`offset` and :param:`duration` allow the user to determine how much of the audio is
        read from the file. If those are non-default, then only the values provided will be stored
        in :attr:`audio_data` (unlike with the active region, which has the entire audio data stored
        in memory but only allows access to a subset of the audio).

        See Also:
            * :func:`load_audio_from_array` to read audio data from a :obj:`np.ndarray`.

        Args:
            input_file_path (str): Path to input file.
            offset (float,): The starting point of the section to be extracted (seconds).
                Defaults to 0 seconds (i.e., the very beginning of the file).
            duration (float): Length of signal to load in second.
                signal_length of 0 means read the whole file. Defaults to the full
                length of the signal.
            new_sample_rate (int): If this parameter is not ``None`` or the same sample rate as
                provided by the input file, then the audio data will be resampled to the new
                sample rate dictated by this parameter.

        """
        assert offset >= 0, 'Parameter `offset` must be >= 0!'
        if duration is not None:
            assert duration >= 0, 'Parameter `duration` must be >= 0!'

        try:
            # try reading headers with soundfile for speed
            audio_info = sf.info(input_file_path)
            file_length = audio_info.duration
        except:
            # if that doesn't work try audioread
            with audioread.audio_open(os.path.realpath(input_file_path)) as input_file:
                file_length = input_file.duration

        if offset > file_length:
            raise AudioSignalException('offset is longer than signal!')

        if duration is not None and offset + duration >= file_length:
            warnings.warn('offset + duration are longer than the signal.'
                          ' Reading until end of signal...',
                          UserWarning)

        audio_input, self._sample_rate = librosa.load(input_file_path,
                                                      sr=None,
                                                      offset=offset,
                                                      duration=duration,
                                                      mono=False)

        self.audio_data = audio_input
        self.original_signal_length = self.signal_length

        if new_sample_rate is not None and new_sample_rate != self._sample_rate:
            warnings.warn('Input sample rate is different than the sample rate'
                          ' read from the file! Resampling...',
                          UserWarning)
            self.resample(new_sample_rate)

        self.path_to_input_file = input_file_path
        self.set_active_region_to_default()