Python load audio

60 Python code examples are found related to " load audio". You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: ikala.py    From mirdata with BSD 3-Clause "New" or "Revised" License 7 votes vote down vote up
def load_vocal_audio(audio_path):
    """Load an ikala vocal.

    Args:
        audio_path (str): path to audio file

    Returns:
        y (np.ndarray): the mono audio signal
        sr (float): The sample rate of the audio file

    """
    if not os.path.exists(audio_path):
        raise IOError("audio_path {} does not exist".format(audio_path))

    audio, sr = librosa.load(audio_path, sr=None, mono=False)
    vocal_channel = audio[1, :]
    return vocal_channel, sr 
Example 2
Source File: data_loader.py    From inference with Apache License 2.0 6 votes vote down vote up
def load_audio(path, frame_start=0, frame_end=-1):
    sound, _ = torchaudio.load(path)
    sound = sound.numpy()
    if len(sound.shape) > 1:
        if sound.shape[1] == 1:
            sound = sound.squeeze()
        else:
            sound = sound.mean(axis=1)  # multiple channels, average
    if frame_end > 0 or frame_start > 0:
        assert frame_start < frame_end, "slicing does not yet support inverting audio"
        if frame_end > sound.shape[0]:
            repeats = ceil((frame_end - sound.shape[0])/float(sound.shape[0]))
            appendage = sound
            for _ in range(int(repeats)):
                sound = np.concatenate((sound,appendage))
        sound = sound[frame_start:frame_end]
    return sound 
Example 3
Source File: fastgen.py    From magenta with Apache License 2.0 6 votes vote down vote up
def load_batch_audio(files, sample_length=64000):
  """Load a batch of audio from either .wav files.

  Args:
    files: A list of filepaths to .wav files.
    sample_length: Maximum sample length

  Returns:
    batch: A padded array of audio [n_files, sample_length]
  """
  batch = []
  # Load the data
  for f in files:
    data = utils.load_audio(f, sample_length, sr=16000)
    length = data.shape[0]
    # Add padding if less than sample length
    if length < sample_length:
      padded = np.zeros([sample_length])
      padded[:length] = data
      batch.append(padded)
    else:
      batch.append(data)
  # Return as an numpy array
  batch = np.array(batch)
  return batch 
Example 4
Source File: resnet_v2_predict.py    From keras-audio with MIT License 6 votes vote down vote up
def load_audio_path_label_pairs(max_allowed_pairs=None):
    download_gtzan_genres_if_not_found('./very_large_data/gtzan')
    audio_paths = []
    with open('./data/lists/test_songs_gtzan_list.txt', 'rt') as file:
        for line in file:
            audio_path = './very_large_data/' + line.strip()
            audio_paths.append(audio_path)
    pairs = []
    with open('./data/lists/test_gt_gtzan_list.txt', 'rt') as file:
        for line in file:
            label = int(line)
            if max_allowed_pairs is None or len(pairs) < max_allowed_pairs:
                pairs.append((audio_paths[len(pairs)], label))
            else:
                break
    return pairs 
Example 5
Source File: ikala.py    From mirdata with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def load_instrumental_audio(audio_path):
    """Load an ikala instrumental.

    Args:
        audio_path (str): path to audio file

    Returns:
        y (np.ndarray): the mono audio signal
        sr (float): The sample rate of the audio file

    """
    if not os.path.exists(audio_path):
        raise IOError("audio_path {} does not exist".format(audio_path))

    audio, sr = librosa.load(audio_path, sr=None, mono=False)
    instrumental_channel = audio[0, :]
    return instrumental_channel, sr 
Example 6
Source File: ikala.py    From mirdata with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def load_mix_audio(audio_path):
    """Load an ikala mix.

    Args:
        audio_path (str): path to audio file

    Returns:
        y (np.ndarray): the mono audio signal
        sr (float): The sample rate of the audio file

    """
    if not os.path.exists(audio_path):
        raise IOError("audio_path {} does not exist".format(audio_path))

    mixed_audio, sr = librosa.load(audio_path, sr=None, mono=True)
    # multipy by 2 because librosa averages the left and right channel.
    return 2.0 * mixed_audio, sr 
Example 7
Source File: groove_midi.py    From mirdata with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def load_audio(audio_path):
    """Load a Groove MIDI audio file.

    Args:
        audio_path (str): path to audio file

    Returns:
        y (np.ndarray): the mono audio signal
        sr (float): The sample rate of the audio file

    """
    if audio_path is None:
        return None, None

    if not os.path.exists(audio_path):
        raise IOError("audio_path {} does not exist".format(audio_path))

    return librosa.load(audio_path, sr=22050, mono=True) 
Example 8
Source File: resnet_v2_predict.py    From mxnet-audio with MIT License 6 votes vote down vote up
def load_audio_path_label_pairs(max_allowed_pairs=None):
    from mxnet_audio.library.utility.gtzan_loader import download_gtzan_genres_if_not_found
    download_gtzan_genres_if_not_found(patch_path('very_large_data/gtzan'))
    audio_paths = []
    with open(patch_path('data/lists/test_songs_gtzan_list.txt'), 'rt') as file:
        for line in file:
            audio_path = patch_path('very_large_data/' + line.strip())
            audio_paths.append(audio_path)
    pairs = []
    with open(patch_path('data/lists/test_gt_gtzan_list.txt'), 'rt') as file:
        for line in file:
            label = int(line)
            if max_allowed_pairs is None or len(pairs) < max_allowed_pairs:
                pairs.append((audio_paths[len(pairs)], label))
            else:
                break
    return pairs 
Example 9
Source File: transcribe.py    From onsets-and-frames with MIT License 6 votes vote down vote up
def load_and_process_audio(flac_path, sequence_length, device):

    random = np.random.RandomState(seed=42)

    audio, sr = soundfile.read(flac_path, dtype='int16')
    assert sr == SAMPLE_RATE

    audio = torch.ShortTensor(audio)

    if sequence_length is not None:
        audio_length = len(audio)
        step_begin = random.randint(audio_length - sequence_length) // HOP_LENGTH
        n_steps = sequence_length // HOP_LENGTH

        begin = step_begin * HOP_LENGTH
        end = begin + sequence_length

        audio = audio[begin:end].to(device)
    else:
        audio = audio.to(device)

    audio = audio.float().div_(32768.0)

    return audio 
Example 10
Source File: audio_io.py    From synvae with MIT License 6 votes vote down vote up
def load_audio(audio_filename, sample_rate):
  """Loads an audio file.

  Args:
    audio_filename: File path to load.
    sample_rate: The number of samples per second at which the audio will be
        returned. Resampling will be performed if necessary.

  Returns:
    A numpy array of audio samples, single-channel (mono) and sampled at the
    specified rate, in float32 format.

  Raises:
    AudioIOReadError: If librosa is unable to load the audio data.
  """
  try:
    y, unused_sr = librosa.load(audio_filename, sr=sample_rate, mono=True)
  except Exception as e:  # pylint: disable=broad-except
    raise AudioIOReadError(e)
  return y 
Example 11
Source File: cfp.py    From Melody-extraction-with-melodic-segnet with MIT License 6 votes vote down vote up
def load_audio(filepath, sr=None, mono=True, dtype='float32'):

    if '.mp3' in filepath:
        from pydub import AudioSegment
        import tempfile
        import os
        mp3 = AudioSegment.from_mp3(filepath)
        _, path = tempfile.mkstemp()
        mp3.export(path, format="wav")
        del mp3
        x, fs = sf.read(path)
        os.remove(path)
    else:
        x, fs = sf.read(filepath)

    if mono and len(x.shape)>1:
        x = np.mean(x, axis = 1)
    if sr:
        x = scipy.signal.resample_poly(x, sr, fs)
        fs = sr 
    x = x.astype(dtype)

    return x, fs 
Example 12
Source File: audio_reader.py    From tensorflow-wavenet with MIT License 6 votes vote down vote up
def load_generic_audio(directory, sample_rate):
    '''Generator that yields audio waveforms from the directory.'''
    files = find_files(directory)
    id_reg_exp = re.compile(FILE_PATTERN)
    print("files length: {}".format(len(files)))
    randomized_files = randomize_files(files)
    for filename in randomized_files:
        ids = id_reg_exp.findall(filename)
        if not ids:
            # The file name does not match the pattern containing ids, so
            # there is no id.
            category_id = None
        else:
            # The file name matches the pattern for containing ids.
            category_id = int(ids[0][0])
        audio, _ = librosa.load(filename, sr=sample_rate, mono=True)
        audio = audio.reshape(-1, 1)
        yield audio, filename, category_id 
Example 13
Source File: core.py    From KoSpeech with Apache License 2.0 6 votes vote down vote up
def load_audio(audio_path, del_silence):
    """
    Load audio file (PCM) to sound. if del_silence is True, Eliminate all sounds below 30dB.
    If exception occurs in numpy.memmap(), return None.
    """
    try:
        signal = np.memmap(audio_path, dtype='h', mode='r').astype('float32')

        if del_silence:
            non_silence_indices = split(signal, top_db=30)
            signal = np.concatenate([signal[start:end] for start, end in non_silence_indices])

        return signal / 32767  # normalize audio

    except ValueError:
        logger.debug('ValueError in {0}'.format(audio_path))
        return None
    except RuntimeError:
        logger.debug('RuntimeError in {0}'.format(audio_path))
        return None
    except IOError:
        logger.debug('IOError in {0}'.format(audio_path))
        return None 
Example 14
Source File: audio_reader.py    From SampleRNN with GNU Lesser General Public License v3.0 5 votes vote down vote up
def load_generic_audio(directory, sample_rate):
    '''Generator that yields audio waveforms from the directory.'''
    files = find_files(directory)
    print("files length: {}".format(len(files)))
    randomized_files = randomize_files(files)
    for filename in randomized_files:
        audio, _ = librosa.load(filename, sr=sample_rate, mono=True)
        audio = audio.reshape(-1, 1)
        yield audio, filename 
Example 15
Source File: model.py    From honk with MIT License 5 votes vote down vote up
def load_audio(self, example, silence=False):
        if silence:
            example = "__silence__"
        if random.random() < 0.7 or not self.set_type == DatasetType.TRAIN:
            try:
                return self._audio_cache[example]
            except KeyError:
                pass
        in_len = self.input_length
        if self.bg_noise_audio:
            bg_noise = random.choice(self.bg_noise_audio)
            a = random.randint(0, len(bg_noise) - in_len - 1)
            bg_noise = bg_noise[a:a + in_len]
        else:
            bg_noise = np.zeros(in_len)

        if silence:
            data = np.zeros(in_len, dtype=np.float32)
        else:
            file_data = self._file_cache.get(example)
            data = librosa.core.load(example, sr=16000)[0] if file_data is None else file_data
            self._file_cache[example] = data
        data = np.pad(data, (0, max(0, in_len - len(data))), "constant")
        if self.set_type == DatasetType.TRAIN:
            data = self._timeshift_audio(data)

        if random.random() < self.noise_prob or silence:
            a = random.random() * 0.1
            data = np.clip(a * bg_noise + data, -1, 1)

        self._audio_cache[example] = data
        return data 
Example 16
Source File: data_loader.py    From pytorch-nlp with MIT License 5 votes vote down vote up
def load_randomly_augmented_audio(path, sample_rate=16000, tempo_range=(0.85, 1.15),
                                  gain_range=(-6, 8)):
    """
    Picks tempo and gain uniformly, applies it to the utterance by using sox utility.
    Returns the augmented utterance.
    """
    low_tempo, high_tempo = tempo_range
    tempo_value = np.random.uniform(low=low_tempo, high=high_tempo)
    low_gain, high_gain = gain_range
    gain_value = np.random.uniform(low=low_gain, high=high_gain)
    audio = augment_audio_with_sox(path=path, sample_rate=sample_rate,
                                   tempo=tempo_value, gain=gain_value)
    return audio 
Example 17
Source File: data_loader.py    From pytorch-nlp with MIT License 5 votes vote down vote up
def load_audio(path):
    sound, _ = torchaudio.load(path)
    sound = sound.numpy()
    if len(sound.shape) > 1:
        if sound.shape[1] == 1:
            sound = sound.squeeze()
        else:
            sound = sound.mean(axis=1)  # multiple channels, average
    return sound 
Example 18
Source File: datautils.py    From panotti with MIT License 5 votes vote down vote up
def load_audio(audio_path, mono=None, sr=None, convertOSXaliases=True):  # wrapper for librosa.load
    try:
        signal, sr = librosa.load(audio_path, mono=mono, sr=sr)
    except NoBackendError as e:
        if ('Darwin' == platform.system()):   # handle OS X alias files gracefully
            source = resolve_osx_alias(audio_path, convert=convertOSXaliases, already_checked_os=True) # convert to symlinks for next time
            try:
                signal, sr = librosa.load(source, mono=mono, sr=sr)
            except NoBackendError as e:
                print("\n*** ERROR: Could not open audio file {}".format(audio_path),"\n",flush=True)
                raise e
        else:
            print("\n*** ERROR: Could not open audio file {}".format(audio_path),"\n",flush=True)
            raise e
    return signal, sr 
Example 19
Source File: utils.py    From Tensorflow-Keyword-Spotting with Apache License 2.0 5 votes vote down vote up
def load_audio_file(file_path,sample_rate):
    input_length = sample_rate
    data = librosa.core.load(file_path, sr=sample_rate)[0]  # , sr=16000
    if len(data) > input_length:
        data = data[:input_length]
    else:
        data = np.pad(data, (0, max(0, input_length - len(data))), "constant")
    return data 
Example 20
Source File: voyagerimb.py    From voyagerimb with MIT License 5 votes vote down vote up
def model_load_audio_data(self, filename):
        self.root.config(cursor="watch")
        self.root.update()
        self.rate, self.audio_data = scipy.io.wavfile.read(filename)
        self.root.config(cursor="") 
Example 21
Source File: models.py    From openl3 with MIT License 5 votes vote down vote up
def load_audio_embedding_model(input_repr, content_type, embedding_size):
    """
    Returns a model with the given characteristics. Loads the model
    if the model has not been loaded yet.

    Parameters
    ----------
    input_repr : "linear", "mel128", or "mel256"
        Spectrogram representation used for audio model.
    content_type : "music" or "env"
        Type of content used to train embedding.
    embedding_size : 6144 or 512
        Embedding dimensionality.

    Returns
    -------
    model : keras.models.Model
        Model object.
    """

    # Construct embedding model and load model weights
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        m = AUDIO_MODELS[input_repr]()

    m.load_weights(get_audio_embedding_model_path(input_repr, content_type))

    # Pooling for final output embedding size
    pool_size = AUDIO_POOLING_SIZES[input_repr][embedding_size]
    y_a = MaxPooling2D(pool_size=pool_size, padding='same')(m.output)
    y_a = Flatten()(y_a)
    m = Model(inputs=m.input, outputs=y_a)
    return m 
Example 22
Source File: predict.py    From Looking-to-Listen with MIT License 5 votes vote down vote up
def LoadAudio(fname):
    y, sr = load(fname, sr=SR)
    spec = stft(y, n_fft=FFT_SIZE, hop_length=HOP_LEN, win_length=WIN_LEN)
    mag = np.abs(spec)
    mag /= np.max(mag)
    phase = np.exp(1.j*np.angle(spec))
    return mag, phase 
Example 23
Source File: tvnplayer.py    From filmkodi with Apache License 2.0 5 votes vote down vote up
def LOAD_AND_PLAY_AUDIO(self, url, title, player=True):
        if url != False:
            self.__LOAD_AND_PLAY(url, title, player, "music")
        else:
            d = xbmcgui.Dialog()
            d.ok('Brak linku!', 'Przepraszamy, chwilowa awaria.', 'Zapraszamy w innym terminie.') 
Example 24
Source File: tvnplayer.py    From filmkodi with Apache License 2.0 5 votes vote down vote up
def LOAD_AND_PLAY_AUDIO_WATCHED(self, url):  # NOWE wersja używa xbmcplugin.setResolvedUrl wspiera status "watched"
        if url != False:
            return self.__LOAD_AND_PLAY_WATCHED(url, 'audio')
        else:
            d = xbmcgui.Dialog()
            d.ok('Brak linku!', 'Przepraszamy, chwilowa awaria.', 'Zapraszamy w innym terminie.')
            return False 
Example 25
Source File: audio.py    From python-dlpy with Apache License 2.0 5 votes vote down vote up
def load_audio_metadata_speechrecognition(cls, conn, path, audio_path):
        '''
        Pre-process and loads the metadata

        Parameters
        ----------
        conn : CAS
            A connection object to the current session.
        path : string
            Location to the input metadata file.
        audio_path : delimiter
            Delimiter for the metadata file.

        Returns
        -------
        :class:`CASTable`

        '''

        if conn is None:
            conn = cls.get_connection()

        if conn is None:
            raise DLPyError('cannot get a connection object to the current session.')

        output_name = random_name('AudioTable_Metadata', 6)
        
        dc = DataClean(conn=conn, contents_as_path=path)
        dc_response = dc.process_contents(audio_path = audio_path)
        tbl = dc.create_castable(dc_response['results'], output_name, replace=True, promote=False,
                                 col_names=dc_response['col_names'])

        scode = 'length _fName_ varchar(*); '
        scode += '_fName_ = _filename_; '

        ctbl = CASTable(tbl, computedvars=['_fName_'],
                        computedvarsprogram=scode)

        conn.table.partition(table=ctbl, casout=dict(name=tbl, replace=True))

        return CASTable(tbl) 
Example 26
Source File: audio.py    From python-dlpy with Apache License 2.0 5 votes vote down vote up
def load_audio_metadata(cls, conn, path, audio_path, task='speech2text'):
        '''
        Pre-process and loads the metadata

        Parameters
        ----------
        conn : CAS
            A connection object to the current session.
        path : string
            Location to the input metadata file.
        audio_path : string
            Location to the audio files.
        task : string, optional
            Specifies the task
            Note: currently only support 'speech2text' (default)

        Returns
        -------
        :class:`CASTable`

        Raises
        ------
        DLPyError
            If anything goes wrong, it complains and prints the appropriate message.

        '''

        if conn is None:
            conn = cls.get_connection()

        if conn is None:
            raise DLPyError('cannot get a connection object to the current session.')

        if task == 'speech2text':
            return cls.load_audio_metadata_speechrecognition(conn, path, audio_path)
        else:
            raise DLPyError("We do not support this task yet!") 
Example 27
Source File: wpe.py    From fdndlp with MIT License 5 votes vote down vote up
def load_audio(self, filename):
        data, fs = sf.read(filename, always_2d=True)
        data = data.T
        assert(data.shape[0] >= self.channels)
        if data.shape[0] > self.channels:
            print(
                "The number of the input channels is %d," % data.shape[0],
                "and only the first %d channels are loaded." % self.channels)
            data = data[0: self.channels]
        return data.copy(), fs 
Example 28
Source File: audio_classes.py    From sigsep-mus-db with MIT License 5 votes vote down vote up
def load_audio(self, path, stem_id, chunk_start=0, chunk_duration=None):
        """array_like: [shape=(num_samples, num_channels)]
        """
        if os.path.exists(self.path):
            if not self.is_wav:
                # read using stempeg
                audio, rate = stempeg.read_stems(
                    filename=path,
                    stem_id=stem_id,
                    start=chunk_start,
                    duration=chunk_duration,
                    info=self.info
                )
            else:
                chunk_start = int(chunk_start * self.rate)

                # check if dur is none
                if chunk_duration:
                    # stop in soundfile is calc in samples, not seconds
                    stop = chunk_start + int(chunk_duration * self.rate)
                else:
                    stop = chunk_duration

                audio, rate = sf.read(
                    path,
                    always_2d=True,
                    start=chunk_start,
                    stop=stop
                )
            self._rate = rate
            return audio
        else:
            self._rate = None
            self._audio = None
            raise ValueError("Oops! %s cannot be loaded" % path) 
Example 29
Source File: wavenet_data.py    From HandsOnDeepLearningWithPytorch with MIT License 5 votes vote down vote up
def load_audio(filename, sample_rate=16000, trim=True, trim_frame_length=2048):
    audio, _ = librosa.load(filename, sr=sample_rate, mono=True)
    audio = audio.reshape(-1, 1)

    if trim > 0:
        audio, _ = librosa.effects.trim(audio, frame_length=trim_frame_length)

    return audio 
Example 30
Source File: refi.py    From QualCoder with MIT License 5 votes vote down vote up
def load_audio_source(self, element):
        """ Load audio source into .
        Load the description and codings into sqlite.

        path to file can be internal or relative.
        e.g. path="relative:///DF370983‐F009‐4D47‐8615‐711633FA9DE6.m4a"
        """

        name, creating_user, create_date, source_path = self.name_creating_user_create_date_source_path_helper(element)

        # Copy file into .qda audio folder and rename into original name
        #print(source_path)
        destination = self.app.project_path + "/audio/" + name
        media_path = "/audio/" + name
        #print(destination)
        try:
            shutil.copyfile(source_path, destination)
        except Exception as e:
            self.parent_textEdit.append(_('Cannot copy Audio file from: ') + source_path + "\nto: " + destination + '\n' + str(e))

        cur = self.app.conn.cursor()
        cur.execute("insert into source(name,memo,owner,date, mediapath, fulltext) values(?,?,?,?,?,?)",
            (name, '', creating_user, create_date, media_path, None))
        self.app.conn.commit()
        cur.execute("select last_insert_rowid()")
        id_ = cur.fetchone()[0]

        #TODO load transcript
        #TODO transcript contains SynchPoints AKA timestamps
        #TODO load codings
        '''
        <PictureSelection guid="04980e59-b290-4481-8cb6-e732824440a1" firstX="783" firstY="1238" secondX="1172" secondY="1788" name="a stylised faced on the lecture slide.
        " creatingUser="70daf61c-b6f0-4b5e-8c2f-548fde3ad3d4" creationDateTime="2019-03-09T23:19:07Z">
        <Coding guid="7a7e80ca-ed8c-4006-86b3-731e36baca19" creatingUser="70daf61c-b6f0-4b5e-8c2f-548fde3ad3d4" ><CodeRef targetGUID="1b594544-2954-4b67-86ff-fb552f090ba8"/>
        </Coding></PictureSelection>''' 
Example 31
Source File: audio.py    From PolyglotDB with MIT License 5 votes vote down vote up
def load_audio(self, discourse, file_type):
        """
        Loads a given audio file at the specified sampling rate type (``consonant``, ``vowel`` or ``low_freq``).
        Consonant files have a sampling rate of 16 kHz, vowel files a sampling rate of 11 kHz, and low frequency files
        a sampling rate of 1.2 kHz.

        Parameters
        ----------
        discourse : str
            Name of the audio file to load
        file_type : str
            One of ``consonant``, ``vowel`` or ``low_freq``

        Returns
        -------
        numpy.array
            Audio signal
        int
            Sampling rate of the file
        """
        sound_file = self.discourse_sound_file(discourse)
        if file_type == 'consonant':
            path = os.path.expanduser(sound_file.consonant_file_path)
        elif file_type == 'vowel':
            path = os.path.expanduser(sound_file.vowel_file_path)
        elif file_type == 'low_freq':
            path = os.path.expanduser(sound_file.low_freq_file_path)
        else:
            path = os.path.expanduser(sound_file.file_path)
        signal, sr = librosa.load(path, sr=None)
        return signal, sr 
Example 32
Source File: sound_to_tfrecords.py    From kinetics-downloader with MIT License 5 votes vote down vote up
def load_audio(path, sampling_rate):
  """
  Load audio using librosa library.
  :param path:            Path to the audio file.
  :param sampling_rate:   Sampling rate to convert all audios to.
  :return:                Audio data.
  """
  audio, _ = librosa.load(path, sr=sampling_rate, mono=True)
  return audio 
Example 33
Source File: audio_model.py    From Tensorflow-Audio-Classification with Apache License 2.0 5 votes vote down vote up
def load_audio_slim_checkpoint(session, checkpoint_path):
    """Loads a pre-trained audio-compatible checkpoint.
    
    This function can be used as an initialization function (referred to as
    init_fn in TensorFlow documentation) which is called in a Session after
    initializating all variables. When used as an init_fn, this will load
    a pre-trained checkpoint that is compatible with the audio model
    definition. Only variables defined by audio will be loaded.
    
    Args:
        session: an active TensorFlow session.
        checkpoint_path: path to a file containing a checkpoint that is
          compatible with the audio model definition.
    """

    # Get the list of names of all audio variables that exist in
    # the checkpoint (i.e., all inference-mode audio variables).
    with tf.Graph().as_default():
        define_audio_slim(training=False)
        audio_var_names = [v.name for v in tf.global_variables()]

    # Get list of variables from exist graph which passed by session
    with session.graph.as_default():
        global_variables = tf.global_variables()

    # Get the list of all currently existing variables that match
    # the list of variable names we just computed.
    audio_vars = [v for v in global_variables if v.name in audio_var_names]

    # Use a Saver to restore just the variables selected above.
    saver = tf.train.Saver(audio_vars, name='audio_load_pretrained',
                         write_version=1)
    saver.restore(session, checkpoint_path) 
Example 34
Source File: data_helpers.py    From MELD with GNU General Public License v3.0 5 votes vote down vote up
def load_audio_data(self, ):

        AUDIO_PATH = "./data/pickles/audio_embeddings_feature_selection_{}.pkl".format(self.MODE.lower())
        self.train_audio_emb, self.val_audio_emb, self.test_audio_emb = pickle.load(open(AUDIO_PATH,"rb"))
        
        self.get_dialogue_audio_embs()
        self.get_dialogue_lengths()
        self.get_dialogue_labels()
        self.get_masks() 
Example 35
Source File: _player.py    From ai-makers-kit with MIT License 5 votes vote down vote up
def load_audio(self, wav_path):
        wav = wave.open(wav_path, 'r')
        if wav.getnchannels() != 1:
            raise ValueError(wav_path + ' is not a mono file')

        self._loaded_bytes = wav.readframes(wav.getnframes())
        self._loaded_samplerate = wav.getframerate()
        self._loaded_samplewidth = wav.getsampwidth()
        wav.close() 
Example 36
Source File: data_loader.py    From LipReading with MIT License 5 votes vote down vote up
def load_audio(path):
    sound, _ = torchaudio.load(path, normalization=True)
    sound = sound.numpy()
    if len(sound.shape) > 1:
        if sound.shape[1] == 1:
            sound = sound.squeeze()
        else:
            sound = sound.mean(axis=1)  # multiple channels, average
    return sound 
Example 37
Source File: audio_signal.py    From nussl with MIT License 5 votes vote down vote up
def load_audio_from_array(self, signal, sample_rate=constants.DEFAULT_SAMPLE_RATE):
        """
        Loads an audio signal from a :obj:`np.ndarray`. :param:`sample_rate` is the sample
        of the signal.

        See Also:
            * :func:`load_audio_from_file` to read in an audio file from disc.

        Notes:
            Only accepts float arrays and int arrays of depth 16-bits.

        Parameters:
            signal (:obj:`np.ndarray`): Array containing the audio signal sampled at
                :param:`sample_rate`.
            sample_rate (int): The sample rate of signal.
                Default is :ref:`constants.DEFAULT_SAMPLE_RATE` (44.1kHz)

        """
        assert (type(signal) == np.ndarray)

        self.path_to_input_file = None

        # Change from fixed point to floating point
        if not np.issubdtype(signal.dtype, np.floating):
            signal = signal.astype('float') / (np.iinfo(np.dtype('int16')).max + 1.0)

        self.audio_data = signal
        self.original_signal_length = self.signal_length
        self._sample_rate = sample_rate if sample_rate is not None \
            else constants.DEFAULT_SAMPLE_RATE

        self.set_active_region_to_default() 
Example 38
Source File: feat_ext.py    From icassp19 with MIT License 5 votes vote down vote up
def load_audio_file(file_path, input_fixed_length=0, params_extract=None):
    """

    :param file_path:
    :param input_fixed_length:
    :param params_extract:
    :return:
    """
    data, source_fs = soundfile.read(file=file_path)
    data = data.T

    # Resample if the source_fs is different from expected
    if params_extract.get('fs') != source_fs:
        data = librosa.core.resample(data, source_fs, params_extract.get('fs'))
        print('Resampling to %d: %s' % (params_extract.get('fs'), file_path))

    if len(data) > 0:
        data = get_normalized_audio(data)
    else:
        # 3 files are corrupted in the test set. They belong to the padding group (not used for evaluation)
        data = np.ones((input_fixed_length, 1))
        print('File corrupted. Could not open: %s' % file_path)

    # careful with the shape
    data = np.reshape(data, [-1, 1])
    return data 
Example 39
Source File: train_audio.py    From AudioEmotion with MIT License 5 votes vote down vote up
def load_audio_data():
    data = pd.read_csv('../Output/data.csv')
    data = data.drop(['filename'], axis=1)

    emotion_list = data.iloc[:, -1]
    encoder = LabelEncoder()
    y = encoder.fit_transform(emotion_list)
    x = np.array(data.iloc[:, :-1], dtype=float)
    # scaler = StandardScaler().fit(x)
    # x = scaler.transform()

    return train_test_split(x, y, test_size=0.2) 
Example 40
Source File: __init__.py    From SimpleAudioIndexer with Apache License 2.0 5 votes vote down vote up
def load_indexed_audio(self, indexed_audio_file_abs_path):
        """
        Parameters
        ----------
        indexed_audio_file_abs_path : str
        """
        with open(indexed_audio_file_abs_path, "rb") as f:
            self.__timestamps = pickle.load(f) 
Example 41
Source File: audio.py    From end2end-asr-pytorch with MIT License 5 votes vote down vote up
def load_randomly_augmented_audio(path, sample_rate=16000, tempo_range=(0.85, 1.15), gain_range=(-6, 8)):
    """
    Picks tempo and gain uniformly, applies it to the utterance by using sox utility.
    Returns the augmented utterance.
    """
    low_tempo, high_tempo = tempo_range
    tempo_value = np.random.uniform(low=low_tempo, high=high_tempo)
    low_gain, high_gain = gain_range
    gain_value = np.random.uniform(low=low_gain, high=high_gain)
    audio = augment_audio_with_sox(path=path, sample_rate=sample_rate,
                                   tempo=tempo_value, gain=gain_value)
    return audio 
Example 42
Source File: audio.py    From end2end-asr-pytorch with MIT License 5 votes vote down vote up
def load_audio(path):
    sound, _ = torchaudio.load(path, normalization=True)
    sound = sound.numpy().T
    if len(sound.shape) > 1:
        if sound.shape[1] == 1:
            sound = sound.squeeze()
        else:
            sound = sound.mean(axis=1)  # multiple channels, average
    return sound 
Example 43
Source File: soundnet.py    From soundnet_keras with MIT License 5 votes vote down vote up
def load_audio(audio_file):
    sample_rate = 22050  # SoundNet works on mono audio files with a sample rate of 22050.
    audio, sr = librosa.load(audio_file, dtype='float32', sr=22050, mono=True)
    audio = preprocess(audio)
    return audio 
Example 44
Source File: util.py    From SoundNet-tensorflow with MIT License 5 votes vote down vote up
def load_audio(audio_path, sr=None):
    # By default, librosa will resample the signal to 22050Hz(sr=None). And range in (-1., 1.)
    sound_sample, sr = librosa.load(audio_path, sr=sr, mono=False)

    return sound_sample, sr 
Example 45
Source File: dataset.py    From rafiki with Apache License 2.0 5 votes vote down vote up
def load_dataset_of_audio_files(self, dataset_path, dataset_dir):
        '''
            Loads dataset with type `AUDIO_FILES`.

            :param str dataset_uri: URI of the dataset file
            :returns: An instance of ``AudioFilesDataset``.
        '''
        return AudioFilesDataset(dataset_path, dataset_dir) 
Example 46
Source File: utils.py    From magenta with Apache License 2.0 5 votes vote down vote up
def load_audio(path, sample_length=64000, sr=16000):
  """Loading of a wave file.

  Args:
    path: Location of a wave file to load.
    sample_length: The truncated total length of the final wave file.
    sr: Samples per a second.

  Returns:
    out: The audio in samples from -1.0 to 1.0
  """
  audio, _ = librosa.load(path, sr=sr)
  audio = audio[:sample_length]
  return audio 
Example 47
Source File: orchset.py    From mirdata with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def load_audio_mono(audio_path):
    """Load a Orchset audio file.

    Args:
        audio_path (str): path to audio file

    Returns:
        y (np.ndarray): the mono audio signal
        sr (float): The sample rate of the audio file

    """
    if not os.path.exists(audio_path):
        raise IOError("audio_path {} does not exist".format(audio_path))

    return librosa.load(audio_path, sr=None, mono=True) 
Example 48
Source File: guitarset.py    From mirdata with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def load_multitrack_audio(audio_path):
    """Load a Guitarset multitrack audio file.

    Args:
        audio_path (str): path to audio file

    Returns:
        y (np.ndarray): the mono audio signal
        sr (float): The sample rate of the audio file

    """
    if not os.path.exists(audio_path):
        raise IOError("audio_path {} does not exist".format(audio_path))
    return librosa.load(audio_path, sr=None, mono=False) 
Example 49
Source File: orchset.py    From mirdata with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def load_audio_stereo(audio_path):
    """Load a Orchset audio file.

    Args:
        audio_path (str): path to audio file

    Returns:
        y (np.ndarray): the mono audio signal
        sr (float): The sample rate of the audio file

    """
    if not os.path.exists(audio_path):
        raise IOError("audio_path {} does not exist".format(audio_path))

    return librosa.load(audio_path, sr=None, mono=False) 
Example 50
Source File: data_loader.py    From inference with Apache License 2.0 5 votes vote down vote up
def load_randomly_augmented_audio(path, sample_rate=16000, tempo_range=(0.85, 1.15),
                                  gain_range=(-6, 8), frame_start=0, frame_end=-1):
    """
    Picks tempo and gain uniformly, applies it to the utterance by using sox utility.
    Returns the augmented utterance.
    """
    low_tempo, high_tempo = tempo_range
    tempo_value = np.random.uniform(low=low_tempo, high=high_tempo)
    low_gain, high_gain = gain_range
    gain_value = np.random.uniform(low=low_gain, high=high_gain)
    audio = augment_audio_with_sox(path=path, sample_rate=sample_rate,
                                   tempo=tempo_value, gain=gain_value,
                                   frame_start=frame_start, frame_end=frame_end)
    return audio 
Example 51
Source File: data_source_loader.py    From nnabla with Apache License 2.0 5 votes vote down vote up
def load_audio(file, shape=None, normalize=False):
    global pydub_available
    if pydub_available:
        return load_audio_pydub(file, shape, normalize)
    else:
        return load_wav(file, shape, normalize) 
Example 52
Source File: paradrop_client.py    From Paradrop with Apache License 2.0 5 votes vote down vote up
def load_audio_module(self, module_name):
        """
        Load a module into the audio subsystem.
        """
        url = "{}/audio/modules".format(self.base_url)
        data = {
            "name": module_name
        }
        return self.request("POST", url, json=data) 
Example 53
Source File: gui.py    From MIA-Japanese-Add-on with GNU General Public License v3.0 5 votes vote down vote up
def loadAudioGraphFieldsCB(self):
        self.ui.audioFieldsCB.addItem('Clipboard')
        self.ui.audioFieldsCB.addItem('──────────────────')
        self.ui.audioFieldsCB.model().item(self.ui.audioFieldsCB.count() - 1).setEnabled(False)
        self.ui.audioFieldsCB.model().item(self.ui.audioFieldsCB.count() - 1).setTextAlignment(Qt.AlignCenter)
        self.ui.audioFieldsCB.addItems(self.allFields)
        self.ui.pitchGraphsCB.addItem('Clipboard')
        self.ui.pitchGraphsCB.addItem('──────────────────')
        self.ui.pitchGraphsCB.model().item(self.ui.pitchGraphsCB.count() - 1).setEnabled(False)
        self.ui.pitchGraphsCB.model().item(self.ui.pitchGraphsCB.count() - 1).setTextAlignment(Qt.AlignCenter)
        self.ui.pitchGraphsCB.addItems(self.allFields) 
Example 54
Source File: core.py    From muda with ISC License 4 votes vote down vote up
def load_jam_audio(
    jam_in, audio_file, validate=True, strict=True, fmt="auto", **kwargs
):
    """Load a jam and pack it with audio.

    Parameters
    ----------
    jam_in : str, file descriptor, jams.JAMS, or None
        JAMS filename, open file-descriptor, or object to load.
        See ``jams.load`` for acceptable formats.

        If `None` is provided, an empty JAMS object is constructed.

    audio_file : str
        Audio filename to load

    validate : bool
    strict : bool
    fmt : str
        Parameters to `jams.load`

    kwargs : additional keyword arguments
        See `librosa.load`

    Returns
    -------
    jam : jams.JAMS
        A jams object with audio data in the top-level sandbox

    Notes
    -----
    This operation can modify the `file_metadata.duration` field of `jam_in`:
    If it is not currently set, it will be populated with the duration of the
    audio file.

    See Also
    --------
    jams.load
    librosa.core.load


    Examples
    --------

    Load a JAMS object and audio from disk

    >>> jam = muda.load_jam_audio('my_file.jams', 'my_file.wav')


    Load an audio file with no jams annotation

    >>> jam = muda.load_jam_audio(None, 'my_file.wav')

    """

    if isinstance(jam_in, jams.JAMS):
        jam = jam_in
    elif jam_in is None:
        jam = jams.JAMS()
    else:
        jam = jams.load(jam_in, validate=validate, strict=strict, fmt=fmt)

    y, sr = librosa.load(audio_file, **kwargs)

    if jam.file_metadata.duration is None:
        jam.file_metadata.duration = librosa.get_duration(y=y, sr=sr)

    return jam_pack(jam, _audio=dict(y=y, sr=sr)) 
Example 55
Source File: LoadClipsExt.py    From Luminosity with GNU General Public License v3.0 4 votes vote down vote up
def LoadAudio(self, *args, local = True):

		#args(path,name,mediaType,bank,channel,clip)
		#print('Loading Movie')

		name = args[0]
		path = args[1]
		mediaType = args[2]
		bank = args[3]
		channel = args[4]
		clip = args [5]

		srcPlugin = op(me.fetch('PLUGINS') + '/players/audioPlayer/plugin')
		dataClipPath = me.fetch('CLIP_DATA') +'/'+ bank +'/'+ channel +'/'+ clip
		clipComp = op(dataClipPath)


		audioInfo = self.LoadAudioComp.op('audioInfo')
		sampleRate = audioInfo['sample_rate'].eval()
		length = audioInfo['true_file_length_frames'].eval()
		end = length + 1

		if clipComp.op('plugin'):
			clipComp.op('plugin').destroy()
		clipComp.copy(srcPlugin)

		pluginComp = clipComp.op('plugin')
		compAttr = pluginComp.storage['CompAttr']

		compAttr['attr']['type'] = 'audio'
		compAttr['attr']['name'] = name 
		compAttr['attr']['fileType'] = mediaType
		compAttr['attr']['length'] = length

		compAttr['uiAttr']['file']['default'] = path
		compAttr['uiAttr']['sampleRate']['default'] = sampleRate
		compAttr['uiAttr']['trimStart']['rangeHigh'] = end
		compAttr['uiAttr']['trimEnd']['rangeHigh'] = end
		compAttr['uiAttr']['trimEnd']['default'] = end
		compAttr['uiAttr']['scrub']['rangeHigh'] = end
		compAttr['uiAttr']['scrub']['default'] = 1
		compAttr['uiAttr']['speed']['default'] = 1


		compPar = pluginComp.storage['CompPar']

		compPar['values']['file']['value'] = path
		compPar['values']['sampleRate']['value'] = sampleRate
		compPar['values']['trimStart']['value'] = 1
		compPar['values']['trimEnd']['value'] = end
		compPar['values']['scrub']['value'] = 1
		compPar['values']['speed']['value'] = 1

		if me.fetch('NODE') == 'master':
			self.LoadProc(name, path, mediaType, bank, channel, clip, dataClipPath, [compAttr['attr'], compAttr['uiAttr']], compPar['values'])

		parTable = pluginComp.op('parameters')

		parTable['file','value'] = path
		parTable['sampleRate','value'] = sampleRate
		parTable['trimStart','value'] = 1
		parTable['trimEnd','value'] = end
		parTable['speed','value'] = 1
		parTable['play','value'] = 0

		audioPlayer = pluginComp.op('audiofilein')
		audioPlayer.par.file = path

		setDefault = "op('loadAudio/loader').par.file = 'C:/Program Files/Derivative/TouchDesigner088/Samples/Audio/JeremyCaulfield_www.dumb-unit.com.mp3'"

		run(setDefault, delayFrames = 10) 
Example 56
Source File: audio_analysis.py    From AudioEmotion with MIT License 4 votes vote down vote up
def load_audio(path, with_path=True, recursive=True, ignore_failure=True, random_order=False):
    """
    Loads WAV file(s) from a path.

    Parameters
    ----------
    path : str
        Path to WAV files to be loaded.

    with_path : bool, optional
        Indicates whether a path column is added to the returned SFrame.

    recursive : bool, optional
        Indicates whether ``load_audio`` should do a recursive directory traversal,
        or only load audio files directly under ``path``.

    ignore_failure : bool, optional
        If True, only print warnings for failed files and keep loading the remaining
        audio files.

    random_order : bool, optional
        Load audio files in random order.

    Returns
    -------
    out : SFrame
        Returns an SFrame with either an 'audio' column or both an 'audio' and
        a 'path' column. The 'audio' column is a column of dictionaries.

        Each dictionary contains two items. One item is the sample rate, in
        samples per second (int type). The other item will be the data in a numpy
        array. If the wav file has a single channel, the array will have a single
        dimension. If there are multiple channels, the array will have shape
        (L,C) where L is the number of samples and C is the number of channels.

    Examples
    --------
    >>> audio_path = "~/Documents/myAudioFiles/"
    >>> audio_sframe = tc.audio_analysis.load_audio(audio_path, recursive=True)
    """
    all_wav_files = []

    if _fnmatch(path, '*.wav'):    # single file
        all_wav_files.append(path)
    elif recursive:
        for (dir_path, _, file_names) in _os.walk(path):
            for cur_file in file_names:
                if _fnmatch(cur_file, '*.wav'):
                    all_wav_files.append(dir_path + '/' + cur_file)
    else:
        all_wav_files = _glob(path + '/*.wav')

    if random_order:
        _shuffle(all_wav_files)

    result_builder = _tc.SFrameBuilder(column_types=[dict, str], column_names=['audio', 'path'])
    for cur_file_path in all_wav_files:
        try:
            data, sample_rate = librosa.load(cur_file_path, sr=None, res_type='scipy')
            data = data * 32768
            # sample_rate, data = _wavfile.read(cur_file_path)
        except Exception as e:
            error_string = "Could not read {}: {}".format(cur_file_path, e)
            if not ignore_failure:
                raise _ToolkitError(error_string)
            else:
                print(error_string)
                continue

        result_builder.append([{'sample_rate': sample_rate, 'data': data}, cur_file_path])

    result = result_builder.close()
    if not with_path:
        del result['path']
    return result 
Example 57
Source File: audio_signal.py    From nussl with MIT License 4 votes vote down vote up
def load_audio_from_file(self, input_file_path, offset=0, duration=None, new_sample_rate=None):
        # type: (str, float, float, int) -> None
        """
        Loads an audio signal into memory from a file on disc. The audio is stored in
        :class:`AudioSignal` as a :obj:`np.ndarray` of `float` s. The sample rate is read from
        the file, and this :class:`AudioSignal` object's sample rate is set from it. If
        :param:`new_sample_rate` is not ``None`` nor the same as the sample rate of the file,
        the audio will be resampled to the sample rate provided in the :param:`new_sample_rate`
        parameter. After reading the audio data into memory, the active region is set to default.

        :param:`offset` and :param:`duration` allow the user to determine how much of the audio is
        read from the file. If those are non-default, then only the values provided will be stored
        in :attr:`audio_data` (unlike with the active region, which has the entire audio data stored
        in memory but only allows access to a subset of the audio).

        See Also:
            * :func:`load_audio_from_array` to read audio data from a :obj:`np.ndarray`.

        Args:
            input_file_path (str): Path to input file.
            offset (float,): The starting point of the section to be extracted (seconds).
                Defaults to 0 seconds (i.e., the very beginning of the file).
            duration (float): Length of signal to load in second.
                signal_length of 0 means read the whole file. Defaults to the full
                length of the signal.
            new_sample_rate (int): If this parameter is not ``None`` or the same sample rate as
                provided by the input file, then the audio data will be resampled to the new
                sample rate dictated by this parameter.

        """
        assert offset >= 0, 'Parameter `offset` must be >= 0!'
        if duration is not None:
            assert duration >= 0, 'Parameter `duration` must be >= 0!'

        try:
            # try reading headers with soundfile for speed
            audio_info = sf.info(input_file_path)
            file_length = audio_info.duration
        except:
            # if that doesn't work try audioread
            with audioread.audio_open(os.path.realpath(input_file_path)) as input_file:
                file_length = input_file.duration

        if offset > file_length:
            raise AudioSignalException('offset is longer than signal!')

        if duration is not None and offset + duration >= file_length:
            warnings.warn('offset + duration are longer than the signal.'
                          ' Reading until end of signal...',
                          UserWarning)

        audio_input, self._sample_rate = librosa.load(input_file_path,
                                                      sr=None,
                                                      offset=offset,
                                                      duration=duration,
                                                      mono=False)

        self.audio_data = audio_input
        self.original_signal_length = self.signal_length

        if new_sample_rate is not None and new_sample_rate != self._sample_rate:
            warnings.warn('Input sample rate is different than the sample rate'
                          ' read from the file! Resampling...',
                          UserWarning)
            self.resample(new_sample_rate)

        self.path_to_input_file = input_file_path
        self.set_active_region_to_default()