Python load audio
60 Python code examples are found related to "
load audio".
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: ikala.py From mirdata with BSD 3-Clause "New" or "Revised" License | 7 votes |
def load_vocal_audio(audio_path): """Load an ikala vocal. Args: audio_path (str): path to audio file Returns: y (np.ndarray): the mono audio signal sr (float): The sample rate of the audio file """ if not os.path.exists(audio_path): raise IOError("audio_path {} does not exist".format(audio_path)) audio, sr = librosa.load(audio_path, sr=None, mono=False) vocal_channel = audio[1, :] return vocal_channel, sr
Example 2
Source File: data_loader.py From inference with Apache License 2.0 | 6 votes |
def load_audio(path, frame_start=0, frame_end=-1): sound, _ = torchaudio.load(path) sound = sound.numpy() if len(sound.shape) > 1: if sound.shape[1] == 1: sound = sound.squeeze() else: sound = sound.mean(axis=1) # multiple channels, average if frame_end > 0 or frame_start > 0: assert frame_start < frame_end, "slicing does not yet support inverting audio" if frame_end > sound.shape[0]: repeats = ceil((frame_end - sound.shape[0])/float(sound.shape[0])) appendage = sound for _ in range(int(repeats)): sound = np.concatenate((sound,appendage)) sound = sound[frame_start:frame_end] return sound
Example 3
Source File: fastgen.py From magenta with Apache License 2.0 | 6 votes |
def load_batch_audio(files, sample_length=64000): """Load a batch of audio from either .wav files. Args: files: A list of filepaths to .wav files. sample_length: Maximum sample length Returns: batch: A padded array of audio [n_files, sample_length] """ batch = [] # Load the data for f in files: data = utils.load_audio(f, sample_length, sr=16000) length = data.shape[0] # Add padding if less than sample length if length < sample_length: padded = np.zeros([sample_length]) padded[:length] = data batch.append(padded) else: batch.append(data) # Return as an numpy array batch = np.array(batch) return batch
Example 4
Source File: resnet_v2_predict.py From keras-audio with MIT License | 6 votes |
def load_audio_path_label_pairs(max_allowed_pairs=None): download_gtzan_genres_if_not_found('./very_large_data/gtzan') audio_paths = [] with open('./data/lists/test_songs_gtzan_list.txt', 'rt') as file: for line in file: audio_path = './very_large_data/' + line.strip() audio_paths.append(audio_path) pairs = [] with open('./data/lists/test_gt_gtzan_list.txt', 'rt') as file: for line in file: label = int(line) if max_allowed_pairs is None or len(pairs) < max_allowed_pairs: pairs.append((audio_paths[len(pairs)], label)) else: break return pairs
Example 5
Source File: ikala.py From mirdata with BSD 3-Clause "New" or "Revised" License | 6 votes |
def load_instrumental_audio(audio_path): """Load an ikala instrumental. Args: audio_path (str): path to audio file Returns: y (np.ndarray): the mono audio signal sr (float): The sample rate of the audio file """ if not os.path.exists(audio_path): raise IOError("audio_path {} does not exist".format(audio_path)) audio, sr = librosa.load(audio_path, sr=None, mono=False) instrumental_channel = audio[0, :] return instrumental_channel, sr
Example 6
Source File: ikala.py From mirdata with BSD 3-Clause "New" or "Revised" License | 6 votes |
def load_mix_audio(audio_path): """Load an ikala mix. Args: audio_path (str): path to audio file Returns: y (np.ndarray): the mono audio signal sr (float): The sample rate of the audio file """ if not os.path.exists(audio_path): raise IOError("audio_path {} does not exist".format(audio_path)) mixed_audio, sr = librosa.load(audio_path, sr=None, mono=True) # multipy by 2 because librosa averages the left and right channel. return 2.0 * mixed_audio, sr
Example 7
Source File: groove_midi.py From mirdata with BSD 3-Clause "New" or "Revised" License | 6 votes |
def load_audio(audio_path): """Load a Groove MIDI audio file. Args: audio_path (str): path to audio file Returns: y (np.ndarray): the mono audio signal sr (float): The sample rate of the audio file """ if audio_path is None: return None, None if not os.path.exists(audio_path): raise IOError("audio_path {} does not exist".format(audio_path)) return librosa.load(audio_path, sr=22050, mono=True)
Example 8
Source File: resnet_v2_predict.py From mxnet-audio with MIT License | 6 votes |
def load_audio_path_label_pairs(max_allowed_pairs=None): from mxnet_audio.library.utility.gtzan_loader import download_gtzan_genres_if_not_found download_gtzan_genres_if_not_found(patch_path('very_large_data/gtzan')) audio_paths = [] with open(patch_path('data/lists/test_songs_gtzan_list.txt'), 'rt') as file: for line in file: audio_path = patch_path('very_large_data/' + line.strip()) audio_paths.append(audio_path) pairs = [] with open(patch_path('data/lists/test_gt_gtzan_list.txt'), 'rt') as file: for line in file: label = int(line) if max_allowed_pairs is None or len(pairs) < max_allowed_pairs: pairs.append((audio_paths[len(pairs)], label)) else: break return pairs
Example 9
Source File: transcribe.py From onsets-and-frames with MIT License | 6 votes |
def load_and_process_audio(flac_path, sequence_length, device): random = np.random.RandomState(seed=42) audio, sr = soundfile.read(flac_path, dtype='int16') assert sr == SAMPLE_RATE audio = torch.ShortTensor(audio) if sequence_length is not None: audio_length = len(audio) step_begin = random.randint(audio_length - sequence_length) // HOP_LENGTH n_steps = sequence_length // HOP_LENGTH begin = step_begin * HOP_LENGTH end = begin + sequence_length audio = audio[begin:end].to(device) else: audio = audio.to(device) audio = audio.float().div_(32768.0) return audio
Example 10
Source File: audio_io.py From synvae with MIT License | 6 votes |
def load_audio(audio_filename, sample_rate): """Loads an audio file. Args: audio_filename: File path to load. sample_rate: The number of samples per second at which the audio will be returned. Resampling will be performed if necessary. Returns: A numpy array of audio samples, single-channel (mono) and sampled at the specified rate, in float32 format. Raises: AudioIOReadError: If librosa is unable to load the audio data. """ try: y, unused_sr = librosa.load(audio_filename, sr=sample_rate, mono=True) except Exception as e: # pylint: disable=broad-except raise AudioIOReadError(e) return y
Example 11
Source File: cfp.py From Melody-extraction-with-melodic-segnet with MIT License | 6 votes |
def load_audio(filepath, sr=None, mono=True, dtype='float32'): if '.mp3' in filepath: from pydub import AudioSegment import tempfile import os mp3 = AudioSegment.from_mp3(filepath) _, path = tempfile.mkstemp() mp3.export(path, format="wav") del mp3 x, fs = sf.read(path) os.remove(path) else: x, fs = sf.read(filepath) if mono and len(x.shape)>1: x = np.mean(x, axis = 1) if sr: x = scipy.signal.resample_poly(x, sr, fs) fs = sr x = x.astype(dtype) return x, fs
Example 12
Source File: audio_reader.py From tensorflow-wavenet with MIT License | 6 votes |
def load_generic_audio(directory, sample_rate): '''Generator that yields audio waveforms from the directory.''' files = find_files(directory) id_reg_exp = re.compile(FILE_PATTERN) print("files length: {}".format(len(files))) randomized_files = randomize_files(files) for filename in randomized_files: ids = id_reg_exp.findall(filename) if not ids: # The file name does not match the pattern containing ids, so # there is no id. category_id = None else: # The file name matches the pattern for containing ids. category_id = int(ids[0][0]) audio, _ = librosa.load(filename, sr=sample_rate, mono=True) audio = audio.reshape(-1, 1) yield audio, filename, category_id
Example 13
Source File: core.py From KoSpeech with Apache License 2.0 | 6 votes |
def load_audio(audio_path, del_silence): """ Load audio file (PCM) to sound. if del_silence is True, Eliminate all sounds below 30dB. If exception occurs in numpy.memmap(), return None. """ try: signal = np.memmap(audio_path, dtype='h', mode='r').astype('float32') if del_silence: non_silence_indices = split(signal, top_db=30) signal = np.concatenate([signal[start:end] for start, end in non_silence_indices]) return signal / 32767 # normalize audio except ValueError: logger.debug('ValueError in {0}'.format(audio_path)) return None except RuntimeError: logger.debug('RuntimeError in {0}'.format(audio_path)) return None except IOError: logger.debug('IOError in {0}'.format(audio_path)) return None
Example 14
Source File: audio_reader.py From SampleRNN with GNU Lesser General Public License v3.0 | 5 votes |
def load_generic_audio(directory, sample_rate): '''Generator that yields audio waveforms from the directory.''' files = find_files(directory) print("files length: {}".format(len(files))) randomized_files = randomize_files(files) for filename in randomized_files: audio, _ = librosa.load(filename, sr=sample_rate, mono=True) audio = audio.reshape(-1, 1) yield audio, filename
Example 15
Source File: model.py From honk with MIT License | 5 votes |
def load_audio(self, example, silence=False): if silence: example = "__silence__" if random.random() < 0.7 or not self.set_type == DatasetType.TRAIN: try: return self._audio_cache[example] except KeyError: pass in_len = self.input_length if self.bg_noise_audio: bg_noise = random.choice(self.bg_noise_audio) a = random.randint(0, len(bg_noise) - in_len - 1) bg_noise = bg_noise[a:a + in_len] else: bg_noise = np.zeros(in_len) if silence: data = np.zeros(in_len, dtype=np.float32) else: file_data = self._file_cache.get(example) data = librosa.core.load(example, sr=16000)[0] if file_data is None else file_data self._file_cache[example] = data data = np.pad(data, (0, max(0, in_len - len(data))), "constant") if self.set_type == DatasetType.TRAIN: data = self._timeshift_audio(data) if random.random() < self.noise_prob or silence: a = random.random() * 0.1 data = np.clip(a * bg_noise + data, -1, 1) self._audio_cache[example] = data return data
Example 16
Source File: data_loader.py From pytorch-nlp with MIT License | 5 votes |
def load_randomly_augmented_audio(path, sample_rate=16000, tempo_range=(0.85, 1.15), gain_range=(-6, 8)): """ Picks tempo and gain uniformly, applies it to the utterance by using sox utility. Returns the augmented utterance. """ low_tempo, high_tempo = tempo_range tempo_value = np.random.uniform(low=low_tempo, high=high_tempo) low_gain, high_gain = gain_range gain_value = np.random.uniform(low=low_gain, high=high_gain) audio = augment_audio_with_sox(path=path, sample_rate=sample_rate, tempo=tempo_value, gain=gain_value) return audio
Example 17
Source File: data_loader.py From pytorch-nlp with MIT License | 5 votes |
def load_audio(path): sound, _ = torchaudio.load(path) sound = sound.numpy() if len(sound.shape) > 1: if sound.shape[1] == 1: sound = sound.squeeze() else: sound = sound.mean(axis=1) # multiple channels, average return sound
Example 18
Source File: datautils.py From panotti with MIT License | 5 votes |
def load_audio(audio_path, mono=None, sr=None, convertOSXaliases=True): # wrapper for librosa.load try: signal, sr = librosa.load(audio_path, mono=mono, sr=sr) except NoBackendError as e: if ('Darwin' == platform.system()): # handle OS X alias files gracefully source = resolve_osx_alias(audio_path, convert=convertOSXaliases, already_checked_os=True) # convert to symlinks for next time try: signal, sr = librosa.load(source, mono=mono, sr=sr) except NoBackendError as e: print("\n*** ERROR: Could not open audio file {}".format(audio_path),"\n",flush=True) raise e else: print("\n*** ERROR: Could not open audio file {}".format(audio_path),"\n",flush=True) raise e return signal, sr
Example 19
Source File: utils.py From Tensorflow-Keyword-Spotting with Apache License 2.0 | 5 votes |
def load_audio_file(file_path,sample_rate): input_length = sample_rate data = librosa.core.load(file_path, sr=sample_rate)[0] # , sr=16000 if len(data) > input_length: data = data[:input_length] else: data = np.pad(data, (0, max(0, input_length - len(data))), "constant") return data
Example 20
Source File: voyagerimb.py From voyagerimb with MIT License | 5 votes |
def model_load_audio_data(self, filename): self.root.config(cursor="watch") self.root.update() self.rate, self.audio_data = scipy.io.wavfile.read(filename) self.root.config(cursor="")
Example 21
Source File: models.py From openl3 with MIT License | 5 votes |
def load_audio_embedding_model(input_repr, content_type, embedding_size): """ Returns a model with the given characteristics. Loads the model if the model has not been loaded yet. Parameters ---------- input_repr : "linear", "mel128", or "mel256" Spectrogram representation used for audio model. content_type : "music" or "env" Type of content used to train embedding. embedding_size : 6144 or 512 Embedding dimensionality. Returns ------- model : keras.models.Model Model object. """ # Construct embedding model and load model weights with warnings.catch_warnings(): warnings.simplefilter("ignore") m = AUDIO_MODELS[input_repr]() m.load_weights(get_audio_embedding_model_path(input_repr, content_type)) # Pooling for final output embedding size pool_size = AUDIO_POOLING_SIZES[input_repr][embedding_size] y_a = MaxPooling2D(pool_size=pool_size, padding='same')(m.output) y_a = Flatten()(y_a) m = Model(inputs=m.input, outputs=y_a) return m
Example 22
Source File: predict.py From Looking-to-Listen with MIT License | 5 votes |
def LoadAudio(fname): y, sr = load(fname, sr=SR) spec = stft(y, n_fft=FFT_SIZE, hop_length=HOP_LEN, win_length=WIN_LEN) mag = np.abs(spec) mag /= np.max(mag) phase = np.exp(1.j*np.angle(spec)) return mag, phase
Example 23
Source File: tvnplayer.py From filmkodi with Apache License 2.0 | 5 votes |
def LOAD_AND_PLAY_AUDIO(self, url, title, player=True): if url != False: self.__LOAD_AND_PLAY(url, title, player, "music") else: d = xbmcgui.Dialog() d.ok('Brak linku!', 'Przepraszamy, chwilowa awaria.', 'Zapraszamy w innym terminie.')
Example 24
Source File: tvnplayer.py From filmkodi with Apache License 2.0 | 5 votes |
def LOAD_AND_PLAY_AUDIO_WATCHED(self, url): # NOWE wersja używa xbmcplugin.setResolvedUrl wspiera status "watched" if url != False: return self.__LOAD_AND_PLAY_WATCHED(url, 'audio') else: d = xbmcgui.Dialog() d.ok('Brak linku!', 'Przepraszamy, chwilowa awaria.', 'Zapraszamy w innym terminie.') return False
Example 25
Source File: audio.py From python-dlpy with Apache License 2.0 | 5 votes |
def load_audio_metadata_speechrecognition(cls, conn, path, audio_path): ''' Pre-process and loads the metadata Parameters ---------- conn : CAS A connection object to the current session. path : string Location to the input metadata file. audio_path : delimiter Delimiter for the metadata file. Returns ------- :class:`CASTable` ''' if conn is None: conn = cls.get_connection() if conn is None: raise DLPyError('cannot get a connection object to the current session.') output_name = random_name('AudioTable_Metadata', 6) dc = DataClean(conn=conn, contents_as_path=path) dc_response = dc.process_contents(audio_path = audio_path) tbl = dc.create_castable(dc_response['results'], output_name, replace=True, promote=False, col_names=dc_response['col_names']) scode = 'length _fName_ varchar(*); ' scode += '_fName_ = _filename_; ' ctbl = CASTable(tbl, computedvars=['_fName_'], computedvarsprogram=scode) conn.table.partition(table=ctbl, casout=dict(name=tbl, replace=True)) return CASTable(tbl)
Example 26
Source File: audio.py From python-dlpy with Apache License 2.0 | 5 votes |
def load_audio_metadata(cls, conn, path, audio_path, task='speech2text'): ''' Pre-process and loads the metadata Parameters ---------- conn : CAS A connection object to the current session. path : string Location to the input metadata file. audio_path : string Location to the audio files. task : string, optional Specifies the task Note: currently only support 'speech2text' (default) Returns ------- :class:`CASTable` Raises ------ DLPyError If anything goes wrong, it complains and prints the appropriate message. ''' if conn is None: conn = cls.get_connection() if conn is None: raise DLPyError('cannot get a connection object to the current session.') if task == 'speech2text': return cls.load_audio_metadata_speechrecognition(conn, path, audio_path) else: raise DLPyError("We do not support this task yet!")
Example 27
Source File: wpe.py From fdndlp with MIT License | 5 votes |
def load_audio(self, filename): data, fs = sf.read(filename, always_2d=True) data = data.T assert(data.shape[0] >= self.channels) if data.shape[0] > self.channels: print( "The number of the input channels is %d," % data.shape[0], "and only the first %d channels are loaded." % self.channels) data = data[0: self.channels] return data.copy(), fs
Example 28
Source File: audio_classes.py From sigsep-mus-db with MIT License | 5 votes |
def load_audio(self, path, stem_id, chunk_start=0, chunk_duration=None): """array_like: [shape=(num_samples, num_channels)] """ if os.path.exists(self.path): if not self.is_wav: # read using stempeg audio, rate = stempeg.read_stems( filename=path, stem_id=stem_id, start=chunk_start, duration=chunk_duration, info=self.info ) else: chunk_start = int(chunk_start * self.rate) # check if dur is none if chunk_duration: # stop in soundfile is calc in samples, not seconds stop = chunk_start + int(chunk_duration * self.rate) else: stop = chunk_duration audio, rate = sf.read( path, always_2d=True, start=chunk_start, stop=stop ) self._rate = rate return audio else: self._rate = None self._audio = None raise ValueError("Oops! %s cannot be loaded" % path)
Example 29
Source File: wavenet_data.py From HandsOnDeepLearningWithPytorch with MIT License | 5 votes |
def load_audio(filename, sample_rate=16000, trim=True, trim_frame_length=2048): audio, _ = librosa.load(filename, sr=sample_rate, mono=True) audio = audio.reshape(-1, 1) if trim > 0: audio, _ = librosa.effects.trim(audio, frame_length=trim_frame_length) return audio
Example 30
Source File: refi.py From QualCoder with MIT License | 5 votes |
def load_audio_source(self, element): """ Load audio source into . Load the description and codings into sqlite. path to file can be internal or relative. e.g. path="relative:///DF370983‐F009‐4D47‐8615‐711633FA9DE6.m4a" """ name, creating_user, create_date, source_path = self.name_creating_user_create_date_source_path_helper(element) # Copy file into .qda audio folder and rename into original name #print(source_path) destination = self.app.project_path + "/audio/" + name media_path = "/audio/" + name #print(destination) try: shutil.copyfile(source_path, destination) except Exception as e: self.parent_textEdit.append(_('Cannot copy Audio file from: ') + source_path + "\nto: " + destination + '\n' + str(e)) cur = self.app.conn.cursor() cur.execute("insert into source(name,memo,owner,date, mediapath, fulltext) values(?,?,?,?,?,?)", (name, '', creating_user, create_date, media_path, None)) self.app.conn.commit() cur.execute("select last_insert_rowid()") id_ = cur.fetchone()[0] #TODO load transcript #TODO transcript contains SynchPoints AKA timestamps #TODO load codings ''' <PictureSelection guid="04980e59-b290-4481-8cb6-e732824440a1" firstX="783" firstY="1238" secondX="1172" secondY="1788" name="a stylised faced on the lecture slide. " creatingUser="70daf61c-b6f0-4b5e-8c2f-548fde3ad3d4" creationDateTime="2019-03-09T23:19:07Z"> <Coding guid="7a7e80ca-ed8c-4006-86b3-731e36baca19" creatingUser="70daf61c-b6f0-4b5e-8c2f-548fde3ad3d4" ><CodeRef targetGUID="1b594544-2954-4b67-86ff-fb552f090ba8"/> </Coding></PictureSelection>'''
Example 31
Source File: audio.py From PolyglotDB with MIT License | 5 votes |
def load_audio(self, discourse, file_type): """ Loads a given audio file at the specified sampling rate type (``consonant``, ``vowel`` or ``low_freq``). Consonant files have a sampling rate of 16 kHz, vowel files a sampling rate of 11 kHz, and low frequency files a sampling rate of 1.2 kHz. Parameters ---------- discourse : str Name of the audio file to load file_type : str One of ``consonant``, ``vowel`` or ``low_freq`` Returns ------- numpy.array Audio signal int Sampling rate of the file """ sound_file = self.discourse_sound_file(discourse) if file_type == 'consonant': path = os.path.expanduser(sound_file.consonant_file_path) elif file_type == 'vowel': path = os.path.expanduser(sound_file.vowel_file_path) elif file_type == 'low_freq': path = os.path.expanduser(sound_file.low_freq_file_path) else: path = os.path.expanduser(sound_file.file_path) signal, sr = librosa.load(path, sr=None) return signal, sr
Example 32
Source File: sound_to_tfrecords.py From kinetics-downloader with MIT License | 5 votes |
def load_audio(path, sampling_rate): """ Load audio using librosa library. :param path: Path to the audio file. :param sampling_rate: Sampling rate to convert all audios to. :return: Audio data. """ audio, _ = librosa.load(path, sr=sampling_rate, mono=True) return audio
Example 33
Source File: audio_model.py From Tensorflow-Audio-Classification with Apache License 2.0 | 5 votes |
def load_audio_slim_checkpoint(session, checkpoint_path): """Loads a pre-trained audio-compatible checkpoint. This function can be used as an initialization function (referred to as init_fn in TensorFlow documentation) which is called in a Session after initializating all variables. When used as an init_fn, this will load a pre-trained checkpoint that is compatible with the audio model definition. Only variables defined by audio will be loaded. Args: session: an active TensorFlow session. checkpoint_path: path to a file containing a checkpoint that is compatible with the audio model definition. """ # Get the list of names of all audio variables that exist in # the checkpoint (i.e., all inference-mode audio variables). with tf.Graph().as_default(): define_audio_slim(training=False) audio_var_names = [v.name for v in tf.global_variables()] # Get list of variables from exist graph which passed by session with session.graph.as_default(): global_variables = tf.global_variables() # Get the list of all currently existing variables that match # the list of variable names we just computed. audio_vars = [v for v in global_variables if v.name in audio_var_names] # Use a Saver to restore just the variables selected above. saver = tf.train.Saver(audio_vars, name='audio_load_pretrained', write_version=1) saver.restore(session, checkpoint_path)
Example 34
Source File: data_helpers.py From MELD with GNU General Public License v3.0 | 5 votes |
def load_audio_data(self, ): AUDIO_PATH = "./data/pickles/audio_embeddings_feature_selection_{}.pkl".format(self.MODE.lower()) self.train_audio_emb, self.val_audio_emb, self.test_audio_emb = pickle.load(open(AUDIO_PATH,"rb")) self.get_dialogue_audio_embs() self.get_dialogue_lengths() self.get_dialogue_labels() self.get_masks()
Example 35
Source File: _player.py From ai-makers-kit with MIT License | 5 votes |
def load_audio(self, wav_path): wav = wave.open(wav_path, 'r') if wav.getnchannels() != 1: raise ValueError(wav_path + ' is not a mono file') self._loaded_bytes = wav.readframes(wav.getnframes()) self._loaded_samplerate = wav.getframerate() self._loaded_samplewidth = wav.getsampwidth() wav.close()
Example 36
Source File: data_loader.py From LipReading with MIT License | 5 votes |
def load_audio(path): sound, _ = torchaudio.load(path, normalization=True) sound = sound.numpy() if len(sound.shape) > 1: if sound.shape[1] == 1: sound = sound.squeeze() else: sound = sound.mean(axis=1) # multiple channels, average return sound
Example 37
Source File: audio_signal.py From nussl with MIT License | 5 votes |
def load_audio_from_array(self, signal, sample_rate=constants.DEFAULT_SAMPLE_RATE): """ Loads an audio signal from a :obj:`np.ndarray`. :param:`sample_rate` is the sample of the signal. See Also: * :func:`load_audio_from_file` to read in an audio file from disc. Notes: Only accepts float arrays and int arrays of depth 16-bits. Parameters: signal (:obj:`np.ndarray`): Array containing the audio signal sampled at :param:`sample_rate`. sample_rate (int): The sample rate of signal. Default is :ref:`constants.DEFAULT_SAMPLE_RATE` (44.1kHz) """ assert (type(signal) == np.ndarray) self.path_to_input_file = None # Change from fixed point to floating point if not np.issubdtype(signal.dtype, np.floating): signal = signal.astype('float') / (np.iinfo(np.dtype('int16')).max + 1.0) self.audio_data = signal self.original_signal_length = self.signal_length self._sample_rate = sample_rate if sample_rate is not None \ else constants.DEFAULT_SAMPLE_RATE self.set_active_region_to_default()
Example 38
Source File: feat_ext.py From icassp19 with MIT License | 5 votes |
def load_audio_file(file_path, input_fixed_length=0, params_extract=None): """ :param file_path: :param input_fixed_length: :param params_extract: :return: """ data, source_fs = soundfile.read(file=file_path) data = data.T # Resample if the source_fs is different from expected if params_extract.get('fs') != source_fs: data = librosa.core.resample(data, source_fs, params_extract.get('fs')) print('Resampling to %d: %s' % (params_extract.get('fs'), file_path)) if len(data) > 0: data = get_normalized_audio(data) else: # 3 files are corrupted in the test set. They belong to the padding group (not used for evaluation) data = np.ones((input_fixed_length, 1)) print('File corrupted. Could not open: %s' % file_path) # careful with the shape data = np.reshape(data, [-1, 1]) return data
Example 39
Source File: train_audio.py From AudioEmotion with MIT License | 5 votes |
def load_audio_data(): data = pd.read_csv('../Output/data.csv') data = data.drop(['filename'], axis=1) emotion_list = data.iloc[:, -1] encoder = LabelEncoder() y = encoder.fit_transform(emotion_list) x = np.array(data.iloc[:, :-1], dtype=float) # scaler = StandardScaler().fit(x) # x = scaler.transform() return train_test_split(x, y, test_size=0.2)
Example 40
Source File: __init__.py From SimpleAudioIndexer with Apache License 2.0 | 5 votes |
def load_indexed_audio(self, indexed_audio_file_abs_path): """ Parameters ---------- indexed_audio_file_abs_path : str """ with open(indexed_audio_file_abs_path, "rb") as f: self.__timestamps = pickle.load(f)
Example 41
Source File: audio.py From end2end-asr-pytorch with MIT License | 5 votes |
def load_randomly_augmented_audio(path, sample_rate=16000, tempo_range=(0.85, 1.15), gain_range=(-6, 8)): """ Picks tempo and gain uniformly, applies it to the utterance by using sox utility. Returns the augmented utterance. """ low_tempo, high_tempo = tempo_range tempo_value = np.random.uniform(low=low_tempo, high=high_tempo) low_gain, high_gain = gain_range gain_value = np.random.uniform(low=low_gain, high=high_gain) audio = augment_audio_with_sox(path=path, sample_rate=sample_rate, tempo=tempo_value, gain=gain_value) return audio
Example 42
Source File: audio.py From end2end-asr-pytorch with MIT License | 5 votes |
def load_audio(path): sound, _ = torchaudio.load(path, normalization=True) sound = sound.numpy().T if len(sound.shape) > 1: if sound.shape[1] == 1: sound = sound.squeeze() else: sound = sound.mean(axis=1) # multiple channels, average return sound
Example 43
Source File: soundnet.py From soundnet_keras with MIT License | 5 votes |
def load_audio(audio_file): sample_rate = 22050 # SoundNet works on mono audio files with a sample rate of 22050. audio, sr = librosa.load(audio_file, dtype='float32', sr=22050, mono=True) audio = preprocess(audio) return audio
Example 44
Source File: util.py From SoundNet-tensorflow with MIT License | 5 votes |
def load_audio(audio_path, sr=None): # By default, librosa will resample the signal to 22050Hz(sr=None). And range in (-1., 1.) sound_sample, sr = librosa.load(audio_path, sr=sr, mono=False) return sound_sample, sr
Example 45
Source File: dataset.py From rafiki with Apache License 2.0 | 5 votes |
def load_dataset_of_audio_files(self, dataset_path, dataset_dir): ''' Loads dataset with type `AUDIO_FILES`. :param str dataset_uri: URI of the dataset file :returns: An instance of ``AudioFilesDataset``. ''' return AudioFilesDataset(dataset_path, dataset_dir)
Example 46
Source File: utils.py From magenta with Apache License 2.0 | 5 votes |
def load_audio(path, sample_length=64000, sr=16000): """Loading of a wave file. Args: path: Location of a wave file to load. sample_length: The truncated total length of the final wave file. sr: Samples per a second. Returns: out: The audio in samples from -1.0 to 1.0 """ audio, _ = librosa.load(path, sr=sr) audio = audio[:sample_length] return audio
Example 47
Source File: orchset.py From mirdata with BSD 3-Clause "New" or "Revised" License | 5 votes |
def load_audio_mono(audio_path): """Load a Orchset audio file. Args: audio_path (str): path to audio file Returns: y (np.ndarray): the mono audio signal sr (float): The sample rate of the audio file """ if not os.path.exists(audio_path): raise IOError("audio_path {} does not exist".format(audio_path)) return librosa.load(audio_path, sr=None, mono=True)
Example 48
Source File: guitarset.py From mirdata with BSD 3-Clause "New" or "Revised" License | 5 votes |
def load_multitrack_audio(audio_path): """Load a Guitarset multitrack audio file. Args: audio_path (str): path to audio file Returns: y (np.ndarray): the mono audio signal sr (float): The sample rate of the audio file """ if not os.path.exists(audio_path): raise IOError("audio_path {} does not exist".format(audio_path)) return librosa.load(audio_path, sr=None, mono=False)
Example 49
Source File: orchset.py From mirdata with BSD 3-Clause "New" or "Revised" License | 5 votes |
def load_audio_stereo(audio_path): """Load a Orchset audio file. Args: audio_path (str): path to audio file Returns: y (np.ndarray): the mono audio signal sr (float): The sample rate of the audio file """ if not os.path.exists(audio_path): raise IOError("audio_path {} does not exist".format(audio_path)) return librosa.load(audio_path, sr=None, mono=False)
Example 50
Source File: data_loader.py From inference with Apache License 2.0 | 5 votes |
def load_randomly_augmented_audio(path, sample_rate=16000, tempo_range=(0.85, 1.15), gain_range=(-6, 8), frame_start=0, frame_end=-1): """ Picks tempo and gain uniformly, applies it to the utterance by using sox utility. Returns the augmented utterance. """ low_tempo, high_tempo = tempo_range tempo_value = np.random.uniform(low=low_tempo, high=high_tempo) low_gain, high_gain = gain_range gain_value = np.random.uniform(low=low_gain, high=high_gain) audio = augment_audio_with_sox(path=path, sample_rate=sample_rate, tempo=tempo_value, gain=gain_value, frame_start=frame_start, frame_end=frame_end) return audio
Example 51
Source File: data_source_loader.py From nnabla with Apache License 2.0 | 5 votes |
def load_audio(file, shape=None, normalize=False): global pydub_available if pydub_available: return load_audio_pydub(file, shape, normalize) else: return load_wav(file, shape, normalize)
Example 52
Source File: paradrop_client.py From Paradrop with Apache License 2.0 | 5 votes |
def load_audio_module(self, module_name): """ Load a module into the audio subsystem. """ url = "{}/audio/modules".format(self.base_url) data = { "name": module_name } return self.request("POST", url, json=data)
Example 53
Source File: gui.py From MIA-Japanese-Add-on with GNU General Public License v3.0 | 5 votes |
def loadAudioGraphFieldsCB(self): self.ui.audioFieldsCB.addItem('Clipboard') self.ui.audioFieldsCB.addItem('──────────────────') self.ui.audioFieldsCB.model().item(self.ui.audioFieldsCB.count() - 1).setEnabled(False) self.ui.audioFieldsCB.model().item(self.ui.audioFieldsCB.count() - 1).setTextAlignment(Qt.AlignCenter) self.ui.audioFieldsCB.addItems(self.allFields) self.ui.pitchGraphsCB.addItem('Clipboard') self.ui.pitchGraphsCB.addItem('──────────────────') self.ui.pitchGraphsCB.model().item(self.ui.pitchGraphsCB.count() - 1).setEnabled(False) self.ui.pitchGraphsCB.model().item(self.ui.pitchGraphsCB.count() - 1).setTextAlignment(Qt.AlignCenter) self.ui.pitchGraphsCB.addItems(self.allFields)
Example 54
Source File: core.py From muda with ISC License | 4 votes |
def load_jam_audio( jam_in, audio_file, validate=True, strict=True, fmt="auto", **kwargs ): """Load a jam and pack it with audio. Parameters ---------- jam_in : str, file descriptor, jams.JAMS, or None JAMS filename, open file-descriptor, or object to load. See ``jams.load`` for acceptable formats. If `None` is provided, an empty JAMS object is constructed. audio_file : str Audio filename to load validate : bool strict : bool fmt : str Parameters to `jams.load` kwargs : additional keyword arguments See `librosa.load` Returns ------- jam : jams.JAMS A jams object with audio data in the top-level sandbox Notes ----- This operation can modify the `file_metadata.duration` field of `jam_in`: If it is not currently set, it will be populated with the duration of the audio file. See Also -------- jams.load librosa.core.load Examples -------- Load a JAMS object and audio from disk >>> jam = muda.load_jam_audio('my_file.jams', 'my_file.wav') Load an audio file with no jams annotation >>> jam = muda.load_jam_audio(None, 'my_file.wav') """ if isinstance(jam_in, jams.JAMS): jam = jam_in elif jam_in is None: jam = jams.JAMS() else: jam = jams.load(jam_in, validate=validate, strict=strict, fmt=fmt) y, sr = librosa.load(audio_file, **kwargs) if jam.file_metadata.duration is None: jam.file_metadata.duration = librosa.get_duration(y=y, sr=sr) return jam_pack(jam, _audio=dict(y=y, sr=sr))
Example 55
Source File: LoadClipsExt.py From Luminosity with GNU General Public License v3.0 | 4 votes |
def LoadAudio(self, *args, local = True): #args(path,name,mediaType,bank,channel,clip) #print('Loading Movie') name = args[0] path = args[1] mediaType = args[2] bank = args[3] channel = args[4] clip = args [5] srcPlugin = op(me.fetch('PLUGINS') + '/players/audioPlayer/plugin') dataClipPath = me.fetch('CLIP_DATA') +'/'+ bank +'/'+ channel +'/'+ clip clipComp = op(dataClipPath) audioInfo = self.LoadAudioComp.op('audioInfo') sampleRate = audioInfo['sample_rate'].eval() length = audioInfo['true_file_length_frames'].eval() end = length + 1 if clipComp.op('plugin'): clipComp.op('plugin').destroy() clipComp.copy(srcPlugin) pluginComp = clipComp.op('plugin') compAttr = pluginComp.storage['CompAttr'] compAttr['attr']['type'] = 'audio' compAttr['attr']['name'] = name compAttr['attr']['fileType'] = mediaType compAttr['attr']['length'] = length compAttr['uiAttr']['file']['default'] = path compAttr['uiAttr']['sampleRate']['default'] = sampleRate compAttr['uiAttr']['trimStart']['rangeHigh'] = end compAttr['uiAttr']['trimEnd']['rangeHigh'] = end compAttr['uiAttr']['trimEnd']['default'] = end compAttr['uiAttr']['scrub']['rangeHigh'] = end compAttr['uiAttr']['scrub']['default'] = 1 compAttr['uiAttr']['speed']['default'] = 1 compPar = pluginComp.storage['CompPar'] compPar['values']['file']['value'] = path compPar['values']['sampleRate']['value'] = sampleRate compPar['values']['trimStart']['value'] = 1 compPar['values']['trimEnd']['value'] = end compPar['values']['scrub']['value'] = 1 compPar['values']['speed']['value'] = 1 if me.fetch('NODE') == 'master': self.LoadProc(name, path, mediaType, bank, channel, clip, dataClipPath, [compAttr['attr'], compAttr['uiAttr']], compPar['values']) parTable = pluginComp.op('parameters') parTable['file','value'] = path parTable['sampleRate','value'] = sampleRate parTable['trimStart','value'] = 1 parTable['trimEnd','value'] = end parTable['speed','value'] = 1 parTable['play','value'] = 0 audioPlayer = pluginComp.op('audiofilein') audioPlayer.par.file = path setDefault = "op('loadAudio/loader').par.file = 'C:/Program Files/Derivative/TouchDesigner088/Samples/Audio/JeremyCaulfield_www.dumb-unit.com.mp3'" run(setDefault, delayFrames = 10)
Example 56
Source File: audio_analysis.py From AudioEmotion with MIT License | 4 votes |
def load_audio(path, with_path=True, recursive=True, ignore_failure=True, random_order=False): """ Loads WAV file(s) from a path. Parameters ---------- path : str Path to WAV files to be loaded. with_path : bool, optional Indicates whether a path column is added to the returned SFrame. recursive : bool, optional Indicates whether ``load_audio`` should do a recursive directory traversal, or only load audio files directly under ``path``. ignore_failure : bool, optional If True, only print warnings for failed files and keep loading the remaining audio files. random_order : bool, optional Load audio files in random order. Returns ------- out : SFrame Returns an SFrame with either an 'audio' column or both an 'audio' and a 'path' column. The 'audio' column is a column of dictionaries. Each dictionary contains two items. One item is the sample rate, in samples per second (int type). The other item will be the data in a numpy array. If the wav file has a single channel, the array will have a single dimension. If there are multiple channels, the array will have shape (L,C) where L is the number of samples and C is the number of channels. Examples -------- >>> audio_path = "~/Documents/myAudioFiles/" >>> audio_sframe = tc.audio_analysis.load_audio(audio_path, recursive=True) """ all_wav_files = [] if _fnmatch(path, '*.wav'): # single file all_wav_files.append(path) elif recursive: for (dir_path, _, file_names) in _os.walk(path): for cur_file in file_names: if _fnmatch(cur_file, '*.wav'): all_wav_files.append(dir_path + '/' + cur_file) else: all_wav_files = _glob(path + '/*.wav') if random_order: _shuffle(all_wav_files) result_builder = _tc.SFrameBuilder(column_types=[dict, str], column_names=['audio', 'path']) for cur_file_path in all_wav_files: try: data, sample_rate = librosa.load(cur_file_path, sr=None, res_type='scipy') data = data * 32768 # sample_rate, data = _wavfile.read(cur_file_path) except Exception as e: error_string = "Could not read {}: {}".format(cur_file_path, e) if not ignore_failure: raise _ToolkitError(error_string) else: print(error_string) continue result_builder.append([{'sample_rate': sample_rate, 'data': data}, cur_file_path]) result = result_builder.close() if not with_path: del result['path'] return result
Example 57
Source File: audio_signal.py From nussl with MIT License | 4 votes |
def load_audio_from_file(self, input_file_path, offset=0, duration=None, new_sample_rate=None): # type: (str, float, float, int) -> None """ Loads an audio signal into memory from a file on disc. The audio is stored in :class:`AudioSignal` as a :obj:`np.ndarray` of `float` s. The sample rate is read from the file, and this :class:`AudioSignal` object's sample rate is set from it. If :param:`new_sample_rate` is not ``None`` nor the same as the sample rate of the file, the audio will be resampled to the sample rate provided in the :param:`new_sample_rate` parameter. After reading the audio data into memory, the active region is set to default. :param:`offset` and :param:`duration` allow the user to determine how much of the audio is read from the file. If those are non-default, then only the values provided will be stored in :attr:`audio_data` (unlike with the active region, which has the entire audio data stored in memory but only allows access to a subset of the audio). See Also: * :func:`load_audio_from_array` to read audio data from a :obj:`np.ndarray`. Args: input_file_path (str): Path to input file. offset (float,): The starting point of the section to be extracted (seconds). Defaults to 0 seconds (i.e., the very beginning of the file). duration (float): Length of signal to load in second. signal_length of 0 means read the whole file. Defaults to the full length of the signal. new_sample_rate (int): If this parameter is not ``None`` or the same sample rate as provided by the input file, then the audio data will be resampled to the new sample rate dictated by this parameter. """ assert offset >= 0, 'Parameter `offset` must be >= 0!' if duration is not None: assert duration >= 0, 'Parameter `duration` must be >= 0!' try: # try reading headers with soundfile for speed audio_info = sf.info(input_file_path) file_length = audio_info.duration except: # if that doesn't work try audioread with audioread.audio_open(os.path.realpath(input_file_path)) as input_file: file_length = input_file.duration if offset > file_length: raise AudioSignalException('offset is longer than signal!') if duration is not None and offset + duration >= file_length: warnings.warn('offset + duration are longer than the signal.' ' Reading until end of signal...', UserWarning) audio_input, self._sample_rate = librosa.load(input_file_path, sr=None, offset=offset, duration=duration, mono=False) self.audio_data = audio_input self.original_signal_length = self.signal_length if new_sample_rate is not None and new_sample_rate != self._sample_rate: warnings.warn('Input sample rate is different than the sample rate' ' read from the file! Resampling...', UserWarning) self.resample(new_sample_rate) self.path_to_input_file = input_file_path self.set_active_region_to_default()