Python hparams.hparams.sample_rate() Examples
The following are 30
code examples of hparams.hparams.sample_rate().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
hparams.hparams
, or try the search function
.
Example #1
Source File: audio.py From Griffin_lim with MIT License | 6 votes |
def get_hop_size(): hop_size = hparams.hop_size if hop_size is None: assert hparams.frame_shift_ms is not None hop_size = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate) return hop_size
Example #2
Source File: LWS.py From Griffin_lim with MIT License | 6 votes |
def main(): data_foler = "data" wavs = [os.path.join(data_foler, file[:-4]) for file in os.listdir(data_foler) if file.endswith(".wav")] outputs_lws = [file + ".lws.gen.wav" for file in wavs] wavs = [audio.load_wav(wav_path + ".wav", hparams.sample_rate) for wav_path in wavs] lws_processor = lws.lws(512, 128, mode="speech") # 512: window length; 128: window shift i = 0 for x in wavs: X = lws_processor.stft(x) # where x is a single-channel waveform X0 = np.abs(X) # Magnitude spectrogram print('{:6}: {:5.2f} dB'.format('Abs(X)', lws_processor.get_consistency(X0))) X1 = lws_processor.run_lws( X0) # reconstruction from magnitude (in general, one can reconstruct from an initial complex spectrogram) print(X1.shape) print('{:6}: {:5.2f} dB'.format('LWS', lws_processor.get_consistency(X1))) print(X1.shape) wav = lws_processor.istft(X1).astype(np.float32) audio.save_wav(wav, outputs_lws[i]) i += 1
Example #3
Source File: train_tacotron.py From Tacotron-Wavenet-Vocoder-Korean with MIT License | 6 votes |
def save_and_plot_fn(args, log_dir, step, loss, prefix): idx, (seq, spec, align) = args audio_path = os.path.join(log_dir, '{}-step-{:09d}-audio{:03d}.wav'.format(prefix, step, idx)) align_path = os.path.join(log_dir, '{}-step-{:09d}-align{:03d}.png'.format(prefix, step, idx)) waveform = inv_spectrogram(spec.T,hparams) save_wav(waveform, audio_path,hparams.sample_rate) info_text = 'step={:d}, loss={:.5f}'.format(step, loss) if 'korean_cleaners' in [x.strip() for x in hparams.cleaners.split(',')]: log('Training korean : Use jamo') plot.plot_alignment( align, align_path, info=info_text, text=sequence_to_text(seq,skip_eos_and_pad=True, combine_jamo=True), isKorean=True) else: log('Training non-korean : X use jamo') plot.plot_alignment(align, align_path, info=info_text,text=sequence_to_text(seq,skip_eos_and_pad=True, combine_jamo=False), isKorean=False)
Example #4
Source File: blizzard.py From libfaceid with MIT License | 6 votes |
def _process_utterance(out_dir, index, wav_path, labels_path, text): # Load the wav file and trim silence from the ends: wav = audio.load_wav(wav_path) start_offset, end_offset = _parse_labels(labels_path) start = int(start_offset * hparams.sample_rate) end = int(end_offset * hparams.sample_rate) if end_offset is not None else -1 wav = wav[start:end] max_samples = _max_out_length * hparams.frame_shift_ms / 1000 * hparams.sample_rate if len(wav) > max_samples: return None spectrogram = audio.spectrogram(wav).astype(np.float32) n_frames = spectrogram.shape[1] mel_spectrogram = audio.melspectrogram(wav).astype(np.float32) spectrogram_filename = 'blizzard-spec-%05d.npy' % index mel_filename = 'blizzard-mel-%05d.npy' % index np.save(os.path.join(out_dir, spectrogram_filename), spectrogram.T, allow_pickle=False) np.save(os.path.join(out_dir, mel_filename), mel_spectrogram.T, allow_pickle=False) return (spectrogram_filename, mel_filename, n_frames, text)
Example #5
Source File: train.py From WaveRNN-Pytorch with MIT License | 6 votes |
def evaluate_model(model, data_loader, checkpoint_dir, limit_eval_to=5): """evaluate model and save generated wav and plot """ test_path = data_loader.dataset.test_path test_files = os.listdir(test_path) counter = 0 output_dir = os.path.join(checkpoint_dir,'eval') for f in test_files: if f[-7:] == "mel.npy": mel = np.load(os.path.join(test_path,f)) wav = model.generate(mel) # save wav wav_path = os.path.join(output_dir,"checkpoint_step{:09d}_wav_{}.wav".format(global_step,counter)) librosa.output.write_wav(wav_path, wav, sr=hp.sample_rate) # save wav plot fig_path = os.path.join(output_dir,"checkpoint_step{:09d}_wav_{}.png".format(global_step,counter)) fig = plt.plot(wav.reshape(-1)) plt.savefig(fig_path) # clear fig to drawing to the same plot plt.clf() counter += 1 # stop evaluation early via limit_eval_to if counter >= limit_eval_to: break
Example #6
Source File: generate.py From Tacotron-Wavenet-Vocoder-Korean with MIT License | 6 votes |
def create_seed(filename,sample_rate,quantization_channels,window_size,scalar_input): # seed의 앞부분만 사용한다. seed_audio, _ = librosa.load(filename, sr=sample_rate, mono=True) seed_audio = audio.trim_silence(seed_audio, hparams) if scalar_input: if len(seed_audio) < window_size: return seed_audio else: return seed_audio[:window_size] else: quantized = mu_law_encode(seed_audio, quantization_channels) # 짧으면 짧은 대로 return하는데, padding이라도 해야되지 않나??? cut_index = tf.cond(tf.size(quantized) < tf.constant(window_size), lambda: tf.size(quantized), lambda: tf.constant(window_size)) return quantized[:cut_index]
Example #7
Source File: synthesis_mel.py From style-token_tacotron2 with MIT License | 6 votes |
def mel_synthesis(out_dir='wav_griffi_syn',in_dir='mel'): os.makedirs(out_dir, exist_ok=True) #mel_file = os.path.join(mel_folder, mel_file) mel_filenames=[x.split('.')[0] for x in os.listdir(in_dir)] start_time=time.time() for mel_file in mel_filenames: try: print('process {}'.format(mel_file)) mel_file_path = os.path.join('training_data/mels', 'mel-{}.wav.npy'.format(mel_file)) mel_spectro = np.load(mel_file_path) wav = inv_mel_spectrogram(mel_spectro.T, hparams) # save the wav under test_<folder>_<file> save_wav(wav, os.path.join(out_dir, 'test_mel_{}.wav'.format( mel_file.replace('/', '_').replace('\\', '_').replace('.npy', ''))), sr=hparams.sample_rate) except: print('{} error'.format(mel_file)) print('griffin-lim :{}'.format(time.time()-start_time))
Example #8
Source File: generate.py From Tacotron2-Wavenet-Korean-TTS with MIT License | 6 votes |
def create_seed(filename,sample_rate,quantization_channels,window_size,scalar_input): # seed의 앞부분만 사용한다. seed_audio, _ = librosa.load(filename, sr=sample_rate, mono=True) seed_audio = audio.trim_silence(seed_audio, hparams) if scalar_input: if len(seed_audio) < window_size: return seed_audio else: return seed_audio[:window_size] else: quantized = mu_law_encode(seed_audio, quantization_channels) # 짧으면 짧은 대로 return하는데, padding이라도 해야되지 않나??? cut_index = tf.cond(tf.size(quantized) < tf.constant(window_size), lambda: tf.size(quantized), lambda: tf.constant(window_size)) return quantized[:cut_index]
Example #9
Source File: blizzard.py From vae_tacotron with MIT License | 6 votes |
def _process_utterance(out_dir, index, wav_path, labels_path, text): # Load the wav file and trim silence from the ends: wav = audio.load_wav(wav_path) start_offset, end_offset = _parse_labels(labels_path) start = int(start_offset * hparams.sample_rate) end = int(end_offset * hparams.sample_rate) if end_offset is not None else -1 wav = wav[start:end] max_samples = _max_out_length * hparams.frame_shift_ms / 1000 * hparams.sample_rate if len(wav) > max_samples: return None spectrogram = audio.spectrogram(wav).astype(np.float32) n_frames = spectrogram.shape[1] mel_spectrogram = audio.melspectrogram(wav).astype(np.float32) spectrogram_filename = 'blizzard-spec-%05d.npy' % index mel_filename = 'blizzard-mel-%05d.npy' % index np.save(os.path.join(out_dir, spectrogram_filename), spectrogram.T, allow_pickle=False) np.save(os.path.join(out_dir, mel_filename), mel_spectrogram.T, allow_pickle=False) return (spectrogram_filename, mel_filename, n_frames, text)
Example #10
Source File: train_tacotron2.py From Tacotron2-Wavenet-Korean-TTS with MIT License | 6 votes |
def save_and_plot_fn(args, log_dir, step, loss, prefix): idx, (seq, spec, align) = args audio_path = os.path.join(log_dir, '{}-step-{:09d}-audio{:03d}.wav'.format(prefix, step, idx)) align_path = os.path.join(log_dir, '{}-step-{:09d}-align{:03d}.png'.format(prefix, step, idx)) waveform = inv_spectrogram(spec.T,hparams) save_wav(waveform, audio_path,hparams.sample_rate) info_text = 'step={:d}, loss={:.5f}'.format(step, loss) if 'korean_cleaners' in [x.strip() for x in hparams.cleaners.split(',')]: log('Training korean : Use jamo') plot.plot_alignment( align, align_path, info=info_text, text=sequence_to_text(seq,skip_eos_and_pad=True, combine_jamo=True), isKorean=True) else: log('Training non-korean : X use jamo') plot.plot_alignment(align, align_path, info=info_text,text=sequence_to_text(seq,skip_eos_and_pad=True, combine_jamo=False), isKorean=False)
Example #11
Source File: audio.py From gmvae_tacotron with MIT License | 5 votes |
def save_wav(wav, path): wav *= 32767 / max(0.01, np.max(np.abs(wav))) #proposed by @dsmiller wavfile.write(path, hparams.sample_rate, wav.astype(np.int16))
Example #12
Source File: preprocess.py From Tacotron2-Wavenet-Korean-TTS with MIT License | 5 votes |
def write_metadata(metadata, out_dir): with open(os.path.join(out_dir, 'train.txt'), 'w', encoding='utf-8') as f: for m in metadata: f.write('|'.join([str(x) for x in m]) + '\n') mel_frames = sum([int(m[4]) for m in metadata]) timesteps = sum([int(m[3]) for m in metadata]) sr = hparams.sample_rate hours = timesteps / sr / 3600 print('Write {} utterances, {} mel frames, {} audio timesteps, ({:.2f} hours)'.format(len(metadata), mel_frames, timesteps, hours)) print('Max input length (text chars): {}'.format(max(len(m[5]) for m in metadata))) print('Max mel frames length: {}'.format(max(int(m[4]) for m in metadata))) print('Max audio timesteps length: {}'.format(max(m[3] for m in metadata)))
Example #13
Source File: audio.py From gmvae_tacotron with MIT License | 5 votes |
def get_hop_size(): hop_size = hparams.hop_size if hop_size is None: assert hparams.frame_shift_ms is not None hop_size = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate) return hop_size
Example #14
Source File: audio.py From gmvae_tacotron with MIT License | 5 votes |
def _build_mel_basis(): assert hparams.fmax <= hparams.sample_rate // 2 return librosa.filters.mel(hparams.sample_rate, hparams.fft_size, n_mels=hparams.num_mels, fmin=hparams.fmin, fmax=hparams.fmax)
Example #15
Source File: audio.py From gmvae_tacotron with MIT License | 5 votes |
def load_wav(path): return librosa.core.load(path, sr=hparams.sample_rate)[0]
Example #16
Source File: wavenet_preprocess.py From style-token_tacotron2 with MIT License | 5 votes |
def write_metadata(metadata, out_dir): with open(os.path.join(out_dir, 'map.txt'), 'w', encoding='utf-8') as f: for m in metadata: f.write('|'.join([str(x) for x in m]) + '\n') mel_frames = sum([int(m[5]) for m in metadata]) timesteps = sum([int(m[4]) for m in metadata]) sr = hparams.sample_rate hours = timesteps / sr / 3600 print('Write {} utterances, {} audio timesteps, ({:.2f} hours)'.format( len(metadata), timesteps, hours)) print('Max mel frames length: {}'.format(max(int(m[5]) for m in metadata))) print('Max audio timesteps length: {}'.format(max(m[4] for m in metadata)))
Example #17
Source File: check_slience_trim.py From style-token_tacotron2 with MIT License | 5 votes |
def get_some_inversed_samples(training_data_path='training_data',output_inversed_path='tmp_inverse_wav_out',n_samples=5): mel_files=glob.glob(os.path.join(training_data_path,'mels','*.npy')) assert len(mel_files)>=n_samples,'no enough .npy to inverse...' if os.path.exists(output_inversed_path): shutil.rmtree(output_inversed_path) os.makedirs(output_inversed_path,exist_ok=False) random.seed(2018) mel_files=random.sample(mel_files,n_samples) for mel_file in mel_files: mel_file_basename=os.path.basename(mel_file) mel_spectro=np.load(mel_file) wav=inv_mel_spectrogram(mel_spectro.T, hparams) save_wav(wav,os.path.join(output_inversed_path,'{}.wav'.format(mel_file_basename)) ,sr=hparams.sample_rate)
Example #18
Source File: test_wavenet_feeder.py From style-token_tacotron2 with MIT License | 5 votes |
def _limit_time(hparams): '''Limit time resolution to save GPU memory. ''' if hparams.max_time_sec is not None: return int(hparams.max_time_sec * hparams.sample_rate) elif hparams.max_time_steps is not None: return hparams.max_time_steps else: return None
Example #19
Source File: preprocess.py From style-token_tacotron2 with MIT License | 5 votes |
def write_metadata(metadata, out_dir): with open(os.path.join(out_dir, 'train.txt'), 'w', encoding='utf-8') as f: for m in metadata: f.write('|'.join([str(x) for x in m]) + '\n') mel_frames = sum([int(m[4]) for m in metadata]) timesteps = sum([int(m[3]) for m in metadata]) sr = hparams.sample_rate hours = timesteps / sr / 3600 print('Write {} utterances, {} mel frames, {} audio timesteps, ({:.2f} hours)'.format( len(metadata), mel_frames, timesteps, hours)) print('Max input length (text chars): {}'.format(max(len(m[5]) for m in metadata))) print('Max mel frames length: {}'.format(max(int(m[4]) for m in metadata))) print('Max audio timesteps length: {}'.format(max(m[3] for m in metadata)))
Example #20
Source File: audio.py From tacotron with MIT License | 5 votes |
def load_wav(path): return librosa.core.load(path, sr=hparams.sample_rate)[0]
Example #21
Source File: audio.py From gmvae_tacotron with MIT License | 5 votes |
def load_wav(path): return librosa.core.load(path, sr=hparams.sample_rate)[0]
Example #22
Source File: preprocess.py From gmvae_tacotron with MIT License | 5 votes |
def write_metadata(metadata, out_dir): with open(os.path.join(out_dir, 'train.txt'), 'w', encoding='utf-8') as f: for m in metadata: f.write('|'.join([str(x) for x in m]) + '\n') mel_frames = sum([int(m[4]) for m in metadata]) timesteps = sum([int(m[3]) for m in metadata]) sr = hparams.sample_rate hours = timesteps / sr / 3600 print('Write {} utterances, {} mel frames, {} audio timesteps, ({:.2f} hours)'.format( len(metadata), mel_frames, timesteps, hours)) print('Max input length (text chars): {}'.format(max(len(m[5]) for m in metadata))) print('Max mel frames length: {}'.format(max(int(m[4]) for m in metadata))) print('Max audio timesteps length: {}'.format(max(m[3] for m in metadata)))
Example #23
Source File: audio.py From WaveRNN-Pytorch with MIT License | 5 votes |
def _build_mel_basis(): if hparams.fmax is not None: assert hparams.fmax <= hparams.sample_rate // 2 return librosa.filters.mel(hparams.sample_rate, hparams.fft_size, fmin=hparams.fmin, fmax=hparams.fmax, n_mels=hparams.num_mels)
Example #24
Source File: audio.py From WaveRNN-Pytorch with MIT License | 5 votes |
def save_wav(wav, path): wav = wav * 32767 / max(0.01, np.max(np.abs(wav))) wavfile.write(path, hparams.sample_rate, wav.astype(np.int16))
Example #25
Source File: audio.py From WaveRNN-Pytorch with MIT License | 5 votes |
def load_wav(path): return librosa.load(path, sr=hparams.sample_rate)[0]
Example #26
Source File: audio.py From tacotron with MIT License | 5 votes |
def _build_mel_basis(): n_fft = (hparams.num_freq - 1) * 2 return librosa.filters.mel(hparams.sample_rate, n_fft, n_mels=hparams.num_mels)
Example #27
Source File: audio.py From tacotron with MIT License | 5 votes |
def _stft_parameters(): n_fft = (hparams.num_freq - 1) * 2 hop_length = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate) win_length = int(hparams.frame_length_ms / 1000 * hparams.sample_rate) return n_fft, hop_length, win_length # Conversions:
Example #28
Source File: audio.py From tacotron with MIT License | 5 votes |
def find_endpoint(wav, threshold_db=-40, min_silence_sec=0.8): window_length = int(hparams.sample_rate * min_silence_sec) hop_length = int(window_length / 4) threshold = _db_to_amp(threshold_db) for x in range(hop_length, len(wav) - window_length, hop_length): if np.max(wav[x:x+window_length]) < threshold: return x + hop_length return len(wav)
Example #29
Source File: audio.py From tacotron with MIT License | 5 votes |
def save_wav(wav, path): wav *= 32767 / max(0.01, np.max(np.abs(wav))) scipy.io.wavfile.write(path, hparams.sample_rate, wav.astype(np.int16))
Example #30
Source File: stft.py From cnn_vocoder with MIT License | 5 votes |
def _build_mel_basis(n_fft, n_mels=80): return torch.FloatTensor(librosa.filters.mel(hparams.sample_rate, n_fft, n_mels=n_mels)).transpose(0, 1)