Python hparams.hparams.fft_size() Examples
The following are 20
code examples of hparams.hparams.fft_size().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
hparams.hparams
, or try the search function
.
Example #1
Source File: audio.py From vae_tacotron2 with MIT License | 5 votes |
def _stft(y): return librosa.stft(y=y, n_fft=hparams.fft_size, hop_length=get_hop_size())
Example #2
Source File: audio.py From representation_mixing with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _build_mel_basis(): assert hparams.fmax <= hparams.sample_rate // 2 return librosa.filters.mel(hparams.sample_rate, hparams.fft_size, fmin=hparams.fmin, fmax=hparams.fmax, n_mels=hparams.num_mels)
Example #3
Source File: audio.py From representation_mixing with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _lws_processor(): return lws.lws(hparams.fft_size, get_hop_size(), mode="speech")
Example #4
Source File: audio.py From representation_mixing with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _build_mel_basis(): assert hparams.fmax <= hparams.sample_rate // 2 return librosa.filters.mel(hparams.sample_rate, hparams.fft_size, fmin=hparams.fmin, fmax=hparams.fmax, n_mels=hparams.num_mels)
Example #5
Source File: audio.py From representation_mixing with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _build_mel_basis(): assert hparams.fmax <= hparams.sample_rate // 2 return librosa.filters.mel(hparams.sample_rate, hparams.fft_size, fmin=hparams.fmin, fmax=hparams.fmax, n_mels=hparams.num_mels)
Example #6
Source File: audio.py From representation_mixing with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _lws_processor(): return lws.lws(hparams.fft_size, get_hop_size(), mode="speech")
Example #7
Source File: audio.py From representation_mixing with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _build_mel_basis(): assert hparams.fmax <= hparams.sample_rate // 2 return librosa.filters.mel(hparams.sample_rate, hparams.fft_size, fmin=hparams.fmin, fmax=hparams.fmax, n_mels=hparams.num_mels)
Example #8
Source File: audio.py From representation_mixing with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _lws_processor(): return lws.lws(hparams.fft_size, get_hop_size(), mode="speech")
Example #9
Source File: audio.py From gmvae_tacotron with MIT License | 5 votes |
def _build_mel_basis(): assert hparams.fmax <= hparams.sample_rate // 2 return librosa.filters.mel(hparams.sample_rate, hparams.fft_size, n_mels=hparams.num_mels, fmin=hparams.fmin, fmax=hparams.fmax)
Example #10
Source File: audio.py From gmvae_tacotron with MIT License | 5 votes |
def _stft(y): return librosa.stft(y=y, n_fft=hparams.fft_size, hop_length=get_hop_size())
Example #11
Source File: audio.py From gmvae_tacotron with MIT License | 5 votes |
def _build_mel_basis(): assert hparams.fmax <= hparams.sample_rate // 2 return librosa.filters.mel(hparams.sample_rate, hparams.fft_size, n_mels=hparams.num_mels, fmin=hparams.fmin, fmax=hparams.fmax)
Example #12
Source File: audio.py From gmvae_tacotron with MIT License | 5 votes |
def _stft(y): return librosa.stft(y=y, n_fft=hparams.fft_size, hop_length=get_hop_size())
Example #13
Source File: audio.py From WaveRNN-Pytorch with MIT License | 5 votes |
def _build_mel_basis(): if hparams.fmax is not None: assert hparams.fmax <= hparams.sample_rate // 2 return librosa.filters.mel(hparams.sample_rate, hparams.fft_size, fmin=hparams.fmin, fmax=hparams.fmax, n_mels=hparams.num_mels)
Example #14
Source File: audio.py From WaveRNN-Pytorch with MIT License | 5 votes |
def _lws_processor(): return lws.lws(hparams.fft_size, hparams.hop_size, mode="speech") # Conversions:
Example #15
Source File: audio.py From vae_tacotron2 with MIT License | 5 votes |
def _build_mel_basis(): assert hparams.fmax <= hparams.sample_rate // 2 return librosa.filters.mel(hparams.sample_rate, hparams.fft_size, n_mels=hparams.num_mels, fmin=hparams.fmin, fmax=hparams.fmax)
Example #16
Source File: audio.py From vae_tacotron2 with MIT License | 5 votes |
def _stft(y): return librosa.stft(y=y, n_fft=hparams.fft_size, hop_length=get_hop_size())
Example #17
Source File: audio.py From vae_tacotron2 with MIT License | 5 votes |
def _build_mel_basis(): assert hparams.fmax <= hparams.sample_rate // 2 return librosa.filters.mel(hparams.sample_rate, hparams.fft_size, n_mels=hparams.num_mels, fmin=hparams.fmin, fmax=hparams.fmax)
Example #18
Source File: audio.py From Griffin_lim with MIT License | 5 votes |
def _stft(y): return librosa.stft(y=y, n_fft=hparams.fft_size, hop_length=get_hop_size())
Example #19
Source File: ljspeech.py From representation_mixing with BSD 3-Clause "New" or "Revised" License | 4 votes |
def _process_utterance(out_dir, index, wav_path, text): # Load the audio to a numpy array: wav = audio.load_wav(wav_path) if hparams.rescaling: wav = wav / np.abs(wav).max() * hparams.rescaling_max # Mu-law quantize if is_mulaw_quantize(hparams.input_type): # [0, quantize_channels) out = P.mulaw_quantize(wav, hparams.quantize_channels) # Trim silences start, end = audio.start_and_end_indices(out, hparams.silence_threshold) wav = wav[start:end] out = out[start:end] constant_values = P.mulaw_quantize(0, hparams.quantize_channels) out_dtype = np.int16 elif is_mulaw(hparams.input_type): # [-1, 1] out = P.mulaw(wav, hparams.quantize_channels) constant_values = P.mulaw(0.0, hparams.quantize_channels) out_dtype = np.float32 else: # [-1, 1] out = wav constant_values = 0.0 out_dtype = np.float32 # Compute a mel-scale spectrogram from the trimmed wav: # (N, D) mel_spectrogram = audio.melspectrogram(wav).astype(np.float32).T # lws pads zeros internally before performing stft # this is needed to adjust time resolution between audio and mel-spectrogram l, r = audio.lws_pad_lr(wav, hparams.fft_size, audio.get_hop_size()) # zero pad for quantized signal out = np.pad(out, (l, r), mode="constant", constant_values=constant_values) N = mel_spectrogram.shape[0] assert len(out) >= N * audio.get_hop_size() # time resolution adjustment # ensure length of raw audio is multiple of hop_size so that we can use # transposed convolution to upsample out = out[:N * audio.get_hop_size()] assert len(out) % audio.get_hop_size() == 0 timesteps = len(out) # Write the spectrograms to disk: audio_filename = 'ljspeech-audio-%05d.npy' % index mel_filename = 'ljspeech-mel-%05d.npy' % index np.save(os.path.join(out_dir, audio_filename), out.astype(out_dtype), allow_pickle=False) np.save(os.path.join(out_dir, mel_filename), mel_spectrogram.astype(np.float32), allow_pickle=False) # Return a tuple describing this training example: return (audio_filename, mel_filename, timesteps, text)
Example #20
Source File: ljspeech.py From representation_mixing with BSD 3-Clause "New" or "Revised" License | 4 votes |
def _process_utterance(out_dir, index, wav_path, text): # Load the audio to a numpy array: wav = audio.load_wav(wav_path) if hparams.rescaling: wav = wav / np.abs(wav).max() * hparams.rescaling_max # Mu-law quantize if is_mulaw_quantize(hparams.input_type): # [0, quantize_channels) out = P.mulaw_quantize(wav, hparams.quantize_channels) # Trim silences start, end = audio.start_and_end_indices(out, hparams.silence_threshold) wav = wav[start:end] out = out[start:end] constant_values = P.mulaw_quantize(0, hparams.quantize_channels) out_dtype = np.int16 elif is_mulaw(hparams.input_type): # [-1, 1] out = P.mulaw(wav, hparams.quantize_channels) constant_values = P.mulaw(0.0, hparams.quantize_channels) out_dtype = np.float32 else: # [-1, 1] out = wav constant_values = 0.0 out_dtype = np.float32 # Compute a mel-scale spectrogram from the trimmed wav: # (N, D) mel_spectrogram = audio.melspectrogram(wav).astype(np.float32).T # lws pads zeros internally before performing stft # this is needed to adjust time resolution between audio and mel-spectrogram l, r = audio.lws_pad_lr(wav, hparams.fft_size, audio.get_hop_size()) # zero pad for quantized signal out = np.pad(out, (l, r), mode="constant", constant_values=constant_values) N = mel_spectrogram.shape[0] assert len(out) >= N * audio.get_hop_size() # time resolution adjustment # ensure length of raw audio is multiple of hop_size so that we can use # transposed convolution to upsample out = out[:N * audio.get_hop_size()] assert len(out) % audio.get_hop_size() == 0 timesteps = len(out) # Write the spectrograms to disk: audio_filename = 'ljspeech-audio-%05d.npy' % index mel_filename = 'ljspeech-mel-%05d.npy' % index np.save(os.path.join(out_dir, audio_filename), out.astype(out_dtype), allow_pickle=False) np.save(os.path.join(out_dir, mel_filename), mel_spectrogram.astype(np.float32), allow_pickle=False) # Return a tuple describing this training example: return (audio_filename, mel_filename, timesteps, text)