Python hparams.hparams.frame_shift_ms() Examples
The following are 23
code examples of hparams.hparams.frame_shift_ms().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
hparams.hparams
, or try the search function
.
Example #1
Source File: blizzard.py From vae_tacotron with MIT License | 6 votes |
def _process_utterance(out_dir, index, wav_path, labels_path, text): # Load the wav file and trim silence from the ends: wav = audio.load_wav(wav_path) start_offset, end_offset = _parse_labels(labels_path) start = int(start_offset * hparams.sample_rate) end = int(end_offset * hparams.sample_rate) if end_offset is not None else -1 wav = wav[start:end] max_samples = _max_out_length * hparams.frame_shift_ms / 1000 * hparams.sample_rate if len(wav) > max_samples: return None spectrogram = audio.spectrogram(wav).astype(np.float32) n_frames = spectrogram.shape[1] mel_spectrogram = audio.melspectrogram(wav).astype(np.float32) spectrogram_filename = 'blizzard-spec-%05d.npy' % index mel_filename = 'blizzard-mel-%05d.npy' % index np.save(os.path.join(out_dir, spectrogram_filename), spectrogram.T, allow_pickle=False) np.save(os.path.join(out_dir, mel_filename), mel_spectrogram.T, allow_pickle=False) return (spectrogram_filename, mel_filename, n_frames, text)
Example #2
Source File: blizzard.py From libfaceid with MIT License | 6 votes |
def _process_utterance(out_dir, index, wav_path, labels_path, text): # Load the wav file and trim silence from the ends: wav = audio.load_wav(wav_path) start_offset, end_offset = _parse_labels(labels_path) start = int(start_offset * hparams.sample_rate) end = int(end_offset * hparams.sample_rate) if end_offset is not None else -1 wav = wav[start:end] max_samples = _max_out_length * hparams.frame_shift_ms / 1000 * hparams.sample_rate if len(wav) > max_samples: return None spectrogram = audio.spectrogram(wav).astype(np.float32) n_frames = spectrogram.shape[1] mel_spectrogram = audio.melspectrogram(wav).astype(np.float32) spectrogram_filename = 'blizzard-spec-%05d.npy' % index mel_filename = 'blizzard-mel-%05d.npy' % index np.save(os.path.join(out_dir, spectrogram_filename), spectrogram.T, allow_pickle=False) np.save(os.path.join(out_dir, mel_filename), mel_spectrogram.T, allow_pickle=False) return (spectrogram_filename, mel_filename, n_frames, text)
Example #3
Source File: blizzard.py From tacotron with MIT License | 6 votes |
def _process_utterance(out_dir, index, wav_path, labels_path, text): # Load the wav file and trim silence from the ends: wav = audio.load_wav(wav_path) start_offset, end_offset = _parse_labels(labels_path) start = int(start_offset * hparams.sample_rate) end = int(end_offset * hparams.sample_rate) if end_offset is not None else -1 wav = wav[start:end] max_samples = _max_out_length * hparams.frame_shift_ms / 1000 * hparams.sample_rate if len(wav) > max_samples: return None spectrogram = audio.spectrogram(wav).astype(np.float32) n_frames = spectrogram.shape[1] mel_spectrogram = audio.melspectrogram(wav).astype(np.float32) spectrogram_filename = 'blizzard-spec-%05d.npy' % index mel_filename = 'blizzard-mel-%05d.npy' % index np.save(os.path.join(out_dir, spectrogram_filename), spectrogram.T, allow_pickle=False) np.save(os.path.join(out_dir, mel_filename), mel_spectrogram.T, allow_pickle=False) return (spectrogram_filename, mel_filename, n_frames, text)
Example #4
Source File: audio.py From Griffin_lim with MIT License | 6 votes |
def get_hop_size(): hop_size = hparams.hop_size if hop_size is None: assert hparams.frame_shift_ms is not None hop_size = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate) return hop_size
Example #5
Source File: audio.py From arabic-tacotron-tts with MIT License | 5 votes |
def _stft_parameters(): n_fft = (hparams.num_freq - 1) * 2 hop_length = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate) win_length = int(hparams.frame_length_ms / 1000 * hparams.sample_rate) return n_fft, hop_length, win_length # Conversions:
Example #6
Source File: audio.py From representation_mixing with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_hop_size(): hop_size = hparams.hop_size if hop_size is None: assert hparams.frame_shift_ms is not None hop_size = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate) return hop_size
Example #7
Source File: audio.py From representation_mixing with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_hop_size(): hop_size = hparams.hop_size if hop_size is None: assert hparams.frame_shift_ms is not None hop_size = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate) return hop_size
Example #8
Source File: audio.py From representation_mixing with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_hop_size(): hop_size = hparams.hop_size if hop_size is None: assert hparams.frame_shift_ms is not None hop_size = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate) return hop_size
Example #9
Source File: audio.py From representation_mixing with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_hop_size(): hop_size = hparams.hop_size if hop_size is None: assert hparams.frame_shift_ms is not None hop_size = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate) return hop_size
Example #10
Source File: audio.py From Tacotron2-PyTorch with MIT License | 5 votes |
def _stft_parameters(): n_fft = (hps.num_freq - 1) * 2 hop_length = int(hps.frame_shift_ms / 1000 * hps.sample_rate) win_length = int(hps.frame_length_ms / 1000 * hps.sample_rate) return n_fft, hop_length, win_length # Conversions:
Example #11
Source File: audio.py From gmvae_tacotron with MIT License | 5 votes |
def get_hop_size(): hop_size = hparams.hop_size if hop_size is None: assert hparams.frame_shift_ms is not None hop_size = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate) return hop_size
Example #12
Source File: audio.py From gmvae_tacotron with MIT License | 5 votes |
def get_hop_size(): hop_size = hparams.hop_size if hop_size is None: assert hparams.frame_shift_ms is not None hop_size = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate) return hop_size
Example #13
Source File: audio.py From tacotron with MIT License | 5 votes |
def _stft_parameters(): n_fft = (hparams.num_freq - 1) * 2 hop_length = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate) win_length = int(hparams.frame_length_ms / 1000 * hparams.sample_rate) return n_fft, hop_length, win_length # Conversions:
Example #14
Source File: preprocess.py From tacotron with MIT License | 5 votes |
def write_metadata(metadata, out_dir): with open(os.path.join(out_dir, 'train.txt'), 'w', encoding='utf-8') as f: for m in metadata: f.write('|'.join([str(x) for x in m]) + '\n') frames = sum([m[2] for m in metadata]) hours = frames * hparams.frame_shift_ms / (3600 * 1000) print('Wrote %d utterances, %d frames (%.2f hours)' % (len(metadata), frames, hours)) print('Max input length: %d' % max(len(m[3]) for m in metadata)) print('Max output length: %d' % max(m[2] for m in metadata))
Example #15
Source File: audio.py From cnn_vocoder with MIT License | 5 votes |
def get_hop_size(): hop_size = hparams.hop_size if hop_size is None: assert hparams.frame_shift_ms is not None hop_size = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate) return hop_size # Conversions:
Example #16
Source File: preprocess.py From arabic-tacotron-tts with MIT License | 5 votes |
def write_metadata(metadata, out_dir): with open(os.path.join(out_dir, 'train.txt'), 'w', encoding='utf-8') as f: for m in metadata: f.write('|'.join([str(x) for x in m]) + '\n') frames = sum([m[2] for m in metadata]) hours = frames * hparams.frame_shift_ms / (3600 * 1000) print('Wrote %d utterances, %d frames (%.2f hours)' % (len(metadata), frames, hours)) print('Max input length: %d' % max(len(m[3]) for m in metadata)) print('Max output length: %d' % max(m[2] for m in metadata))
Example #17
Source File: preprocess.py From libfaceid with MIT License | 5 votes |
def write_metadata(metadata, out_dir): with open(os.path.join(out_dir, 'train.txt'), 'w', encoding='utf-8') as f: for m in metadata: f.write('|'.join([str(x) for x in m]) + '\n') frames = sum([m[2] for m in metadata]) hours = frames * hparams.frame_shift_ms / (3600 * 1000) print('Wrote %d utterances, %d frames (%.2f hours)' % (len(metadata), frames, hours)) print('Max input length: %d' % max(len(m[3]) for m in metadata)) print('Max output length: %d' % max(m[2] for m in metadata))
Example #18
Source File: audio.py From vae_tacotron2 with MIT License | 5 votes |
def get_hop_size(): hop_size = hparams.hop_size if hop_size is None: assert hparams.frame_shift_ms is not None hop_size = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate) return hop_size
Example #19
Source File: audio.py From vae_tacotron2 with MIT License | 5 votes |
def get_hop_size(): hop_size = hparams.hop_size if hop_size is None: assert hparams.frame_shift_ms is not None hop_size = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate) return hop_size
Example #20
Source File: blizzard2013.py From vae_tacotron with MIT License | 5 votes |
def _process_utterance(out_dir, index, wav_path, text): '''Preprocesses a single utterance audio/text pair. This writes the mel and linear scale spectrograms to disk and returns a tuple to write to the train.txt file. Args: out_dir: The directory to write the spectrograms into index: The numeric index to use in the spectrogram filenames. wav_path: Path to the audio file containing the speech input text: The text spoken in the input audio file Returns: A (spectrogram_filename, mel_filename, n_frames, text) tuple to write to train.txt ''' # Load the audio to a numpy array: wav = audio.load_wav(wav_path) max_samples = _max_out_length * hparams.frame_shift_ms / 1000 * hparams.sample_rate if len(wav) > max_samples and _max_out_length is not None: return None # Compute the linear-scale spectrogram from the wav: spectrogram = audio.spectrogram(wav).astype(np.float32) n_frames = spectrogram.shape[1] # Compute a mel-scale spectrogram from the wav: mel_spectrogram = audio.melspectrogram(wav).astype(np.float32) # Write the spectrograms to disk: spectrogram_filename = 'blizzard2013-spec-%05d.npy' % index mel_filename = 'blizzard2013-mel-%05d.npy' % index np.save(os.path.join(out_dir, spectrogram_filename), spectrogram.T, allow_pickle=False) np.save(os.path.join(out_dir, mel_filename), mel_spectrogram.T, allow_pickle=False) # Return a tuple describing this training example: return (spectrogram_filename, mel_filename, n_frames, text)
Example #21
Source File: audio.py From vae_tacotron with MIT License | 5 votes |
def _stft_parameters(): n_fft = (hparams.num_freq - 1) * 2 hop_length = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate) win_length = int(hparams.frame_length_ms / 1000 * hparams.sample_rate) return n_fft, hop_length, win_length # Conversions:
Example #22
Source File: preprocess.py From vae_tacotron with MIT License | 5 votes |
def write_metadata(metadata, out_dir): with open(os.path.join(out_dir, 'train.txt'), 'w', encoding='utf-8') as f: for m in metadata: f.write('|'.join([str(x) for x in m]) + '\n') frames = sum([m[2] for m in metadata]) hours = frames * hparams.frame_shift_ms / (3600 * 1000) print('Wrote %d utterances, %d frames (%.2f hours)' % (len(metadata), frames, hours)) print('Max input length: %d' % max(len(m[3]) for m in metadata)) print('Max output length: %d' % max(m[2] for m in metadata))
Example #23
Source File: griffin_lim.py From Griffin_lim with MIT License | 5 votes |
def spectrogram2wav(spectrogram, n_iter=hparams.griffin_lim_iters, n_fft=(hparams.num_freq - 1) * 2, win_length=int(hparams.frame_length_ms / 1000 * hparams.sample_rate), hop_length=int(hparams.frame_shift_ms / 1000 * hparams.sample_rate)): '''Converts spectrogram into a waveform using Griffin-lim's raw. ''' def invert_spectrogram(spectrogram): ''' spectrogram: [t, f] ''' spectrogram = tf.expand_dims(spectrogram, 0) inversed = tf.contrib.signal.inverse_stft(spectrogram, win_length, hop_length, n_fft) squeezed = tf.squeeze(inversed, 0) return squeezed spectrogram = tf.transpose(spectrogram) spectrogram = tf.cast(spectrogram, dtype=tf.complex64) # [t, f] X_best = tf.identity(spectrogram) for i in range(n_iter): X_t = invert_spectrogram(X_best) est = tf.contrib.signal.stft(X_t, win_length, hop_length, n_fft, pad_end=False) # (1, T, n_fft/2+1) phase = est / tf.cast(tf.maximum(1e-8, tf.abs(est)), tf.complex64) # [t, f] X_best = spectrogram * phase # [t, t] X_t = invert_spectrogram(X_best) y = tf.real(X_t) return y