Python hparams.hparams.hop_size() Examples
The following are 21
code examples of hparams.hparams.hop_size().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
hparams.hparams
, or try the search function
.
Example #1
Source File: audio.py From Griffin_lim with MIT License | 6 votes |
def get_hop_size(): hop_size = hparams.hop_size if hop_size is None: assert hparams.frame_shift_ms is not None hop_size = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate) return hop_size
Example #2
Source File: dataset.py From WaveRNN-Pytorch with MIT License | 5 votes |
def raw_collate(batch) : """collate function used for raw wav forms, such as using beta/guassian/mixture of logistic """ pad = 2 mel_win = hp.seq_len // hp.hop_size + 2 * pad max_offsets = [x[0].shape[-1] - (mel_win + 2 * pad) for x in batch] mel_offsets = [np.random.randint(0, offset) for offset in max_offsets] sig_offsets = [(offset + pad) * hp.hop_size for offset in mel_offsets] mels = [x[0][:, mel_offsets[i]:mel_offsets[i] + mel_win] \ for i, x in enumerate(batch)] coarse = [x[1][sig_offsets[i]:sig_offsets[i] + hp.seq_len + 1] \ for i, x in enumerate(batch)] mels = np.stack(mels).astype(np.float32) coarse = np.stack(coarse).astype(np.float32) mels = torch.FloatTensor(mels) coarse = torch.FloatTensor(coarse) x_input = coarse[:,:hp.seq_len] y_coarse = coarse[:, 1:] return x_input, mels, y_coarse
Example #3
Source File: audio.py From representation_mixing with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_hop_size(): hop_size = hparams.hop_size if hop_size is None: assert hparams.frame_shift_ms is not None hop_size = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate) return hop_size
Example #4
Source File: audio.py From representation_mixing with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_hop_size(): hop_size = hparams.hop_size if hop_size is None: assert hparams.frame_shift_ms is not None hop_size = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate) return hop_size
Example #5
Source File: audio.py From representation_mixing with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_hop_size(): hop_size = hparams.hop_size if hop_size is None: assert hparams.frame_shift_ms is not None hop_size = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate) return hop_size
Example #6
Source File: audio.py From representation_mixing with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_hop_size(): hop_size = hparams.hop_size if hop_size is None: assert hparams.frame_shift_ms is not None hop_size = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate) return hop_size
Example #7
Source File: feeder.py From gmvae_tacotron with MIT License | 5 votes |
def __init__(self, coordinator, metadata_filename, hparams): super(Feeder, self).__init__() self._coord = coordinator self._hparams = hparams self._cleaner_names = [x.strip() for x in hparams.cleaners.split(',')] self._offset = 0 # Load metadata self._mel_dir = os.path.join(os.path.dirname(metadata_filename), 'mels') self._linear_dir = os.path.join(os.path.dirname(metadata_filename), 'linear') with open(metadata_filename, encoding='utf-8') as f: self._metadata = [line.strip().split('|') for line in f] frame_shift_ms = hparams.hop_size / hparams.sample_rate hours = sum([int(x[4]) for x in self._metadata]) * frame_shift_ms / (3600) log('Loaded metadata for {} examples ({:.2f} hours)'.format(len(self._metadata), hours)) # Create placeholders for inputs and targets. Don't specify batch size because we want # to be able to feed different batch sizes at eval time. self._placeholders = [ tf.placeholder(tf.int32, shape=(None, None), name='inputs'), tf.placeholder(tf.int32, shape=(None, ), name='input_lengths'), tf.placeholder(tf.float32, shape=(None, None, hparams.num_mels), name='mel_targets'), tf.placeholder(tf.int32,[None],'mel_lengths'), tf.placeholder(tf.float32, shape=(None, None), name='token_targets'), tf.placeholder(tf.float32, shape=(None, None, hparams.num_freq), name='linear_targets'), ] # Create queue for buffering data queue = tf.FIFOQueue(8, [tf.int32, tf.int32, tf.float32, tf.int32, tf.float32, tf.float32], name='input_queue') self._enqueue_op = queue.enqueue(self._placeholders) self.inputs, self.input_lengths, self.mel_targets, self.mel_lengths, self.token_targets, self.linear_targets = queue.dequeue() self.inputs.set_shape(self._placeholders[0].shape) self.input_lengths.set_shape(self._placeholders[1].shape) self.mel_targets.set_shape(self._placeholders[2].shape) self.mel_lengths.set_shape(self._placeholders[3].shape) self.token_targets.set_shape(self._placeholders[4].shape) self.linear_targets.set_shape(self._placeholders[5].shape)
Example #8
Source File: audio.py From gmvae_tacotron with MIT License | 5 votes |
def get_hop_size(): hop_size = hparams.hop_size if hop_size is None: assert hparams.frame_shift_ms is not None hop_size = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate) return hop_size
Example #9
Source File: synthesize.py From gmvae_tacotron with MIT License | 5 votes |
def run_synthesis(args, checkpoint_path, output_dir): metadata_filename = os.path.join(args.input_dir, 'train.txt') print(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, gta=args.GTA) with open(metadata_filename, encoding='utf-8') as f: metadata = [line.strip().split('|') for line in f] frame_shift_ms = hparams.hop_size / hparams.sample_rate hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600) print('Loaded metadata for {} examples ({:.2f} hours)'.format(len(metadata), hours)) if args.GTA==True: synth_dir = os.path.join(output_dir, 'gta') else: synth_dir = os.path.join(output_dir, 'natural') #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) print('starting synthesis') mel_dir = os.path.join(args.input_dir, 'mels') wav_dir = os.path.join(args.input_dir, 'audio') with open(os.path.join(synth_dir, 'map.txt'), 'w') as file: for i, meta in enumerate(tqdm(metadata)): text = meta[5] mel_filename = os.path.join(mel_dir, meta[1]) wav_filename = os.path.join(wav_dir, meta[0]) mel_output_filename = synth.synthesize(text, None, i+1, synth_dir, None, mel_filename) file.write('{}|{}|{}|{}\n'.format(text, mel_filename, mel_output_filename, wav_filename)) print('synthesized mel spectrograms at {}'.format(synth_dir))
Example #10
Source File: audio.py From gmvae_tacotron with MIT License | 5 votes |
def get_hop_size(): hop_size = hparams.hop_size if hop_size is None: assert hparams.frame_shift_ms is not None hop_size = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate) return hop_size
Example #11
Source File: dataset.py From WaveRNN-Pytorch with MIT License | 5 votes |
def discrete_collate(batch) : """collate function used for discrete wav output, such as 9-bit, mulaw-discrete, etc. """ pad = 2 mel_win = hp.seq_len // hp.hop_size + 2 * pad max_offsets = [x[0].shape[-1] - (mel_win + 2 * pad) for x in batch] mel_offsets = [np.random.randint(0, offset) for offset in max_offsets] sig_offsets = [(offset + pad) * hp.hop_size for offset in mel_offsets] mels = [x[0][:, mel_offsets[i]:mel_offsets[i] + mel_win] \ for i, x in enumerate(batch)] coarse = [x[1][sig_offsets[i]:sig_offsets[i] + hp.seq_len + 1] \ for i, x in enumerate(batch)] mels = np.stack(mels).astype(np.float32) coarse = np.stack(coarse).astype(np.int64) mels = torch.FloatTensor(mels) coarse = torch.LongTensor(coarse) if hp.input_type == 'bits': x_input = 2 * coarse[:, :hp.seq_len].float() / (2**hp.bits - 1.) - 1. elif hp.input_type == 'mulaw': x_input = inv_mulaw_quantize(coarse[:, :hp.seq_len], hp.mulaw_quantize_channels) y_coarse = coarse[:, 1:] return x_input, mels, y_coarse
Example #12
Source File: audio.py From WaveRNN-Pytorch with MIT License | 5 votes |
def _lws_processor(): return lws.lws(hparams.fft_size, hparams.hop_size, mode="speech") # Conversions:
Example #13
Source File: synthesize.py From tacotron2-mandarin-griffin-lim with MIT License | 5 votes |
def run_synthesis(args, checkpoint_path, output_dir, hparams): GTA = (args.GTA == 'True') if GTA: synth_dir = os.path.join(output_dir, 'gta') #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) else: synth_dir = os.path.join(output_dir, 'natural') #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) metadata_filename = os.path.join(args.input_dir, 'train.txt') log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams, gta=GTA) with open(metadata_filename, encoding='utf-8') as f: metadata = [line.strip().split('|') for line in f] frame_shift_ms = hparams.hop_size / hparams.sample_rate hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600) log('Loaded metadata for {} examples ({:.2f} hours)'.format(len(metadata), hours)) metadata = [metadata[i: i+hparams.tacotron_synthesis_batch_size] for i in range(0, len(metadata), hparams.tacotron_synthesis_batch_size)] log('starting synthesis') mel_dir = os.path.join(args.input_dir, 'mels') wav_dir = os.path.join(args.input_dir, 'audio') with open(os.path.join(synth_dir, 'map.txt'), 'w') as file: for i, meta in enumerate(tqdm(metadata)): texts = [m[5] for m in meta] mel_filenames = [os.path.join(mel_dir, m[1]) for m in meta] wav_filenames = [os.path.join(wav_dir, m[0]) for m in meta] basenames = [os.path.basename(m).replace('.npy', '').replace('mel-', '') for m in mel_filenames] mel_output_filenames, speaker_ids = synth.synthesize(texts, basenames, synth_dir, None, mel_filenames) for elems in zip(wav_filenames, mel_filenames, mel_output_filenames, speaker_ids, texts): file.write('|'.join([str(x) for x in elems]) + '\n') log('synthesized mel spectrograms at {}'.format(synth_dir)) return os.path.join(synth_dir, 'map.txt')
Example #14
Source File: audio.py From cnn_vocoder with MIT License | 5 votes |
def get_hop_size(): hop_size = hparams.hop_size if hop_size is None: assert hparams.frame_shift_ms is not None hop_size = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate) return hop_size # Conversions:
Example #15
Source File: feeder.py From vae_tacotron2 with MIT License | 5 votes |
def __init__(self, coordinator, metadata_filename, hparams): super(Feeder, self).__init__() self._coord = coordinator self._hparams = hparams self._cleaner_names = [x.strip() for x in hparams.cleaners.split(',')] self._offset = 0 # Load metadata self._mel_dir = os.path.join(os.path.dirname(metadata_filename), 'mels') self._linear_dir = os.path.join(os.path.dirname(metadata_filename), 'linear') with open(metadata_filename, encoding='utf-8') as f: self._metadata = [line.strip().split('|') for line in f] frame_shift_ms = hparams.hop_size / hparams.sample_rate hours = sum([int(x[4]) for x in self._metadata]) * frame_shift_ms / (3600) log('Loaded metadata for {} examples ({:.2f} hours)'.format(len(self._metadata), hours)) # Create placeholders for inputs and targets. Don't specify batch size because we want # to be able to feed different batch sizes at eval time. self._placeholders = [ tf.placeholder(tf.int32, shape=(None, None), name='inputs'), tf.placeholder(tf.int32, shape=(None, ), name='input_lengths'), tf.placeholder(tf.float32, shape=(None, None, hparams.num_mels), name='mel_targets'), tf.placeholder(tf.int32,[None],'mel_lengths'), tf.placeholder(tf.float32, shape=(None, None), name='token_targets'), tf.placeholder(tf.float32, shape=(None, None, hparams.num_freq), name='linear_targets'), ] # Create queue for buffering data queue = tf.FIFOQueue(8, [tf.int32, tf.int32, tf.float32, tf.int32, tf.float32, tf.float32], name='input_queue') self._enqueue_op = queue.enqueue(self._placeholders) self.inputs, self.input_lengths, self.mel_targets, self.mel_lengths, self.token_targets, self.linear_targets = queue.dequeue() self.inputs.set_shape(self._placeholders[0].shape) self.input_lengths.set_shape(self._placeholders[1].shape) self.mel_targets.set_shape(self._placeholders[2].shape) self.mel_lengths.set_shape(self._placeholders[3].shape) self.token_targets.set_shape(self._placeholders[4].shape) self.linear_targets.set_shape(self._placeholders[5].shape)
Example #16
Source File: audio.py From vae_tacotron2 with MIT License | 5 votes |
def get_hop_size(): hop_size = hparams.hop_size if hop_size is None: assert hparams.frame_shift_ms is not None hop_size = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate) return hop_size
Example #17
Source File: synthesize.py From vae_tacotron2 with MIT License | 5 votes |
def run_synthesis(args, checkpoint_path, output_dir): metadata_filename = os.path.join(args.input_dir, 'train.txt') print(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, gta=args.GTA) with open(metadata_filename, encoding='utf-8') as f: metadata = [line.strip().split('|') for line in f] frame_shift_ms = hparams.hop_size / hparams.sample_rate hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600) print('Loaded metadata for {} examples ({:.2f} hours)'.format(len(metadata), hours)) if args.GTA==True: synth_dir = os.path.join(output_dir, 'gta') else: synth_dir = os.path.join(output_dir, 'natural') #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) print('starting synthesis') mel_dir = os.path.join(args.input_dir, 'mels') wav_dir = os.path.join(args.input_dir, 'audio') with open(os.path.join(synth_dir, 'map.txt'), 'w') as file: for i, meta in enumerate(tqdm(metadata)): text = meta[5] mel_filename = os.path.join(mel_dir, meta[1]) wav_filename = os.path.join(wav_dir, meta[0]) mel_output_filename = synth.synthesize(text, None, i+1, synth_dir, None, mel_filename) file.write('{}|{}|{}|{}\n'.format(text, mel_filename, mel_output_filename, wav_filename)) print('synthesized mel spectrograms at {}'.format(synth_dir))
Example #18
Source File: audio.py From vae_tacotron2 with MIT License | 5 votes |
def get_hop_size(): hop_size = hparams.hop_size if hop_size is None: assert hparams.frame_shift_ms is not None hop_size = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate) return hop_size
Example #19
Source File: synthesis.py From autovc with MIT License | 5 votes |
def wavegen(model, c=None, tqdm=tqdm): """Generate waveform samples by WaveNet. """ model.eval() model.make_generation_fast_() Tc = c.shape[0] upsample_factor = hparams.hop_size # Overwrite length according to feature size length = Tc * upsample_factor # B x C x T c = torch.FloatTensor(c.T).unsqueeze(0) initial_input = torch.zeros(1, 1, 1).fill_(0.0) # Transform data to GPU initial_input = initial_input.to(device) c = None if c is None else c.to(device) with torch.no_grad(): y_hat = model.incremental_forward( initial_input, c=c, g=None, T=length, tqdm=tqdm, softmax=True, quantize=True, log_scale_min=hparams.log_scale_min) y_hat = y_hat.view(-1).cpu().data.numpy() return y_hat
Example #20
Source File: synthesize.py From Tacotron-2 with MIT License | 4 votes |
def run_synthesis(args, checkpoint_path, output_dir, hparams): GTA = (args.GTA == 'True') if GTA: synth_dir = os.path.join(output_dir, 'gta') #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) else: synth_dir = os.path.join(output_dir, 'natural') #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) metadata_filename = os.path.join(args.input_dir, 'train.txt') log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams, gta=GTA) with open(metadata_filename, encoding='utf-8') as f: metadata = [line.strip().split('|') for line in f] frame_shift_ms = hparams.hop_size / hparams.sample_rate hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600) log('Loaded metadata for {} examples ({:.2f} hours)'.format(len(metadata), hours)) #Set inputs batch wise metadata = [metadata[i: i+hparams.tacotron_synthesis_batch_size] for i in range(0, len(metadata), hparams.tacotron_synthesis_batch_size)] log('Starting Synthesis') mel_dir = os.path.join(args.input_dir, 'mels') wav_dir = os.path.join(args.input_dir, 'audio') with open(os.path.join(synth_dir, 'map.txt'), 'w') as file: for i, meta in enumerate(tqdm(metadata)): texts = [m[5] for m in meta] mel_filenames = [os.path.join(mel_dir, m[1]) for m in meta] wav_filenames = [os.path.join(wav_dir, m[0]) for m in meta] basenames = [os.path.basename(m).replace('.npy', '').replace('mel-', '') for m in mel_filenames] mel_output_filenames, speaker_ids = synth.synthesize(texts, basenames, synth_dir, None, mel_filenames) for elems in zip(wav_filenames, mel_filenames, mel_output_filenames, speaker_ids, texts): file.write('|'.join([str(x) for x in elems]) + '\n') log('synthesized mel spectrograms at {}'.format(synth_dir)) return os.path.join(synth_dir, 'map.txt')
Example #21
Source File: synthesize.py From style-token_tacotron2 with MIT License | 4 votes |
def run_synthesis(args, checkpoint_path, output_dir, hparams): GTA = (args.GTA == 'True') if GTA: synth_dir = os.path.join(output_dir, 'gta') #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) else: synth_dir = os.path.join(output_dir, 'natural') #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) metadata_filename = os.path.join(args.input_dir, 'train.txt') log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams, gta=GTA) with open(metadata_filename, encoding='utf-8') as f: metadata = [line.strip().split('|') for line in f] frame_shift_ms = hparams.hop_size / hparams.sample_rate hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600) log('Loaded metadata for {} examples ({:.2f} hours)'.format(len(metadata), hours)) #Set inputs batch wise metadata = [metadata[i: i+hparams.tacotron_synthesis_batch_size] for i in range(0, len(metadata), hparams.tacotron_synthesis_batch_size)] log('Starting Synthesis') mel_dir = os.path.join(args.input_dir, 'mels') wav_dir = os.path.join(args.input_dir, 'audio') with open(os.path.join(synth_dir, 'map.txt'), 'w') as file: for i, meta in enumerate(tqdm(metadata)): texts = [m[5] for m in meta] mel_filenames = [os.path.join(mel_dir, m[1]) for m in meta] wav_filenames = [os.path.join(wav_dir, m[0]) for m in meta] basenames = [os.path.basename(m).replace('.npy', '').replace('mel-', '') for m in mel_filenames] mel_output_filenames, speaker_ids = synth.synthesize(texts, basenames, synth_dir, None, mel_filenames) for elems in zip(wav_filenames, mel_filenames, mel_output_filenames, speaker_ids, texts): file.write('|'.join([str(x) for x in elems]) + '\n') log('synthesized mel spectrograms at {}'.format(synth_dir)) return os.path.join(synth_dir, 'map.txt')