Python hparams.hparams.num_freq() Examples
The following are 14
code examples of hparams.hparams.num_freq().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
hparams.hparams
, or try the search function
.
Example #1
Source File: audio.py From Griffin_lim with MIT License | 5 votes |
def _build_mel_basis(): n_fft = (hparams.num_freq - 1) * 2 return librosa.filters.mel(hparams.sample_rate, n_fft, n_mels=hparams.num_mels)
Example #2
Source File: griffin_lim.py From Griffin_lim with MIT License | 5 votes |
def spectrogram2wav(spectrogram, n_iter=hparams.griffin_lim_iters, n_fft=(hparams.num_freq - 1) * 2, win_length=int(hparams.frame_length_ms / 1000 * hparams.sample_rate), hop_length=int(hparams.frame_shift_ms / 1000 * hparams.sample_rate)): '''Converts spectrogram into a waveform using Griffin-lim's raw. ''' def invert_spectrogram(spectrogram): ''' spectrogram: [t, f] ''' spectrogram = tf.expand_dims(spectrogram, 0) inversed = tf.contrib.signal.inverse_stft(spectrogram, win_length, hop_length, n_fft) squeezed = tf.squeeze(inversed, 0) return squeezed spectrogram = tf.transpose(spectrogram) spectrogram = tf.cast(spectrogram, dtype=tf.complex64) # [t, f] X_best = tf.identity(spectrogram) for i in range(n_iter): X_t = invert_spectrogram(X_best) est = tf.contrib.signal.stft(X_t, win_length, hop_length, n_fft, pad_end=False) # (1, T, n_fft/2+1) phase = est / tf.cast(tf.maximum(1e-8, tf.abs(est)), tf.complex64) # [t, f] X_best = spectrogram * phase # [t, t] X_t = invert_spectrogram(X_best) y = tf.real(X_t) return y
Example #3
Source File: audio.py From vae_tacotron with MIT License | 5 votes |
def _stft_parameters(): n_fft = (hparams.num_freq - 1) * 2 hop_length = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate) win_length = int(hparams.frame_length_ms / 1000 * hparams.sample_rate) return n_fft, hop_length, win_length # Conversions:
Example #4
Source File: audio.py From vae_tacotron with MIT License | 5 votes |
def _build_mel_basis(): n_fft = (hparams.num_freq - 1) * 2 return librosa.filters.mel(hparams.sample_rate, n_fft, n_mels=hparams.num_mels)
Example #5
Source File: feeder.py From vae_tacotron2 with MIT License | 5 votes |
def __init__(self, coordinator, metadata_filename, hparams): super(Feeder, self).__init__() self._coord = coordinator self._hparams = hparams self._cleaner_names = [x.strip() for x in hparams.cleaners.split(',')] self._offset = 0 # Load metadata self._mel_dir = os.path.join(os.path.dirname(metadata_filename), 'mels') self._linear_dir = os.path.join(os.path.dirname(metadata_filename), 'linear') with open(metadata_filename, encoding='utf-8') as f: self._metadata = [line.strip().split('|') for line in f] frame_shift_ms = hparams.hop_size / hparams.sample_rate hours = sum([int(x[4]) for x in self._metadata]) * frame_shift_ms / (3600) log('Loaded metadata for {} examples ({:.2f} hours)'.format(len(self._metadata), hours)) # Create placeholders for inputs and targets. Don't specify batch size because we want # to be able to feed different batch sizes at eval time. self._placeholders = [ tf.placeholder(tf.int32, shape=(None, None), name='inputs'), tf.placeholder(tf.int32, shape=(None, ), name='input_lengths'), tf.placeholder(tf.float32, shape=(None, None, hparams.num_mels), name='mel_targets'), tf.placeholder(tf.int32,[None],'mel_lengths'), tf.placeholder(tf.float32, shape=(None, None), name='token_targets'), tf.placeholder(tf.float32, shape=(None, None, hparams.num_freq), name='linear_targets'), ] # Create queue for buffering data queue = tf.FIFOQueue(8, [tf.int32, tf.int32, tf.float32, tf.int32, tf.float32, tf.float32], name='input_queue') self._enqueue_op = queue.enqueue(self._placeholders) self.inputs, self.input_lengths, self.mel_targets, self.mel_lengths, self.token_targets, self.linear_targets = queue.dequeue() self.inputs.set_shape(self._placeholders[0].shape) self.input_lengths.set_shape(self._placeholders[1].shape) self.mel_targets.set_shape(self._placeholders[2].shape) self.mel_lengths.set_shape(self._placeholders[3].shape) self.token_targets.set_shape(self._placeholders[4].shape) self.linear_targets.set_shape(self._placeholders[5].shape)
Example #6
Source File: audio.py From arabic-tacotron-tts with MIT License | 5 votes |
def _stft_parameters(): n_fft = (hparams.num_freq - 1) * 2 hop_length = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate) win_length = int(hparams.frame_length_ms / 1000 * hparams.sample_rate) return n_fft, hop_length, win_length # Conversions:
Example #7
Source File: audio.py From arabic-tacotron-tts with MIT License | 5 votes |
def _build_mel_basis(): n_fft = (hparams.num_freq - 1) * 2 return librosa.filters.mel(hparams.sample_rate, n_fft, n_mels=hparams.num_mels)
Example #8
Source File: audio.py From tacotron with MIT License | 5 votes |
def _stft_parameters(): n_fft = (hparams.num_freq - 1) * 2 hop_length = int(hparams.frame_shift_ms / 1000 * hparams.sample_rate) win_length = int(hparams.frame_length_ms / 1000 * hparams.sample_rate) return n_fft, hop_length, win_length # Conversions:
Example #9
Source File: audio.py From tacotron with MIT License | 5 votes |
def _build_mel_basis(): n_fft = (hparams.num_freq - 1) * 2 return librosa.filters.mel(hparams.sample_rate, n_fft, n_mels=hparams.num_mels)
Example #10
Source File: feeder.py From gmvae_tacotron with MIT License | 5 votes |
def __init__(self, coordinator, metadata_filename, hparams): super(Feeder, self).__init__() self._coord = coordinator self._hparams = hparams self._cleaner_names = [x.strip() for x in hparams.cleaners.split(',')] self._offset = 0 # Load metadata self._mel_dir = os.path.join(os.path.dirname(metadata_filename), 'mels') self._linear_dir = os.path.join(os.path.dirname(metadata_filename), 'linear') with open(metadata_filename, encoding='utf-8') as f: self._metadata = [line.strip().split('|') for line in f] frame_shift_ms = hparams.hop_size / hparams.sample_rate hours = sum([int(x[4]) for x in self._metadata]) * frame_shift_ms / (3600) log('Loaded metadata for {} examples ({:.2f} hours)'.format(len(self._metadata), hours)) # Create placeholders for inputs and targets. Don't specify batch size because we want # to be able to feed different batch sizes at eval time. self._placeholders = [ tf.placeholder(tf.int32, shape=(None, None), name='inputs'), tf.placeholder(tf.int32, shape=(None, ), name='input_lengths'), tf.placeholder(tf.float32, shape=(None, None, hparams.num_mels), name='mel_targets'), tf.placeholder(tf.int32,[None],'mel_lengths'), tf.placeholder(tf.float32, shape=(None, None), name='token_targets'), tf.placeholder(tf.float32, shape=(None, None, hparams.num_freq), name='linear_targets'), ] # Create queue for buffering data queue = tf.FIFOQueue(8, [tf.int32, tf.int32, tf.float32, tf.int32, tf.float32, tf.float32], name='input_queue') self._enqueue_op = queue.enqueue(self._placeholders) self.inputs, self.input_lengths, self.mel_targets, self.mel_lengths, self.token_targets, self.linear_targets = queue.dequeue() self.inputs.set_shape(self._placeholders[0].shape) self.input_lengths.set_shape(self._placeholders[1].shape) self.mel_targets.set_shape(self._placeholders[2].shape) self.mel_lengths.set_shape(self._placeholders[3].shape) self.token_targets.set_shape(self._placeholders[4].shape) self.linear_targets.set_shape(self._placeholders[5].shape)
Example #11
Source File: audio.py From Tacotron2-PyTorch with MIT License | 5 votes |
def _stft_parameters(): n_fft = (hps.num_freq - 1) * 2 hop_length = int(hps.frame_shift_ms / 1000 * hps.sample_rate) win_length = int(hps.frame_length_ms / 1000 * hps.sample_rate) return n_fft, hop_length, win_length # Conversions:
Example #12
Source File: audio.py From Tacotron2-PyTorch with MIT License | 5 votes |
def _build_mel_basis(): n_fft = (hps.num_freq - 1) * 2 return librosa.filters.mel(hps.sample_rate, n_fft, n_mels=hps.num_mels)
Example #13
Source File: synthesizer.py From vae_tacotron2 with MIT License | 4 votes |
def synthesize(self, text, index, out_dir, log_dir, mel_filename, reference_mel): cleaner_names = [x.strip() for x in hparams.cleaners.split(',')] seq = text_to_sequence(text, cleaner_names) feed_dict = { self.model.inputs: [np.asarray(seq, dtype=np.int32)], self.model.input_lengths: np.asarray([len(seq)], dtype=np.int32) } if self.gta: feed_dict[self.model.mel_targets] = np.load(mel_filename).reshape(1, -1, 80) feed_dict[self.model.reference_mel] = np.load(mel_filename).reshape(1, -1, 80) elif hparams.use_vae: reference_mel = [np.asarray(reference_mel, dtype=np.float32)] feed_dict[self.model.reference_mel] = reference_mel if self.gta or not hparams.predict_linear: mels, alignment = self.session.run([self.mel_outputs, self.alignment], feed_dict=feed_dict) else: linear, mels, alignment = self.session.run([self.linear_outputs, self.mel_outputs, self.alignment], feed_dict=feed_dict) linear = linear.reshape(-1, hparams.num_freq) mels = mels.reshape(-1, hparams.num_mels) #Thanks to @imdatsolak for pointing this out # Write the spectrogram to disk # Note: outputs mel-spectrogram files and target ones have same names, just different folders mel_filename = os.path.join(out_dir, 'speech-mel-{:05d}.npy'.format(index)) np.save(mel_filename, mels, allow_pickle=False) if log_dir is not None: #save wav (mel -> wav) wav = audio.inv_mel_spectrogram(mels.T) audio.save_wav(wav, os.path.join(log_dir, 'wavs/speech-wav-{:05d}-mel.wav'.format(index))) if hparams.predict_linear: #save wav (linear -> wav) wav = audio.inv_linear_spectrogram(linear.T) audio.save_wav(wav, os.path.join(log_dir, 'wavs/speech-wav-{:05d}-linear.wav'.format(index))) #save alignments plot.plot_alignment(alignment, os.path.join(log_dir, 'plots/speech-alignment-{:05d}.png'.format(index)), info='{}'.format(text), split_title=True) #save mel spectrogram plot plot.plot_spectrogram(mels, os.path.join(log_dir, 'plots/speech-mel-{:05d}.png'.format(index)), info='{}'.format(text), split_title=True) return mel_filename
Example #14
Source File: synthesizer.py From gmvae_tacotron with MIT License | 4 votes |
def synthesize(self, text, index, out_dir, log_dir, mel_filename, reference_mel): cleaner_names = [x.strip() for x in hparams.cleaners.split(',')] seq = text_to_sequence(text, cleaner_names) feed_dict = { self.model.inputs: [np.asarray(seq, dtype=np.int32)], self.model.input_lengths: np.asarray([len(seq)], dtype=np.int32) } if self.gta: feed_dict[self.model.mel_targets] = np.load(mel_filename).reshape(1, -1, 80) feed_dict[self.model.reference_mel] = np.load(mel_filename).reshape(1, -1, 80) elif hparams.use_vae: reference_mel = [np.asarray(reference_mel, dtype=np.float32)] feed_dict[self.model.reference_mel] = reference_mel if self.gta or not hparams.predict_linear: mels, alignment = self.session.run([self.mel_outputs, self.alignment], feed_dict=feed_dict) else: linear, mels, alignment = self.session.run([self.linear_outputs, self.mel_outputs, self.alignment], feed_dict=feed_dict) linear = linear.reshape(-1, hparams.num_freq) mels = mels.reshape(-1, hparams.num_mels) #Thanks to @imdatsolak for pointing this out # Write the spectrogram to disk # Note: outputs mel-spectrogram files and target ones have same names, just different folders mel_filename = os.path.join(out_dir, 'speech-mel-{:05d}.npy'.format(index)) np.save(mel_filename, mels, allow_pickle=False) if log_dir is not None: #save wav (mel -> wav) wav = audio.inv_mel_spectrogram(mels.T) audio.save_wav(wav, os.path.join(log_dir, 'wavs/speech-wav-{:05d}-mel.wav'.format(index))) if hparams.predict_linear: #save wav (linear -> wav) wav = audio.inv_linear_spectrogram(linear.T) audio.save_wav(wav, os.path.join(log_dir, 'wavs/speech-wav-{:05d}-linear.wav'.format(index))) #save alignments plot.plot_alignment(alignment, os.path.join(log_dir, 'plots/speech-alignment-{:05d}.png'.format(index)), info='{}'.format(text), split_title=True) #save mel spectrogram plot plot.plot_spectrogram(mels, os.path.join(log_dir, 'plots/speech-mel-{:05d}.png'.format(index)), info='{}'.format(text), split_title=True) return mel_filename