Python Examples of hparams.hparams.num

Source File: model.py From Tacotron2-PyTorch with MIT License

6 votes

def get_alignment_energies(self, query, processed_memory,
							   attention_weights_cat):
		'''
		PARAMS
		------
		query: decoder output (batch, num_mels * n_frames_per_step)
		processed_memory: processed encoder outputs (B, T_in, attention_dim)
		attention_weights_cat: cumulative and prev. att weights (B, 2, max_time)

		RETURNS
		-------
		alignment (batch, max_time)
		'''

		processed_query = self.query_layer(query.unsqueeze(1))
		processed_attention_weights = self.location_layer(attention_weights_cat)
		energies = self.v(torch.tanh(
			processed_query + processed_attention_weights + processed_memory))

		energies = energies.squeeze(-1)
		return energies

Source File: train_vocoder.py From Tacotron2-Wavenet-Korean-TTS with MIT License

6 votes

def create_network(hp,batch_size,num_speakers,is_training):
    net = WaveNetModel(
        batch_size=batch_size,
        dilations=hp.dilations,
        filter_width=hp.filter_width,
        residual_channels=hp.residual_channels,
        dilation_channels=hp.dilation_channels,
        quantization_channels=hp.quantization_channels,
        out_channels =hp.out_channels,
        skip_channels=hp.skip_channels,
        use_biases=hp.use_biases,  #  True
        scalar_input=hp.scalar_input,
        global_condition_channels=hp.gc_channels,
        global_condition_cardinality=num_speakers,
        local_condition_channels=hp.num_mels,
        upsample_factor=hp.upsample_factor,
        legacy = hp.legacy,
        residual_legacy = hp.residual_legacy,
        drop_rate = hp.wavenet_dropout,
        train_mode=is_training)
    
    return net

Source File: model.py From WaveRNN-Pytorch with MIT License

6 votes

def build_model():
    """build model with hparams settings

    """
    if hp.input_type == 'raw':
        print('building model with Beta distribution output')
    elif hp.input_type == 'mixture':
        print("building model with mixture of logistic output")
    elif hp.input_type == 'bits':
        print("building model with quantized bit audio")
    elif hp.input_type == 'mulaw':
        print("building model with quantized mulaw encoding")
    else:
        raise ValueError('input_type provided not supported')
    model = Model(hp.rnn_dims, hp.fc_dims, hp.bits,
        hp.pad, hp.upsample_factors, hp.num_mels,
        hp.compute_dims, hp.res_out_dims, hp.res_blocks)

    return model

Source File: model.py From Tacotron2-PyTorch with MIT License

5 votes

def __init__(self):
		super(Tacotron2, self).__init__()
		self.num_mels = hps.num_mels
		self.mask_padding = hps.mask_padding
		self.n_frames_per_step = hps.n_frames_per_step
		self.embedding = nn.Embedding(
			hps.n_symbols, hps.symbols_embedding_dim)
		std = sqrt(2.0/(hps.n_symbols+hps.symbols_embedding_dim))
		val = sqrt(3.0)*std  # uniform bounds for std
		self.embedding.weight.data.uniform_(-val, val)
		self.encoder = Encoder()
		self.decoder = Decoder()
		self.postnet = Postnet()

Source File: audio.py From Griffin_lim with MIT License

5 votes

def _build_mel_basis():
    n_fft = (hparams.num_freq - 1) * 2
    return librosa.filters.mel(hparams.sample_rate, n_fft, n_mels=hparams.num_mels)

Source File: audio.py From Tacotron2-PyTorch with MIT License

5 votes

def _build_mel_basis():
	n_fft = (hps.num_freq - 1) * 2
	return librosa.filters.mel(hps.sample_rate, n_fft, n_mels=hps.num_mels)

Source File: model.py From Tacotron2-PyTorch with MIT License

5 votes

def __init__(self):
		super(Postnet, self).__init__()
		self.convolutions = nn.ModuleList()

		self.convolutions.append(
			nn.Sequential(
				ConvNorm(hps.num_mels, hps.postnet_embedding_dim,
						 kernel_size=hps.postnet_kernel_size, stride=1,
						 padding=int((hps.postnet_kernel_size - 1) / 2),
						 dilation=1, w_init_gain='tanh'),
				nn.BatchNorm1d(hps.postnet_embedding_dim))
		)

		for i in range(1, hps.postnet_n_convolutions - 1):
			self.convolutions.append(
				nn.Sequential(
					ConvNorm(hps.postnet_embedding_dim,
							 hps.postnet_embedding_dim,
							 kernel_size=hps.postnet_kernel_size, stride=1,
							 padding=int((hps.postnet_kernel_size - 1) / 2),
							 dilation=1, w_init_gain='tanh'),
					nn.BatchNorm1d(hps.postnet_embedding_dim))
			)

		self.convolutions.append(
			nn.Sequential(
				ConvNorm(hps.postnet_embedding_dim, hps.num_mels,
						 kernel_size=hps.postnet_kernel_size, stride=1,
						 padding=int((hps.postnet_kernel_size - 1) / 2),
						 dilation=1, w_init_gain='linear'),
				nn.BatchNorm1d(hps.num_mels))
			)

Source File: model.py From Tacotron2-PyTorch with MIT License

5 votes

def __init__(self):
		super(Decoder, self).__init__()
		self.num_mels = hps.num_mels
		self.n_frames_per_step = hps.n_frames_per_step
		self.encoder_embedding_dim = hps.encoder_embedding_dim
		self.attention_rnn_dim = hps.attention_rnn_dim
		self.decoder_rnn_dim = hps.decoder_rnn_dim
		self.prenet_dim = hps.prenet_dim
		self.max_decoder_steps = hps.max_decoder_steps
		self.gate_threshold = hps.gate_threshold
		self.p_attention_dropout = hps.p_attention_dropout
		self.p_decoder_dropout = hps.p_decoder_dropout

		self.prenet = Prenet(
			hps.num_mels * hps.n_frames_per_step,
			[hps.prenet_dim, hps.prenet_dim])

		self.attention_rnn = nn.LSTMCell(
			hps.prenet_dim + hps.encoder_embedding_dim,
			hps.attention_rnn_dim)

		self.attention_layer = Attention(
			hps.attention_rnn_dim, hps.encoder_embedding_dim,
			hps.attention_dim, hps.attention_location_n_filters,
			hps.attention_location_kernel_size)

		self.decoder_rnn = nn.LSTMCell(
			hps.attention_rnn_dim + hps.encoder_embedding_dim,
			hps.decoder_rnn_dim, 1)

		self.linear_projection = LinearNorm(
			hps.decoder_rnn_dim + hps.encoder_embedding_dim,
			hps.num_mels * hps.n_frames_per_step)

		self.gate_layer = LinearNorm(
			hps.decoder_rnn_dim + hps.encoder_embedding_dim, 1,
			bias=True, w_init_gain='sigmoid')

Source File: model.py From Tacotron2-PyTorch with MIT License

5 votes

def get_go_frame(self, memory):
		''' Gets all zeros frames to use as first decoder input
		PARAMS
		------
		memory: decoder outputs

		RETURNS
		-------
		decoder_input: all zeros frames
		'''
		B = memory.size(0)
		decoder_input = Variable(memory.data.new(
			B, self.num_mels * self.n_frames_per_step).zero_())
		return decoder_input

Source File: model.py From Tacotron2-PyTorch with MIT License

5 votes

def parse_decoder_outputs(self, mel_outputs, gate_outputs, alignments):
		''' Prepares decoder outputs for output
		PARAMS
		------
		mel_outputs:
		gate_outputs: gate output energies
		alignments:

		RETURNS
		-------
		mel_outputs:
		gate_outpust: gate output energies
		alignments:
		'''
		# (T_out, B) -> (B, T_out)
		alignments = torch.stack(alignments).transpose(0, 1)
		# (T_out, B) -> (B, T_out)
		gate_outputs = torch.stack(gate_outputs).transpose(0, 1)
		gate_outputs = gate_outputs.contiguous()
		# (T_out, B, num_mels) -> (B, T_out, num_mels)
		mel_outputs = torch.stack(mel_outputs).transpose(0, 1).contiguous()
		# decouple frames per step
		mel_outputs = mel_outputs.view(
			mel_outputs.size(0), -1, self.num_mels)
		# (B, T_out, num_mels) -> (B, num_mels, T_out)
		mel_outputs = mel_outputs.transpose(1, 2)

		return mel_outputs, gate_outputs, alignments

Source File: synthesizer.py From gmvae_tacotron with MIT License

5 votes

def load(self, checkpoint_path, gta=False, model_name='Tacotron'):
		print('Constructing model: %s' % model_name)
		inputs = tf.placeholder(tf.int32, [1, None], 'inputs')
		input_lengths = tf.placeholder(tf.int32, [1], 'input_lengths')

		with tf.variable_scope('model') as scope:
			self.model = create_model(model_name, hparams)
			if hparams.use_vae:
				ref_targets = tf.placeholder(tf.float32, [1, None, hparams.num_mels], 'ref_targets')
			if gta:
				targets = tf.placeholder(tf.float32, [1, None, hparams.num_mels], 'mel_targets')
				
				if hparams.use_vae:
					self.model.initialize(inputs, input_lengths, targets, gta=gta, reference_mel=ref_targets)
				else:
					self.model.initialize(inputs, input_lengths, targets, gta=gta)
			else:
				if hparams.use_vae:
					self.model.initialize(inputs, input_lengths, reference_mel=ref_targets)
				else:
					self.model.initialize(inputs, input_lengths)
			self.mel_outputs = self.model.mel_outputs
			self.alignment = self.model.alignments[0]

		self.gta = gta
		print('Loading checkpoint: %s' % checkpoint_path)
		self.session = tf.Session()
		self.session.run(tf.global_variables_initializer())
		saver = tf.train.Saver()
		saver.restore(self.session, checkpoint_path)

Source File: model.py From Tacotron2-PyTorch with MIT License

5 votes

def parse_output(self, outputs, output_lengths=None):
		if self.mask_padding and output_lengths is not None:
			mask = ~get_mask_from_lengths(output_lengths, True) # (B, T)
			mask = mask.expand(self.num_mels, mask.size(0), mask.size(1)) # (80, B, T)
			mask = mask.permute(1, 0, 2) # (B, 80, T)
			
			outputs[0].data.masked_fill_(mask, 0.0) # (B, 80, T)
			outputs[1].data.masked_fill_(mask, 0.0) # (B, 80, T)
			slice = torch.arange(0, mask.size(2), self.n_frames_per_step)
			outputs[2].data.masked_fill_(mask[:, 0, slice], 1e3)  # gate energies (B, T//n_frames_per_step)

		return outputs

Source File: datafeeder_tacotron2.py From Tacotron2-Wavenet-Korean-TTS with MIT License

5 votes

def _pad_target(t, length):
    # t: 2 dim array. ( xx, num_mels) ==> (length,num_mels)
    return np.pad(t, [(0, length - t.shape[0]), (0,0)], mode='constant', constant_values=_pad)  # (169, 80) ==> (length, 80)

###

Source File: predict_mel.py From self-attention-tacotron with BSD 3-Clause "New" or "Revised" License

5 votes

def predict(hparams,
            model_dir, checkpoint_path, output_dir,
            test_source_files, test_target_files):
    def predict_input_fn():
        source = tf.data.TFRecordDataset(list(test_source_files))
        target = tf.data.TFRecordDataset(list(test_target_files))
        dataset = dataset_factory(source, target, hparams)
        batched = dataset.prepare_and_zip().group_by_batch(
            batch_size=1).merge_target_to_source()
        return batched.dataset

    estimator = tacotron_model_factory(hparams, model_dir, None)

    predictions = map(
        lambda p: PredictedMel(p["id"], p["key"], p["mel"], p.get("mel_postnet"), p["mel"].shape[1], p["mel"].shape[0],
                               p["ground_truth_mel"], p["alignment"], p.get("alignment2"), p.get("alignment3"),
                               p.get("alignment4"), p.get("alignment5"), p.get("alignment6"),
                               p["source"], p["text"], p.get("accent_type")),
        estimator.predict(predict_input_fn, checkpoint_path=checkpoint_path))

    for v in predictions:
        key = v.key.decode('utf-8')
        mel_filename = f"{key}.{hparams.predicted_mel_extension}"
        mel_filepath = os.path.join(output_dir, mel_filename)
        mel = v.predicted_mel_postnet if hparams.use_postnet_v2 else v.predicted_mel
        assert mel.shape[1] == hparams.num_mels
        mel.tofile(mel_filepath, format='<f4')
        text = v.text.decode("utf-8")
        plot_filename = f"{key}.png"
        plot_filepath = os.path.join(output_dir, plot_filename)
        alignments = list(filter(lambda x: x is not None,
                                 [v.alignment, v.alignment2, v.alignment3, v.alignment4, v.alignment5, v.alignment6]))

        plot_predictions(alignments, v.ground_truth_mel, v.predicted_mel, v.predicted_mel_postnet,
                         text, v.key, plot_filepath)
        prediction_filename = f"{key}.tfrecord"
        prediction_filepath = os.path.join(output_dir, prediction_filename)
        write_prediction_result(v.id, key, alignments, mel, v.ground_truth_mel, text, v.source,
                                v.accent_type, prediction_filepath)

Source File: audio.py From representation_mixing with BSD 3-Clause "New" or "Revised" License

5 votes

def _build_mel_basis():
    assert hparams.fmax <= hparams.sample_rate // 2
    return librosa.filters.mel(hparams.sample_rate, hparams.fft_size,
                               fmin=hparams.fmin, fmax=hparams.fmax,
                               n_mels=hparams.num_mels)

Source File: audio.py From representation_mixing with BSD 3-Clause "New" or "Revised" License

5 votes

def _build_mel_basis():
    assert hparams.fmax <= hparams.sample_rate // 2
    return librosa.filters.mel(hparams.sample_rate, hparams.fft_size,
                               fmin=hparams.fmin, fmax=hparams.fmax,
                               n_mels=hparams.num_mels)

Source File: audio.py From representation_mixing with BSD 3-Clause "New" or "Revised" License

5 votes

def _build_mel_basis():
    assert hparams.fmax <= hparams.sample_rate // 2
    return librosa.filters.mel(hparams.sample_rate, hparams.fft_size,
                               fmin=hparams.fmin, fmax=hparams.fmax,
                               n_mels=hparams.num_mels)

Source File: audio.py From representation_mixing with BSD 3-Clause "New" or "Revised" License

5 votes

def _build_mel_basis():
    assert hparams.fmax <= hparams.sample_rate // 2
    return librosa.filters.mel(hparams.sample_rate, hparams.fft_size,
                               fmin=hparams.fmin, fmax=hparams.fmax,
                               n_mels=hparams.num_mels)

Source File: datafeeder_wavenet.py From Tacotron-Wavenet-Vocoder-Korean with MIT License

5 votes

def __init__(self,coord,data_dirs,batch_size,receptive_field, gc_enable=False, queue_size=8):
        super(DataFeederWavenet, self).__init__()    
        self.data_dirs = data_dirs
        self.coord = coord
        self.batch_size = batch_size
        self.receptive_field = receptive_field
        self.hop_size = audio.get_hop_size(hparams)
        self.sample_size = ensure_divisible(hparams.sample_size,self.hop_size, True)
        self.max_frames = self.sample_size // self.hop_size  # sample_size 크기를 확보하기 위해.
        self.queue_size = queue_size
        self.gc_enable = gc_enable
        self.skip_path_filter = hparams.skip_path_filter
       
        self.rng = np.random.RandomState(123)
        self._offset = defaultdict(lambda: 2)  # key에 없는 값이 들어어면 2가 할당된다.
        
        self.data_dir_to_id = {data_dir: idx for idx, data_dir in enumerate(self.data_dirs)}  # data_dir <---> speaker_id 매핑
        self.path_dict = get_path_dict(self.data_dirs,np.max([self.sample_size,receptive_field]))# receptive_field 보다 작은 것을 버리고, 나머지만 돌려준다.
        
        self._placeholders = [
            tf.placeholder(tf.float32, shape=[None,None,1],name='input_wav'),
            tf.placeholder(tf.float32, shape=[None,None,hparams.num_mels],name='local_condition')
        ]    
        dtypes = [tf.float32, tf.float32]
    
        if self.gc_enable:
            self._placeholders.append(tf.placeholder(tf.int32, shape=[None],name='speaker_id'))
            dtypes.append(tf.int32)
 
        queue = tf.FIFOQueue(self.queue_size, dtypes, name='input_queue')
        self.enqueue = queue.enqueue(self._placeholders)
        
        if self.gc_enable:
            self.inputs_wav, self.local_condition, self.speaker_id = queue.dequeue()
        else:
            self.inputs_wav, self.local_condition = queue.dequeue()

        self.inputs_wav.set_shape(self._placeholders[0].shape)
        self.local_condition.set_shape(self._placeholders[1].shape)
        if self.gc_enable:
            self.speaker_id.set_shape(self._placeholders[2].shape)

Source File: audio.py From vae_tacotron with MIT License

5 votes

def _build_mel_basis():
  n_fft = (hparams.num_freq - 1) * 2
  return librosa.filters.mel(hparams.sample_rate, n_fft, n_mels=hparams.num_mels)

Source File: audio.py From vae_tacotron2 with MIT License

5 votes

def _build_mel_basis():
	assert hparams.fmax <= hparams.sample_rate // 2
	return librosa.filters.mel(hparams.sample_rate, hparams.fft_size, n_mels=hparams.num_mels,
							   fmin=hparams.fmin, fmax=hparams.fmax)

Source File: audio.py From vae_tacotron2 with MIT License

5 votes

def _build_mel_basis():
	assert hparams.fmax <= hparams.sample_rate // 2
	return librosa.filters.mel(hparams.sample_rate, hparams.fft_size, n_mels=hparams.num_mels,
							   fmin=hparams.fmin, fmax=hparams.fmax)

Source File: feeder.py From vae_tacotron2 with MIT License

5 votes

def __init__(self, coordinator, metadata_filename, hparams):
		super(Feeder, self).__init__()
		self._coord = coordinator
		self._hparams = hparams
		self._cleaner_names = [x.strip() for x in hparams.cleaners.split(',')]
		self._offset = 0

		# Load metadata
		self._mel_dir = os.path.join(os.path.dirname(metadata_filename), 'mels')
		self._linear_dir = os.path.join(os.path.dirname(metadata_filename), 'linear')
		with open(metadata_filename, encoding='utf-8') as f:
			self._metadata = [line.strip().split('|') for line in f]
			frame_shift_ms = hparams.hop_size / hparams.sample_rate
			hours = sum([int(x[4]) for x in self._metadata]) * frame_shift_ms / (3600)
			log('Loaded metadata for {} examples ({:.2f} hours)'.format(len(self._metadata), hours))

		# Create placeholders for inputs and targets. Don't specify batch size because we want
		# to be able to feed different batch sizes at eval time.
		self._placeholders = [
		tf.placeholder(tf.int32, shape=(None, None), name='inputs'),
		tf.placeholder(tf.int32, shape=(None, ), name='input_lengths'),
		tf.placeholder(tf.float32, shape=(None, None, hparams.num_mels), name='mel_targets'),
		tf.placeholder(tf.int32,[None],'mel_lengths'),
		tf.placeholder(tf.float32, shape=(None, None), name='token_targets'),
		tf.placeholder(tf.float32, shape=(None, None, hparams.num_freq), name='linear_targets'),
		]

		# Create queue for buffering data
		queue = tf.FIFOQueue(8, [tf.int32, tf.int32, tf.float32, tf.int32, tf.float32, tf.float32], name='input_queue')
		self._enqueue_op = queue.enqueue(self._placeholders)
		self.inputs, self.input_lengths, self.mel_targets, self.mel_lengths, self.token_targets, self.linear_targets = queue.dequeue()
		self.inputs.set_shape(self._placeholders[0].shape)
		self.input_lengths.set_shape(self._placeholders[1].shape)
		self.mel_targets.set_shape(self._placeholders[2].shape)
		self.mel_lengths.set_shape(self._placeholders[3].shape)
		self.token_targets.set_shape(self._placeholders[4].shape)
		self.linear_targets.set_shape(self._placeholders[5].shape)

Source File: synthesizer.py From vae_tacotron2 with MIT License

5 votes

def load(self, checkpoint_path, gta=False, model_name='Tacotron'):
		print('Constructing model: %s' % model_name)
		inputs = tf.placeholder(tf.int32, [1, None], 'inputs')
		input_lengths = tf.placeholder(tf.int32, [1], 'input_lengths')

		with tf.variable_scope('model') as scope:
			self.model = create_model(model_name, hparams)
			if hparams.use_vae:
				ref_targets = tf.placeholder(tf.float32, [1, None, hparams.num_mels], 'ref_targets')
			if gta:
				targets = tf.placeholder(tf.float32, [1, None, hparams.num_mels], 'mel_targets')
				
				if hparams.use_vae:
					self.model.initialize(inputs, input_lengths, targets, gta=gta, reference_mel=ref_targets)
				else:
					self.model.initialize(inputs, input_lengths, targets, gta=gta)
			else:
				if hparams.use_vae:
					self.model.initialize(inputs, input_lengths, reference_mel=ref_targets)
				else:
					self.model.initialize(inputs, input_lengths)
			self.mel_outputs = self.model.mel_outputs
			self.alignment = self.model.alignments[0]

		self.gta = gta
		print('Loading checkpoint: %s' % checkpoint_path)
		self.session = tf.Session()
		self.session.run(tf.global_variables_initializer())
		saver = tf.train.Saver()
		saver.restore(self.session, checkpoint_path)

Source File: audio.py From arabic-tacotron-tts with MIT License

5 votes

def _build_mel_basis():
  n_fft = (hparams.num_freq - 1) * 2
  return librosa.filters.mel(hparams.sample_rate, n_fft, n_mels=hparams.num_mels)

Source File: feeder.py From gmvae_tacotron with MIT License

5 votes

def __init__(self, coordinator, metadata_filename, hparams):
		super(Feeder, self).__init__()
		self._coord = coordinator
		self._hparams = hparams
		self._cleaner_names = [x.strip() for x in hparams.cleaners.split(',')]
		self._offset = 0

		# Load metadata
		self._mel_dir = os.path.join(os.path.dirname(metadata_filename), 'mels')
		self._linear_dir = os.path.join(os.path.dirname(metadata_filename), 'linear')
		with open(metadata_filename, encoding='utf-8') as f:
			self._metadata = [line.strip().split('|') for line in f]
			frame_shift_ms = hparams.hop_size / hparams.sample_rate
			hours = sum([int(x[4]) for x in self._metadata]) * frame_shift_ms / (3600)
			log('Loaded metadata for {} examples ({:.2f} hours)'.format(len(self._metadata), hours))

		# Create placeholders for inputs and targets. Don't specify batch size because we want
		# to be able to feed different batch sizes at eval time.
		self._placeholders = [
		tf.placeholder(tf.int32, shape=(None, None), name='inputs'),
		tf.placeholder(tf.int32, shape=(None, ), name='input_lengths'),
		tf.placeholder(tf.float32, shape=(None, None, hparams.num_mels), name='mel_targets'),
		tf.placeholder(tf.int32,[None],'mel_lengths'),
		tf.placeholder(tf.float32, shape=(None, None), name='token_targets'),
		tf.placeholder(tf.float32, shape=(None, None, hparams.num_freq), name='linear_targets'),
		]

		# Create queue for buffering data
		queue = tf.FIFOQueue(8, [tf.int32, tf.int32, tf.float32, tf.int32, tf.float32, tf.float32], name='input_queue')
		self._enqueue_op = queue.enqueue(self._placeholders)
		self.inputs, self.input_lengths, self.mel_targets, self.mel_lengths, self.token_targets, self.linear_targets = queue.dequeue()
		self.inputs.set_shape(self._placeholders[0].shape)
		self.input_lengths.set_shape(self._placeholders[1].shape)
		self.mel_targets.set_shape(self._placeholders[2].shape)
		self.mel_lengths.set_shape(self._placeholders[3].shape)
		self.token_targets.set_shape(self._placeholders[4].shape)
		self.linear_targets.set_shape(self._placeholders[5].shape)

Source File: audio.py From cnn_vocoder with MIT License

5 votes

def _build_mel_basis():
    assert hparams.fmax <= hparams.sample_rate // 2
    return librosa.filters.mel(hparams.sample_rate, n_fft,
                               fmin=hparams.fmin, fmax=hparams.fmax,
                               n_mels=hparams.num_mels)

Source File: audio.py From tacotron with MIT License

5 votes

def _build_mel_basis():
  n_fft = (hparams.num_freq - 1) * 2
  return librosa.filters.mel(hparams.sample_rate, n_fft, n_mels=hparams.num_mels)

Source File: audio.py From WaveRNN-Pytorch with MIT License

5 votes

def _build_mel_basis():
    if hparams.fmax is not None:
        assert hparams.fmax <= hparams.sample_rate // 2
    return librosa.filters.mel(hparams.sample_rate, hparams.fft_size,
                               fmin=hparams.fmin, fmax=hparams.fmax,
                               n_mels=hparams.num_mels)

Source File: model.py From WaveRNN-Pytorch with MIT License

5 votes

def no_test_build_model():
    model = Model(hp.rnn_dims, hp.fc_dims, hp.bits,
        hp.pad, hp.upsample_factors, hp.num_mels,
        hp.compute_dims, hp.res_out_dims, hp.res_blocks).cuda()
    print(vars(model))

Python hparams.hparams.num_mels() Examples