Python hparams.hparams.num_mels() Examples

The following are 30 code examples of hparams.hparams.num_mels(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module hparams.hparams , or try the search function .
Example #1
Source File: model.py    From Tacotron2-PyTorch with MIT License 6 votes vote down vote up
def get_alignment_energies(self, query, processed_memory,
							   attention_weights_cat):
		'''
		PARAMS
		------
		query: decoder output (batch, num_mels * n_frames_per_step)
		processed_memory: processed encoder outputs (B, T_in, attention_dim)
		attention_weights_cat: cumulative and prev. att weights (B, 2, max_time)

		RETURNS
		-------
		alignment (batch, max_time)
		'''

		processed_query = self.query_layer(query.unsqueeze(1))
		processed_attention_weights = self.location_layer(attention_weights_cat)
		energies = self.v(torch.tanh(
			processed_query + processed_attention_weights + processed_memory))

		energies = energies.squeeze(-1)
		return energies 
Example #2
Source File: train_vocoder.py    From Tacotron2-Wavenet-Korean-TTS with MIT License 6 votes vote down vote up
def create_network(hp,batch_size,num_speakers,is_training):
    net = WaveNetModel(
        batch_size=batch_size,
        dilations=hp.dilations,
        filter_width=hp.filter_width,
        residual_channels=hp.residual_channels,
        dilation_channels=hp.dilation_channels,
        quantization_channels=hp.quantization_channels,
        out_channels =hp.out_channels,
        skip_channels=hp.skip_channels,
        use_biases=hp.use_biases,  #  True
        scalar_input=hp.scalar_input,
        global_condition_channels=hp.gc_channels,
        global_condition_cardinality=num_speakers,
        local_condition_channels=hp.num_mels,
        upsample_factor=hp.upsample_factor,
        legacy = hp.legacy,
        residual_legacy = hp.residual_legacy,
        drop_rate = hp.wavenet_dropout,
        train_mode=is_training)
    
    return net 
Example #3
Source File: model.py    From WaveRNN-Pytorch with MIT License 6 votes vote down vote up
def build_model():
    """build model with hparams settings

    """
    if hp.input_type == 'raw':
        print('building model with Beta distribution output')
    elif hp.input_type == 'mixture':
        print("building model with mixture of logistic output")
    elif hp.input_type == 'bits':
        print("building model with quantized bit audio")
    elif hp.input_type == 'mulaw':
        print("building model with quantized mulaw encoding")
    else:
        raise ValueError('input_type provided not supported')
    model = Model(hp.rnn_dims, hp.fc_dims, hp.bits,
        hp.pad, hp.upsample_factors, hp.num_mels,
        hp.compute_dims, hp.res_out_dims, hp.res_blocks)

    return model 
Example #4
Source File: model.py    From Tacotron2-PyTorch with MIT License 5 votes vote down vote up
def __init__(self):
		super(Tacotron2, self).__init__()
		self.num_mels = hps.num_mels
		self.mask_padding = hps.mask_padding
		self.n_frames_per_step = hps.n_frames_per_step
		self.embedding = nn.Embedding(
			hps.n_symbols, hps.symbols_embedding_dim)
		std = sqrt(2.0/(hps.n_symbols+hps.symbols_embedding_dim))
		val = sqrt(3.0)*std  # uniform bounds for std
		self.embedding.weight.data.uniform_(-val, val)
		self.encoder = Encoder()
		self.decoder = Decoder()
		self.postnet = Postnet() 
Example #5
Source File: audio.py    From Griffin_lim with MIT License 5 votes vote down vote up
def _build_mel_basis():
    n_fft = (hparams.num_freq - 1) * 2
    return librosa.filters.mel(hparams.sample_rate, n_fft, n_mels=hparams.num_mels) 
Example #6
Source File: audio.py    From Tacotron2-PyTorch with MIT License 5 votes vote down vote up
def _build_mel_basis():
	n_fft = (hps.num_freq - 1) * 2
	return librosa.filters.mel(hps.sample_rate, n_fft, n_mels=hps.num_mels) 
Example #7
Source File: model.py    From Tacotron2-PyTorch with MIT License 5 votes vote down vote up
def __init__(self):
		super(Postnet, self).__init__()
		self.convolutions = nn.ModuleList()

		self.convolutions.append(
			nn.Sequential(
				ConvNorm(hps.num_mels, hps.postnet_embedding_dim,
						 kernel_size=hps.postnet_kernel_size, stride=1,
						 padding=int((hps.postnet_kernel_size - 1) / 2),
						 dilation=1, w_init_gain='tanh'),
				nn.BatchNorm1d(hps.postnet_embedding_dim))
		)

		for i in range(1, hps.postnet_n_convolutions - 1):
			self.convolutions.append(
				nn.Sequential(
					ConvNorm(hps.postnet_embedding_dim,
							 hps.postnet_embedding_dim,
							 kernel_size=hps.postnet_kernel_size, stride=1,
							 padding=int((hps.postnet_kernel_size - 1) / 2),
							 dilation=1, w_init_gain='tanh'),
					nn.BatchNorm1d(hps.postnet_embedding_dim))
			)

		self.convolutions.append(
			nn.Sequential(
				ConvNorm(hps.postnet_embedding_dim, hps.num_mels,
						 kernel_size=hps.postnet_kernel_size, stride=1,
						 padding=int((hps.postnet_kernel_size - 1) / 2),
						 dilation=1, w_init_gain='linear'),
				nn.BatchNorm1d(hps.num_mels))
			) 
Example #8
Source File: model.py    From Tacotron2-PyTorch with MIT License 5 votes vote down vote up
def __init__(self):
		super(Decoder, self).__init__()
		self.num_mels = hps.num_mels
		self.n_frames_per_step = hps.n_frames_per_step
		self.encoder_embedding_dim = hps.encoder_embedding_dim
		self.attention_rnn_dim = hps.attention_rnn_dim
		self.decoder_rnn_dim = hps.decoder_rnn_dim
		self.prenet_dim = hps.prenet_dim
		self.max_decoder_steps = hps.max_decoder_steps
		self.gate_threshold = hps.gate_threshold
		self.p_attention_dropout = hps.p_attention_dropout
		self.p_decoder_dropout = hps.p_decoder_dropout

		self.prenet = Prenet(
			hps.num_mels * hps.n_frames_per_step,
			[hps.prenet_dim, hps.prenet_dim])

		self.attention_rnn = nn.LSTMCell(
			hps.prenet_dim + hps.encoder_embedding_dim,
			hps.attention_rnn_dim)

		self.attention_layer = Attention(
			hps.attention_rnn_dim, hps.encoder_embedding_dim,
			hps.attention_dim, hps.attention_location_n_filters,
			hps.attention_location_kernel_size)

		self.decoder_rnn = nn.LSTMCell(
			hps.attention_rnn_dim + hps.encoder_embedding_dim,
			hps.decoder_rnn_dim, 1)

		self.linear_projection = LinearNorm(
			hps.decoder_rnn_dim + hps.encoder_embedding_dim,
			hps.num_mels * hps.n_frames_per_step)

		self.gate_layer = LinearNorm(
			hps.decoder_rnn_dim + hps.encoder_embedding_dim, 1,
			bias=True, w_init_gain='sigmoid') 
Example #9
Source File: model.py    From Tacotron2-PyTorch with MIT License 5 votes vote down vote up
def get_go_frame(self, memory):
		''' Gets all zeros frames to use as first decoder input
		PARAMS
		------
		memory: decoder outputs

		RETURNS
		-------
		decoder_input: all zeros frames
		'''
		B = memory.size(0)
		decoder_input = Variable(memory.data.new(
			B, self.num_mels * self.n_frames_per_step).zero_())
		return decoder_input 
Example #10
Source File: model.py    From Tacotron2-PyTorch with MIT License 5 votes vote down vote up
def parse_decoder_outputs(self, mel_outputs, gate_outputs, alignments):
		''' Prepares decoder outputs for output
		PARAMS
		------
		mel_outputs:
		gate_outputs: gate output energies
		alignments:

		RETURNS
		-------
		mel_outputs:
		gate_outpust: gate output energies
		alignments:
		'''
		# (T_out, B) -> (B, T_out)
		alignments = torch.stack(alignments).transpose(0, 1)
		# (T_out, B) -> (B, T_out)
		gate_outputs = torch.stack(gate_outputs).transpose(0, 1)
		gate_outputs = gate_outputs.contiguous()
		# (T_out, B, num_mels) -> (B, T_out, num_mels)
		mel_outputs = torch.stack(mel_outputs).transpose(0, 1).contiguous()
		# decouple frames per step
		mel_outputs = mel_outputs.view(
			mel_outputs.size(0), -1, self.num_mels)
		# (B, T_out, num_mels) -> (B, num_mels, T_out)
		mel_outputs = mel_outputs.transpose(1, 2)

		return mel_outputs, gate_outputs, alignments 
Example #11
Source File: synthesizer.py    From gmvae_tacotron with MIT License 5 votes vote down vote up
def load(self, checkpoint_path, gta=False, model_name='Tacotron'):
		print('Constructing model: %s' % model_name)
		inputs = tf.placeholder(tf.int32, [1, None], 'inputs')
		input_lengths = tf.placeholder(tf.int32, [1], 'input_lengths')

		with tf.variable_scope('model') as scope:
			self.model = create_model(model_name, hparams)
			if hparams.use_vae:
				ref_targets = tf.placeholder(tf.float32, [1, None, hparams.num_mels], 'ref_targets')
			if gta:
				targets = tf.placeholder(tf.float32, [1, None, hparams.num_mels], 'mel_targets')
				
				if hparams.use_vae:
					self.model.initialize(inputs, input_lengths, targets, gta=gta, reference_mel=ref_targets)
				else:
					self.model.initialize(inputs, input_lengths, targets, gta=gta)
			else:
				if hparams.use_vae:
					self.model.initialize(inputs, input_lengths, reference_mel=ref_targets)
				else:
					self.model.initialize(inputs, input_lengths)
			self.mel_outputs = self.model.mel_outputs
			self.alignment = self.model.alignments[0]

		self.gta = gta
		print('Loading checkpoint: %s' % checkpoint_path)
		self.session = tf.Session()
		self.session.run(tf.global_variables_initializer())
		saver = tf.train.Saver()
		saver.restore(self.session, checkpoint_path) 
Example #12
Source File: model.py    From Tacotron2-PyTorch with MIT License 5 votes vote down vote up
def parse_output(self, outputs, output_lengths=None):
		if self.mask_padding and output_lengths is not None:
			mask = ~get_mask_from_lengths(output_lengths, True) # (B, T)
			mask = mask.expand(self.num_mels, mask.size(0), mask.size(1)) # (80, B, T)
			mask = mask.permute(1, 0, 2) # (B, 80, T)
			
			outputs[0].data.masked_fill_(mask, 0.0) # (B, 80, T)
			outputs[1].data.masked_fill_(mask, 0.0) # (B, 80, T)
			slice = torch.arange(0, mask.size(2), self.n_frames_per_step)
			outputs[2].data.masked_fill_(mask[:, 0, slice], 1e3)  # gate energies (B, T//n_frames_per_step)

		return outputs 
Example #13
Source File: datafeeder_tacotron2.py    From Tacotron2-Wavenet-Korean-TTS with MIT License 5 votes vote down vote up
def _pad_target(t, length):
    # t: 2 dim array. ( xx, num_mels) ==> (length,num_mels)
    return np.pad(t, [(0, length - t.shape[0]), (0,0)], mode='constant', constant_values=_pad)  # (169, 80) ==> (length, 80)

### 
Example #14
Source File: predict_mel.py    From self-attention-tacotron with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def predict(hparams,
            model_dir, checkpoint_path, output_dir,
            test_source_files, test_target_files):
    def predict_input_fn():
        source = tf.data.TFRecordDataset(list(test_source_files))
        target = tf.data.TFRecordDataset(list(test_target_files))
        dataset = dataset_factory(source, target, hparams)
        batched = dataset.prepare_and_zip().group_by_batch(
            batch_size=1).merge_target_to_source()
        return batched.dataset

    estimator = tacotron_model_factory(hparams, model_dir, None)

    predictions = map(
        lambda p: PredictedMel(p["id"], p["key"], p["mel"], p.get("mel_postnet"), p["mel"].shape[1], p["mel"].shape[0],
                               p["ground_truth_mel"], p["alignment"], p.get("alignment2"), p.get("alignment3"),
                               p.get("alignment4"), p.get("alignment5"), p.get("alignment6"),
                               p["source"], p["text"], p.get("accent_type")),
        estimator.predict(predict_input_fn, checkpoint_path=checkpoint_path))

    for v in predictions:
        key = v.key.decode('utf-8')
        mel_filename = f"{key}.{hparams.predicted_mel_extension}"
        mel_filepath = os.path.join(output_dir, mel_filename)
        mel = v.predicted_mel_postnet if hparams.use_postnet_v2 else v.predicted_mel
        assert mel.shape[1] == hparams.num_mels
        mel.tofile(mel_filepath, format='<f4')
        text = v.text.decode("utf-8")
        plot_filename = f"{key}.png"
        plot_filepath = os.path.join(output_dir, plot_filename)
        alignments = list(filter(lambda x: x is not None,
                                 [v.alignment, v.alignment2, v.alignment3, v.alignment4, v.alignment5, v.alignment6]))

        plot_predictions(alignments, v.ground_truth_mel, v.predicted_mel, v.predicted_mel_postnet,
                         text, v.key, plot_filepath)
        prediction_filename = f"{key}.tfrecord"
        prediction_filepath = os.path.join(output_dir, prediction_filename)
        write_prediction_result(v.id, key, alignments, mel, v.ground_truth_mel, text, v.source,
                                v.accent_type, prediction_filepath) 
Example #15
Source File: audio.py    From representation_mixing with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _build_mel_basis():
    assert hparams.fmax <= hparams.sample_rate // 2
    return librosa.filters.mel(hparams.sample_rate, hparams.fft_size,
                               fmin=hparams.fmin, fmax=hparams.fmax,
                               n_mels=hparams.num_mels) 
Example #16
Source File: audio.py    From representation_mixing with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _build_mel_basis():
    assert hparams.fmax <= hparams.sample_rate // 2
    return librosa.filters.mel(hparams.sample_rate, hparams.fft_size,
                               fmin=hparams.fmin, fmax=hparams.fmax,
                               n_mels=hparams.num_mels) 
Example #17
Source File: audio.py    From representation_mixing with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _build_mel_basis():
    assert hparams.fmax <= hparams.sample_rate // 2
    return librosa.filters.mel(hparams.sample_rate, hparams.fft_size,
                               fmin=hparams.fmin, fmax=hparams.fmax,
                               n_mels=hparams.num_mels) 
Example #18
Source File: audio.py    From representation_mixing with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _build_mel_basis():
    assert hparams.fmax <= hparams.sample_rate // 2
    return librosa.filters.mel(hparams.sample_rate, hparams.fft_size,
                               fmin=hparams.fmin, fmax=hparams.fmax,
                               n_mels=hparams.num_mels) 
Example #19
Source File: datafeeder_wavenet.py    From Tacotron-Wavenet-Vocoder-Korean with MIT License 5 votes vote down vote up
def __init__(self,coord,data_dirs,batch_size,receptive_field, gc_enable=False, queue_size=8):
        super(DataFeederWavenet, self).__init__()    
        self.data_dirs = data_dirs
        self.coord = coord
        self.batch_size = batch_size
        self.receptive_field = receptive_field
        self.hop_size = audio.get_hop_size(hparams)
        self.sample_size = ensure_divisible(hparams.sample_size,self.hop_size, True)
        self.max_frames = self.sample_size // self.hop_size  # sample_size 크기를 확보하기 위해.
        self.queue_size = queue_size
        self.gc_enable = gc_enable
        self.skip_path_filter = hparams.skip_path_filter
       
        self.rng = np.random.RandomState(123)
        self._offset = defaultdict(lambda: 2)  # key에 없는 값이 들어어면 2가 할당된다.
        
        self.data_dir_to_id = {data_dir: idx for idx, data_dir in enumerate(self.data_dirs)}  # data_dir <---> speaker_id 매핑
        self.path_dict = get_path_dict(self.data_dirs,np.max([self.sample_size,receptive_field]))# receptive_field 보다 작은 것을 버리고, 나머지만 돌려준다.
        
        self._placeholders = [
            tf.placeholder(tf.float32, shape=[None,None,1],name='input_wav'),
            tf.placeholder(tf.float32, shape=[None,None,hparams.num_mels],name='local_condition')
        ]    
        dtypes = [tf.float32, tf.float32]
    
        if self.gc_enable:
            self._placeholders.append(tf.placeholder(tf.int32, shape=[None],name='speaker_id'))
            dtypes.append(tf.int32)
 
        queue = tf.FIFOQueue(self.queue_size, dtypes, name='input_queue')
        self.enqueue = queue.enqueue(self._placeholders)
        
        if self.gc_enable:
            self.inputs_wav, self.local_condition, self.speaker_id = queue.dequeue()
        else:
            self.inputs_wav, self.local_condition = queue.dequeue()

        self.inputs_wav.set_shape(self._placeholders[0].shape)
        self.local_condition.set_shape(self._placeholders[1].shape)
        if self.gc_enable:
            self.speaker_id.set_shape(self._placeholders[2].shape) 
Example #20
Source File: audio.py    From vae_tacotron with MIT License 5 votes vote down vote up
def _build_mel_basis():
  n_fft = (hparams.num_freq - 1) * 2
  return librosa.filters.mel(hparams.sample_rate, n_fft, n_mels=hparams.num_mels) 
Example #21
Source File: audio.py    From vae_tacotron2 with MIT License 5 votes vote down vote up
def _build_mel_basis():
	assert hparams.fmax <= hparams.sample_rate // 2
	return librosa.filters.mel(hparams.sample_rate, hparams.fft_size, n_mels=hparams.num_mels,
							   fmin=hparams.fmin, fmax=hparams.fmax) 
Example #22
Source File: audio.py    From vae_tacotron2 with MIT License 5 votes vote down vote up
def _build_mel_basis():
	assert hparams.fmax <= hparams.sample_rate // 2
	return librosa.filters.mel(hparams.sample_rate, hparams.fft_size, n_mels=hparams.num_mels,
							   fmin=hparams.fmin, fmax=hparams.fmax) 
Example #23
Source File: feeder.py    From vae_tacotron2 with MIT License 5 votes vote down vote up
def __init__(self, coordinator, metadata_filename, hparams):
		super(Feeder, self).__init__()
		self._coord = coordinator
		self._hparams = hparams
		self._cleaner_names = [x.strip() for x in hparams.cleaners.split(',')]
		self._offset = 0

		# Load metadata
		self._mel_dir = os.path.join(os.path.dirname(metadata_filename), 'mels')
		self._linear_dir = os.path.join(os.path.dirname(metadata_filename), 'linear')
		with open(metadata_filename, encoding='utf-8') as f:
			self._metadata = [line.strip().split('|') for line in f]
			frame_shift_ms = hparams.hop_size / hparams.sample_rate
			hours = sum([int(x[4]) for x in self._metadata]) * frame_shift_ms / (3600)
			log('Loaded metadata for {} examples ({:.2f} hours)'.format(len(self._metadata), hours))

		# Create placeholders for inputs and targets. Don't specify batch size because we want
		# to be able to feed different batch sizes at eval time.
		self._placeholders = [
		tf.placeholder(tf.int32, shape=(None, None), name='inputs'),
		tf.placeholder(tf.int32, shape=(None, ), name='input_lengths'),
		tf.placeholder(tf.float32, shape=(None, None, hparams.num_mels), name='mel_targets'),
		tf.placeholder(tf.int32,[None],'mel_lengths'),
		tf.placeholder(tf.float32, shape=(None, None), name='token_targets'),
		tf.placeholder(tf.float32, shape=(None, None, hparams.num_freq), name='linear_targets'),
		]

		# Create queue for buffering data
		queue = tf.FIFOQueue(8, [tf.int32, tf.int32, tf.float32, tf.int32, tf.float32, tf.float32], name='input_queue')
		self._enqueue_op = queue.enqueue(self._placeholders)
		self.inputs, self.input_lengths, self.mel_targets, self.mel_lengths, self.token_targets, self.linear_targets = queue.dequeue()
		self.inputs.set_shape(self._placeholders[0].shape)
		self.input_lengths.set_shape(self._placeholders[1].shape)
		self.mel_targets.set_shape(self._placeholders[2].shape)
		self.mel_lengths.set_shape(self._placeholders[3].shape)
		self.token_targets.set_shape(self._placeholders[4].shape)
		self.linear_targets.set_shape(self._placeholders[5].shape) 
Example #24
Source File: synthesizer.py    From vae_tacotron2 with MIT License 5 votes vote down vote up
def load(self, checkpoint_path, gta=False, model_name='Tacotron'):
		print('Constructing model: %s' % model_name)
		inputs = tf.placeholder(tf.int32, [1, None], 'inputs')
		input_lengths = tf.placeholder(tf.int32, [1], 'input_lengths')

		with tf.variable_scope('model') as scope:
			self.model = create_model(model_name, hparams)
			if hparams.use_vae:
				ref_targets = tf.placeholder(tf.float32, [1, None, hparams.num_mels], 'ref_targets')
			if gta:
				targets = tf.placeholder(tf.float32, [1, None, hparams.num_mels], 'mel_targets')
				
				if hparams.use_vae:
					self.model.initialize(inputs, input_lengths, targets, gta=gta, reference_mel=ref_targets)
				else:
					self.model.initialize(inputs, input_lengths, targets, gta=gta)
			else:
				if hparams.use_vae:
					self.model.initialize(inputs, input_lengths, reference_mel=ref_targets)
				else:
					self.model.initialize(inputs, input_lengths)
			self.mel_outputs = self.model.mel_outputs
			self.alignment = self.model.alignments[0]

		self.gta = gta
		print('Loading checkpoint: %s' % checkpoint_path)
		self.session = tf.Session()
		self.session.run(tf.global_variables_initializer())
		saver = tf.train.Saver()
		saver.restore(self.session, checkpoint_path) 
Example #25
Source File: audio.py    From arabic-tacotron-tts with MIT License 5 votes vote down vote up
def _build_mel_basis():
  n_fft = (hparams.num_freq - 1) * 2
  return librosa.filters.mel(hparams.sample_rate, n_fft, n_mels=hparams.num_mels) 
Example #26
Source File: feeder.py    From gmvae_tacotron with MIT License 5 votes vote down vote up
def __init__(self, coordinator, metadata_filename, hparams):
		super(Feeder, self).__init__()
		self._coord = coordinator
		self._hparams = hparams
		self._cleaner_names = [x.strip() for x in hparams.cleaners.split(',')]
		self._offset = 0

		# Load metadata
		self._mel_dir = os.path.join(os.path.dirname(metadata_filename), 'mels')
		self._linear_dir = os.path.join(os.path.dirname(metadata_filename), 'linear')
		with open(metadata_filename, encoding='utf-8') as f:
			self._metadata = [line.strip().split('|') for line in f]
			frame_shift_ms = hparams.hop_size / hparams.sample_rate
			hours = sum([int(x[4]) for x in self._metadata]) * frame_shift_ms / (3600)
			log('Loaded metadata for {} examples ({:.2f} hours)'.format(len(self._metadata), hours))

		# Create placeholders for inputs and targets. Don't specify batch size because we want
		# to be able to feed different batch sizes at eval time.
		self._placeholders = [
		tf.placeholder(tf.int32, shape=(None, None), name='inputs'),
		tf.placeholder(tf.int32, shape=(None, ), name='input_lengths'),
		tf.placeholder(tf.float32, shape=(None, None, hparams.num_mels), name='mel_targets'),
		tf.placeholder(tf.int32,[None],'mel_lengths'),
		tf.placeholder(tf.float32, shape=(None, None), name='token_targets'),
		tf.placeholder(tf.float32, shape=(None, None, hparams.num_freq), name='linear_targets'),
		]

		# Create queue for buffering data
		queue = tf.FIFOQueue(8, [tf.int32, tf.int32, tf.float32, tf.int32, tf.float32, tf.float32], name='input_queue')
		self._enqueue_op = queue.enqueue(self._placeholders)
		self.inputs, self.input_lengths, self.mel_targets, self.mel_lengths, self.token_targets, self.linear_targets = queue.dequeue()
		self.inputs.set_shape(self._placeholders[0].shape)
		self.input_lengths.set_shape(self._placeholders[1].shape)
		self.mel_targets.set_shape(self._placeholders[2].shape)
		self.mel_lengths.set_shape(self._placeholders[3].shape)
		self.token_targets.set_shape(self._placeholders[4].shape)
		self.linear_targets.set_shape(self._placeholders[5].shape) 
Example #27
Source File: audio.py    From cnn_vocoder with MIT License 5 votes vote down vote up
def _build_mel_basis():
    assert hparams.fmax <= hparams.sample_rate // 2
    return librosa.filters.mel(hparams.sample_rate, n_fft,
                               fmin=hparams.fmin, fmax=hparams.fmax,
                               n_mels=hparams.num_mels) 
Example #28
Source File: audio.py    From tacotron with MIT License 5 votes vote down vote up
def _build_mel_basis():
  n_fft = (hparams.num_freq - 1) * 2
  return librosa.filters.mel(hparams.sample_rate, n_fft, n_mels=hparams.num_mels) 
Example #29
Source File: audio.py    From WaveRNN-Pytorch with MIT License 5 votes vote down vote up
def _build_mel_basis():
    if hparams.fmax is not None:
        assert hparams.fmax <= hparams.sample_rate // 2
    return librosa.filters.mel(hparams.sample_rate, hparams.fft_size,
                               fmin=hparams.fmin, fmax=hparams.fmax,
                               n_mels=hparams.num_mels) 
Example #30
Source File: model.py    From WaveRNN-Pytorch with MIT License 5 votes vote down vote up
def no_test_build_model():
    model = Model(hp.rnn_dims, hp.fc_dims, hp.bits,
        hp.pad, hp.upsample_factors, hp.num_mels,
        hp.compute_dims, hp.res_out_dims, hp.res_blocks).cuda()
    print(vars(model))