Python text.text_to_sequence() Examples
The following are 28
code examples of text.text_to_sequence().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
text
, or try the search function
.
Example #1
Source File: synthesis.py From FastSpeech with MIT License | 6 votes |
def synthesis(model, text, alpha=1.0): text = np.array(text_to_sequence(text, hp.text_cleaners)) text = np.stack([text]) src_pos = np.array([i+1 for i in range(text.shape[1])]) src_pos = np.stack([src_pos]) with torch.no_grad(): sequence = torch.autograd.Variable( torch.from_numpy(text)).cuda().long() src_pos = torch.autograd.Variable( torch.from_numpy(src_pos)).cuda().long() mel, mel_postnet = model.module.forward(sequence, src_pos, alpha=alpha) return mel[0].cpu().transpose(0, 1), \ mel_postnet[0].cpu().transpose(0, 1), \ mel.transpose(1, 2), \ mel_postnet.transpose(1, 2)
Example #2
Source File: dataset.py From LightSpeech with MIT License | 6 votes |
def __getitem__(self, idx): # mel_gt_name = os.path.join( # hparams.mel_ground_truth, "ljspeech-mel-%05d.npy" % (idx+1)) # mel_gt_target = np.load(mel_gt_name) mel_tac2_target = np.load(os.path.join( hparams.mel_tacotron2, str(idx)+".npy")).T cemb = np.load(os.path.join(hparams.cemb_path, str(idx)+".npy")) D = np.load(os.path.join(hparams.alignment_path, str(idx)+".npy")) character = self.text[idx][0:len(self.text[idx])-1] character = np.array(text_to_sequence( character, hparams.text_cleaners)) sample = {"text": character, "mel_tac2_target": mel_tac2_target, "cemb": cemb, "D": D} return sample
Example #3
Source File: inference.py From LightSpeech with MIT License | 6 votes |
def synthesis(model, text, alpha=1.0): text = np.array(text_to_sequence(text, hp.text_cleaners)) text = np.stack([text]) with torch.no_grad(): sequence = torch.autograd.Variable( torch.from_numpy(text)).cuda().long() # mel, mel_postnet_1, mel_postnet_2 = model.module.inference( # sequence, alpha) mel = model.module.inference(sequence, alpha) # out = mel[0].cpu().transpose(0, 1),\ # mel_postnet_1[0].cpu().transpose(0, 1),\ # mel_postnet_2[0].cpu().transpose(0, 1),\ # mel.transpose(1, 2),\ # mel_postnet_1.transpose(1, 2),\ # mel_postnet_2.transpose(1, 2) return mel[0].cpu().transpose(0, 1), mel.transpose(1, 2)
Example #4
Source File: utils.py From LightSpeech with MIT License | 6 votes |
def load_data_from_tacotron2(txt, model): character = text.text_to_sequence(txt, hparams.text_cleaners) character = torch.from_numpy(np.stack([np.array(character)])).long().cuda() with torch.no_grad(): [_, mel, _, alignment], cemb = model.inference(character) alignment = alignment[0].cpu().numpy() cemb = cemb[0].cpu().numpy() D = get_D(alignment) D = np.array(D) mel = mel[0].cpu().numpy() return mel, cemb, D
Example #5
Source File: utils.py From LightSpeech with MIT License | 6 votes |
def load_data(txt, mel, model): character = text.text_to_sequence(txt, hparams.text_cleaners) character = torch.from_numpy(np.stack([np.array(character)])).long().cuda() text_length = torch.Tensor([character.size(1)]).long().cuda() mel = torch.from_numpy(np.stack([mel.T])).float().cuda() max_len = mel.size(2) output_length = torch.Tensor([max_len]).long().cuda() inputs = character, text_length, mel, max_len, output_length with torch.no_grad(): [_, mel_tacotron2, _, alignment], cemb = model.forward(inputs) alignment = alignment[0].cpu().numpy() cemb = cemb[0].cpu().numpy() D = get_D(alignment) D = np.array(D) mel_tacotron2 = mel_tacotron2[0].cpu().numpy() return mel_tacotron2, cemb, D
Example #6
Source File: datafeeder.py From tacotron with MIT License | 6 votes |
def _get_next_example(self): '''Loads a single example (input, mel_target, linear_target, cost) from disk''' if self._offset >= len(self._metadata): self._offset = 0 random.shuffle(self._metadata) meta = self._metadata[self._offset] self._offset += 1 text = meta[3] if self._cmudict and random.random() < _p_cmudict: text = ' '.join([self._maybe_get_arpabet(word) for word in text.split(' ')]) input_data = np.asarray(text_to_sequence(text, self._cleaner_names), dtype=np.int32) linear_target = np.load(os.path.join(self._datadir, meta[0])) mel_target = np.load(os.path.join(self._datadir, meta[1])) return (input_data, mel_target, linear_target, len(linear_target))
Example #7
Source File: datafeeder.py From arabic-tacotron-tts with MIT License | 6 votes |
def _get_next_example(self): '''Loads a single example (input, mel_target, linear_target, cost) from disk''' if self._offset >= len(self._metadata): self._offset = 0 random.shuffle(self._metadata) meta = self._metadata[self._offset] self._offset += 1 text = meta[3] arr = [] for word in text.split(' '): if word in [" ", ""]: pass elif word in [",", '.', '-']: x = word arr.append(x) else: x = self._maybe_get_arpabet(word) arr.append(x) text = ' '.join(arr) input_data = np.asarray(text_to_sequence(text, self._cleaner_names), dtype=np.int32) linear_target = np.load(os.path.join(self._datadir, meta[0])) mel_target = np.load(os.path.join(self._datadir, meta[1])) return (input_data, mel_target, linear_target, len(linear_target))
Example #8
Source File: utils.py From FastSpeech with MIT License | 6 votes |
def load_data(txt, mel, model): character = text.text_to_sequence(txt, hparams.text_cleaners) character = torch.from_numpy(np.stack([np.array(character)])).long().cuda() text_length = torch.Tensor([character.size(1)]).long().cuda() mel = torch.from_numpy(np.stack([mel.T])).float().cuda() max_len = mel.size(2) output_length = torch.Tensor([max_len]).long().cuda() inputs = character, text_length, mel, max_len, output_length with torch.no_grad(): [_, mel_tacotron2, _, alignment], cemb = model.forward(inputs) alignment = alignment[0].cpu().numpy() cemb = cemb[0].cpu().numpy() D = get_D(alignment) D = np.array(D) mel_tacotron2 = mel_tacotron2[0].cpu().numpy() return mel_tacotron2, cemb, D
Example #9
Source File: datafeeder.py From libfaceid with MIT License | 6 votes |
def _get_next_example(self): '''Loads a single example (input, mel_target, linear_target, cost) from disk''' if self._offset >= len(self._metadata): self._offset = 0 random.shuffle(self._metadata) meta = self._metadata[self._offset] self._offset += 1 text = meta[3] if self._cmudict and random.random() < _p_cmudict: text = ' '.join([self._maybe_get_arpabet(word) for word in text.split(' ')]) input_data = np.asarray(text_to_sequence(text, self._cleaner_names), dtype=np.int32) linear_target = np.load(os.path.join(self._datadir, meta[0])) mel_target = np.load(os.path.join(self._datadir, meta[1])) return (input_data, mel_target, linear_target, len(linear_target))
Example #10
Source File: utils.py From FastSpeech with MIT License | 6 votes |
def load_data_from_tacotron2(txt, model): character = text.text_to_sequence(txt, hparams.text_cleaners) character = torch.from_numpy(np.stack([np.array(character)])).long().cuda() with torch.no_grad(): [_, mel, _, alignment], cemb = model.inference(character) alignment = alignment[0].cpu().numpy() cemb = cemb[0].cpu().numpy() D = get_D(alignment) D = np.array(D) mel = mel[0].cpu().numpy() return mel, cemb, D
Example #11
Source File: mkgta.py From Tacotron2-PyTorch with MIT License | 6 votes |
def infer(wav_path, text, model): sequence = text_to_sequence(text, hps.text_cleaners) sequence = to_var(torch.IntTensor(sequence)[None, :]).long() mel = melspectrogram(load_wav(wav_path)) mel_in = to_var(torch.Tensor([mel])) r = mel_in.shape[2]%hps.n_frames_per_step if r != 0: mel_in = mel_in[:, :, :-r] sequence = torch.cat([sequence, sequence], 0) mel_in = torch.cat([mel_in, mel_in], 0) _, mel_outputs_postnet, _, _ = model.teacher_infer(sequence, mel_in) ret = mel if r != 0: ret[:, :-r] = to_arr(mel_outputs_postnet[0]) else: ret = to_arr(mel_outputs_postnet[0]) return ret
Example #12
Source File: datafeeder.py From vae_tacotron with MIT License | 6 votes |
def _get_next_example(self): '''Loads a single example (input, mel_target, linear_target, cost) from disk''' if self._offset >= len(self._metadata): self._offset = 0 random.shuffle(self._metadata) meta = self._metadata[self._offset] self._offset += 1 text = meta[3] if self._cmudict and random.random() < _p_cmudict: text = ' '.join([self._maybe_get_arpabet(word) for word in text.split(' ')]) input_data = np.asarray(text_to_sequence(text, self._cleaner_names), dtype=np.int32) linear_target = np.load(os.path.join(self._datadir, meta[0])) mel_target = np.load(os.path.join(self._datadir, meta[1])) return (input_data, mel_target, linear_target, len(linear_target))
Example #13
Source File: synthesis.py From Tacotron-pytorch with Apache License 2.0 | 6 votes |
def generate(model, text): # Text to index sequence cleaner_names = [x.strip() for x in hp.cleaners.split(',')] seq = np.expand_dims(np.asarray(text_to_sequence(text, cleaner_names), dtype=np.int32), axis=0) # Provide [GO] Frame mel_input = np.zeros([seq.shape[0], hp.num_mels, 1], dtype=np.float32) # Variables characters = Variable(torch.from_numpy(seq).type(torch.cuda.LongTensor), volatile=True).cuda() mel_input = Variable(torch.from_numpy(mel_input).type(torch.cuda.FloatTensor), volatile=True).cuda() # Spectrogram to wav _, linear_output = model.forward(characters, mel_input) wav = inv_spectrogram(linear_output[0].data.cpu().numpy()) wav = wav[:find_endpoint(wav)] out = io.BytesIO() save_wav(wav, out) return out.getvalue()
Example #14
Source File: inference.py From Tacotron2-PyTorch with MIT License | 5 votes |
def infer(text, model): sequence = text_to_sequence(text, hps.text_cleaners) sequence = to_var(torch.IntTensor(sequence)[None, :]).long() mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence) return (mel_outputs, mel_outputs_postnet, alignments)
Example #15
Source File: dataset.py From FastSpeech with MIT License | 5 votes |
def __getitem__(self, idx): mel_gt_name = os.path.join( hparams.mel_ground_truth, "ljspeech-mel-%05d.npy" % (idx+1)) mel_gt_target = np.load(mel_gt_name) D = np.load(os.path.join(hparams.alignment_path, str(idx)+".npy")) character = self.text[idx][0:len(self.text[idx])-1] character = np.array(text_to_sequence( character, hparams.text_cleaners)) sample = {"text": character, "mel_target": mel_gt_target, "D": D} return sample
Example #16
Source File: text_test.py From tacotron with MIT License | 5 votes |
def test_text_to_sequence(): assert text_to_sequence('', []) == [1] assert text_to_sequence('Hi!', []) == [9, 36, 54, 1] assert text_to_sequence('"A"_B', []) == [2, 3, 1] assert text_to_sequence('A {AW1 S} B', []) == [2, 64, 83, 132, 64, 3, 1] assert text_to_sequence('Hi', ['lowercase']) == [35, 36, 1] assert text_to_sequence('A {AW1 S} B', ['english_cleaners']) == [28, 64, 83, 132, 64, 29, 1]
Example #17
Source File: synthesizer.py From tacotron with MIT License | 5 votes |
def synthesize(self, text): cleaner_names = [x.strip() for x in hparams.cleaners.split(',')] seq = text_to_sequence(text, cleaner_names) feed_dict = { self.model.inputs: [np.asarray(seq, dtype=np.int32)], self.model.input_lengths: np.asarray([len(seq)], dtype=np.int32) } wav = self.session.run(self.wav_output, feed_dict=feed_dict) wav = audio.inv_preemphasis(wav) wav = wav[:audio.find_endpoint(wav)] out = io.BytesIO() audio.save_wav(wav, out) return out.getvalue()
Example #18
Source File: dataset.py From Tacotron2-PyTorch with MIT License | 5 votes |
def get_text(self, text): text_norm = torch.IntTensor(text_to_sequence(text, hps.text_cleaners)) return text_norm
Example #19
Source File: train_tacotron.py From Tacotron-Wavenet-Vocoder-Korean with MIT License | 5 votes |
def create_batch_inputs_from_texts(texts): sequences = [text_to_sequence(text) for text in texts] inputs = _prepare_inputs(sequences) input_lengths = np.asarray([len(x) for x in inputs], dtype=np.int32) for idx, (seq, text) in enumerate(zip(inputs, texts)): recovered_text = sequence_to_text(seq, skip_eos_and_pad=True) if recovered_text != h2j(text): log(" [{}] {}".format(idx, text)) log(" [{}] {}".format(idx, recovered_text)) log("="*30) return inputs, input_lengths
Example #20
Source File: wavloader.py From MelNet with MIT License | 5 votes |
def __getitem__(self, idx): text = self.dataset[idx][1] if self.hp.data.name == 'KSS': seq = text_to_sequence(text) elif self.hp.data.name == 'Blizzard': seq = process_blizzard(text) wav = read_wav_np(self.dataset[idx][0], sample_rate=self.hp.audio.sr) # wav = cut_wav(self.wavlen, wav) mel = self.melgen.get_normalized_mel(wav) source, target = self.tierutil.cut_divide_tiers(mel, self.tier) return seq, source, target
Example #21
Source File: model.py From MelNet with MIT License | 5 votes |
def sample(self, condition): x = None seq = torch.from_numpy(text_to_sequence(condition)).long().unsqueeze(0) input_lengths = torch.LongTensor([seq[0].shape[0]]).cuda() audio_lengths = torch.LongTensor([0]).cuda() ## Tier 1 ## tqdm.write('Tier 1') for t in tqdm(range(self.args.timestep // self.t_div)): audio_lengths += 1 if x is None: x = torch.zeros((1, self.n_mels // self.f_div, 1)).cuda() else: x = torch.cat([x, torch.zeros((1, self.n_mels // self.f_div, 1)).cuda()], dim=-1) for m in tqdm(range(self.n_mels // self.f_div)): torch.cuda.synchronize() if self.infer_hp.conditional: mu, std, pi, _ = self.tiers[1](x, seq, input_lengths, audio_lengths) else: mu, std, pi = self.tiers[1](x, audio_lengths) temp = sample_gmm(mu, std, pi) x[:, m, t] = temp[:, m, t] ## Tier 2~N ## for tier in tqdm(range(2, self.hp.model.tier + 1)): tqdm.write('Tier %d' % tier) mu, std, pi = self.tiers[tier](x) temp = sample_gmm(mu, std, pi) x = self.tierutil.interleave(x, temp, tier + 1) return x
Example #22
Source File: text_test.py From arabic-tacotron-tts with MIT License | 5 votes |
def test_text_to_sequence(): assert text_to_sequence('', []) == [1] assert text_to_sequence('{t a s d ii0 d a t i1 n}', []) == [49, 29, 48, 32, 38, 32, 29, 49, 37, 44, 1] assert text_to_sequence('{t a s d ii0 d a t i1 n} {s t a E S A t}', ['lowercase']) == [49, 29, 48, 32, 38, 32, 29, 49, 37, 44, 11, 48, 49, 29, 18, 22, 15, 49, 1] assert text_to_sequence('{t a s d ii0 d a t i1 n} {s t a E S A t}', ['english_cleaners']) == [49, 29, 48, 32, 38, 32, 29, 49, 37, 44, 11, 48, 49, 29, 18, 22, 15, 49, 1] assert text_to_sequence('{t a s d ii0 d a t i1 n} {s t a E S A t}', ['arabic_cleaners']) == [49, 29, 48, 32, 38, 32, 29, 49, 37, 44, 11, 48, 49, 29, 18, 22, 15, 49, 1] # assert text_to_sequence('Hi', ['lowercase']) == [35, 36, 1] # assert text_to_sequence('A {AW1 S} B', ['english_cleaners']) == [28, 64, 83, 132, 64, 29, 1] # def test_sequence_to_text(): # assert sequence_to_text([]) == '' # assert sequence_to_text([1]) == '~' # assert sequence_to_text([9, 36, 54, 1]) == 'Hi!~' # assert sequence_to_text([2, 64, 83, 132, 64, 3]) == 'A {AW1 S} B'
Example #23
Source File: synthesizer.py From arabic-tacotron-tts with MIT License | 5 votes |
def synthesize(self, text): text = arpa.to_arpa(text) cleaner_names = [x.strip() for x in hparams.cleaners.split(',')] seq = text_to_sequence(text, cleaner_names) feed_dict = { self.model.inputs: [np.asarray(seq, dtype=np.int32)], self.model.input_lengths: np.asarray([len(seq)], dtype=np.int32) } wav = self.session.run(self.wav_output, feed_dict=feed_dict) wav = audio.inv_preemphasis(wav) wav = wav[:audio.find_endpoint(wav)] out = io.BytesIO() audio.save_wav(wav, out) return out.getvalue()
Example #24
Source File: text_test.py From libfaceid with MIT License | 5 votes |
def test_text_to_sequence(): assert text_to_sequence('', []) == [1] assert text_to_sequence('Hi!', []) == [9, 36, 54, 1] assert text_to_sequence('"A"_B', []) == [2, 3, 1] assert text_to_sequence('A {AW1 S} B', []) == [2, 64, 83, 132, 64, 3, 1] assert text_to_sequence('Hi', ['lowercase']) == [35, 36, 1] assert text_to_sequence('A {AW1 S} B', ['english_cleaners']) == [28, 64, 83, 132, 64, 29, 1]
Example #25
Source File: preprocess.py From Transformer-TTS with MIT License | 5 votes |
def __getitem__(self, idx): wav_name = os.path.join(self.root_dir, self.landmarks_frame.ix[idx, 0]) + '.wav' text = self.landmarks_frame.ix[idx, 1] text = np.asarray(text_to_sequence(text, [hp.cleaners]), dtype=np.int32) mel = np.load(wav_name[:-4] + '.pt.npy') mel_input = np.concatenate([np.zeros([1,hp.num_mels], np.float32), mel[:-1,:]], axis=0) text_length = len(text) pos_text = np.arange(1, text_length + 1) pos_mel = np.arange(1, mel.shape[0] + 1) sample = {'text': text, 'mel': mel, 'text_length':text_length, 'mel_input':mel_input, 'pos_mel':pos_mel, 'pos_text':pos_text} return sample
Example #26
Source File: synthesis.py From Transformer-TTS with MIT License | 5 votes |
def synthesis(text, args): m = Model() m_post = ModelPostNet() m.load_state_dict(load_checkpoint(args.restore_step1, "transformer")) m_post.load_state_dict(load_checkpoint(args.restore_step2, "postnet")) text = np.asarray(text_to_sequence(text, [hp.cleaners])) text = t.LongTensor(text).unsqueeze(0) text = text.cuda() mel_input = t.zeros([1,1, 80]).cuda() pos_text = t.arange(1, text.size(1)+1).unsqueeze(0) pos_text = pos_text.cuda() m=m.cuda() m_post = m_post.cuda() m.train(False) m_post.train(False) pbar = tqdm(range(args.max_len)) with t.no_grad(): for i in pbar: pos_mel = t.arange(1,mel_input.size(1)+1).unsqueeze(0).cuda() mel_pred, postnet_pred, attn, stop_token, _, attn_dec = m.forward(text, mel_input, pos_text, pos_mel) mel_input = t.cat([mel_input, postnet_pred[:,-1:,:]], dim=1) mag_pred = m_post.forward(postnet_pred) wav = spectrogram2wav(mag_pred.squeeze(0).cpu().numpy()) write(hp.sample_path + "/test.wav", hp.sr, wav)
Example #27
Source File: text_test.py From vae_tacotron with MIT License | 5 votes |
def test_text_to_sequence(): assert text_to_sequence('', []) == [1] assert text_to_sequence('Hi!', []) == [9, 36, 54, 1] assert text_to_sequence('"A"_B', []) == [2, 3, 1] assert text_to_sequence('A {AW1 S} B', []) == [2, 64, 83, 132, 64, 3, 1] assert text_to_sequence('Hi', ['lowercase']) == [35, 36, 1] assert text_to_sequence('A {AW1 S} B', ['english_cleaners']) == [28, 64, 83, 132, 64, 29, 1]
Example #28
Source File: synthesizer.py From vae_tacotron with MIT License | 5 votes |
def synthesize(self, text, reference_mel): cleaner_names = [x.strip() for x in hparams.cleaners.split(',')] seq = text_to_sequence(text, cleaner_names) feed_dict = { self.model.inputs: [np.asarray(seq, dtype=np.int32)], self.model.input_lengths: np.asarray([len(seq)], dtype=np.int32), self.model.reference_mel: [np.asarray(reference_mel, dtype=np.float32)] } wav = self.session.run(self.wav_output, feed_dict=feed_dict) wav = audio.inv_preemphasis(wav) wav = wav[:audio.find_endpoint(wav)] out = io.BytesIO() audio.save_wav(wav, out) return out.getvalue()