Python utils.spectrogram2wav() Examples
The following are 9
code examples of utils.spectrogram2wav().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
utils
, or try the search function
.
Example #1
Source File: synthesize.py From tacotron with Apache License 2.0 | 6 votes |
def synthesize(): if not os.path.exists(hp.sampledir): os.mkdir(hp.sampledir) # Load graph g = Graph(mode="synthesize"); print("Graph loaded") # Load data texts = load_data(mode="synthesize") saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)); print("Restored!") # Feed Forward ## mel y_hat = np.zeros((texts.shape[0], 200, hp.n_mels*hp.r), np.float32) # hp.n_mels*hp.r for j in tqdm.tqdm(range(200)): _y_hat = sess.run(g.y_hat, {g.x: texts, g.y: y_hat}) y_hat[:, j, :] = _y_hat[:, j, :] ## mag mags = sess.run(g.z_hat, {g.y_hat: y_hat}) for i, mag in enumerate(mags): print("File {}.wav is being generated ...".format(i+1)) audio = spectrogram2wav(mag) write(os.path.join(hp.sampledir, '{}.wav'.format(i+1)), hp.sr, audio)
Example #2
Source File: synthesize.py From tacotron with Apache License 2.0 | 6 votes |
def synthesize(): if not os.path.exists(hp.sampledir): os.mkdir(hp.sampledir) # Load graph g = Graph(mode="synthesize"); print("Graph loaded") # Load data texts = load_data(mode="synthesize") saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)); print("Restored!") # Feed Forward ## mel y_hat = np.zeros((texts.shape[0], 200, hp.n_mels*hp.r), np.float32) # hp.n_mels*hp.r for j in tqdm.tqdm(range(200)): _y_hat = sess.run(g.y_hat, {g.x: texts, g.y: y_hat}) y_hat[:, j, :] = _y_hat[:, j, :] ## mag mags = sess.run(g.z_hat, {g.y_hat: y_hat}) for i, mag in enumerate(mags): print("File {}.wav is being generated ...".format(i+1)) audio = spectrogram2wav(mag) write(os.path.join(hp.sampledir, '{}.wav'.format(i+1)), hp.sr, audio)
Example #3
Source File: copy_synth_SSRN_GL.py From ophelia with Apache License 2.0 | 6 votes |
def copy_synth_SSRN_GL(hp, outdir): safe_makedir(outdir) dataset = load_data(hp, mode="synthesis") fnames, texts = dataset['fpaths'], dataset['texts'] bases = [basename(fname) for fname in fnames] mels = [np.load(os.path.join(hp.coarse_audio_dir, base + '.npy')) for base in bases] lengths = [a.shape[0] for a in mels] mels = list2batch(mels, 0) g = SSRNGraph(hp, mode="synthesize"); print("Graph (ssrn) loaded") with tf.Session() as sess: sess.run(tf.global_variables_initializer()) ssrn_epoch = restore_latest_model_parameters(sess, hp, 'ssrn') print('Run SSRN...') Z = synth_mel2mag(hp, mels, g, sess) for i, mag in enumerate(Z): print("Working on %s"%(bases[i])) mag = mag[:lengths[i]*hp.r,:] ### trim to generated length wav = spectrogram2wav(hp, mag) soundfile.write(outdir + "/%s.wav"%(base), wav, hp.sr)
Example #4
Source File: synthesis.py From Transformer-TTS with MIT License | 5 votes |
def synthesis(text, args): m = Model() m_post = ModelPostNet() m.load_state_dict(load_checkpoint(args.restore_step1, "transformer")) m_post.load_state_dict(load_checkpoint(args.restore_step2, "postnet")) text = np.asarray(text_to_sequence(text, [hp.cleaners])) text = t.LongTensor(text).unsqueeze(0) text = text.cuda() mel_input = t.zeros([1,1, 80]).cuda() pos_text = t.arange(1, text.size(1)+1).unsqueeze(0) pos_text = pos_text.cuda() m=m.cuda() m_post = m_post.cuda() m.train(False) m_post.train(False) pbar = tqdm(range(args.max_len)) with t.no_grad(): for i in pbar: pos_mel = t.arange(1,mel_input.size(1)+1).unsqueeze(0).cuda() mel_pred, postnet_pred, attn, stop_token, _, attn_dec = m.forward(text, mel_input, pos_text, pos_mel) mel_input = t.cat([mel_input, postnet_pred[:,-1:,:]], dim=1) mag_pred = m_post.forward(postnet_pred) wav = spectrogram2wav(mag_pred.squeeze(0).cpu().numpy()) write(hp.sample_path + "/test.wav", hp.sr, wav)
Example #5
Source File: copy_synth_GL.py From ophelia with Apache License 2.0 | 5 votes |
def copy_synth_GL(hp, outdir): safe_makedir(outdir) dataset = load_data(hp, mode="synthesis") fnames, texts = dataset['fpaths'], dataset['texts'] bases = [basename(fname) for fname in fnames] for base in bases: print("Working on file %s"%(base)) mag = np.load(os.path.join(hp.full_audio_dir, base + '.npy')) wav = spectrogram2wav(hp, mag) soundfile.write(outdir + "/%s.wav"%(base), wav, hp.sr)
Example #6
Source File: synthesize.py From ophelia with Apache License 2.0 | 5 votes |
def babble(hp, num_sentences=0): if num_sentences == 0: num_sentences = 4 # default g1 = BabblerGraph(hp, mode="synthesize"); print("Babbler graph loaded") g2 = SSRNGraph(hp, mode="synthesize"); print("SSRN graph loaded") with tf.Session() as sess: sess.run(tf.global_variables_initializer()) babbler_epoch = restore_latest_model_parameters(sess, hp, 'babbler') ssrn_epoch = restore_latest_model_parameters(sess, hp, 'ssrn') t = start_clock('Babbling...') Y = synth_babble(hp, g1, sess, seed=False, nsamples=num_sentences) stop_clock(t) t = start_clock('Mel2Mag generating...') Z = synth_mel2mag(hp, Y, g2, sess) stop_clock(t) if (np.isnan(Z).any()): ### TODO: keep? Z = np.nan_to_num(Z) # Generate wav files outdir = os.path.join(hp.voicedir, 'synth_babble', '%s_%s'%(babbler_epoch, ssrn_epoch)) safe_makedir(outdir) for i, mag in enumerate(Z): print("Applying Griffin-Lim to sample number %s"%(i)) wav = spectrogram2wav(hp, mag) write(outdir + "/{:03d}.wav".format(i), hp.sr, wav)
Example #7
Source File: synthesize.py From ophelia with Apache License 2.0 | 5 votes |
def synth_wave(hp, mag, outfile): if hp.vocoder == 'griffin_lim': wav = spectrogram2wav(hp, mag) if hp.store_synth_features: np.save(outfile.replace('.wav',''), mag) soundfile.write(outfile, wav, hp.sr) elif hp.vocoder == 'world': world_synthesis(mag, outfile, hp)
Example #8
Source File: synthesise_validation_waveforms.py From ophelia with Apache License 2.0 | 5 votes |
def synth_wave(hp, magfile): mag = np.load(magfile) #print ('mag shape %s'%(str(mag.shape))) wav = spectrogram2wav(hp, mag) outfile = magfile.replace('.mag.npy', '.wav') outfile = outfile.replace('.npy', '.wav') #print magfile #print outfile #print # write(outfile, hp.sr, wav) soundfile.write(outfile, wav, hp.sr)
Example #9
Source File: evaluate.py From tacotron with Apache License 2.0 | 4 votes |
def evaluate(): # Load graph g = Graph(mode="evaluate"); print("Graph loaded") # Load data fpaths, _, texts = load_data(mode="evaluate") lengths = [len(t) for t in texts] maxlen = sorted(lengths, reverse=True)[0] new_texts = np.zeros((len(texts), maxlen), np.int32) for i, text in enumerate(texts): new_texts[i, :len(text)] = [idx for idx in text] #new_texts = np.split(new_texts, 2) new_texts = new_texts[:evaluate_wav_num] half_size = int(len(fpaths)/2) print(half_size) #new_fpaths = [fpaths[:half_size], fpaths[half_size:]] fpaths = fpaths[:evaluate_wav_num] saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)); print("Evaluate Model Restored!") """ err = 0.0 for i, t_split in enumerate(new_texts): y_hat = np.zeros((t_split.shape[0], 200, hp.n_mels*hp.r), np.float32) # hp.n_mels*hp.r for j in tqdm.tqdm(range(200)): _y_hat = sess.run(g.y_hat, {g.x: t_split, g.y: y_hat}) y_hat[:, j, :] = _y_hat[:, j, :] mags = sess.run(g.z_hat, {g.y_hat: y_hat}) for k, mag in enumerate(mags): fname, mel_ans, mag_ans = load_spectrograms(new_fpaths[i][k]) print("File {} is being evaluated ...".format(fname)) audio = spectrogram2wav(mag) audio_ans = spectrogram2wav(mag_ans) err += calculate_mse(audio, audio_ans) err = err/float(len(fpaths)) print(err) """ # Feed Forward ## mel y_hat = np.zeros((new_texts.shape[0], 200, hp.n_mels*hp.r), np.float32) # hp.n_mels*hp.r for j in tqdm.tqdm(range(200)): _y_hat = sess.run(g.y_hat, {g.x: new_texts, g.y: y_hat}) y_hat[:, j, :] = _y_hat[:, j, :] ## mag mags = sess.run(g.z_hat, {g.y_hat: y_hat}) err = 0.0 for i, mag in enumerate(mags): fname, mel_ans, mag_ans = load_spectrograms(fpaths[i]) print("File {} is being evaluated ...".format(fname)) #audio = spectrogram2wav(mag) #audio_ans = spectrogram2wav(mag_ans) #err += calculate_mse(audio, audio_ans) err += calculate_mse(mag, mag_ans) err = err/float(len(fpaths)) print(err) opf.write(hp.logdir + " spectrogram mse: " + str(err) + "\n")