Python Examples of utils.spectrogram2wav

Source File: synthesize.py From tacotron with Apache License 2.0

6 votes

def synthesize():
    if not os.path.exists(hp.sampledir): os.mkdir(hp.sampledir)

    # Load graph
    g = Graph(mode="synthesize"); print("Graph loaded")

    # Load data
    texts = load_data(mode="synthesize")

    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)); print("Restored!")

        # Feed Forward
        ## mel
        y_hat = np.zeros((texts.shape[0], 200, hp.n_mels*hp.r), np.float32)  # hp.n_mels*hp.r
        for j in tqdm.tqdm(range(200)):
            _y_hat = sess.run(g.y_hat, {g.x: texts, g.y: y_hat})
            y_hat[:, j, :] = _y_hat[:, j, :]
        ## mag
        mags = sess.run(g.z_hat, {g.y_hat: y_hat})
        for i, mag in enumerate(mags):
            print("File {}.wav is being generated ...".format(i+1))
            audio = spectrogram2wav(mag)
            write(os.path.join(hp.sampledir, '{}.wav'.format(i+1)), hp.sr, audio)

Source File: synthesize.py From tacotron with Apache License 2.0

6 votes

def synthesize():
    if not os.path.exists(hp.sampledir): os.mkdir(hp.sampledir)

    # Load graph
    g = Graph(mode="synthesize"); print("Graph loaded")

    # Load data
    texts = load_data(mode="synthesize")

    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)); print("Restored!")

        # Feed Forward
        ## mel
        y_hat = np.zeros((texts.shape[0], 200, hp.n_mels*hp.r), np.float32)  # hp.n_mels*hp.r
        for j in tqdm.tqdm(range(200)):
            _y_hat = sess.run(g.y_hat, {g.x: texts, g.y: y_hat})
            y_hat[:, j, :] = _y_hat[:, j, :]
        ## mag
        mags = sess.run(g.z_hat, {g.y_hat: y_hat})
        for i, mag in enumerate(mags):
            print("File {}.wav is being generated ...".format(i+1))
            audio = spectrogram2wav(mag)
            write(os.path.join(hp.sampledir, '{}.wav'.format(i+1)), hp.sr, audio)

Source File: copy_synth_SSRN_GL.py From ophelia with Apache License 2.0

6 votes

def copy_synth_SSRN_GL(hp, outdir):

    safe_makedir(outdir)

    dataset = load_data(hp, mode="synthesis") 
    fnames, texts = dataset['fpaths'], dataset['texts']
    bases = [basename(fname) for fname in fnames]
    mels = [np.load(os.path.join(hp.coarse_audio_dir, base + '.npy')) for base in bases]
    lengths = [a.shape[0] for a in mels]
    mels = list2batch(mels, 0)

    g = SSRNGraph(hp, mode="synthesize"); print("Graph (ssrn) loaded")

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        ssrn_epoch = restore_latest_model_parameters(sess, hp, 'ssrn')

        print('Run SSRN...')
        Z = synth_mel2mag(hp, mels, g, sess)

        for i, mag in enumerate(Z):
            print("Working on %s"%(bases[i]))
            mag = mag[:lengths[i]*hp.r,:]  ### trim to generated length             
            wav = spectrogram2wav(hp, mag)
            soundfile.write(outdir + "/%s.wav"%(base), wav, hp.sr)

Source File: synthesis.py From Transformer-TTS with MIT License

5 votes

def synthesis(text, args):
    m = Model()
    m_post = ModelPostNet()

    m.load_state_dict(load_checkpoint(args.restore_step1, "transformer"))
    m_post.load_state_dict(load_checkpoint(args.restore_step2, "postnet"))

    text = np.asarray(text_to_sequence(text, [hp.cleaners]))
    text = t.LongTensor(text).unsqueeze(0)
    text = text.cuda()
    mel_input = t.zeros([1,1, 80]).cuda()
    pos_text = t.arange(1, text.size(1)+1).unsqueeze(0)
    pos_text = pos_text.cuda()

    m=m.cuda()
    m_post = m_post.cuda()
    m.train(False)
    m_post.train(False)
    
    pbar = tqdm(range(args.max_len))
    with t.no_grad():
        for i in pbar:
            pos_mel = t.arange(1,mel_input.size(1)+1).unsqueeze(0).cuda()
            mel_pred, postnet_pred, attn, stop_token, _, attn_dec = m.forward(text, mel_input, pos_text, pos_mel)
            mel_input = t.cat([mel_input, postnet_pred[:,-1:,:]], dim=1)

        mag_pred = m_post.forward(postnet_pred)
        
    wav = spectrogram2wav(mag_pred.squeeze(0).cpu().numpy())
    write(hp.sample_path + "/test.wav", hp.sr, wav)

Source File: copy_synth_GL.py From ophelia with Apache License 2.0

5 votes

def copy_synth_GL(hp, outdir):

    safe_makedir(outdir)

    dataset = load_data(hp, mode="synthesis") 
    fnames, texts = dataset['fpaths'], dataset['texts']
    bases = [basename(fname) for fname in fnames]
    
    for base in bases:
        print("Working on file %s"%(base))
        mag = np.load(os.path.join(hp.full_audio_dir, base + '.npy'))
        wav = spectrogram2wav(hp, mag)
        soundfile.write(outdir + "/%s.wav"%(base), wav, hp.sr)

Source File: synthesize.py From ophelia with Apache License 2.0

5 votes

def babble(hp, num_sentences=0):

    if num_sentences == 0:
        num_sentences = 4 # default
    g1 = BabblerGraph(hp, mode="synthesize"); print("Babbler graph loaded")
    g2 = SSRNGraph(hp, mode="synthesize"); print("SSRN graph loaded")

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        babbler_epoch = restore_latest_model_parameters(sess, hp, 'babbler')
        ssrn_epoch = restore_latest_model_parameters(sess, hp, 'ssrn')

        t = start_clock('Babbling...')
        Y = synth_babble(hp, g1, sess, seed=False, nsamples=num_sentences)
        stop_clock(t)

        t = start_clock('Mel2Mag generating...')
        Z = synth_mel2mag(hp, Y, g2, sess)
        stop_clock(t) 

        if (np.isnan(Z).any()):  ### TODO: keep?
            Z = np.nan_to_num(Z)

        # Generate wav files
        outdir = os.path.join(hp.voicedir, 'synth_babble', '%s_%s'%(babbler_epoch, ssrn_epoch))
        safe_makedir(outdir)
        for i, mag in enumerate(Z):
            print("Applying Griffin-Lim to sample number %s"%(i))
            wav = spectrogram2wav(hp, mag)
            write(outdir + "/{:03d}.wav".format(i), hp.sr, wav)

Source File: synthesize.py From ophelia with Apache License 2.0

5 votes

def synth_wave(hp, mag, outfile):
    if hp.vocoder == 'griffin_lim':
        wav = spectrogram2wav(hp, mag)
        if hp.store_synth_features:
            np.save(outfile.replace('.wav',''), mag)   
        soundfile.write(outfile, wav, hp.sr)
    elif hp.vocoder == 'world':
        world_synthesis(mag, outfile, hp)

Source File: synthesise_validation_waveforms.py From ophelia with Apache License 2.0

5 votes

def synth_wave(hp, magfile):
    mag = np.load(magfile)
    #print ('mag shape %s'%(str(mag.shape)))
    wav = spectrogram2wav(hp, mag)
    outfile = magfile.replace('.mag.npy', '.wav')
    outfile = outfile.replace('.npy', '.wav')
    #print magfile
    #print outfile
    #print 
    # write(outfile, hp.sr, wav)
    soundfile.write(outfile, wav, hp.sr)

Source File: evaluate.py From tacotron with Apache License 2.0

4 votes

def evaluate():
    # Load graph
    g = Graph(mode="evaluate"); print("Graph loaded")

    # Load data
    fpaths, _, texts = load_data(mode="evaluate")
    lengths = [len(t) for t in texts]
    maxlen = sorted(lengths, reverse=True)[0]
    new_texts = np.zeros((len(texts), maxlen), np.int32)
    for i, text in enumerate(texts):
        new_texts[i, :len(text)] = [idx for idx in text]
    #new_texts = np.split(new_texts, 2)
    new_texts = new_texts[:evaluate_wav_num]
    half_size = int(len(fpaths)/2)
    print(half_size)
    #new_fpaths = [fpaths[:half_size], fpaths[half_size:]]
    fpaths = fpaths[:evaluate_wav_num]
    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)); print("Evaluate Model Restored!")
        """
        err = 0.0

        for i, t_split in enumerate(new_texts):
            y_hat = np.zeros((t_split.shape[0], 200, hp.n_mels*hp.r), np.float32)  # hp.n_mels*hp.r
            for j in tqdm.tqdm(range(200)):
                _y_hat = sess.run(g.y_hat, {g.x: t_split, g.y: y_hat})
                y_hat[:, j, :] = _y_hat[:, j, :]

            mags = sess.run(g.z_hat, {g.y_hat: y_hat})
            for k, mag in enumerate(mags):
                fname, mel_ans, mag_ans = load_spectrograms(new_fpaths[i][k])
                print("File {} is being evaluated ...".format(fname))
                audio = spectrogram2wav(mag)
                audio_ans = spectrogram2wav(mag_ans)
                err += calculate_mse(audio, audio_ans)

        err = err/float(len(fpaths))
        print(err)

        """
        # Feed Forward
        ## mel
        y_hat = np.zeros((new_texts.shape[0], 200, hp.n_mels*hp.r), np.float32)  # hp.n_mels*hp.r
        for j in tqdm.tqdm(range(200)):
            _y_hat = sess.run(g.y_hat, {g.x: new_texts, g.y: y_hat})
            y_hat[:, j, :] = _y_hat[:, j, :]
        ## mag
        mags = sess.run(g.z_hat, {g.y_hat: y_hat})
        err = 0.0
        for i, mag in enumerate(mags):
            fname, mel_ans, mag_ans = load_spectrograms(fpaths[i])
            print("File {} is being evaluated ...".format(fname))
            #audio = spectrogram2wav(mag)
            #audio_ans = spectrogram2wav(mag_ans)
            #err += calculate_mse(audio, audio_ans)
            err += calculate_mse(mag, mag_ans)
        err = err/float(len(fpaths))
        print(err)
        opf.write(hp.logdir  + " spectrogram mse: " + str(err) + "\n")

Python utils.spectrogram2wav() Examples