Python utils.spectrogram2wav() Examples

The following are 9 code examples of utils.spectrogram2wav(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module utils , or try the search function .
Example #1
Source File: synthesize.py    From tacotron with Apache License 2.0 6 votes vote down vote up
def synthesize():
    if not os.path.exists(hp.sampledir): os.mkdir(hp.sampledir)

    # Load graph
    g = Graph(mode="synthesize"); print("Graph loaded")

    # Load data
    texts = load_data(mode="synthesize")

    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)); print("Restored!")

        # Feed Forward
        ## mel
        y_hat = np.zeros((texts.shape[0], 200, hp.n_mels*hp.r), np.float32)  # hp.n_mels*hp.r
        for j in tqdm.tqdm(range(200)):
            _y_hat = sess.run(g.y_hat, {g.x: texts, g.y: y_hat})
            y_hat[:, j, :] = _y_hat[:, j, :]
        ## mag
        mags = sess.run(g.z_hat, {g.y_hat: y_hat})
        for i, mag in enumerate(mags):
            print("File {}.wav is being generated ...".format(i+1))
            audio = spectrogram2wav(mag)
            write(os.path.join(hp.sampledir, '{}.wav'.format(i+1)), hp.sr, audio) 
Example #2
Source File: synthesize.py    From tacotron with Apache License 2.0 6 votes vote down vote up
def synthesize():
    if not os.path.exists(hp.sampledir): os.mkdir(hp.sampledir)

    # Load graph
    g = Graph(mode="synthesize"); print("Graph loaded")

    # Load data
    texts = load_data(mode="synthesize")

    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)); print("Restored!")

        # Feed Forward
        ## mel
        y_hat = np.zeros((texts.shape[0], 200, hp.n_mels*hp.r), np.float32)  # hp.n_mels*hp.r
        for j in tqdm.tqdm(range(200)):
            _y_hat = sess.run(g.y_hat, {g.x: texts, g.y: y_hat})
            y_hat[:, j, :] = _y_hat[:, j, :]
        ## mag
        mags = sess.run(g.z_hat, {g.y_hat: y_hat})
        for i, mag in enumerate(mags):
            print("File {}.wav is being generated ...".format(i+1))
            audio = spectrogram2wav(mag)
            write(os.path.join(hp.sampledir, '{}.wav'.format(i+1)), hp.sr, audio) 
Example #3
Source File: copy_synth_SSRN_GL.py    From ophelia with Apache License 2.0 6 votes vote down vote up
def copy_synth_SSRN_GL(hp, outdir):

    safe_makedir(outdir)

    dataset = load_data(hp, mode="synthesis") 
    fnames, texts = dataset['fpaths'], dataset['texts']
    bases = [basename(fname) for fname in fnames]
    mels = [np.load(os.path.join(hp.coarse_audio_dir, base + '.npy')) for base in bases]
    lengths = [a.shape[0] for a in mels]
    mels = list2batch(mels, 0)

    g = SSRNGraph(hp, mode="synthesize"); print("Graph (ssrn) loaded")

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        ssrn_epoch = restore_latest_model_parameters(sess, hp, 'ssrn')

        print('Run SSRN...')
        Z = synth_mel2mag(hp, mels, g, sess)

        for i, mag in enumerate(Z):
            print("Working on %s"%(bases[i]))
            mag = mag[:lengths[i]*hp.r,:]  ### trim to generated length             
            wav = spectrogram2wav(hp, mag)
            soundfile.write(outdir + "/%s.wav"%(base), wav, hp.sr) 
Example #4
Source File: synthesis.py    From Transformer-TTS with MIT License 5 votes vote down vote up
def synthesis(text, args):
    m = Model()
    m_post = ModelPostNet()

    m.load_state_dict(load_checkpoint(args.restore_step1, "transformer"))
    m_post.load_state_dict(load_checkpoint(args.restore_step2, "postnet"))

    text = np.asarray(text_to_sequence(text, [hp.cleaners]))
    text = t.LongTensor(text).unsqueeze(0)
    text = text.cuda()
    mel_input = t.zeros([1,1, 80]).cuda()
    pos_text = t.arange(1, text.size(1)+1).unsqueeze(0)
    pos_text = pos_text.cuda()

    m=m.cuda()
    m_post = m_post.cuda()
    m.train(False)
    m_post.train(False)
    
    pbar = tqdm(range(args.max_len))
    with t.no_grad():
        for i in pbar:
            pos_mel = t.arange(1,mel_input.size(1)+1).unsqueeze(0).cuda()
            mel_pred, postnet_pred, attn, stop_token, _, attn_dec = m.forward(text, mel_input, pos_text, pos_mel)
            mel_input = t.cat([mel_input, postnet_pred[:,-1:,:]], dim=1)

        mag_pred = m_post.forward(postnet_pred)
        
    wav = spectrogram2wav(mag_pred.squeeze(0).cpu().numpy())
    write(hp.sample_path + "/test.wav", hp.sr, wav) 
Example #5
Source File: copy_synth_GL.py    From ophelia with Apache License 2.0 5 votes vote down vote up
def copy_synth_GL(hp, outdir):

    safe_makedir(outdir)

    dataset = load_data(hp, mode="synthesis") 
    fnames, texts = dataset['fpaths'], dataset['texts']
    bases = [basename(fname) for fname in fnames]
    
    for base in bases:
        print("Working on file %s"%(base))
        mag = np.load(os.path.join(hp.full_audio_dir, base + '.npy'))
        wav = spectrogram2wav(hp, mag)
        soundfile.write(outdir + "/%s.wav"%(base), wav, hp.sr) 
Example #6
Source File: synthesize.py    From ophelia with Apache License 2.0 5 votes vote down vote up
def babble(hp, num_sentences=0):

    if num_sentences == 0:
        num_sentences = 4 # default
    g1 = BabblerGraph(hp, mode="synthesize"); print("Babbler graph loaded")
    g2 = SSRNGraph(hp, mode="synthesize"); print("SSRN graph loaded")

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        babbler_epoch = restore_latest_model_parameters(sess, hp, 'babbler')
        ssrn_epoch = restore_latest_model_parameters(sess, hp, 'ssrn')

        t = start_clock('Babbling...')
        Y = synth_babble(hp, g1, sess, seed=False, nsamples=num_sentences)
        stop_clock(t)

        t = start_clock('Mel2Mag generating...')
        Z = synth_mel2mag(hp, Y, g2, sess)
        stop_clock(t) 

        if (np.isnan(Z).any()):  ### TODO: keep?
            Z = np.nan_to_num(Z)

        # Generate wav files
        outdir = os.path.join(hp.voicedir, 'synth_babble', '%s_%s'%(babbler_epoch, ssrn_epoch))
        safe_makedir(outdir)
        for i, mag in enumerate(Z):
            print("Applying Griffin-Lim to sample number %s"%(i))
            wav = spectrogram2wav(hp, mag)
            write(outdir + "/{:03d}.wav".format(i), hp.sr, wav) 
Example #7
Source File: synthesize.py    From ophelia with Apache License 2.0 5 votes vote down vote up
def synth_wave(hp, mag, outfile):
    if hp.vocoder == 'griffin_lim':
        wav = spectrogram2wav(hp, mag)
        if hp.store_synth_features:
            np.save(outfile.replace('.wav',''), mag)   
        soundfile.write(outfile, wav, hp.sr)
    elif hp.vocoder == 'world':
        world_synthesis(mag, outfile, hp) 
Example #8
Source File: synthesise_validation_waveforms.py    From ophelia with Apache License 2.0 5 votes vote down vote up
def synth_wave(hp, magfile):
    mag = np.load(magfile)
    #print ('mag shape %s'%(str(mag.shape)))
    wav = spectrogram2wav(hp, mag)
    outfile = magfile.replace('.mag.npy', '.wav')
    outfile = outfile.replace('.npy', '.wav')
    #print magfile
    #print outfile
    #print 
    # write(outfile, hp.sr, wav)
    soundfile.write(outfile, wav, hp.sr) 
Example #9
Source File: evaluate.py    From tacotron with Apache License 2.0 4 votes vote down vote up
def evaluate():
    # Load graph
    g = Graph(mode="evaluate"); print("Graph loaded")

    # Load data
    fpaths, _, texts = load_data(mode="evaluate")
    lengths = [len(t) for t in texts]
    maxlen = sorted(lengths, reverse=True)[0]
    new_texts = np.zeros((len(texts), maxlen), np.int32)
    for i, text in enumerate(texts):
        new_texts[i, :len(text)] = [idx for idx in text]
    #new_texts = np.split(new_texts, 2)
    new_texts = new_texts[:evaluate_wav_num]
    half_size = int(len(fpaths)/2)
    print(half_size)
    #new_fpaths = [fpaths[:half_size], fpaths[half_size:]]
    fpaths = fpaths[:evaluate_wav_num]
    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)); print("Evaluate Model Restored!")
        """
        err = 0.0

        for i, t_split in enumerate(new_texts):
            y_hat = np.zeros((t_split.shape[0], 200, hp.n_mels*hp.r), np.float32)  # hp.n_mels*hp.r
            for j in tqdm.tqdm(range(200)):
                _y_hat = sess.run(g.y_hat, {g.x: t_split, g.y: y_hat})
                y_hat[:, j, :] = _y_hat[:, j, :]

            mags = sess.run(g.z_hat, {g.y_hat: y_hat})
            for k, mag in enumerate(mags):
                fname, mel_ans, mag_ans = load_spectrograms(new_fpaths[i][k])
                print("File {} is being evaluated ...".format(fname))
                audio = spectrogram2wav(mag)
                audio_ans = spectrogram2wav(mag_ans)
                err += calculate_mse(audio, audio_ans)

        err = err/float(len(fpaths))
        print(err)

        """
        # Feed Forward
        ## mel
        y_hat = np.zeros((new_texts.shape[0], 200, hp.n_mels*hp.r), np.float32)  # hp.n_mels*hp.r
        for j in tqdm.tqdm(range(200)):
            _y_hat = sess.run(g.y_hat, {g.x: new_texts, g.y: y_hat})
            y_hat[:, j, :] = _y_hat[:, j, :]
        ## mag
        mags = sess.run(g.z_hat, {g.y_hat: y_hat})
        err = 0.0
        for i, mag in enumerate(mags):
            fname, mel_ans, mag_ans = load_spectrograms(fpaths[i])
            print("File {} is being evaluated ...".format(fname))
            #audio = spectrogram2wav(mag)
            #audio_ans = spectrogram2wav(mag_ans)
            #err += calculate_mse(audio, audio_ans)
            err += calculate_mse(mag, mag_ans)
        err = err/float(len(fpaths))
        print(err)
        opf.write(hp.logdir  + " spectrogram mse: " + str(err) + "\n")