Python data_load.load_vocab() Examples

The following are 6 code examples of data_load.load_vocab(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module data_load , or try the search function .
Example #1
Source File: model.py    From transformer with Apache License 2.0 5 votes vote down vote up
def __init__(self, hp):
        self.hp = hp
        self.token2idx, self.idx2token = load_vocab(hp.vocab)
        self.embeddings = get_token_embeddings(self.hp.vocab_size, self.hp.d_model, zero_pad=True) 
Example #2
Source File: prepro.py    From neural_japanese_transliterator with Apache License 2.0 5 votes vote down vote up
def create_train_data():
    from data_load import load_vocab
    roma2idx, idx2roma, surf2idx, idx2surf = load_vocab()
    romaji_sents, surface_sents = [], []
    for line in codecs.open('preprocessed/ja.tsv', 'r', 'utf-8'):
        try:
            idx, romaji_sent, surface_sent = line.strip().split("\t")
        except ValueError:
            continue
        
        if len(romaji_sent) < hp.max_len:
            romaji_sents.append(np.array([roma2idx.get(roma, 1) for roma in romaji_sent+"S"], np.int32).tostring())
            surface_sents.append(np.array([surf2idx.get(surf, 1) for surf in surface_sent+"S"], np.int32).tostring())
    pickle.dump((romaji_sents, surface_sents), open('preprocessed/train.pkl', 'wb'), protocol=2) 
Example #3
Source File: qa.py    From CoupletAI with MIT License 5 votes vote down vote up
def create_qa_context(model_path: str, word_to_ix_path: str,
                      embed_dim: int, hidden_dim: int, device) -> QAContext:
    word_dict = load_vocab(word_to_ix_path)
    vocab_size = len(word_dict)
    model = TraForEncoder(vocab_size, embed_dim, hidden_dim)
    if not torch.cuda.is_available():
        model.load_state_dict(torch.load(model_path, map_location='cpu'))
    else:
        model.load_state_dict(torch.load(model_path))
    return QAContext(model, word_dict, device) 
Example #4
Source File: eval1.py    From deep-voice-conversion with MIT License 5 votes vote down vote up
def eval(logdir):
    # Load graph
    model = Net1()

    # dataflow
    df = Net1DataFlow(hp.test1.data_path, hp.test1.batch_size)

    ckpt = tf.train.latest_checkpoint(logdir)

    pred_conf = PredictConfig(
        model=model,
        input_names=get_eval_input_names(),
        output_names=get_eval_output_names())
    if ckpt:
        pred_conf.session_init = SaverRestore(ckpt)
    predictor = OfflinePredictor(pred_conf)

    x_mfccs, y_ppgs = next(df().get_data())
    y_ppg_1d, pred_ppg_1d, summ_loss, summ_acc = predictor(x_mfccs, y_ppgs)

    # plot confusion matrix
    _, idx2phn = load_vocab()
    y_ppg_1d = [idx2phn[i] for i in y_ppg_1d]
    pred_ppg_1d = [idx2phn[i] for i in pred_ppg_1d]
    summ_cm = plot_confusion_matrix(y_ppg_1d, pred_ppg_1d, phns)

    writer = tf.summary.FileWriter(logdir)
    writer.add_summary(summ_loss)
    writer.add_summary(summ_acc)
    writer.add_summary(summ_cm)
    writer.close() 
Example #5
Source File: graph.py    From bangla-tts with GNU General Public License v3.0 4 votes vote down vote up
def __init__(self):
        # Load vocabulary
        self.char2idx, self.idx2char = load_vocab()



        self.L = tf.placeholder(tf.int32, shape=(None, None))
        self.mels = tf.placeholder(tf.float32, shape=(None, None, n_mels))
        self.prev_max_attentions = tf.placeholder(tf.int32, shape=(None,))


        # network 1

        with tf.variable_scope("Text2Mel"):
            # Get S or decoder inputs. (B, T//r, n_mels)
            self.S = tf.concat((tf.zeros_like(self.mels[:, :1, :]), self.mels[:, :-1, :]), 1)

            # Networks
            with tf.variable_scope("TextEnc"):
                self.K, self.V = TextEnc(self.L)  # (N, Tx, e)

            with tf.variable_scope("AudioEnc"):
                self.Q = AudioEnc(self.S)

            with tf.variable_scope("Attention"):
                # R: (B, T/r, 2d)
                # alignments: (B, N, T/r)
                # max_attentions: (B,)
                self.R, self.alignments, self.max_attentions = Attention(self.Q, self.K, self.V,
                                                                            mononotic_attention=True,
                                                                            prev_max_attentions=self.prev_max_attentions)
            with tf.variable_scope("AudioDec"):
                self.Y_logits, self.Y = AudioDec(self.R) # (B, T/r, n_mels)

        # network 2

        # During inference, the predicted melspectrogram values are fed.
        with tf.variable_scope("SSRN"):
            self.Z_logits, self.Z = SSRN(self.Y)

        with tf.variable_scope("gs"):
            self.global_step = tf.Variable(0, name='global_step', trainable=False) 
Example #6
Source File: eval.py    From neural_tokenizer with MIT License 4 votes vote down vote up
def eval():
    # Load graph
    g = Graph(is_training=False)
    print("Graph loaded")

    # Load data
    X, Y = load_data(mode="test")  # texts
    char2idx, idx2char = load_vocab()

    with g.graph.as_default():
        sv = tf.train.Supervisor()
        with sv.managed_session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
            # Restore parameters
            sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir))
            print("Restored!")

            # Get model
            mname = open(hp.logdir + '/checkpoint', 'r').read().split('"')[1]  # model name

            # Inference
            if not os.path.exists(hp.savedir): os.mkdir(hp.savedir)
            with open("{}/{}".format(hp.savedir, mname), 'w') as fout:
                results = []
                baseline_results = []
                for step in range(len(X) // hp.batch_size):
                    x = X[step * hp.batch_size: (step + 1) * hp.batch_size]
                    y = Y[step * hp.batch_size: (step + 1) * hp.batch_size]

                    # predict characters
                    preds = sess.run(g.preds, {g.x: x})

                    for xx, yy, pp in zip(x, y, preds):  # sentence-wise
                        expected = ''
                        got = ''
                        for xxx, yyy, ppp in zip(xx, yy, pp):  # character-wise
                            if xxx == 0:
                                break
                            else:
                                got += idx2char.get(xxx, "*")
                                expected += idx2char.get(xxx, "*")
                            if ppp == 1: got += " "
                            if yyy == 1: expected += " "

                            # prediction results
                            if ppp == yyy:
                                results.append(1)
                            else:
                                results.append(0)

                            # baseline results
                            if yyy == 0: # no space
                                baseline_results.append(1)
                            else:
                                baseline_results.append(0)

                        fout.write("▌Expected: " + expected + "\n")
                        fout.write("▌Got: " + got + "\n\n")
                fout.write(
                    "Final Accuracy = %d/%d=%.4f\n" % (sum(results), len(results), float(sum(results)) / len(results)))
                fout.write(
                    "Baseline Accuracy = %d/%d=%.4f" % (sum(baseline_results), len(baseline_results), float(sum(baseline_results)) / len(baseline_results)))