Python data_load.load_vocab() Examples
The following are 6
code examples of data_load.load_vocab().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
data_load
, or try the search function
.
Example #1
Source File: model.py From transformer with Apache License 2.0 | 5 votes |
def __init__(self, hp): self.hp = hp self.token2idx, self.idx2token = load_vocab(hp.vocab) self.embeddings = get_token_embeddings(self.hp.vocab_size, self.hp.d_model, zero_pad=True)
Example #2
Source File: prepro.py From neural_japanese_transliterator with Apache License 2.0 | 5 votes |
def create_train_data(): from data_load import load_vocab roma2idx, idx2roma, surf2idx, idx2surf = load_vocab() romaji_sents, surface_sents = [], [] for line in codecs.open('preprocessed/ja.tsv', 'r', 'utf-8'): try: idx, romaji_sent, surface_sent = line.strip().split("\t") except ValueError: continue if len(romaji_sent) < hp.max_len: romaji_sents.append(np.array([roma2idx.get(roma, 1) for roma in romaji_sent+"S"], np.int32).tostring()) surface_sents.append(np.array([surf2idx.get(surf, 1) for surf in surface_sent+"S"], np.int32).tostring()) pickle.dump((romaji_sents, surface_sents), open('preprocessed/train.pkl', 'wb'), protocol=2)
Example #3
Source File: qa.py From CoupletAI with MIT License | 5 votes |
def create_qa_context(model_path: str, word_to_ix_path: str, embed_dim: int, hidden_dim: int, device) -> QAContext: word_dict = load_vocab(word_to_ix_path) vocab_size = len(word_dict) model = TraForEncoder(vocab_size, embed_dim, hidden_dim) if not torch.cuda.is_available(): model.load_state_dict(torch.load(model_path, map_location='cpu')) else: model.load_state_dict(torch.load(model_path)) return QAContext(model, word_dict, device)
Example #4
Source File: eval1.py From deep-voice-conversion with MIT License | 5 votes |
def eval(logdir): # Load graph model = Net1() # dataflow df = Net1DataFlow(hp.test1.data_path, hp.test1.batch_size) ckpt = tf.train.latest_checkpoint(logdir) pred_conf = PredictConfig( model=model, input_names=get_eval_input_names(), output_names=get_eval_output_names()) if ckpt: pred_conf.session_init = SaverRestore(ckpt) predictor = OfflinePredictor(pred_conf) x_mfccs, y_ppgs = next(df().get_data()) y_ppg_1d, pred_ppg_1d, summ_loss, summ_acc = predictor(x_mfccs, y_ppgs) # plot confusion matrix _, idx2phn = load_vocab() y_ppg_1d = [idx2phn[i] for i in y_ppg_1d] pred_ppg_1d = [idx2phn[i] for i in pred_ppg_1d] summ_cm = plot_confusion_matrix(y_ppg_1d, pred_ppg_1d, phns) writer = tf.summary.FileWriter(logdir) writer.add_summary(summ_loss) writer.add_summary(summ_acc) writer.add_summary(summ_cm) writer.close()
Example #5
Source File: graph.py From bangla-tts with GNU General Public License v3.0 | 4 votes |
def __init__(self): # Load vocabulary self.char2idx, self.idx2char = load_vocab() self.L = tf.placeholder(tf.int32, shape=(None, None)) self.mels = tf.placeholder(tf.float32, shape=(None, None, n_mels)) self.prev_max_attentions = tf.placeholder(tf.int32, shape=(None,)) # network 1 with tf.variable_scope("Text2Mel"): # Get S or decoder inputs. (B, T//r, n_mels) self.S = tf.concat((tf.zeros_like(self.mels[:, :1, :]), self.mels[:, :-1, :]), 1) # Networks with tf.variable_scope("TextEnc"): self.K, self.V = TextEnc(self.L) # (N, Tx, e) with tf.variable_scope("AudioEnc"): self.Q = AudioEnc(self.S) with tf.variable_scope("Attention"): # R: (B, T/r, 2d) # alignments: (B, N, T/r) # max_attentions: (B,) self.R, self.alignments, self.max_attentions = Attention(self.Q, self.K, self.V, mononotic_attention=True, prev_max_attentions=self.prev_max_attentions) with tf.variable_scope("AudioDec"): self.Y_logits, self.Y = AudioDec(self.R) # (B, T/r, n_mels) # network 2 # During inference, the predicted melspectrogram values are fed. with tf.variable_scope("SSRN"): self.Z_logits, self.Z = SSRN(self.Y) with tf.variable_scope("gs"): self.global_step = tf.Variable(0, name='global_step', trainable=False)
Example #6
Source File: eval.py From neural_tokenizer with MIT License | 4 votes |
def eval(): # Load graph g = Graph(is_training=False) print("Graph loaded") # Load data X, Y = load_data(mode="test") # texts char2idx, idx2char = load_vocab() with g.graph.as_default(): sv = tf.train.Supervisor() with sv.managed_session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: # Restore parameters sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)) print("Restored!") # Get model mname = open(hp.logdir + '/checkpoint', 'r').read().split('"')[1] # model name # Inference if not os.path.exists(hp.savedir): os.mkdir(hp.savedir) with open("{}/{}".format(hp.savedir, mname), 'w') as fout: results = [] baseline_results = [] for step in range(len(X) // hp.batch_size): x = X[step * hp.batch_size: (step + 1) * hp.batch_size] y = Y[step * hp.batch_size: (step + 1) * hp.batch_size] # predict characters preds = sess.run(g.preds, {g.x: x}) for xx, yy, pp in zip(x, y, preds): # sentence-wise expected = '' got = '' for xxx, yyy, ppp in zip(xx, yy, pp): # character-wise if xxx == 0: break else: got += idx2char.get(xxx, "*") expected += idx2char.get(xxx, "*") if ppp == 1: got += " " if yyy == 1: expected += " " # prediction results if ppp == yyy: results.append(1) else: results.append(0) # baseline results if yyy == 0: # no space baseline_results.append(1) else: baseline_results.append(0) fout.write("▌Expected: " + expected + "\n") fout.write("▌Got: " + got + "\n\n") fout.write( "Final Accuracy = %d/%d=%.4f\n" % (sum(results), len(results), float(sum(results)) / len(results))) fout.write( "Baseline Accuracy = %d/%d=%.4f" % (sum(baseline_results), len(baseline_results), float(sum(baseline_results)) / len(baseline_results)))