Python util.convert_tokens() Examples
The following are 14
code examples of util.convert_tokens().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
util
, or try the search function
.
Example #1
Source File: main.py From R-Net with MIT License | 6 votes |
def evaluate_batch(model, num_batches, eval_file, sess, data_type, handle, str_handle): answer_dict = {} losses = [] for _ in tqdm(range(1, num_batches + 1)): qa_id, loss, yp1, yp2, = sess.run( [model.qa_id, model.loss, model.yp1, model.yp2], feed_dict={handle: str_handle}) answer_dict_, _ = convert_tokens( eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist()) answer_dict.update(answer_dict_) losses.append(loss) loss = np.mean(losses) metrics = evaluate(eval_file, answer_dict) metrics["loss"] = loss loss_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/loss".format(data_type), simple_value=metrics["loss"]), ]) f1_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/f1".format(data_type), simple_value=metrics["f1"]), ]) em_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/em".format(data_type), simple_value=metrics["exact_match"]), ]) return metrics, [loss_sum, f1_sum, em_sum]
Example #2
Source File: main.py From Question_Answering_Models with MIT License | 6 votes |
def evaluate_batch(model, num_batches, eval_file, sess, data_type, handle, str_handle): answer_dict = {} losses = [] for _ in tqdm(range(1, num_batches + 1)): qa_id, loss, yp1, yp2 = sess.run( [model.qa_id, model.loss, model.yp1, model.yp2], feed_dict={handle:str_handle}) answer_dict_, _ = convert_tokens( eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist()) answer_dict.update(answer_dict_) losses.append(loss) loss = np.mean(losses) metrics = evaluate(eval_file, answer_dict) metrics["loss"] = loss loss_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/loss".format(data_type), simple_value=loss), ]) f1_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/f1".format(data_type), simple_value=metrics["f1"]), ]) em_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/em".format(data_type), simple_value=metrics["exact_match"]), ]) print("[{}] loss:{}, f1:{}, em:{}".format(data_type, loss, metrics["f1"], metrics["exact_match"])) return metrics, [loss_sum, f1_sum, em_sum]
Example #3
Source File: main.py From Question_Answering_Models with MIT License | 6 votes |
def evaluate_batch(model, num_batches, eval_file, sess, data_type, handle, str_handle): answer_dict = {} losses = [] for _ in tqdm(range(1, num_batches + 1)): qa_id, loss, yp1, yp2, = sess.run( [model.qa_id, model.loss, model.yp1, model.yp2], feed_dict={handle: str_handle}) answer_dict_, _ = convert_tokens( eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist()) answer_dict.update(answer_dict_) losses.append(loss) loss = np.mean(losses) metrics = evaluate(eval_file, answer_dict) metrics["loss"] = loss loss_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/loss".format(data_type), simple_value=metrics["loss"]), ]) f1_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/f1".format(data_type), simple_value=metrics["f1"]), ]) em_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/em".format(data_type), simple_value=metrics["exact_match"]), ]) return metrics, [loss_sum, f1_sum, em_sum]
Example #4
Source File: main.py From Question_Answering_Models with MIT License | 6 votes |
def evaluate_batch(model, num_batches, eval_file, sess, data_type, handle, str_handle): answer_dict = {} losses = [] for _ in tqdm(range(1, num_batches + 1)): qa_id, loss, yp1, yp2 = sess.run( [model.qa_id, model.loss, model.yp1, model.yp2], feed_dict={handle:str_handle}) answer_dict_, _ = convert_tokens( eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist()) answer_dict.update(answer_dict_) losses.append(loss) loss = np.mean(losses) metrics = evaluate(eval_file, answer_dict) metrics["loss"] = loss loss_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/loss".format(data_type), simple_value=loss), ]) f1_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/f1".format(data_type), simple_value=metrics["f1"]), ]) em_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/em".format(data_type), simple_value=metrics["exact_match"]), ]) print("[{}] loss:{}, f1:{}, em:{}".format(data_type, loss, metrics["f1"], metrics["exact_match"])) return metrics, [loss_sum, f1_sum, em_sum]
Example #5
Source File: main.py From Question_Answering_Models with MIT License | 6 votes |
def evaluate_batch(model, num_batches, eval_file, sess, data_type, handle, str_handle): answer_dict = {} losses = [] for _ in tqdm(range(1, num_batches + 1)): qa_id, loss, yp1, yp2 = sess.run( [model.qa_id, model.loss, model.yp1, model.yp2], feed_dict={handle:str_handle}) answer_dict_, _ = convert_tokens( eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist()) answer_dict.update(answer_dict_) losses.append(loss) loss = np.mean(losses) metrics = evaluate(eval_file, answer_dict) metrics["loss"] = loss loss_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/loss".format(data_type), simple_value=loss), ]) f1_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/f1".format(data_type), simple_value=metrics["f1"]), ]) em_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/em".format(data_type), simple_value=metrics["exact_match"]), ]) print("[{}] loss:{}, f1:{}, em:{}".format(data_type, loss, metrics["f1"], metrics["exact_match"])) return metrics, [loss_sum, f1_sum, em_sum]
Example #6
Source File: main.py From QANet with MIT License | 6 votes |
def evaluate_batch(model, num_batches, eval_file, sess, data_type, handle, str_handle): answer_dict = {} losses = [] for _ in tqdm(range(1, num_batches + 1)): qa_id, loss, yp1, yp2, = sess.run( [model.qa_id, model.loss, model.yp1, model.yp2], feed_dict={handle: str_handle}) answer_dict_, _ = convert_tokens( eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist()) answer_dict.update(answer_dict_) losses.append(loss) loss = np.mean(losses) metrics = evaluate(eval_file, answer_dict) metrics["loss"] = loss loss_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/loss".format(data_type), simple_value=metrics["loss"]), ]) f1_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/f1".format(data_type), simple_value=metrics["f1"]), ]) em_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/em".format(data_type), simple_value=metrics["exact_match"]), ]) return metrics, [loss_sum, f1_sum, em_sum]
Example #7
Source File: run.py From hotpot with Apache License 2.0 | 5 votes |
def evaluate_batch(data_source, model, max_batches, eval_file, config): answer_dict = {} sp_dict = {} total_loss, step_cnt = 0, 0 iter = data_source for step, data in enumerate(iter): if step >= max_batches and max_batches > 0: break context_idxs = Variable(data['context_idxs'], volatile=True) ques_idxs = Variable(data['ques_idxs'], volatile=True) context_char_idxs = Variable(data['context_char_idxs'], volatile=True) ques_char_idxs = Variable(data['ques_char_idxs'], volatile=True) context_lens = Variable(data['context_lens'], volatile=True) y1 = Variable(data['y1'], volatile=True) y2 = Variable(data['y2'], volatile=True) q_type = Variable(data['q_type'], volatile=True) is_support = Variable(data['is_support'], volatile=True) start_mapping = Variable(data['start_mapping'], volatile=True) end_mapping = Variable(data['end_mapping'], volatile=True) all_mapping = Variable(data['all_mapping'], volatile=True) logit1, logit2, predict_type, predict_support, yp1, yp2 = model(context_idxs, ques_idxs, context_char_idxs, ques_char_idxs, context_lens, start_mapping, end_mapping, all_mapping, return_yp=True) loss = (nll_sum(predict_type, q_type) + nll_sum(logit1, y1) + nll_sum(logit2, y2)) / context_idxs.size(0) + config.sp_lambda * nll_average(predict_support.view(-1, 2), is_support.view(-1)) answer_dict_ = convert_tokens(eval_file, data['ids'], yp1.data.cpu().numpy().tolist(), yp2.data.cpu().numpy().tolist(), np.argmax(predict_type.data.cpu().numpy(), 1)) answer_dict.update(answer_dict_) total_loss += loss.data[0] step_cnt += 1 loss = total_loss / step_cnt metrics = evaluate(eval_file, answer_dict) metrics['loss'] = loss return metrics
Example #8
Source File: run.py From hotpot with Apache License 2.0 | 5 votes |
def predict(data_source, model, eval_file, config, prediction_file): answer_dict = {} sp_dict = {} sp_th = config.sp_threshold for step, data in enumerate(tqdm(data_source)): context_idxs = Variable(data['context_idxs'], volatile=True) ques_idxs = Variable(data['ques_idxs'], volatile=True) context_char_idxs = Variable(data['context_char_idxs'], volatile=True) ques_char_idxs = Variable(data['ques_char_idxs'], volatile=True) context_lens = Variable(data['context_lens'], volatile=True) start_mapping = Variable(data['start_mapping'], volatile=True) end_mapping = Variable(data['end_mapping'], volatile=True) all_mapping = Variable(data['all_mapping'], volatile=True) logit1, logit2, predict_type, predict_support, yp1, yp2 = model(context_idxs, ques_idxs, context_char_idxs, ques_char_idxs, context_lens, start_mapping, end_mapping, all_mapping, return_yp=True) answer_dict_ = convert_tokens(eval_file, data['ids'], yp1.data.cpu().numpy().tolist(), yp2.data.cpu().numpy().tolist(), np.argmax(predict_type.data.cpu().numpy(), 1)) answer_dict.update(answer_dict_) predict_support_np = torch.sigmoid(predict_support[:, :, 1]).data.cpu().numpy() for i in range(predict_support_np.shape[0]): cur_sp_pred = [] cur_id = data['ids'][i] for j in range(predict_support_np.shape[1]): if j >= len(eval_file[cur_id]['sent2title_ids']): break if predict_support_np[i, j] > sp_th: cur_sp_pred.append(eval_file[cur_id]['sent2title_ids'][j]) sp_dict.update({cur_id: cur_sp_pred}) prediction = {'answer': answer_dict, 'sp': sp_dict} with open(prediction_file, 'w') as f: json.dump(prediction, f)
Example #9
Source File: main.py From R-Net with MIT License | 4 votes |
def test(config): with open(config.word_emb_file, "r") as fh: word_mat = np.array(json.load(fh), dtype=np.float32) with open(config.char_emb_file, "r") as fh: char_mat = np.array(json.load(fh), dtype=np.float32) with open(config.test_eval_file, "r") as fh: eval_file = json.load(fh) with open(config.test_meta, "r") as fh: meta = json.load(fh) total = meta["total"] print("Loading model...") test_batch = get_dataset(config.test_record_file, get_record_parser( config, is_test=True), config).make_one_shot_iterator() model = Model(config, test_batch, word_mat, char_mat, trainable=False) sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.allow_growth = True with tf.Session(config=sess_config) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint(config.save_dir)) sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool))) losses = [] answer_dict = {} remapped_dict = {} for step in tqdm(range(total // config.batch_size + 1)): qa_id, loss, yp1, yp2 = sess.run( [model.qa_id, model.loss, model.yp1, model.yp2]) answer_dict_, remapped_dict_ = convert_tokens( eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist()) answer_dict.update(answer_dict_) remapped_dict.update(remapped_dict_) losses.append(loss) loss = np.mean(losses) metrics = evaluate(eval_file, answer_dict) with open(config.answer_file, "w") as fh: json.dump(remapped_dict, fh) print("Exact Match: {}, F1: {}".format( metrics['exact_match'], metrics['f1']))
Example #10
Source File: main.py From Question_Answering_Models with MIT License | 4 votes |
def test(config): os.environ["CUDA_VISIBLE_DEVICES"] = config.choose_gpu with open(config.word_emb_file, "r") as fh: word_mat = np.array(json.load(fh), dtype=np.float32) with open(config.char_emb_file, "r") as fh: char_mat = np.array(json.load(fh), dtype=np.float32) with open(config.test_eval_file, "r") as fh: eval_file = json.load(fh) with open(config.test_meta, "r") as fh: meta = json.load(fh) total = meta["total"] print("Loading model...") test_batch = get_dataset(config.test_record_file, get_record_parser( config, is_test=True), config).make_one_shot_iterator() model = Model(config, test_batch, word_mat, char_mat, trainable=False) sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.allow_growth = True try: sess_config.gpu_options.per_process_gpu_memory_fraction = config.gpu_memory_fraction except: sess_config.gpu_options.per_process_gpu_memory_fraction = 0.5 with tf.Session(config=sess_config) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint(config.save_dir)) sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool))) losses = [] answer_dict = {} remapped_dict = {} for step in tqdm(range(total // config.batch_size + 1)): qa_id, loss, yp1, yp2 = sess.run( [model.qa_id, model.loss, model.yp1, model.yp2]) answer_dict_, remapped_dict_ = convert_tokens( eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist()) answer_dict.update(answer_dict_) remapped_dict.update(remapped_dict_) losses.append(loss) loss = np.mean(losses) metrics = evaluate(eval_file, answer_dict) with open(config.answer_file, "w") as fh: json.dump(remapped_dict, fh) print("Exact Match: {}, F1: {}".format( metrics['exact_match'], metrics['f1']))
Example #11
Source File: main.py From Question_Answering_Models with MIT License | 4 votes |
def test(config): os.environ["CUDA_VISIBLE_DEVICES"] = config.choose_gpu with open(config.word_emb_file, "r") as fh: word_mat = np.array(json.load(fh), dtype=np.float32) with open(config.char_emb_file, "r") as fh: char_mat = np.array(json.load(fh), dtype=np.float32) with open(config.test_eval_file, "r") as fh: eval_file = json.load(fh) with open(config.test_meta, "r") as fh: meta = json.load(fh) total = meta["total"] graph = tf.Graph() print("Loading model...") with graph.as_default() as g: test_batch = get_dataset(config.test_record_file, get_record_parser( config, is_test=True), config).make_one_shot_iterator() model = QANet(config, test_batch, word_mat, char_mat, trainable=False, graph = g) sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.allow_growth = True sess_config.gpu_options.per_process_gpu_memory_fraction = config.gpu_memory_fraction with tf.Session(config=sess_config) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint(config.save_dir)) if config.decay < 1.0: sess.run(model.assign_vars) losses = [] answer_dict = {} remapped_dict = {} for step in tqdm(range(total // config.batch_size + 1)): qa_id, loss, yp1, yp2 = sess.run( [model.qa_id, model.loss, model.yp1, model.yp2]) answer_dict_, remapped_dict_ = convert_tokens( eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist()) answer_dict.update(answer_dict_) remapped_dict.update(remapped_dict_) losses.append(loss) loss = np.mean(losses) metrics = evaluate(eval_file, answer_dict) with open(config.answer_file, "w") as fh: json.dump(remapped_dict, fh) print("Exact Match: {}, F1: {}".format( metrics['exact_match'], metrics['f1']))
Example #12
Source File: main.py From Question_Answering_Models with MIT License | 4 votes |
def test(config): os.environ["CUDA_VISIBLE_DEVICES"] = config.choose_gpu with open(config.word_emb_file, "r") as fh: word_mat = np.array(json.load(fh), dtype=np.float32) with open(config.char_emb_file, "r") as fh: char_mat = np.array(json.load(fh), dtype=np.float32) with open(config.test_eval_file, "r") as fh: eval_file = json.load(fh) with open(config.test_meta, "r") as fh: meta = json.load(fh) total = meta["total"] print("Loading model...") test_batch = get_dataset(config.test_record_file, get_record_parser( config, is_test=True), config).make_one_shot_iterator() model = Model(config, test_batch, word_mat, char_mat, trainable=False) sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.allow_growth = True try: sess_config.gpu_options.per_process_gpu_memory_fraction = config.gpu_memory_fraction except: sess_config.gpu_options.per_process_gpu_memory_fraction = 0.5 with tf.Session(config=sess_config) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint(config.save_dir)) sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool))) losses = [] answer_dict = {} remapped_dict = {} for step in tqdm(range(total // config.batch_size + 1)): qa_id, loss, yp1, yp2 = sess.run( [model.qa_id, model.loss, model.yp1, model.yp2]) answer_dict_, remapped_dict_ = convert_tokens( eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist()) answer_dict.update(answer_dict_) remapped_dict.update(remapped_dict_) losses.append(loss) loss = np.mean(losses) metrics = evaluate(eval_file, answer_dict) with open(config.answer_file, "w") as fh: json.dump(remapped_dict, fh) print("Exact Match: {}, F1: {}".format( metrics['exact_match'], metrics['f1']))
Example #13
Source File: main.py From QANet with MIT License | 4 votes |
def test(config): with open(config.word_emb_file, "r") as fh: word_mat = np.array(json.load(fh), dtype=np.float32) with open(config.char_emb_file, "r") as fh: char_mat = np.array(json.load(fh), dtype=np.float32) with open(config.test_eval_file, "r") as fh: eval_file = json.load(fh) with open(config.test_meta, "r") as fh: meta = json.load(fh) total = meta["total"] graph = tf.Graph() print("Loading model...") with graph.as_default() as g: test_batch = get_dataset(config.test_record_file, get_record_parser( config, is_test=True), config).make_one_shot_iterator() model = Model(config, test_batch, word_mat, char_mat, trainable=False, graph = g) sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.allow_growth = True with tf.Session(config=sess_config) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint(config.save_dir)) if config.decay < 1.0: sess.run(model.assign_vars) losses = [] answer_dict = {} remapped_dict = {} for step in tqdm(range(total // config.batch_size + 1)): qa_id, loss, yp1, yp2 = sess.run( [model.qa_id, model.loss, model.yp1, model.yp2]) answer_dict_, remapped_dict_ = convert_tokens( eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist()) answer_dict.update(answer_dict_) remapped_dict.update(remapped_dict_) losses.append(loss) loss = np.mean(losses) metrics = evaluate(eval_file, answer_dict) with open(config.answer_file, "w") as fh: json.dump(remapped_dict, fh) print("Exact Match: {}, F1: {}".format( metrics['exact_match'], metrics['f1']))
Example #14
Source File: train.py From squad with MIT License | 4 votes |
def evaluate(model, data_loader, device, eval_file, max_len, use_squad_v2): nll_meter = util.AverageMeter() model.eval() pred_dict = {} with open(eval_file, 'r') as fh: gold_dict = json_load(fh) with torch.no_grad(), \ tqdm(total=len(data_loader.dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in data_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) batch_size = cw_idxs.size(0) # Forward log_p1, log_p2 = model(cw_idxs, qw_idxs) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) nll_meter.update(loss.item(), batch_size) # Get F1 and EM scores p1, p2 = log_p1.exp(), log_p2.exp() starts, ends = util.discretize(p1, p2, max_len, use_squad_v2) # Log info progress_bar.update(batch_size) progress_bar.set_postfix(NLL=nll_meter.avg) preds, _ = util.convert_tokens(gold_dict, ids.tolist(), starts.tolist(), ends.tolist(), use_squad_v2) pred_dict.update(preds) model.train() results = util.eval_dicts(gold_dict, pred_dict, use_squad_v2) results_list = [('NLL', nll_meter.avg), ('F1', results['F1']), ('EM', results['EM'])] if use_squad_v2: results_list.append(('AvNA', results['AvNA'])) results = OrderedDict(results_list) return results, pred_dict