Python Examples of utils.Dataset

Source File: model_based_rl.py From cs294-112_hws with MIT License

6 votes

def _gather_rollouts(self, policy, num_rollouts):
        dataset = utils.Dataset()

        for _ in range(num_rollouts):
            state = self._env.reset()
            done = False
            t = 0
            while not done:
                if self._render:
                    timeit.start('render')
                    self._env.render()
                    timeit.stop('render')
                timeit.start('get action')
                action = policy.get_action(state)
                timeit.stop('get action')
                timeit.start('env step')
                next_state, reward, done, _ = self._env.step(action)
                timeit.stop('env step')
                done = done or (t >= self._max_rollout_length)
                dataset.add(state, action, next_state, reward, done)

                state = next_state
                t += 1

        return dataset

Source File: train_shapes.py From images-to-osm with MIT License

6 votes

def get_ax(rows=1, cols=1, size=8):
    """Return a Matplotlib Axes array to be used in
    all visualizations in the notebook. Provide a
    central point to control graph sizes.
    
    Change the default size attribute to control the size
    of rendered images
    """
    _, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows))
    return ax


# ## Dataset
# 
# Create a synthetic dataset
# 
# Extend the Dataset class and add a method to load the shapes dataset, `load_shapes()`, and override the following methods:
# 
# * load_image()
# * load_mask()
# * image_reference()

# In[4]:

Source File: test.py From seq2seq-summarizer with MIT License

5 votes

def eval_bs(test_set: Dataset, vocab: Vocab, model: Seq2Seq, params: Params):
  test_gen = test_set.generator(1, vocab, None, True if params.pointer else False)
  n_samples = int(params.test_sample_ratio * len(test_set.pairs))

  if params.test_save_results and params.model_path_prefix:
    result_file = tarfile.open(params.model_path_prefix + ".results.tgz", 'w:gz')
  else:
    result_file = None

  model.eval()
  r1, r2, rl, rsu4 = 0, 0, 0, 0
  prog_bar = tqdm(range(1, n_samples + 1))
  for i in prog_bar:
    batch = next(test_gen)
    scores, file_content = eval_bs_batch(batch, model, vocab, pack_seq=params.pack_seq,
                                         beam_size=params.beam_size,
                                         min_out_len=params.min_out_len,
                                         max_out_len=params.max_out_len,
                                         len_in_words=params.out_len_in_words,
                                         details=result_file is not None)
    if file_content:
      file_content = file_content.encode('utf-8')
      file_info = tarfile.TarInfo(name='%06d.txt' % i)
      file_info.size = len(file_content)
      result_file.addfile(file_info, fileobj=BytesIO(file_content))
    if scores:
      r1 += scores[0]['1_f']
      r2 += scores[0]['2_f']
      rl += scores[0]['l_f']
      rsu4 += scores[0]['su4_f']
      prog_bar.set_postfix(R1='%.4g' % (r1 / i * 100), R2='%.4g' % (r2 / i * 100),
                           RL='%.4g' % (rl / i * 100), RSU4='%.4g' % (rsu4 / i * 100))

Source File: main.py From Object_Detection_Tracking with Apache License 2.0

5 votes

def read_data_coco(datajson, config, add_gt=False, load_coco_class=False):

  with open(datajson, "r") as f:
    dj = json.load(f)

  if load_coco_class:
    add_coco(config, datajson)


  data = {"imgs":[], "ids":[]}
  if add_gt:
    data = {"imgs":[], "ids":[], "gt":[]}

  # read coco annotation file
  for one in dj["images"]:
    imgid = int(one["id"])
    imgfile = os.path.join(config.imgpath, one["file_name"])
    if config.coco2014_to_2017:
      imgfile = os.path.join(config.imgpath, one["file_name"].split("_")[-1])
    data["imgs"].append(imgfile)
    data["ids"].append(imgid)
    if add_gt:
      # load the bounding box and so on
      pass


  return Dataset(data, add_gt=add_gt)


# for testing, dataset -> {"imgs":[],"ids":[]}, imgs is the image file path,

Source File: main.py From img2imgGAN with MIT License

5 votes

def main(_):
   """Begins the execution of the program

   Args:
      _ : Tensorflow flags app instance
   """
   if FLAGS.create != "":
      dataset = utils.Dataset(FLAGS)
      dataset.create_records(FLAGS.create)
      exit()

   if not FLAGS.test:
      priliminary_checks(FLAGS)
      idx = get_runid(FLAGS)
      create_rundirs(FLAGS, idx)
      dump_model_params(FLAGS)
      log_config(idx, FLAGS.__flags)

   if FLAGS.archi:
      net = nnet.Model(FLAGS, is_training=False)
      net.test_graph()
      exit()


   FLAGS.h = 600 if FLAGS.dataset == 'maps' else 256
   FLAGS.w = FLAGS.h

   if FLAGS.train or FLAGS.resume:
      net = nnet.Model(FLAGS, is_training=True)
      net.train()
      print ' - Done training the network...'
   else:
      print ' - Testing the model...'
      net = nnet.Model(FLAGS, is_training=False)
      net.test(FLAGS.test_source)

Source File: osmmodelconfig.py From images-to-osm with MIT License

5 votes

def __init__(self, rootDir):
        utils.Dataset.__init__(self)
        self.ROOT_DIR = rootDir

Source File: coco.py From Mask-RCNN-Pedestrian-Detection with MIT License

4 votes

def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None):
    """Runs official COCO evaluation.
    dataset: A Dataset object with valiadtion data
    eval_type: "bbox" or "segm" for bounding box or segmentation evaluation
    limit: if not 0, it's the number of images to use for evaluation
    """
    # Pick COCO images from the dataset
    image_ids = image_ids or dataset.image_ids

    # Limit to a subset
    if limit:
        image_ids = image_ids[:limit]

    # Get corresponding COCO image IDs.
    coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids]

    t_prediction = 0
    t_start = time.time()

    results = []
    for i, image_id in enumerate(image_ids):
        # Load image
        image = dataset.load_image(image_id)

        # Run detection
        t = time.time()
        r = model.detect([image], verbose=0)[0]
        t_prediction += (time.time() - t)

        # Convert results to COCO format
        image_results = build_coco_results(dataset, coco_image_ids[i:i + 1],
                                           r["rois"], r["class_ids"],
                                           r["scores"], r["masks"])
        results.extend(image_results)

    # Load results. This modifies results with additional attributes.
    coco_results = coco.loadRes(results)

    # Evaluate
    cocoEval = COCOeval(coco, coco_results, eval_type)
    cocoEval.params.imgIds = coco_image_ids
    cocoEval.evaluate()
    cocoEval.accumulate()
    cocoEval.summarize()

    print("Prediction time: {}. Average {}/image".format(
        t_prediction, t_prediction / len(image_ids)))
    print("Total time: ", time.time() - t_start)


############################################################
#  Training
############################################################

Source File: coco.py From decentralized_AI with GNU General Public License v2.0

4 votes

def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None):
    """Runs official COCO evaluation.
    dataset: A Dataset object with valiadtion data
    eval_type: "bbox" or "segm" for bounding box or segmentation evaluation
    limit: if not 0, it's the number of images to use for evaluation
    """
    # Pick COCO images from the dataset
    image_ids = image_ids or dataset.image_ids

    # Limit to a subset
    if limit:
        image_ids = image_ids[:limit]

    # Get corresponding COCO image IDs.
    coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids]

    t_prediction = 0
    t_start = time.time()

    results = []
    for i, image_id in enumerate(image_ids):
        # Load image
        image = dataset.load_image(image_id)

        # Run detection
        t = time.time()
        r = model.detect([image])[0]
        t_prediction += (time.time() - t)

        # Convert results to COCO format
        image_results = build_coco_results(dataset, coco_image_ids[i:i + 1],
                                           r["rois"], r["class_ids"],
                                           r["scores"], r["masks"])
        results.extend(image_results)

    # Load results. This modifies results with additional attributes.
    coco_results = coco.loadRes(results)

    # Evaluate
    cocoEval = COCOeval(coco, coco_results, eval_type)
    cocoEval.params.imgIds = coco_image_ids
    cocoEval.evaluate()
    cocoEval.accumulate()
    cocoEval.summarize()

    print("Prediction time: {}. Average {}/image".format(
        t_prediction, t_prediction / len(image_ids)))
    print("Total time: ", time.time() - t_start)


############################################################
#  Training
############################################################

Source File: main.py From FVTA_MemexQA with MIT License

4 votes

def read_data(config,datatype,loadExistModelShared=False): 
	data_path = os.path.join(config.prepropath,"%s_data.p"%datatype)
	shared_path = os.path.join(config.prepropath,"%s_shared.p"%datatype)

	with open(data_path,"rb")as f:
		data = pickle.load(f)
	with open(shared_path,"rb") as f:
		shared = pickle.load(f) # this will be added later with word id, either new or load from exists

	num_examples = len(data['q'])
	
	valid_idxs = range(num_examples)

	print "loaded %s/%s data points for %s"%(len(valid_idxs),num_examples,datatype)

	# this is the file for the model' training, with word ID and stuff, if set load in config, will read from existing, otherwise write a new one
	# load the word2idx info into shared[]
	model_shared_path = os.path.join(config.outpath,"shared.p")
	if(loadExistModelShared):
		with open(model_shared_path,"rb") as f:
			model_shared = pickle.load(f)
		for key in model_shared:
			shared[key] = model_shared[key]
	else:
		# no fine tuning of word vector

		# the word larger than word_count_thres and not in the glove word2vec
		# word2idx -> the idx is the wordCounter's item() idx 
		# the new word to index
		# 
		shared['word2idx'] = {word:idx+2 for idx,word in enumerate([word for word,count in shared['wordCounter'].items() if (count > config.word_count_thres) and not shared['word2vec'].has_key(word)])}
		shared['char2idx'] = {char:idx+2 for idx,char in enumerate([char for char,count in shared['charCounter'].items() if count > config.char_count_thres])}
		#print "len of shared['word2idx']:%s"%len(shared['word2idx']) 

		NULL = "<NULL>"
		UNK = "<UNK>"
		shared['word2idx'][NULL] = 0
		shared['char2idx'][NULL] = 0
		shared['word2idx'][UNK] = 1
		shared['char2idx'][UNK] = 1

		# existing word in word2vec will be put after len(new word)+2
		pickle.dump({"word2idx":shared['word2idx'],'char2idx':shared['char2idx']},open(model_shared_path,"wb"))

	# load the word embedding for word in word2vec

	shared['existing_word2idx'] = {word:idx for idx,word in enumerate([word for word in sorted(shared['word2vec'].keys()) if not shared['word2idx'].has_key(word)])}

	# idx -> vector
	idx2vec = {idx:shared['word2vec'][word] for word,idx in shared['existing_word2idx'].items()}
	# load all this vector into a matrix
	# so you can use word -> idx -> vector
	# using xrange(len) so that the idx is 0,1,2,3...
	# then it could be call with embedding lookup with the correct idx

	shared['existing_emb_mat'] = np.array([idx2vec[idx] for idx in xrange(len(idx2vec))],dtype="float32")

	assert config.image_feat_dim == shared['pid2feat'][shared['pid2feat'].keys()[0]].shape[0], ("image dim is not %s, it is %s"%(config.image_feat_dim,shared['pid2feat'][shared['pid2feat'].keys()[0]].shape[0]))

	return Dataset(data,datatype,shared=shared,valid_idxs=valid_idxs)

Source File: main.py From SummaRuNNer with MIT License

4 votes

def test():
     
    embed = torch.Tensor(np.load(args.embedding)['embedding'])
    with open(args.word2id) as f:
        word2id = json.load(f)
    vocab = utils.Vocab(embed, word2id)

    with open(args.test_dir) as f:
        examples = [json.loads(line) for line in f]
    test_dataset = utils.Dataset(examples)

    test_iter = DataLoader(dataset=test_dataset,
                            batch_size=args.batch_size,
                            shuffle=False)
    if use_gpu:
        checkpoint = torch.load(args.load_dir)
    else:
        checkpoint = torch.load(args.load_dir, map_location=lambda storage, loc: storage)

    # checkpoint['args']['device'] saves the device used as train time
    # if at test time, we are using a CPU, we must override device to None
    if not use_gpu:
        checkpoint['args'].device = None
    net = getattr(models,checkpoint['args'].model)(checkpoint['args'])
    net.load_state_dict(checkpoint['model'])
    if use_gpu:
        net.cuda()
    net.eval()
    
    doc_num = len(test_dataset)
    time_cost = 0
    file_id = 1
    for batch in tqdm(test_iter):
        features,_,summaries,doc_lens = vocab.make_features(batch)
        t1 = time()
        if use_gpu:
            probs = net(Variable(features).cuda(), doc_lens)
        else:
            probs = net(Variable(features), doc_lens)
        t2 = time()
        time_cost += t2 - t1
        start = 0
        for doc_id,doc_len in enumerate(doc_lens):
            stop = start + doc_len
            prob = probs[start:stop]
            topk = min(args.topk,doc_len)
            topk_indices = prob.topk(topk)[1].cpu().data.numpy()
            topk_indices.sort()
            doc = batch['doc'][doc_id].split('\n')[:doc_len]
            hyp = [doc[index] for index in topk_indices]
            ref = summaries[doc_id]
            with open(os.path.join(args.ref,str(file_id)+'.txt'), 'w') as f:
                f.write(ref)
            with open(os.path.join(args.hyp,str(file_id)+'.txt'), 'w') as f:
                f.write('\n'.join(hyp))
            start = stop
            file_id = file_id + 1
    print('Speed: %.2f docs / s' % (doc_num / time_cost))

Source File: main.py From SummaRuNNer with MIT License

4 votes

def predict(examples):
    embed = torch.Tensor(np.load(args.embedding)['embedding'])
    with open(args.word2id) as f:
        word2id = json.load(f)
    vocab = utils.Vocab(embed, word2id)
    pred_dataset = utils.Dataset(examples)

    pred_iter = DataLoader(dataset=pred_dataset,
                            batch_size=args.batch_size,
                            shuffle=False)
    if use_gpu:
        checkpoint = torch.load(args.load_dir)
    else:
        checkpoint = torch.load(args.load_dir, map_location=lambda storage, loc: storage)

    # checkpoint['args']['device'] saves the device used as train time
    # if at test time, we are using a CPU, we must override device to None
    if not use_gpu:
        checkpoint['args'].device = None
    net = getattr(models,checkpoint['args'].model)(checkpoint['args'])
    net.load_state_dict(checkpoint['model'])
    if use_gpu:
        net.cuda()
    net.eval()
    
    doc_num = len(pred_dataset)
    time_cost = 0
    file_id = 1
    for batch in tqdm(pred_iter):
        features, doc_lens = vocab.make_predict_features(batch)
        t1 = time()
        if use_gpu:
            probs = net(Variable(features).cuda(), doc_lens)
        else:
            probs = net(Variable(features), doc_lens)
        t2 = time()
        time_cost += t2 - t1
        start = 0
        for doc_id,doc_len in enumerate(doc_lens):
            stop = start + doc_len
            prob = probs[start:stop]
            topk = min(args.topk,doc_len)
            topk_indices = prob.topk(topk)[1].cpu().data.numpy()
            topk_indices.sort()
            doc = batch[doc_id].split('. ')[:doc_len]
            hyp = [doc[index] for index in topk_indices]
            with open(os.path.join(args.hyp,str(file_id)+'.txt'), 'w') as f:
                f.write('. '.join(hyp))
            start = stop
            file_id = file_id + 1
    print('Speed: %.2f docs / s' % (doc_num / time_cost))

Source File: coco.py From latte with Apache License 2.0

4 votes

def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None):
    """Runs official COCO evaluation.
    dataset: A Dataset object with valiadtion data
    eval_type: "bbox" or "segm" for bounding box or segmentation evaluation
    limit: if not 0, it's the number of images to use for evaluation
    """
    # Pick COCO images from the dataset
    image_ids = image_ids or dataset.image_ids

    # Limit to a subset
    if limit:
        image_ids = image_ids[:limit]

    # Get corresponding COCO image IDs.
    coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids]

    t_prediction = 0
    t_start = time.time()

    results = []
    for i, image_id in enumerate(image_ids):
        # Load image
        image = dataset.load_image(image_id)

        # Run detection
        t = time.time()
        r = model.detect([image], verbose=0)[0]
        t_prediction += (time.time() - t)

        # Convert results to COCO format
        image_results = build_coco_results(dataset, coco_image_ids[i:i + 1],
                                           r["rois"], r["class_ids"],
                                           r["scores"], r["masks"])
        results.extend(image_results)

    # Load results. This modifies results with additional attributes.
    coco_results = coco.loadRes(results)

    # Evaluate
    cocoEval = COCOeval(coco, coco_results, eval_type)
    cocoEval.params.imgIds = coco_image_ids
    cocoEval.evaluate()
    cocoEval.accumulate()
    cocoEval.summarize()

    print("Prediction time: {}. Average {}/image".format(
        t_prediction, t_prediction / len(image_ids)))
    print("Total time: ", time.time() - t_start)


############################################################
#  Training
############################################################

Source File: adriving.py From cvpr-2018-autonomous-driving-autopilot-solution with MIT License

4 votes

def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None):
    """Runs official COCO evaluation.
    dataset: A Dataset object with valiadtion data
    eval_type: "bbox" or "segm" for bounding box or segmentation evaluation
    limit: if not 0, it's the number of images to use for evaluation
    """
    # Pick COCO images from the dataset
    image_ids = image_ids or dataset.image_ids

    # Limit to a subset
    if limit:
        image_ids = image_ids[:limit]

    # Get corresponding COCO image IDs.
    coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids]

    t_prediction = 0
    t_start = time.time()

    results = []
    for i, image_id in enumerate(image_ids):
        # Load image
        image = dataset.load_image(image_id)

        # Run detection
        t = time.time()
        r = model.detect([image])[0]
        t_prediction += (time.time() - t)

        # Convert results to COCO format
        image_results = build_coco_results(dataset, coco_image_ids[i:i + 1],
                                           r["rois"], r["class_ids"],
                                           r["scores"], r["masks"])
        results.extend(image_results)

    # Load results. This modifies results with additional attributes.
    coco_results = coco.loadRes(results)

    # Evaluate
    cocoEval = COCOeval(coco, coco_results, eval_type)
    cocoEval.params.imgIds = coco_image_ids
    cocoEval.evaluate()
    cocoEval.accumulate()
    cocoEval.summarize()

    print("Prediction time: {}. Average {}/image".format(
        t_prediction, t_prediction / len(image_ids)))
    print("Total time: ", time.time() - t_start)


############################################################
#  Training
############################################################

Python utils.Dataset() Examples