Python utils.load_json() Examples

The following are 14 code examples of utils.load_json(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module utils , or try the search function .
Example #1
Source File: eval_tvqa_plus.py    From TVQAplus with MIT License 6 votes vote down vote up
def load_tvqa_plus_annotation(anno_path):
    raw_annotation = load_json(anno_path)
    gt_box_info = defaultdict(list)
    gt_ts_answer = defaultdict(dict)
    for e in raw_annotation:
        qid = e["qid"]
        vid_name = e["vid_name"]
        # {"qid": [(st (float), ed (float)), ans_idx(int)], ...}
        gt_ts_answer[qid] = [e["ts"], int(e["answer_idx"])]
        for frm_id, box_info_list in e["bbox"].items():
            img_name = "{}_{}_{:05d}".format(vid_name, int(qid), int(frm_id))
            for single_box in box_info_list:
                # [label, score=1 (fake score), box_coordinates (xyxy)]
                reformatted_single_box = [clean_label(single_box["label"]), 1,
                                          [single_box["left"], single_box["top"],
                                           single_box["left"]+single_box["width"],
                                           single_box["top"]+single_box["height"]]]
                gt_box_info[img_name].append(reformatted_single_box)
    annotation = dict(
        ts_answer=gt_ts_answer,
        bbox=gt_box_info
    )
    return annotation 
Example #2
Source File: train_model.py    From Dense_BiLSTM with MIT License 6 votes vote down vote up
def main():
    data_folder = os.path.join('.', 'dataset', 'data')
    # set tasks
    source_dir = os.path.join(data_folder, task)
    # create config
    config = Config(task)
    # load datasets
    trainset = load_json(os.path.join(source_dir, 'train.json'))
    devset = load_json(os.path.join(source_dir, 'dev.json'))
    testset = load_json(os.path.join(source_dir, 'test.json'))
    # build model
    model = DenseConnectBiLSTM(config, resume_training=resume_training)
    # training
    batch_size = 200
    epochs = 30
    if has_devset:
        model.train(trainset, devset, testset, batch_size=batch_size, epochs=epochs, shuffle=True)
    else:
        trainset = trainset + devset
        model.train(trainset, None, testset, batch_size=batch_size, epochs=epochs, shuffle=True) 
Example #3
Source File: argument.py    From dreampower with GNU General Public License v3.0 6 votes vote down vote up
def arg_json_args(parser):
    def check_json_args_file():
        def type_func(a):
            try:
                j = load_json(a)
            except JSONDecodeError:
                raise parser.error(
                    "Arguments json {} is not in valid JSON format.".format(a))
            return j
        return type_func

    parser.add_argument(
        "-j",
        "--json-args",
        type=check_json_args_file(),
        help="Load arguments from json files or json string. "
             "If a command line argument is also provide the json value will be ignore for this argument.",
    ) 
Example #4
Source File: eval_tvqa_plus.py    From TVQAplus with MIT License 5 votes vote down vote up
def load_predictions(pred_path, gt_path, w2i_path):
    """gt_path stores ground truth data, here used to reformat the predictions"""
    raw_preds = load_json(pred_path)
    gt_data = load_json(gt_path)
    word2idx = load_json(w2i_path)
    idx2word = {i: w for w, i in word2idx.items()}
    qid2ans = {int(e["qid"]): int(e["answer_idx"]) for e in gt_data}
    qid2bbox = {int(e["qid"]): e["bbox"] for e in gt_data}
    bbox_preds = dict()
    for e in raw_preds["raw_bbox"]:
        qid = None
        for i in range(5):  # n_answer == 5
            if len(e[str(i)]) > 0:
                qid = e[str(i)][0]["qid"]
        assert qid is not None
        ans_idx = qid2ans[int(qid)]
        cur_gt_bbox = qid2bbox[int(qid)]
        cur_correct_bbox_preds = e[str(ans_idx)]
        key_template = "{vid_name}_{qid}_{img_idx:05d}"
        for p in cur_correct_bbox_preds:
            annotated_word_ids = [word2idx[clean_label(b["label"])] if clean_label(b["label"]) in word2idx
                                  else word2idx["<unk>"] for b in cur_gt_bbox[str(p["img_idx"])]]
            collected_bbox = []
            for idx, b in enumerate(p["bbox"]):
                if p["word"] in annotated_word_ids:
                    collected_bbox.append([idx2word[p["word"]], float(p["pred"][idx]), b])
            key_str = key_template.format(vid_name=p["vid_name"], qid=qid, img_idx=p["img_idx"])
            if key_str not in bbox_preds:
                bbox_preds[key_str] = []
            bbox_preds[key_str].extend(collected_bbox)

    preds = dict(ts_answer=raw_preds["ts_answer"], bbox=bbox_preds)
    return preds 
Example #5
Source File: eval_tvqa_plus.py    From TVQAplus with MIT License 5 votes vote down vote up
def main_eval():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--gt_path", type=str, default="data/tvqa_plus_val.json",
                        help="ground-truth json file path")
    parser.add_argument("--pred_path", type=str,
                        help="input prediction json file path, the same format as the results "
                             "returned by load_tvqa_plus_annotation func")
    parser.add_argument("--word2idx_path", type=str, default="data/word2idx.json",
                        help="word2idx json file path, provided with the evaluation code")
    parser.add_argument("--output_path", type=str,
                        help="path to store the calculated metrics")
    parser.add_argument("--no_preproc_pred", action="store_true",)
    args = parser.parse_args()

    # Display settings
    print('------------ Options -------------')
    for k, v in sorted(vars(args).items()):
        print('%s: %s' % (str(k), str(v)))
    print('-------------- End ----------------')

    groundtruth = load_tvqa_plus_annotation(args.gt_path)
    if args.no_preproc_pred:
        prediction = load_json(args.pred_path)
    else:
        prediction = load_predictions(args.pred_path, args.gt_path, args.word2idx_path)
    word2idx = load_json(args.word2idx_path)

    bbox_metrics = compute_att_metrics_using_maskrcnn_voc(prediction["bbox"], groundtruth["bbox"], word2idx)
    temporal_metrics = compute_temporal_metrics(prediction["ts_answer"], groundtruth["ts_answer"])
    all_metrics = merge_dicts([bbox_metrics, temporal_metrics])
    print("QA Acc. {}\nGrd. mAP {}\nTemp. mIoU{}\nASA {}"
          .format(all_metrics["qa_acc"], all_metrics["overall_map"],
                  all_metrics["miou"], all_metrics["ans_span_joint_acc@.5"]))
    if args.output_path:
        save_json_pretty(all_metrics, args.output_path) 
Example #6
Source File: nasbench_api_v2.py    From eval-nas with MIT License 5 votes vote down vote up
def _make_rank_hash_perf_file(self, f_name, config=f'v7_e9_op3'):

        def make_perfs_file(perf_filename):
            """ move to nasbench_v2 """
            hash_perfs = {}
            for ind, hash in enumerate(self.hash_dict.keys()):
                perfs = self.query_hash(hash)
                hash_perfs[hash] = (perfs['validation_accuracy'], perfs['test_accuracy'])
            utils.save_json(hash_perfs, perf_filename)
            return hash_perfs

        filename = f'data/nasbench/nasbench_perfs_{config}.json'
        if not os.path.exists(filename):
            hash_perfs = make_perfs_file(filename)
        else:
            hash_perfs = utils.load_json(filename)
        self.hash_perfs_keys = ('validation_accuracy', 'test_accuracy')

        hashs = list(self.hash_dict.keys())
        f_config = config.replace('_', '-')
        hash_rank_filename = f'nasbench_hash_rank_bytest_{f_config}.json'
        perf_filename = f'nasbench_perfs_bytest_{f_config}.json'

        perfs = [hash_perfs[h] for h in hashs]
        t_ps = [p[1] for p in perfs]
        print("sorting the hashs by testing accuracy.")
        sorted_hash_indices = np.argsort(t_ps)
        s_hashs = [hashs[i] for i in sorted_hash_indices]
        s_perfs = [perfs[i] for i in sorted_hash_indices]
        utils.save_json(s_perfs, 'data/nasbench/' + perf_filename)
        utils.save_json(s_hashs, 'data/nasbench/' + hash_rank_filename) 
Example #7
Source File: config.py    From Dense_BiLSTM with MIT License 5 votes vote down vote up
def __init__(self, task):
        self.ckpt_path = './ckpt/{}/'.format(task)
        if not os.path.exists(self.ckpt_path):
            os.makedirs(self.ckpt_path)
        source_dir = os.path.join('.', 'dataset', 'data', task)
        self.word_vocab, _ = load_vocab(os.path.join(source_dir, 'words.vocab'))
        self.char_vocab, _ = load_vocab(os.path.join(source_dir, 'chars.vocab'))
        self.vocab_size = len(self.word_vocab)
        self.char_vocab_size = len(self.char_vocab)
        self.label_size = load_json(os.path.join(source_dir, 'label.json'))["label_size"]
        self.word_emb = load_embeddings(os.path.join(source_dir, 'glove.filtered.npz'))

    # log and model file paths 
Example #8
Source File: preprocessing.py    From TVQA with MIT License 5 votes vote down vote up
def get_vidname2cnt_all(frame_root_path, vidname2cnt_cache_path):
    if os.path.exists(vidname2cnt_cache_path):
        print("Found frame cnt cache, loading ...")
        return load_json(vidname2cnt_cache_path)
    show_names = ["bbt", "friends", "grey", "met", "castle", "house"]
    vidname2cnt_list = []
    for sn in show_names:
        print("Count frames in %s" % sn)
        cur_base_path = os.path.join(frame_root_path, "%s_frames" % sn)
        vidname2cnt_list.append(get_vidname2cnt_per_show(cur_base_path))
    vidname2cnt = merge_list_dicts(vidname2cnt_list)    
    save_json(vidname2cnt, vidname2cnt_cache_path)    
    return 
Example #9
Source File: preprocessing.py    From TVQA with MIT License 5 votes vote down vote up
def load_srt(srt_dir, srt_cache_path):
    """
    return: A python dict, the keys are the video names, the entries are lists,
            each contains all the text from a .srt file
    sub_times are the start time of the sentences.
    """
    if os.path.exists(srt_cache_path):
        print("Found srt data cache, loading ...")
        return load_json(srt_cache_path)

    print("Loading srt files from %s ..." % srt_dir)
    srt_paths = glob.glob(os.path.join(srt_dir, "*.srt"))
    name2sub_text = {}
    name2sub_time = {}
    for i in tqdm(range(len(srt_paths))):
        subs = pysrt.open(srt_paths[i], encoding="iso-8859-1")
        if len(subs) == 0:
            subs = pysrt.open(srt_paths[i])

        text_list = []
        sub_time_list = []
        for j in range(len(subs)):
            cur_sub = subs[j]
            cur_str = cur_sub.text
            cur_str = "(<UNKNAME>:)" + cur_str if cur_str[0] != "(" else cur_str
            cur_str = cur_str.replace("\n", " ")
            text_list.append(cur_str)
            sub_time_list.append(
                60 * cur_sub.start.minutes + cur_sub.start.seconds + 0.001 * cur_sub.start.milliseconds)

        key_str = os.path.splitext(os.path.basename(srt_paths[i]))[0]
        name2sub_text[key_str] = text_list
        name2sub_time[key_str] = sub_time_list
    srt_data = {"sub_text": name2sub_text, "sub_time": name2sub_time}
    save_json(srt_data, srt_cache_path)
    return srt_data 
Example #10
Source File: config.py    From TVQA with MIT License 5 votes vote down vote up
def parse(self):
        """parse cmd line arguments and do some preprocessing"""
        if not self.initialized:
            self.initialize()
        opt = self.parser.parse_args()
        results_dir = opt.results_dir_base + time.strftime("_%Y_%m_%d_%H_%M_%S")

        if isinstance(self, TestOptions):
            options = load_json(os.path.join("results", opt.model_dir, "opt.json"))
            for arg in options:
                setattr(opt, arg, options[arg])
        else:

            os.makedirs(results_dir)
            self.display_save(opt, results_dir)

        opt.normalize_v = not opt.no_normalize_v
        opt.device = torch.device("cuda:%d" % opt.device if opt.device >= 0 else "cpu")
        opt.with_ts = not opt.no_ts
        opt.input_streams = [] if opt.input_streams is None else opt.input_streams
        opt.vid_feat_flag = True if "imagenet" in opt.input_streams else False
        opt.h5driver = None if opt.no_core_driver else "core"
        opt.results_dir = results_dir

        self.opt = opt
        return opt 
Example #11
Source File: config.py    From TVQAplus with MIT License 4 votes vote down vote up
def parse(self):
        if not self.initialized:
            self.initialize()
        opt = self.parser.parse_args()

        if opt.input_streams is None:
            if isinstance(self, TestOptions):
                opt.input_streams = []
            else:
                raise ValueError("input_streams must be set")

        if opt.debug:
            opt.results_dir_base = opt.results_dir_base.split("/")[0] + "/debug_results"
            opt.no_core_driver = True
            opt.num_workers = 0
        opt.results_dir = opt.results_dir_base + time.strftime("_%Y_%m_%d_%H_%M_%S")

        self.opt = opt

        if isinstance(self, TestOptions):
            options = load_json(os.path.join("results", opt.model_dir, "opt.json"))
            for arg in options:
                if arg not in ["debug"]:
                    setattr(opt, arg, options[arg])
            opt.no_core_driver = True
        else:
            mkdirp(opt.results_dir)
            # save a copy of current code
            code_dir = os.path.dirname(os.path.realpath(__file__))
            code_zip_filename = os.path.join(opt.results_dir, "code.zip")
            make_zipfile(code_dir, code_zip_filename,
                         enclosing_dir="code", exclude_paths=["results"], exclude_extensions=[".pyc", ".ipynb"])
        self.display_save()

        assert opt.num_hard <= opt.num_negatives
        opt.device = torch.device("cuda:%d" % opt.device_ids[0] if opt.device >= 0 else "cpu")
        if opt.device.type == "cuda":
            opt.bsz = opt.bsz * len(opt.device_ids)
            opt.test_bsz = opt.test_bsz * len(opt.device_ids)
        opt.h5driver = None if opt.no_core_driver else "core"
        opt.vfeat_flag = "vfeat" in opt.input_streams
        opt.vcpt_flag = "vcpt" in opt.input_streams
        opt.sub_flag = "sub" in opt.input_streams
        self.opt = opt
        return opt 
Example #12
Source File: scraper.py    From gasvaktin with MIT License 4 votes vote down vote up
def get_individual_olis_prices():
    url = 'https://www.olis.is/solustadir/thjonustustodvar/eldsneytisverd/'
    res = requests.get(url, headers=utils.headers())
    html = lxml.etree.fromstring(res.content.decode('utf-8'), lxml.etree.HTMLParser())
    data = {
        'stations': {},
        'highest': {'bensin95': None, 'diesel': None}
    }
    price_table = html.find('.//table')  # theres just one table element, let's use that ofc
    for row in price_table.findall('.//tr'):
        if len(row.findall('.//td')) < 3:
            continue
        if row.findall('.//td')[0].text.strip() == '':
            continue
        name = row.findall('.//td')[0].text.strip()
        station_key = globs.OLIS_LOCATION_RELATION[name]
        bensin = None
        if row.findall('.//td')[1].text.strip() != '':
            bensin = float(row.findall('.//td')[1].text.strip().replace(',', '.'))
        diesel = None
        if row.findall('.//td')[2].text.strip() != '':
            diesel = float(row.findall('.//td')[2].text.strip().replace(',', '.'))
        data['stations'][station_key] = {'bensin95': bensin, 'diesel': diesel}
        if data['highest']['bensin95'] is None or data['highest']['bensin95'] < bensin:
            data['highest']['bensin95'] = bensin
        if data['highest']['diesel'] is None or data['highest']['diesel'] < diesel:
            data['highest']['diesel'] = diesel
    assert(data['highest']['bensin95'] is not None)
    assert(data['highest']['diesel'] is not None)
    for name in data['stations']:
        # fallback to highest provided price if for some reason it's not provided ._.
        if data['stations'][name]['bensin95'] is None:
            data['stations'][name]['bensin95'] = data['highest']['bensin95']
        if data['stations'][name]['diesel'] is None:
            data['stations'][name]['diesel'] = data['highest']['diesel']
    prices = {}
    olis_stations = utils.load_json(
        os.path.join(
            os.path.dirname(os.path.realpath(__file__)),
            '../stations/olis.json'
        )
    )
    for key in olis_stations:
        if key in data['stations']:
            bensin95 = data['stations'][key]['bensin95']
            diesel = data['stations'][key]['diesel']
        else:
            bensin95 = data['highest']['bensin95']
            diesel = data['highest']['diesel']
        bensin95_discount = int((bensin95 - globs.OLIS_MINIMUM_DISCOUNT) * 10) / 10.0
        diesel_discount = int((diesel - globs.OLIS_MINIMUM_DISCOUNT) * 10) / 10.0
        prices[key] = {
            'bensin95': bensin95,
            'diesel': diesel,
            'bensin95_discount': bensin95_discount,
            'diesel_discount': diesel_discount
        }
    return prices 
Example #13
Source File: scraper.py    From gasvaktin with MIT License 4 votes vote down vote up
def get_individual_ob_prices():
    url = 'https://www.ob.is/eldsneytisverd/'
    res = requests.get(url, headers=utils.headers())
    html = lxml.etree.fromstring(res.content.decode('utf-8'), lxml.etree.HTMLParser())
    data = {
        'stations': {},
        'highest': {'bensin95': None, 'diesel': None}
    }
    price_table = html.find('.//table[@id="gas-prices"]')
    for row in price_table.findall('.//tr'):
        if len(row.findall('.//td')) == 0:
            continue
        if row.findall('.//td')[0].get('style') == 'border:0px;':
            continue
        name = row.findall('.//td')[0].text.strip()
        if name == 'Ketilás í Fljótum':
            continue  # throw this one for now, only diesel, needs investigation
        station_key = globs.OB_LOCATION_RELATION[name]
        bensin = float(row.findall('.//td')[1].text.strip().replace(',', '.'))
        diesel = float(row.findall('.//td')[2].text.strip().replace(',', '.'))
        data['stations'][station_key] = {'bensin95': bensin, 'diesel': diesel}
        if data['highest']['bensin95'] is None or data['highest']['bensin95'] < bensin:
            data['highest']['bensin95'] = bensin
        if data['highest']['diesel'] is None or data['highest']['diesel'] < diesel:
            data['highest']['diesel'] = diesel
    prices = {}
    ob_stations = utils.load_json(
        os.path.join(
            os.path.dirname(os.path.realpath(__file__)),
            '../stations/ob.json'
        )
    )
    now = datetime.datetime.now()
    end = datetime.datetime.strptime(globs.OB_EXTRA_DISCOUNT_UNTIL, '%Y-%m-%dT%H:%M')
    for key in ob_stations:
        if key in data['stations']:
            bensin95 = data['stations'][key]['bensin95']
            diesel = data['stations'][key]['diesel']
        else:
            bensin95 = data['highest']['bensin95']
            diesel = data['highest']['diesel']
        bensin95_discount = int((bensin95 - globs.OB_MINIMUM_DISCOUNT) * 10) / 10.0
        diesel_discount = int((diesel - globs.OB_MINIMUM_DISCOUNT) * 10) / 10.0
        if key in globs.OB_DISCOUNTLESS_STATIONS:
            bensin95_discount = None
            diesel_discount = None
        if key in globs.OB_EXTRA_DISCOUNT_STATIONS and now < end:
            if bensin95_discount is not None:
                bensin95_discount = int((bensin95 - globs.OB_EXTRA_DISCOUNT_AMOUNT) * 10) / 10.0
            if diesel_discount is not None:
                diesel_discount = int((diesel - globs.OB_EXTRA_DISCOUNT_AMOUNT) * 10) / 10.0
        prices[key] = {
            'bensin95': bensin95,
            'diesel': diesel,
            'bensin95_discount': bensin95_discount,
            'diesel_discount': diesel_discount
        }
    return prices 
Example #14
Source File: tvqa_dataset.py    From TVQA with MIT License 4 votes vote down vote up
def __init__(self, opt, mode="train"):
        self.raw_train = load_json(opt.train_path)
        self.raw_test = load_json(opt.test_path)
        self.raw_valid = load_json(opt.valid_path)
        self.vcpt_dict = load_pickle(opt.vcpt_path)
        self.vfeat_load = opt.vid_feat_flag
        if self.vfeat_load:
            self.vid_h5 = h5py.File(opt.vid_feat_path, "r", driver=opt.h5driver)
        self.glove_embedding_path = opt.glove_path
        self.normalize_v = opt.normalize_v
        self.with_ts = opt.with_ts
        self.mode = mode
        self.cur_data_dict = self.get_cur_dict()

        # set word embedding / vocabulary
        self.word2idx_path = opt.word2idx_path
        self.idx2word_path = opt.idx2word_path
        self.vocab_embedding_path = opt.vocab_embedding_path
        self.embedding_dim = opt.embedding_size
        self.word2idx = {"<pad>": 0, "<unk>": 1, "<eos>": 2}
        self.idx2word = {0: "<pad>", 1: "<unk>", 2: "<eos>"}
        self.offset = len(self.word2idx)

        # set entry keys
        if self.with_ts:
            self.text_keys = ["q", "a0", "a1", "a2", "a3", "a4", "located_sub_text"]
        else:
            self.text_keys = ["q", "a0", "a1", "a2", "a3", "a4", "sub_text"]
        self.vcpt_key = "vcpt"
        self.label_key = "answer_idx"
        self.qid_key = "qid"
        self.vid_name_key = "vid_name"
        self.located_frm_key = "located_frame"
        for k in self.text_keys + [self.vcpt_key, self.qid_key, self.vid_name_key]:
            if k == "vcpt":
                continue
            assert k in self.raw_valid[0].keys()

        # build/load vocabulary
        if not files_exist([self.word2idx_path, self.idx2word_path, self.vocab_embedding_path]):
            print("\nNo cache founded.")
            self.build_word_vocabulary(word_count_threshold=opt.word_count_threshold)
        else:
            print("\nLoading cache ...")
            self.word2idx = load_pickle(self.word2idx_path)
            self.idx2word = load_pickle(self.idx2word_path)
            self.vocab_embedding = load_pickle(self.vocab_embedding_path)