Python Examples of utils.load

Source File: eval_tvqa_plus.py From TVQAplus with MIT License

6 votes

def load_tvqa_plus_annotation(anno_path):
    raw_annotation = load_json(anno_path)
    gt_box_info = defaultdict(list)
    gt_ts_answer = defaultdict(dict)
    for e in raw_annotation:
        qid = e["qid"]
        vid_name = e["vid_name"]
        # {"qid": [(st (float), ed (float)), ans_idx(int)], ...}
        gt_ts_answer[qid] = [e["ts"], int(e["answer_idx"])]
        for frm_id, box_info_list in e["bbox"].items():
            img_name = "{}_{}_{:05d}".format(vid_name, int(qid), int(frm_id))
            for single_box in box_info_list:
                # [label, score=1 (fake score), box_coordinates (xyxy)]
                reformatted_single_box = [clean_label(single_box["label"]), 1,
                                          [single_box["left"], single_box["top"],
                                           single_box["left"]+single_box["width"],
                                           single_box["top"]+single_box["height"]]]
                gt_box_info[img_name].append(reformatted_single_box)
    annotation = dict(
        ts_answer=gt_ts_answer,
        bbox=gt_box_info
    )
    return annotation

Source File: train_model.py From Dense_BiLSTM with MIT License

6 votes

def main():
    data_folder = os.path.join('.', 'dataset', 'data')
    # set tasks
    source_dir = os.path.join(data_folder, task)
    # create config
    config = Config(task)
    # load datasets
    trainset = load_json(os.path.join(source_dir, 'train.json'))
    devset = load_json(os.path.join(source_dir, 'dev.json'))
    testset = load_json(os.path.join(source_dir, 'test.json'))
    # build model
    model = DenseConnectBiLSTM(config, resume_training=resume_training)
    # training
    batch_size = 200
    epochs = 30
    if has_devset:
        model.train(trainset, devset, testset, batch_size=batch_size, epochs=epochs, shuffle=True)
    else:
        trainset = trainset + devset
        model.train(trainset, None, testset, batch_size=batch_size, epochs=epochs, shuffle=True)

Source File: argument.py From dreampower with GNU General Public License v3.0

6 votes

def arg_json_args(parser):
    def check_json_args_file():
        def type_func(a):
            try:
                j = load_json(a)
            except JSONDecodeError:
                raise parser.error(
                    "Arguments json {} is not in valid JSON format.".format(a))
            return j
        return type_func

    parser.add_argument(
        "-j",
        "--json-args",
        type=check_json_args_file(),
        help="Load arguments from json files or json string. "
             "If a command line argument is also provide the json value will be ignore for this argument.",
    )

Source File: eval_tvqa_plus.py From TVQAplus with MIT License

5 votes

def load_predictions(pred_path, gt_path, w2i_path):
    """gt_path stores ground truth data, here used to reformat the predictions"""
    raw_preds = load_json(pred_path)
    gt_data = load_json(gt_path)
    word2idx = load_json(w2i_path)
    idx2word = {i: w for w, i in word2idx.items()}
    qid2ans = {int(e["qid"]): int(e["answer_idx"]) for e in gt_data}
    qid2bbox = {int(e["qid"]): e["bbox"] for e in gt_data}
    bbox_preds = dict()
    for e in raw_preds["raw_bbox"]:
        qid = None
        for i in range(5):  # n_answer == 5
            if len(e[str(i)]) > 0:
                qid = e[str(i)][0]["qid"]
        assert qid is not None
        ans_idx = qid2ans[int(qid)]
        cur_gt_bbox = qid2bbox[int(qid)]
        cur_correct_bbox_preds = e[str(ans_idx)]
        key_template = "{vid_name}_{qid}_{img_idx:05d}"
        for p in cur_correct_bbox_preds:
            annotated_word_ids = [word2idx[clean_label(b["label"])] if clean_label(b["label"]) in word2idx
                                  else word2idx["<unk>"] for b in cur_gt_bbox[str(p["img_idx"])]]
            collected_bbox = []
            for idx, b in enumerate(p["bbox"]):
                if p["word"] in annotated_word_ids:
                    collected_bbox.append([idx2word[p["word"]], float(p["pred"][idx]), b])
            key_str = key_template.format(vid_name=p["vid_name"], qid=qid, img_idx=p["img_idx"])
            if key_str not in bbox_preds:
                bbox_preds[key_str] = []
            bbox_preds[key_str].extend(collected_bbox)

    preds = dict(ts_answer=raw_preds["ts_answer"], bbox=bbox_preds)
    return preds

Source File: eval_tvqa_plus.py From TVQAplus with MIT License

5 votes

def main_eval():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--gt_path", type=str, default="data/tvqa_plus_val.json",
                        help="ground-truth json file path")
    parser.add_argument("--pred_path", type=str,
                        help="input prediction json file path, the same format as the results "
                             "returned by load_tvqa_plus_annotation func")
    parser.add_argument("--word2idx_path", type=str, default="data/word2idx.json",
                        help="word2idx json file path, provided with the evaluation code")
    parser.add_argument("--output_path", type=str,
                        help="path to store the calculated metrics")
    parser.add_argument("--no_preproc_pred", action="store_true",)
    args = parser.parse_args()

    # Display settings
    print('------------ Options -------------')
    for k, v in sorted(vars(args).items()):
        print('%s: %s' % (str(k), str(v)))
    print('-------------- End ----------------')

    groundtruth = load_tvqa_plus_annotation(args.gt_path)
    if args.no_preproc_pred:
        prediction = load_json(args.pred_path)
    else:
        prediction = load_predictions(args.pred_path, args.gt_path, args.word2idx_path)
    word2idx = load_json(args.word2idx_path)

    bbox_metrics = compute_att_metrics_using_maskrcnn_voc(prediction["bbox"], groundtruth["bbox"], word2idx)
    temporal_metrics = compute_temporal_metrics(prediction["ts_answer"], groundtruth["ts_answer"])
    all_metrics = merge_dicts([bbox_metrics, temporal_metrics])
    print("QA Acc. {}\nGrd. mAP {}\nTemp. mIoU{}\nASA {}"
          .format(all_metrics["qa_acc"], all_metrics["overall_map"],
                  all_metrics["miou"], all_metrics["ans_span_joint_acc@.5"]))
    if args.output_path:
        save_json_pretty(all_metrics, args.output_path)

Source File: nasbench_api_v2.py From eval-nas with MIT License

5 votes

def _make_rank_hash_perf_file(self, f_name, config=f'v7_e9_op3'):

        def make_perfs_file(perf_filename):
            """ move to nasbench_v2 """
            hash_perfs = {}
            for ind, hash in enumerate(self.hash_dict.keys()):
                perfs = self.query_hash(hash)
                hash_perfs[hash] = (perfs['validation_accuracy'], perfs['test_accuracy'])
            utils.save_json(hash_perfs, perf_filename)
            return hash_perfs

        filename = f'data/nasbench/nasbench_perfs_{config}.json'
        if not os.path.exists(filename):
            hash_perfs = make_perfs_file(filename)
        else:
            hash_perfs = utils.load_json(filename)
        self.hash_perfs_keys = ('validation_accuracy', 'test_accuracy')

        hashs = list(self.hash_dict.keys())
        f_config = config.replace('_', '-')
        hash_rank_filename = f'nasbench_hash_rank_bytest_{f_config}.json'
        perf_filename = f'nasbench_perfs_bytest_{f_config}.json'

        perfs = [hash_perfs[h] for h in hashs]
        t_ps = [p[1] for p in perfs]
        print("sorting the hashs by testing accuracy.")
        sorted_hash_indices = np.argsort(t_ps)
        s_hashs = [hashs[i] for i in sorted_hash_indices]
        s_perfs = [perfs[i] for i in sorted_hash_indices]
        utils.save_json(s_perfs, 'data/nasbench/' + perf_filename)
        utils.save_json(s_hashs, 'data/nasbench/' + hash_rank_filename)

Source File: config.py From Dense_BiLSTM with MIT License

5 votes

def __init__(self, task):
        self.ckpt_path = './ckpt/{}/'.format(task)
        if not os.path.exists(self.ckpt_path):
            os.makedirs(self.ckpt_path)
        source_dir = os.path.join('.', 'dataset', 'data', task)
        self.word_vocab, _ = load_vocab(os.path.join(source_dir, 'words.vocab'))
        self.char_vocab, _ = load_vocab(os.path.join(source_dir, 'chars.vocab'))
        self.vocab_size = len(self.word_vocab)
        self.char_vocab_size = len(self.char_vocab)
        self.label_size = load_json(os.path.join(source_dir, 'label.json'))["label_size"]
        self.word_emb = load_embeddings(os.path.join(source_dir, 'glove.filtered.npz'))

    # log and model file paths

Source File: preprocessing.py From TVQA with MIT License

5 votes

def get_vidname2cnt_all(frame_root_path, vidname2cnt_cache_path):
    if os.path.exists(vidname2cnt_cache_path):
        print("Found frame cnt cache, loading ...")
        return load_json(vidname2cnt_cache_path)
    show_names = ["bbt", "friends", "grey", "met", "castle", "house"]
    vidname2cnt_list = []
    for sn in show_names:
        print("Count frames in %s" % sn)
        cur_base_path = os.path.join(frame_root_path, "%s_frames" % sn)
        vidname2cnt_list.append(get_vidname2cnt_per_show(cur_base_path))
    vidname2cnt = merge_list_dicts(vidname2cnt_list)    
    save_json(vidname2cnt, vidname2cnt_cache_path)    
    return

Source File: preprocessing.py From TVQA with MIT License

5 votes

def load_srt(srt_dir, srt_cache_path):
    """
    return: A python dict, the keys are the video names, the entries are lists,
            each contains all the text from a .srt file
    sub_times are the start time of the sentences.
    """
    if os.path.exists(srt_cache_path):
        print("Found srt data cache, loading ...")
        return load_json(srt_cache_path)

    print("Loading srt files from %s ..." % srt_dir)
    srt_paths = glob.glob(os.path.join(srt_dir, "*.srt"))
    name2sub_text = {}
    name2sub_time = {}
    for i in tqdm(range(len(srt_paths))):
        subs = pysrt.open(srt_paths[i], encoding="iso-8859-1")
        if len(subs) == 0:
            subs = pysrt.open(srt_paths[i])

        text_list = []
        sub_time_list = []
        for j in range(len(subs)):
            cur_sub = subs[j]
            cur_str = cur_sub.text
            cur_str = "(<UNKNAME>:)" + cur_str if cur_str[0] != "(" else cur_str
            cur_str = cur_str.replace("\n", " ")
            text_list.append(cur_str)
            sub_time_list.append(
                60 * cur_sub.start.minutes + cur_sub.start.seconds + 0.001 * cur_sub.start.milliseconds)

        key_str = os.path.splitext(os.path.basename(srt_paths[i]))[0]
        name2sub_text[key_str] = text_list
        name2sub_time[key_str] = sub_time_list
    srt_data = {"sub_text": name2sub_text, "sub_time": name2sub_time}
    save_json(srt_data, srt_cache_path)
    return srt_data

Source File: config.py From TVQA with MIT License

5 votes

def parse(self):
        """parse cmd line arguments and do some preprocessing"""
        if not self.initialized:
            self.initialize()
        opt = self.parser.parse_args()
        results_dir = opt.results_dir_base + time.strftime("_%Y_%m_%d_%H_%M_%S")

        if isinstance(self, TestOptions):
            options = load_json(os.path.join("results", opt.model_dir, "opt.json"))
            for arg in options:
                setattr(opt, arg, options[arg])
        else:

            os.makedirs(results_dir)
            self.display_save(opt, results_dir)

        opt.normalize_v = not opt.no_normalize_v
        opt.device = torch.device("cuda:%d" % opt.device if opt.device >= 0 else "cpu")
        opt.with_ts = not opt.no_ts
        opt.input_streams = [] if opt.input_streams is None else opt.input_streams
        opt.vid_feat_flag = True if "imagenet" in opt.input_streams else False
        opt.h5driver = None if opt.no_core_driver else "core"
        opt.results_dir = results_dir

        self.opt = opt
        return opt

Source File: config.py From TVQAplus with MIT License

4 votes

def parse(self):
        if not self.initialized:
            self.initialize()
        opt = self.parser.parse_args()

        if opt.input_streams is None:
            if isinstance(self, TestOptions):
                opt.input_streams = []
            else:
                raise ValueError("input_streams must be set")

        if opt.debug:
            opt.results_dir_base = opt.results_dir_base.split("/")[0] + "/debug_results"
            opt.no_core_driver = True
            opt.num_workers = 0
        opt.results_dir = opt.results_dir_base + time.strftime("_%Y_%m_%d_%H_%M_%S")

        self.opt = opt

        if isinstance(self, TestOptions):
            options = load_json(os.path.join("results", opt.model_dir, "opt.json"))
            for arg in options:
                if arg not in ["debug"]:
                    setattr(opt, arg, options[arg])
            opt.no_core_driver = True
        else:
            mkdirp(opt.results_dir)
            # save a copy of current code
            code_dir = os.path.dirname(os.path.realpath(__file__))
            code_zip_filename = os.path.join(opt.results_dir, "code.zip")
            make_zipfile(code_dir, code_zip_filename,
                         enclosing_dir="code", exclude_paths=["results"], exclude_extensions=[".pyc", ".ipynb"])
        self.display_save()

        assert opt.num_hard <= opt.num_negatives
        opt.device = torch.device("cuda:%d" % opt.device_ids[0] if opt.device >= 0 else "cpu")
        if opt.device.type == "cuda":
            opt.bsz = opt.bsz * len(opt.device_ids)
            opt.test_bsz = opt.test_bsz * len(opt.device_ids)
        opt.h5driver = None if opt.no_core_driver else "core"
        opt.vfeat_flag = "vfeat" in opt.input_streams
        opt.vcpt_flag = "vcpt" in opt.input_streams
        opt.sub_flag = "sub" in opt.input_streams
        self.opt = opt
        return opt

Source File: scraper.py From gasvaktin with MIT License

4 votes

def get_individual_olis_prices():
    url = 'https://www.olis.is/solustadir/thjonustustodvar/eldsneytisverd/'
    res = requests.get(url, headers=utils.headers())
    html = lxml.etree.fromstring(res.content.decode('utf-8'), lxml.etree.HTMLParser())
    data = {
        'stations': {},
        'highest': {'bensin95': None, 'diesel': None}
    }
    price_table = html.find('.//table')  # theres just one table element, let's use that ofc
    for row in price_table.findall('.//tr'):
        if len(row.findall('.//td')) < 3:
            continue
        if row.findall('.//td')[0].text.strip() == '':
            continue
        name = row.findall('.//td')[0].text.strip()
        station_key = globs.OLIS_LOCATION_RELATION[name]
        bensin = None
        if row.findall('.//td')[1].text.strip() != '':
            bensin = float(row.findall('.//td')[1].text.strip().replace(',', '.'))
        diesel = None
        if row.findall('.//td')[2].text.strip() != '':
            diesel = float(row.findall('.//td')[2].text.strip().replace(',', '.'))
        data['stations'][station_key] = {'bensin95': bensin, 'diesel': diesel}
        if data['highest']['bensin95'] is None or data['highest']['bensin95'] < bensin:
            data['highest']['bensin95'] = bensin
        if data['highest']['diesel'] is None or data['highest']['diesel'] < diesel:
            data['highest']['diesel'] = diesel
    assert(data['highest']['bensin95'] is not None)
    assert(data['highest']['diesel'] is not None)
    for name in data['stations']:
        # fallback to highest provided price if for some reason it's not provided ._.
        if data['stations'][name]['bensin95'] is None:
            data['stations'][name]['bensin95'] = data['highest']['bensin95']
        if data['stations'][name]['diesel'] is None:
            data['stations'][name]['diesel'] = data['highest']['diesel']
    prices = {}
    olis_stations = utils.load_json(
        os.path.join(
            os.path.dirname(os.path.realpath(__file__)),
            '../stations/olis.json'
        )
    )
    for key in olis_stations:
        if key in data['stations']:
            bensin95 = data['stations'][key]['bensin95']
            diesel = data['stations'][key]['diesel']
        else:
            bensin95 = data['highest']['bensin95']
            diesel = data['highest']['diesel']
        bensin95_discount = int((bensin95 - globs.OLIS_MINIMUM_DISCOUNT) * 10) / 10.0
        diesel_discount = int((diesel - globs.OLIS_MINIMUM_DISCOUNT) * 10) / 10.0
        prices[key] = {
            'bensin95': bensin95,
            'diesel': diesel,
            'bensin95_discount': bensin95_discount,
            'diesel_discount': diesel_discount
        }
    return prices

Source File: scraper.py From gasvaktin with MIT License

4 votes

def get_individual_ob_prices():
    url = 'https://www.ob.is/eldsneytisverd/'
    res = requests.get(url, headers=utils.headers())
    html = lxml.etree.fromstring(res.content.decode('utf-8'), lxml.etree.HTMLParser())
    data = {
        'stations': {},
        'highest': {'bensin95': None, 'diesel': None}
    }
    price_table = html.find('.//table[@id="gas-prices"]')
    for row in price_table.findall('.//tr'):
        if len(row.findall('.//td')) == 0:
            continue
        if row.findall('.//td')[0].get('style') == 'border:0px;':
            continue
        name = row.findall('.//td')[0].text.strip()
        if name == 'Ketilás í Fljótum':
            continue  # throw this one for now, only diesel, needs investigation
        station_key = globs.OB_LOCATION_RELATION[name]
        bensin = float(row.findall('.//td')[1].text.strip().replace(',', '.'))
        diesel = float(row.findall('.//td')[2].text.strip().replace(',', '.'))
        data['stations'][station_key] = {'bensin95': bensin, 'diesel': diesel}
        if data['highest']['bensin95'] is None or data['highest']['bensin95'] < bensin:
            data['highest']['bensin95'] = bensin
        if data['highest']['diesel'] is None or data['highest']['diesel'] < diesel:
            data['highest']['diesel'] = diesel
    prices = {}
    ob_stations = utils.load_json(
        os.path.join(
            os.path.dirname(os.path.realpath(__file__)),
            '../stations/ob.json'
        )
    )
    now = datetime.datetime.now()
    end = datetime.datetime.strptime(globs.OB_EXTRA_DISCOUNT_UNTIL, '%Y-%m-%dT%H:%M')
    for key in ob_stations:
        if key in data['stations']:
            bensin95 = data['stations'][key]['bensin95']
            diesel = data['stations'][key]['diesel']
        else:
            bensin95 = data['highest']['bensin95']
            diesel = data['highest']['diesel']
        bensin95_discount = int((bensin95 - globs.OB_MINIMUM_DISCOUNT) * 10) / 10.0
        diesel_discount = int((diesel - globs.OB_MINIMUM_DISCOUNT) * 10) / 10.0
        if key in globs.OB_DISCOUNTLESS_STATIONS:
            bensin95_discount = None
            diesel_discount = None
        if key in globs.OB_EXTRA_DISCOUNT_STATIONS and now < end:
            if bensin95_discount is not None:
                bensin95_discount = int((bensin95 - globs.OB_EXTRA_DISCOUNT_AMOUNT) * 10) / 10.0
            if diesel_discount is not None:
                diesel_discount = int((diesel - globs.OB_EXTRA_DISCOUNT_AMOUNT) * 10) / 10.0
        prices[key] = {
            'bensin95': bensin95,
            'diesel': diesel,
            'bensin95_discount': bensin95_discount,
            'diesel_discount': diesel_discount
        }
    return prices

Source File: tvqa_dataset.py From TVQA with MIT License

4 votes

def __init__(self, opt, mode="train"):
        self.raw_train = load_json(opt.train_path)
        self.raw_test = load_json(opt.test_path)
        self.raw_valid = load_json(opt.valid_path)
        self.vcpt_dict = load_pickle(opt.vcpt_path)
        self.vfeat_load = opt.vid_feat_flag
        if self.vfeat_load:
            self.vid_h5 = h5py.File(opt.vid_feat_path, "r", driver=opt.h5driver)
        self.glove_embedding_path = opt.glove_path
        self.normalize_v = opt.normalize_v
        self.with_ts = opt.with_ts
        self.mode = mode
        self.cur_data_dict = self.get_cur_dict()

        # set word embedding / vocabulary
        self.word2idx_path = opt.word2idx_path
        self.idx2word_path = opt.idx2word_path
        self.vocab_embedding_path = opt.vocab_embedding_path
        self.embedding_dim = opt.embedding_size
        self.word2idx = {"<pad>": 0, "<unk>": 1, "<eos>": 2}
        self.idx2word = {0: "<pad>", 1: "<unk>", 2: "<eos>"}
        self.offset = len(self.word2idx)

        # set entry keys
        if self.with_ts:
            self.text_keys = ["q", "a0", "a1", "a2", "a3", "a4", "located_sub_text"]
        else:
            self.text_keys = ["q", "a0", "a1", "a2", "a3", "a4", "sub_text"]
        self.vcpt_key = "vcpt"
        self.label_key = "answer_idx"
        self.qid_key = "qid"
        self.vid_name_key = "vid_name"
        self.located_frm_key = "located_frame"
        for k in self.text_keys + [self.vcpt_key, self.qid_key, self.vid_name_key]:
            if k == "vcpt":
                continue
            assert k in self.raw_valid[0].keys()

        # build/load vocabulary
        if not files_exist([self.word2idx_path, self.idx2word_path, self.vocab_embedding_path]):
            print("\nNo cache founded.")
            self.build_word_vocabulary(word_count_threshold=opt.word_count_threshold)
        else:
            print("\nLoading cache ...")
            self.word2idx = load_pickle(self.word2idx_path)
            self.idx2word = load_pickle(self.idx2word_path)
            self.vocab_embedding = load_pickle(self.vocab_embedding_path)

Python utils.load_json() Examples