Python msgpack.load() Examples

The following are 23 code examples of msgpack.load(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module msgpack , or try the search function .
Example #1
Source File: interface.py    From simple-effective-text-matching with Apache License 2.0 6 votes vote down vote up
def __init__(self, args, log=None):
        self.args = args
        # build/load vocab and target map
        vocab_file = os.path.join(args.output_dir, 'vocab.txt')
        target_map_file = os.path.join(args.output_dir, 'target_map.txt')
        if not os.path.exists(vocab_file):
            data = load_data(self.args.data_dir)
            self.target_map = Indexer.build((sample['target'] for sample in data), log=log)
            self.target_map.save(target_map_file)
            self.vocab = Vocab.build((word for sample in data
                                      for text in (sample['text1'], sample['text2'])
                                      for word in text.split()[:self.args.max_len]),
                                     lower=args.lower_case, min_df=self.args.min_df, log=log,
                                     pretrained_embeddings=args.pretrained_embeddings,
                                     dump_filtered=os.path.join(args.output_dir, 'filtered_words.txt'))
            self.vocab.save(vocab_file)
        else:
            self.target_map = Indexer.load(target_map_file)
            self.vocab = Vocab.load(vocab_file)
        args.num_classes = len(self.target_map)
        args.num_vocab = len(self.vocab)
        args.padding = Vocab.pad() 
Example #2
Source File: coverage.py    From bncov with MIT License 6 votes vote down vote up
def save_to_file(self, filename):
        """Save only the bare minimum needed to reconstruct this CoverageDB.

        This serializes the data to a single file and cab reduce the disk footprint of
        block coverage significantly (depending on overlap and number of files)."""
        if file_backing_disabled:
            raise Exception("[!] Can't save/load coverage db files without msgpack. Try `pip install msgpack`")
        save_dict = dict()
        save_dict["version"] = 1  # serialized covdb version
        save_dict["module_name"] = self.module_name
        save_dict["module_base"] = self.module_base
        save_dict["coverage_files"] = self.coverage_files
        # save tighter version of block dict {int: int} vice {int: str}
        block_dict_to_save = {}
        file_index_map = {filepath: self.coverage_files.index(filepath) for filepath in self.coverage_files}
        for block, trace_list in self.block_dict.items():
            trace_id_list = [file_index_map[name] for name in trace_list]
            block_dict_to_save[block] = trace_id_list
        save_dict["block_dict"] = block_dict_to_save
        # write packed version to file
        with open(filename, "wb") as f:
            msgpack.dump(save_dict, f)
            self.filename = filename 
Example #3
Source File: interface.py    From simple-effective-text-matching-pytorch with Apache License 2.0 6 votes vote down vote up
def __init__(self, args, log=None):
        self.args = args
        # build/load vocab and target map
        vocab_file = os.path.join(args.output_dir, 'vocab.txt')
        target_map_file = os.path.join(args.output_dir, 'target_map.txt')
        if not os.path.exists(vocab_file):
            data = load_data(self.args.data_dir)
            self.target_map = Indexer.build((sample['target'] for sample in data), log=log)
            self.target_map.save(target_map_file)
            self.vocab = Vocab.build((word for sample in data
                                      for text in (sample['text1'], sample['text2'])
                                      for word in text.split()[:self.args.max_len]),
                                     lower=args.lower_case, min_df=self.args.min_df, log=log,
                                     pretrained_embeddings=args.pretrained_embeddings,
                                     dump_filtered=os.path.join(args.output_dir, 'filtered_words.txt'))
            self.vocab.save(vocab_file)

        else:
            self.target_map = Indexer.load(target_map_file)
            self.vocab = Vocab.load(vocab_file)
        args.num_classes = len(self.target_map)
        args.num_vocab = len(self.vocab)
        args.padding = Vocab.pad() 
Example #4
Source File: serialization.py    From dcase_util with MIT License 5 votes vote down vote up
def load_json(cls, filename):
        """Load JSON file

        Parameters
        ----------
        filename : str
            Filename path

        Returns
        -------
        data

        """

        cls.file_exists(filename=filename)

        try:
            import ujson as json

        except ImportError:
            try:
                import json

            except ImportError:
                message = '{name}: Unable to import json module. You can install it with `pip install ujson`.'.format(
                    name=cls.__class__.__name__
                )

                cls.logger().exception(message)
                raise ImportError(message)

        return json.load(open(filename, "r")) 
Example #5
Source File: train.py    From sru with MIT License 5 votes vote down vote up
def load_data(opt):
    with open('SQuAD/meta.msgpack', 'rb') as f:
        meta = msgpack.load(f, encoding='utf8')
    embedding = torch.Tensor(meta['embedding'])
    opt['pretrained_words'] = True
    opt['vocab_size'] = embedding.size(0)
    opt['embedding_dim'] = embedding.size(1)
    if not opt['fix_embeddings']:
        embedding[1] = torch.normal(means=torch.zeros(opt['embedding_dim']), std=1.)
    with open(args.data_file, 'rb') as f:
        data = msgpack.load(f, encoding='utf8')
    train_orig = pd.read_csv('SQuAD/train.csv')
    dev_orig = pd.read_csv('SQuAD/dev.csv')
    train = list(zip(
        data['trn_context_ids'],
        data['trn_context_features'],
        data['trn_context_tags'],
        data['trn_context_ents'],
        data['trn_question_ids'],
        train_orig['answer_start_token'].tolist(),
        train_orig['answer_end_token'].tolist(),
        data['trn_context_text'],
        data['trn_context_spans']
    ))
    dev = list(zip(
        data['dev_context_ids'],
        data['dev_context_features'],
        data['dev_context_tags'],
        data['dev_context_ents'],
        data['dev_question_ids'],
        data['dev_context_text'],
        data['dev_context_spans']
    ))
    dev_y = dev_orig['answers'].tolist()[:len(dev)]
    dev_y = [eval(y) for y in dev_y]
    return train, dev, dev_y, embedding, opt 
Example #6
Source File: interface.py    From simple-effective-text-matching with Apache License 2.0 5 votes vote down vote up
def load_embeddings(self):
        """generate embeddings suited for the current vocab or load previously cached ones."""
        embedding_file = os.path.join(self.args.output_dir, 'embedding.msgpack')
        if not os.path.exists(embedding_file):
            embeddings = load_embeddings(self.args.pretrained_embeddings, self.vocab,
                                         self.args.embedding_dim, mode=self.args.embedding_mode,
                                         lower=self.args.lower_case)
            with open(embedding_file, 'wb') as f:
                msgpack.dump(embeddings, f)
        else:
            with open(embedding_file, 'rb') as f:
                embeddings = msgpack.load(f)
        return embeddings 
Example #7
Source File: predict_CoQA.py    From FlowDelta with MIT License 5 votes vote down vote up
def load_dev_data(opt): # can be extended to true test set
    with open(os.path.join(args.dev_dir, 'dev_meta.msgpack'), 'rb') as f:
        meta = msgpack.load(f, encoding='utf8')
    embedding = torch.Tensor(meta['embedding'])
    assert opt['embedding_dim'] == embedding.size(1)

    with open(os.path.join(args.dev_dir, 'dev_data.msgpack'), 'rb') as f:
        data = msgpack.load(f, encoding='utf8')

    assert opt['num_features'] == len(data['context_features'][0][0]) + opt['explicit_dialog_ctx'] * 3

    dev = {'context': list(zip(
                        data['context_ids'],
                        data['context_tags'],
                        data['context_ents'],
                        data['context'],
                        data['context_span'],
                        data['1st_question'],
                        data['context_tokenized'])),
           'qa': list(zip(
                        data['question_CID'],
                        data['question_ids'],
                        data['context_features'],
                        data['answer_start'],
                        data['answer_end'],
                        data['rationale_start'],
                        data['rationale_end'],
                        data['answer_choice'],
                        data['question'],
                        data['answer'],
                        data['question_tokenized']))
          }

    return dev, embedding 
Example #8
Source File: train_QuAC.py    From FlowDelta with MIT License 5 votes vote down vote up
def load_train_data(opt):
    with open(os.path.join(args.train_dir, 'train_meta.msgpack'), 'rb') as f:
        meta = msgpack.load(f, encoding='utf8')
    embedding = torch.Tensor(meta['embedding'])
    opt['vocab_size'] = embedding.size(0)
    opt['embedding_dim'] = embedding.size(1)

    with open(os.path.join(args.train_dir, 'train_data.msgpack'), 'rb') as f:
        data = msgpack.load(f, encoding='utf8')
    #data_orig = pd.read_csv(os.path.join(args.train_dir, 'train.csv'))

    opt['num_features'] = len(data['context_features'][0][0])

    train = {'context': list(zip(
                        data['context_ids'],
                        data['context_tags'],
                        data['context_ents'],
                        data['context'],
                        data['context_span'],
                        data['1st_question'],
                        data['context_tokenized'])),
             'qa': list(zip(
                        data['question_CID'],
                        data['question_ids'],
                        data['context_features'],
                        data['answer_start'],
                        data['answer_end'],
                        data['answer_choice'],
                        data['question'],
                        data['answer'],
                        data['question_tokenized']))
            }
    return train, embedding, opt 
Example #9
Source File: predict_QuAC.py    From FlowDelta with MIT License 5 votes vote down vote up
def load_dev_data(opt): # can be extended to true test set
    with open(os.path.join(args.dev_dir, 'dev_meta.msgpack'), 'rb') as f:
        meta = msgpack.load(f, encoding='utf8')
    embedding = torch.Tensor(meta['embedding'])
    assert opt['embedding_dim'] == embedding.size(1)

    with open(os.path.join(args.dev_dir, 'dev_data.msgpack'), 'rb') as f:
        data = msgpack.load(f, encoding='utf8')

    assert opt['num_features'] == len(data['context_features'][0][0]) + opt['explicit_dialog_ctx'] * (opt['use_dialog_act']*3 + 2)
    
    dev = {'context': list(zip(
                        data['context_ids'],
                        data['context_tags'],
                        data['context_ents'],
                        data['context'],
                        data['context_span'],
                        data['1st_question'],
                        data['context_tokenized'])),
           'qa': list(zip(
                        data['question_CID'],
                        data['question_ids'],
                        data['context_features'],
                        data['answer_start'],
                        data['answer_end'],
                        data['answer_choice'],
                        data['question'],
                        data['answer'],
                        data['question_tokenized']))
          }
    
    dev_answer = []
    for i, CID in enumerate(data['question_CID']):
        if len(dev_answer) <= CID:
            dev_answer.append([])
        dev_answer[CID].append(data['all_answer'][i])
    
    return dev, embedding, dev_answer 
Example #10
Source File: train_CoQA.py    From FlowDelta with MIT License 5 votes vote down vote up
def load_dev_data(opt): # can be extended to true test set
    with open(os.path.join(args.dev_dir, 'dev_meta.msgpack'), 'rb') as f:
        meta = msgpack.load(f, encoding='utf8')
    embedding = torch.Tensor(meta['embedding'])
    assert opt['embedding_dim'] == embedding.size(1)

    with open(os.path.join(args.dev_dir, 'dev_data.msgpack'), 'rb') as f:
        data = msgpack.load(f, encoding='utf8')
    #data_orig = pd.read_csv(os.path.join(args.dev_dir, 'dev.csv'))

    assert opt['num_features'] == len(data['context_features'][0][0])

    dev = {'context': list(zip(
                        data['context_ids'],
                        data['context_tags'],
                        data['context_ents'],
                        data['context'],
                        data['context_span'],
                        data['1st_question'],
                        data['context_tokenized'])),
           'qa': list(zip(
                        data['question_CID'],
                        data['question_ids'],
                        data['context_features'],
                        data['answer_start'],
                        data['answer_end'],
                        data['rationale_start'],
                        data['rationale_end'],
                        data['answer_choice'],
                        data['question'],
                        data['answer'],
                        data['question_tokenized']))
          }

    return dev, embedding 
Example #11
Source File: train_CoQA.py    From FlowDelta with MIT License 5 votes vote down vote up
def load_train_data(opt):
    with open(os.path.join(args.train_dir, 'train_meta.msgpack'), 'rb') as f:
        meta = msgpack.load(f, encoding='utf8')
    embedding = torch.Tensor(meta['embedding'])
    opt['vocab_size'] = embedding.size(0)
    opt['embedding_dim'] = embedding.size(1)

    with open(os.path.join(args.train_dir, 'train_data.msgpack'), 'rb') as f:
        data = msgpack.load(f, encoding='utf8')
    #data_orig = pd.read_csv(os.path.join(args.train_dir, 'train.csv'))

    opt['num_features'] = len(data['context_features'][0][0])

    train = {'context': list(zip(
                        data['context_ids'],
                        data['context_tags'],
                        data['context_ents'],
                        data['context'],
                        data['context_span'],
                        data['1st_question'],
                        data['context_tokenized'])),
             'qa': list(zip(
                        data['question_CID'],
                        data['question_ids'],
                        data['context_features'],
                        data['answer_start'],
                        data['answer_end'],
                        data['rationale_start'],
                        data['rationale_end'],
                        data['answer_choice'],
                        data['question'],
                        data['answer'],
                        data['question_tokenized']))
            }
    return train, embedding, opt 
Example #12
Source File: serialization.py    From dcase_util with MIT License 5 votes vote down vote up
def load_marshal(cls, filename):
        """Load MARSHAL file

        Parameters
        ----------
        filename : str
            Filename path

        Returns
        -------
        data

        """

        cls.file_exists(filename=filename)

        try:
            import marshal

        except ImportError:
            message = '{name}: Unable to import marshal module. You can install it with `pip install pymarshal`.'.format(
                name=cls.__class__.__name__
            )

            cls.logger().exception(message)
            raise ImportError(message)

        return marshal.load(open(filename, "rb")) 
Example #13
Source File: serialization.py    From dcase_util with MIT License 5 votes vote down vote up
def load_msgpack(cls, filename):
        """Load MSGPACK file

        Parameters
        ----------
        filename : str
            Filename path

        Returns
        -------
        data

        """

        cls.file_exists(filename=filename)

        try:
            import msgpack

        except ImportError:
            message = '{name}: Unable to import msgpack module. You can install it with `pip install msgpack-python`.'.format(
                name=cls.__class__.__name__
            )

            cls.logger().exception(message)
            raise ImportError(message)

        return msgpack.load(open(filename, "rb"), encoding='utf-8') 
Example #14
Source File: serialization.py    From dcase_util with MIT License 5 votes vote down vote up
def load_cpickle(cls, filename):
        """Load CPICKLE file

        Parameters
        ----------
        filename : str
            Filename path

        Returns
        -------
        data

        """

        cls.file_exists(filename=filename)

        try:
            import cPickle as pickle

        except ImportError:
            try:
                import pickle

            except ImportError:
                message = '{name}: Unable to import pickle module.'.format(
                    name=cls.__class__.__name__
                )

                cls.logger().exception(message)
                raise ImportError(message)

        return pickle.load(open(filename, "rb")) 
Example #15
Source File: CoQAPreprocess.py    From SDNet with MIT License 5 votes vote down vote up
def load_data(self):
        print('Load train_meta.msgpack...')
        meta_file_name = os.path.join(self.spacyDir, 'train_meta.msgpack')
        with open(meta_file_name, 'rb') as f:
            meta = msgpack.load(f, encoding='utf8')
        embedding = torch.Tensor(meta['embedding'])
        self.opt['vocab_size'] = embedding.size(0)
        self.opt['vocab_dim'] = embedding.size(1)
        self.opt['char_vocab_size'] = len(meta['char_vocab'])
        return meta['vocab'], meta['char_vocab'], embedding 
Example #16
Source File: reader.py    From transit-python with Apache License 2.0 5 votes vote down vote up
def load(self, stream):
        return self.decoder.decode(msgpack.load(stream,
                                                object_pairs_hook=OrderedDict)) 
Example #17
Source File: reader.py    From transit-python with Apache License 2.0 5 votes vote down vote up
def load(self, stream):
        return self.decoder.decode(json.load(stream,
                                             object_pairs_hook=OrderedDict)) 
Example #18
Source File: reader.py    From transit-python with Apache License 2.0 5 votes vote down vote up
def read(self, stream):
        """Given a readable file descriptor object (something `load`able by
        msgpack or json), read the data, and return the Python representation
        of the contents. One-shot reader.
        """
        return self.reader.load(stream) 
Example #19
Source File: interface.py    From simple-effective-text-matching-pytorch with Apache License 2.0 5 votes vote down vote up
def load_embeddings(self):
        """generate embeddings suited for the current vocab or load previously cached ones."""
        assert self.args.pretrained_embeddings
        embedding_file = os.path.join(self.args.output_dir, 'embedding.msgpack')
        if not os.path.exists(embedding_file):
            embeddings = load_embeddings(self.args.pretrained_embeddings, self.vocab,
                                         self.args.embedding_dim, mode=self.args.embedding_mode,
                                         lower=self.args.lower_case)
            with open(embedding_file, 'wb') as f:
                msgpack.dump(embeddings, f)
        else:
            with open(embedding_file, 'rb') as f:
                embeddings = msgpack.load(f)
        return embeddings 
Example #20
Source File: serialization.py    From dcase_util with MIT License 4 votes vote down vote up
def load_yaml(cls, filename):
        """Load YAML file

        Parameters
        ----------
        filename : str
            Filename path

        Returns
        -------
        data

        """

        cls.file_exists(filename=filename)

        try:
            import yaml

        except ImportError:
            message = '{name}: Unable to import YAML module. You can install it with `pip install pyyaml`.'.format(name=cls.__class__.__name__)
            cls.logger().exception(message)
            raise ImportError(message)

        try:
            with open(filename, 'r') as infile:
                return yaml.load(infile, Loader=yaml.FullLoader)

        except yaml.YAMLError as exc:
            cls.logger().error("Error while parsing YAML file [{file}]".format(file=filename))
            if hasattr(exc, 'problem_mark'):
                if exc.context is not None:
                    cls.logger().error(str(exc.problem_mark) + '\n  ' + str(exc.problem) + ' ' + str(exc.context))
                    cls.logger().error('  Please correct data and retry.')

                else:
                    cls.logger().error(str(exc.problem_mark) + '\n  ' + str(exc.problem))
                    cls.logger().error('  Please correct data and retry.')

            else:
                cls.logger().error("Something went wrong while parsing yaml file [{file}]".format(file=filename))

            return 
Example #21
Source File: utils.py    From libnacl with Apache License 2.0 4 votes vote down vote up
def load_key(path_or_file, serial='json'):
    '''
    Read in a key from a file and return the applicable key object based on
    the contents of the file
    '''
    if hasattr(path_or_file, 'read'):
        stream = path_or_file
    else:
        if serial == 'json':
            stream = open(path_or_file, 'r')
        else:
            stream = open(path_or_file, 'rb')

    try:
        if serial == 'msgpack':
            import msgpack
            key_data = msgpack.load(stream)
        elif serial == 'json':
            import json
            if sys.version_info[0] >= 3:
                key_data = json.loads(stream.read())
            else:
                key_data = json.loads(stream.read(), encoding='UTF-8')
    finally:
        if stream != path_or_file:
            stream.close()

    if 'priv' in key_data and 'sign' in key_data and 'pub' in key_data:
        return libnacl.dual.DualSecret(
                libnacl.encode.hex_decode(key_data['priv']),
                libnacl.encode.hex_decode(key_data['sign']))
    elif 'priv' in key_data and 'pub' in key_data:
        return libnacl.public.SecretKey(
                libnacl.encode.hex_decode(key_data['priv']))
    elif 'sign' in key_data:
        return libnacl.sign.Signer(
                libnacl.encode.hex_decode(key_data['sign']))
    elif 'pub' in key_data:
        return libnacl.public.PublicKey(
                libnacl.encode.hex_decode(key_data['pub']))
    elif 'verify' in key_data:
        return libnacl.sign.Verifier(key_data['verify'])
    elif 'priv' in key_data:
        return libnacl.secret.SecretBox(
                libnacl.encode.hex_decode(key_data['priv']))
    raise ValueError('Found no key data') 
Example #22
Source File: predict_QuAC.py    From FlowDelta with MIT License 4 votes vote down vote up
def main():
    log.info('[program starts.]')
    checkpoint = torch.load(args.model)
    opt = checkpoint['config']
    opt['task_name'] = 'QuAC'
    opt['cuda'] = args.cuda
    opt['seed'] = args.seed
    if opt.get('disperse_flow') is None:
        opt['disperse_flow'] = False
    if opt.get('rationale_lambda') is None:
        opt['rationale_lambda'] = 0.0
    if opt.get('no_dialog_flow') is None:
        opt['no_dialog_flow'] = False
    if opt.get('do_hierarchical_query') is None:
        opt['do_hierarchical_query'] = False
    state_dict = checkpoint['state_dict']
    log.info('[model loaded.]')

    test, test_embedding, test_answer = load_dev_data(opt)
    model = QAModel(opt, state_dict = state_dict)
    log.info('[Data loaded.]')

    model.setup_eval_embed(test_embedding)

    if args.cuda:
        model.cuda()

    batches = BatchGen_QuAC(test, batch_size=args.batch_size, evaluation=True, gpu=args.cuda, dialog_ctx=opt['explicit_dialog_ctx'], use_dialog_act=opt['use_dialog_act'], precompute_elmo=opt['elmo_batch_size'] // args.batch_size)
    sample_idx = random.sample(range(len(batches)), args.show)

    predictions = []
    no_ans_scores = []
    for i, batch in enumerate(batches):
        prediction, noans = model.predict(batch, No_Ans_Threshold=args.no_ans)
        predictions.extend(prediction)
        no_ans_scores.extend(noans)

        if not (i in sample_idx):
            continue
        
        print("Context: ", batch[-4][0])
        for j in range(len(batch[-2][0])):
            print("Q: ", batch[-2][0][j])
            print("A: ", prediction[0][j])
            print("     True A: ", batch[-1][0][j], "| Follow up" if batch[-6][0][j].item() // 10 else "| Don't follow up")
            print("     Val. A: ", test_answer[args.batch_size * i][j])
        print("")


    pred_out = {'predictions': predictions, 'no_ans_scores': no_ans_scores}
    with open(args.output, 'wb') as f:
        pickle.dump(pred_out, f)

    f1, h_f1, HEQ_Q, HEQ_D = score(predictions, test_answer, min_F1=args.min_f1)
    log.warning("Test F1: {:.2f}, HEQ_Q: {:.2f}, HEQ_D: {:.2f}".format(f1, HEQ_Q, HEQ_D)) 
Example #23
Source File: coverage.py    From bncov with MIT License 4 votes vote down vote up
def load_from_file(self, filename):
        """Reconstruct a CoverageDB using the current BinaryView and a CoverageDB saved to disk using .save_to_file()"""
        if file_backing_disabled:
            raise Exception("[!] Can't save/load coverage db files without msgpack. Try `pip install msgpack`")
        self.filename = filename
        with open(filename, "rb") as f:
            loaded_dict = msgpack.load(f, raw=False)
        if "version" not in loaded_dict:
            self._old_load_from_file(loaded_dict)
        # Do sanity checks
        loaded_version = int(loaded_dict["version"])
        if loaded_version != 1:
            raise Exception("[!] Unsupported version number: %d" % loaded_version)

        loaded_module_name = loaded_dict["module_name"]
        if loaded_module_name != self.module_name:
            raise Exception("[!] ERROR: Module name from covdb (%s) doesn't match BinaryView (%s)" %
                            (loaded_module_name, self.module_name))

        loaded_module_base = loaded_dict["module_base"]
        if loaded_module_base != self.module_base:
            raise Exception("[!] ERROR: Module base from covdb (0x%x) doesn't match BinaryView (0x%x)" %
                            (loaded_module_base, self.module_base))

        # Parse the saved members
        coverage_files = loaded_dict["coverage_files"]
        self.coverage_files = coverage_files

        block_dict = dict()
        loaded_block_dict = loaded_dict["block_dict"]
        file_index_map = {self.coverage_files.index(filepath): filepath for filepath in self.coverage_files}
        for block, trace_id_list in loaded_block_dict.items():
            trace_list = [file_index_map[i] for i in trace_id_list]
            block_dict[block] = trace_list
        self.block_dict = block_dict

        # Regen other members from saved members
        bv = self.bv
        self.module_blocks = {bb.start: bb.length for bb in bv.basic_blocks}
        trace_dict = {}
        for block, trace_list in block_dict.items():
            for name in trace_list:
                trace_dict.setdefault(name, set()).add(block)
        self.trace_dict = trace_dict
        self.total_coverage = set(block_dict.keys())

        # Other members are blank/empty
        self.function_stats = {}
        self.frontier = set()