Python msgpack.load() Examples
The following are 23
code examples of msgpack.load().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
msgpack
, or try the search function
.
Example #1
Source File: interface.py From simple-effective-text-matching with Apache License 2.0 | 6 votes |
def __init__(self, args, log=None): self.args = args # build/load vocab and target map vocab_file = os.path.join(args.output_dir, 'vocab.txt') target_map_file = os.path.join(args.output_dir, 'target_map.txt') if not os.path.exists(vocab_file): data = load_data(self.args.data_dir) self.target_map = Indexer.build((sample['target'] for sample in data), log=log) self.target_map.save(target_map_file) self.vocab = Vocab.build((word for sample in data for text in (sample['text1'], sample['text2']) for word in text.split()[:self.args.max_len]), lower=args.lower_case, min_df=self.args.min_df, log=log, pretrained_embeddings=args.pretrained_embeddings, dump_filtered=os.path.join(args.output_dir, 'filtered_words.txt')) self.vocab.save(vocab_file) else: self.target_map = Indexer.load(target_map_file) self.vocab = Vocab.load(vocab_file) args.num_classes = len(self.target_map) args.num_vocab = len(self.vocab) args.padding = Vocab.pad()
Example #2
Source File: coverage.py From bncov with MIT License | 6 votes |
def save_to_file(self, filename): """Save only the bare minimum needed to reconstruct this CoverageDB. This serializes the data to a single file and cab reduce the disk footprint of block coverage significantly (depending on overlap and number of files).""" if file_backing_disabled: raise Exception("[!] Can't save/load coverage db files without msgpack. Try `pip install msgpack`") save_dict = dict() save_dict["version"] = 1 # serialized covdb version save_dict["module_name"] = self.module_name save_dict["module_base"] = self.module_base save_dict["coverage_files"] = self.coverage_files # save tighter version of block dict {int: int} vice {int: str} block_dict_to_save = {} file_index_map = {filepath: self.coverage_files.index(filepath) for filepath in self.coverage_files} for block, trace_list in self.block_dict.items(): trace_id_list = [file_index_map[name] for name in trace_list] block_dict_to_save[block] = trace_id_list save_dict["block_dict"] = block_dict_to_save # write packed version to file with open(filename, "wb") as f: msgpack.dump(save_dict, f) self.filename = filename
Example #3
Source File: interface.py From simple-effective-text-matching-pytorch with Apache License 2.0 | 6 votes |
def __init__(self, args, log=None): self.args = args # build/load vocab and target map vocab_file = os.path.join(args.output_dir, 'vocab.txt') target_map_file = os.path.join(args.output_dir, 'target_map.txt') if not os.path.exists(vocab_file): data = load_data(self.args.data_dir) self.target_map = Indexer.build((sample['target'] for sample in data), log=log) self.target_map.save(target_map_file) self.vocab = Vocab.build((word for sample in data for text in (sample['text1'], sample['text2']) for word in text.split()[:self.args.max_len]), lower=args.lower_case, min_df=self.args.min_df, log=log, pretrained_embeddings=args.pretrained_embeddings, dump_filtered=os.path.join(args.output_dir, 'filtered_words.txt')) self.vocab.save(vocab_file) else: self.target_map = Indexer.load(target_map_file) self.vocab = Vocab.load(vocab_file) args.num_classes = len(self.target_map) args.num_vocab = len(self.vocab) args.padding = Vocab.pad()
Example #4
Source File: serialization.py From dcase_util with MIT License | 5 votes |
def load_json(cls, filename): """Load JSON file Parameters ---------- filename : str Filename path Returns ------- data """ cls.file_exists(filename=filename) try: import ujson as json except ImportError: try: import json except ImportError: message = '{name}: Unable to import json module. You can install it with `pip install ujson`.'.format( name=cls.__class__.__name__ ) cls.logger().exception(message) raise ImportError(message) return json.load(open(filename, "r"))
Example #5
Source File: train.py From sru with MIT License | 5 votes |
def load_data(opt): with open('SQuAD/meta.msgpack', 'rb') as f: meta = msgpack.load(f, encoding='utf8') embedding = torch.Tensor(meta['embedding']) opt['pretrained_words'] = True opt['vocab_size'] = embedding.size(0) opt['embedding_dim'] = embedding.size(1) if not opt['fix_embeddings']: embedding[1] = torch.normal(means=torch.zeros(opt['embedding_dim']), std=1.) with open(args.data_file, 'rb') as f: data = msgpack.load(f, encoding='utf8') train_orig = pd.read_csv('SQuAD/train.csv') dev_orig = pd.read_csv('SQuAD/dev.csv') train = list(zip( data['trn_context_ids'], data['trn_context_features'], data['trn_context_tags'], data['trn_context_ents'], data['trn_question_ids'], train_orig['answer_start_token'].tolist(), train_orig['answer_end_token'].tolist(), data['trn_context_text'], data['trn_context_spans'] )) dev = list(zip( data['dev_context_ids'], data['dev_context_features'], data['dev_context_tags'], data['dev_context_ents'], data['dev_question_ids'], data['dev_context_text'], data['dev_context_spans'] )) dev_y = dev_orig['answers'].tolist()[:len(dev)] dev_y = [eval(y) for y in dev_y] return train, dev, dev_y, embedding, opt
Example #6
Source File: interface.py From simple-effective-text-matching with Apache License 2.0 | 5 votes |
def load_embeddings(self): """generate embeddings suited for the current vocab or load previously cached ones.""" embedding_file = os.path.join(self.args.output_dir, 'embedding.msgpack') if not os.path.exists(embedding_file): embeddings = load_embeddings(self.args.pretrained_embeddings, self.vocab, self.args.embedding_dim, mode=self.args.embedding_mode, lower=self.args.lower_case) with open(embedding_file, 'wb') as f: msgpack.dump(embeddings, f) else: with open(embedding_file, 'rb') as f: embeddings = msgpack.load(f) return embeddings
Example #7
Source File: predict_CoQA.py From FlowDelta with MIT License | 5 votes |
def load_dev_data(opt): # can be extended to true test set with open(os.path.join(args.dev_dir, 'dev_meta.msgpack'), 'rb') as f: meta = msgpack.load(f, encoding='utf8') embedding = torch.Tensor(meta['embedding']) assert opt['embedding_dim'] == embedding.size(1) with open(os.path.join(args.dev_dir, 'dev_data.msgpack'), 'rb') as f: data = msgpack.load(f, encoding='utf8') assert opt['num_features'] == len(data['context_features'][0][0]) + opt['explicit_dialog_ctx'] * 3 dev = {'context': list(zip( data['context_ids'], data['context_tags'], data['context_ents'], data['context'], data['context_span'], data['1st_question'], data['context_tokenized'])), 'qa': list(zip( data['question_CID'], data['question_ids'], data['context_features'], data['answer_start'], data['answer_end'], data['rationale_start'], data['rationale_end'], data['answer_choice'], data['question'], data['answer'], data['question_tokenized'])) } return dev, embedding
Example #8
Source File: train_QuAC.py From FlowDelta with MIT License | 5 votes |
def load_train_data(opt): with open(os.path.join(args.train_dir, 'train_meta.msgpack'), 'rb') as f: meta = msgpack.load(f, encoding='utf8') embedding = torch.Tensor(meta['embedding']) opt['vocab_size'] = embedding.size(0) opt['embedding_dim'] = embedding.size(1) with open(os.path.join(args.train_dir, 'train_data.msgpack'), 'rb') as f: data = msgpack.load(f, encoding='utf8') #data_orig = pd.read_csv(os.path.join(args.train_dir, 'train.csv')) opt['num_features'] = len(data['context_features'][0][0]) train = {'context': list(zip( data['context_ids'], data['context_tags'], data['context_ents'], data['context'], data['context_span'], data['1st_question'], data['context_tokenized'])), 'qa': list(zip( data['question_CID'], data['question_ids'], data['context_features'], data['answer_start'], data['answer_end'], data['answer_choice'], data['question'], data['answer'], data['question_tokenized'])) } return train, embedding, opt
Example #9
Source File: predict_QuAC.py From FlowDelta with MIT License | 5 votes |
def load_dev_data(opt): # can be extended to true test set with open(os.path.join(args.dev_dir, 'dev_meta.msgpack'), 'rb') as f: meta = msgpack.load(f, encoding='utf8') embedding = torch.Tensor(meta['embedding']) assert opt['embedding_dim'] == embedding.size(1) with open(os.path.join(args.dev_dir, 'dev_data.msgpack'), 'rb') as f: data = msgpack.load(f, encoding='utf8') assert opt['num_features'] == len(data['context_features'][0][0]) + opt['explicit_dialog_ctx'] * (opt['use_dialog_act']*3 + 2) dev = {'context': list(zip( data['context_ids'], data['context_tags'], data['context_ents'], data['context'], data['context_span'], data['1st_question'], data['context_tokenized'])), 'qa': list(zip( data['question_CID'], data['question_ids'], data['context_features'], data['answer_start'], data['answer_end'], data['answer_choice'], data['question'], data['answer'], data['question_tokenized'])) } dev_answer = [] for i, CID in enumerate(data['question_CID']): if len(dev_answer) <= CID: dev_answer.append([]) dev_answer[CID].append(data['all_answer'][i]) return dev, embedding, dev_answer
Example #10
Source File: train_CoQA.py From FlowDelta with MIT License | 5 votes |
def load_dev_data(opt): # can be extended to true test set with open(os.path.join(args.dev_dir, 'dev_meta.msgpack'), 'rb') as f: meta = msgpack.load(f, encoding='utf8') embedding = torch.Tensor(meta['embedding']) assert opt['embedding_dim'] == embedding.size(1) with open(os.path.join(args.dev_dir, 'dev_data.msgpack'), 'rb') as f: data = msgpack.load(f, encoding='utf8') #data_orig = pd.read_csv(os.path.join(args.dev_dir, 'dev.csv')) assert opt['num_features'] == len(data['context_features'][0][0]) dev = {'context': list(zip( data['context_ids'], data['context_tags'], data['context_ents'], data['context'], data['context_span'], data['1st_question'], data['context_tokenized'])), 'qa': list(zip( data['question_CID'], data['question_ids'], data['context_features'], data['answer_start'], data['answer_end'], data['rationale_start'], data['rationale_end'], data['answer_choice'], data['question'], data['answer'], data['question_tokenized'])) } return dev, embedding
Example #11
Source File: train_CoQA.py From FlowDelta with MIT License | 5 votes |
def load_train_data(opt): with open(os.path.join(args.train_dir, 'train_meta.msgpack'), 'rb') as f: meta = msgpack.load(f, encoding='utf8') embedding = torch.Tensor(meta['embedding']) opt['vocab_size'] = embedding.size(0) opt['embedding_dim'] = embedding.size(1) with open(os.path.join(args.train_dir, 'train_data.msgpack'), 'rb') as f: data = msgpack.load(f, encoding='utf8') #data_orig = pd.read_csv(os.path.join(args.train_dir, 'train.csv')) opt['num_features'] = len(data['context_features'][0][0]) train = {'context': list(zip( data['context_ids'], data['context_tags'], data['context_ents'], data['context'], data['context_span'], data['1st_question'], data['context_tokenized'])), 'qa': list(zip( data['question_CID'], data['question_ids'], data['context_features'], data['answer_start'], data['answer_end'], data['rationale_start'], data['rationale_end'], data['answer_choice'], data['question'], data['answer'], data['question_tokenized'])) } return train, embedding, opt
Example #12
Source File: serialization.py From dcase_util with MIT License | 5 votes |
def load_marshal(cls, filename): """Load MARSHAL file Parameters ---------- filename : str Filename path Returns ------- data """ cls.file_exists(filename=filename) try: import marshal except ImportError: message = '{name}: Unable to import marshal module. You can install it with `pip install pymarshal`.'.format( name=cls.__class__.__name__ ) cls.logger().exception(message) raise ImportError(message) return marshal.load(open(filename, "rb"))
Example #13
Source File: serialization.py From dcase_util with MIT License | 5 votes |
def load_msgpack(cls, filename): """Load MSGPACK file Parameters ---------- filename : str Filename path Returns ------- data """ cls.file_exists(filename=filename) try: import msgpack except ImportError: message = '{name}: Unable to import msgpack module. You can install it with `pip install msgpack-python`.'.format( name=cls.__class__.__name__ ) cls.logger().exception(message) raise ImportError(message) return msgpack.load(open(filename, "rb"), encoding='utf-8')
Example #14
Source File: serialization.py From dcase_util with MIT License | 5 votes |
def load_cpickle(cls, filename): """Load CPICKLE file Parameters ---------- filename : str Filename path Returns ------- data """ cls.file_exists(filename=filename) try: import cPickle as pickle except ImportError: try: import pickle except ImportError: message = '{name}: Unable to import pickle module.'.format( name=cls.__class__.__name__ ) cls.logger().exception(message) raise ImportError(message) return pickle.load(open(filename, "rb"))
Example #15
Source File: CoQAPreprocess.py From SDNet with MIT License | 5 votes |
def load_data(self): print('Load train_meta.msgpack...') meta_file_name = os.path.join(self.spacyDir, 'train_meta.msgpack') with open(meta_file_name, 'rb') as f: meta = msgpack.load(f, encoding='utf8') embedding = torch.Tensor(meta['embedding']) self.opt['vocab_size'] = embedding.size(0) self.opt['vocab_dim'] = embedding.size(1) self.opt['char_vocab_size'] = len(meta['char_vocab']) return meta['vocab'], meta['char_vocab'], embedding
Example #16
Source File: reader.py From transit-python with Apache License 2.0 | 5 votes |
def load(self, stream): return self.decoder.decode(msgpack.load(stream, object_pairs_hook=OrderedDict))
Example #17
Source File: reader.py From transit-python with Apache License 2.0 | 5 votes |
def load(self, stream): return self.decoder.decode(json.load(stream, object_pairs_hook=OrderedDict))
Example #18
Source File: reader.py From transit-python with Apache License 2.0 | 5 votes |
def read(self, stream): """Given a readable file descriptor object (something `load`able by msgpack or json), read the data, and return the Python representation of the contents. One-shot reader. """ return self.reader.load(stream)
Example #19
Source File: interface.py From simple-effective-text-matching-pytorch with Apache License 2.0 | 5 votes |
def load_embeddings(self): """generate embeddings suited for the current vocab or load previously cached ones.""" assert self.args.pretrained_embeddings embedding_file = os.path.join(self.args.output_dir, 'embedding.msgpack') if not os.path.exists(embedding_file): embeddings = load_embeddings(self.args.pretrained_embeddings, self.vocab, self.args.embedding_dim, mode=self.args.embedding_mode, lower=self.args.lower_case) with open(embedding_file, 'wb') as f: msgpack.dump(embeddings, f) else: with open(embedding_file, 'rb') as f: embeddings = msgpack.load(f) return embeddings
Example #20
Source File: serialization.py From dcase_util with MIT License | 4 votes |
def load_yaml(cls, filename): """Load YAML file Parameters ---------- filename : str Filename path Returns ------- data """ cls.file_exists(filename=filename) try: import yaml except ImportError: message = '{name}: Unable to import YAML module. You can install it with `pip install pyyaml`.'.format(name=cls.__class__.__name__) cls.logger().exception(message) raise ImportError(message) try: with open(filename, 'r') as infile: return yaml.load(infile, Loader=yaml.FullLoader) except yaml.YAMLError as exc: cls.logger().error("Error while parsing YAML file [{file}]".format(file=filename)) if hasattr(exc, 'problem_mark'): if exc.context is not None: cls.logger().error(str(exc.problem_mark) + '\n ' + str(exc.problem) + ' ' + str(exc.context)) cls.logger().error(' Please correct data and retry.') else: cls.logger().error(str(exc.problem_mark) + '\n ' + str(exc.problem)) cls.logger().error(' Please correct data and retry.') else: cls.logger().error("Something went wrong while parsing yaml file [{file}]".format(file=filename)) return
Example #21
Source File: utils.py From libnacl with Apache License 2.0 | 4 votes |
def load_key(path_or_file, serial='json'): ''' Read in a key from a file and return the applicable key object based on the contents of the file ''' if hasattr(path_or_file, 'read'): stream = path_or_file else: if serial == 'json': stream = open(path_or_file, 'r') else: stream = open(path_or_file, 'rb') try: if serial == 'msgpack': import msgpack key_data = msgpack.load(stream) elif serial == 'json': import json if sys.version_info[0] >= 3: key_data = json.loads(stream.read()) else: key_data = json.loads(stream.read(), encoding='UTF-8') finally: if stream != path_or_file: stream.close() if 'priv' in key_data and 'sign' in key_data and 'pub' in key_data: return libnacl.dual.DualSecret( libnacl.encode.hex_decode(key_data['priv']), libnacl.encode.hex_decode(key_data['sign'])) elif 'priv' in key_data and 'pub' in key_data: return libnacl.public.SecretKey( libnacl.encode.hex_decode(key_data['priv'])) elif 'sign' in key_data: return libnacl.sign.Signer( libnacl.encode.hex_decode(key_data['sign'])) elif 'pub' in key_data: return libnacl.public.PublicKey( libnacl.encode.hex_decode(key_data['pub'])) elif 'verify' in key_data: return libnacl.sign.Verifier(key_data['verify']) elif 'priv' in key_data: return libnacl.secret.SecretBox( libnacl.encode.hex_decode(key_data['priv'])) raise ValueError('Found no key data')
Example #22
Source File: predict_QuAC.py From FlowDelta with MIT License | 4 votes |
def main(): log.info('[program starts.]') checkpoint = torch.load(args.model) opt = checkpoint['config'] opt['task_name'] = 'QuAC' opt['cuda'] = args.cuda opt['seed'] = args.seed if opt.get('disperse_flow') is None: opt['disperse_flow'] = False if opt.get('rationale_lambda') is None: opt['rationale_lambda'] = 0.0 if opt.get('no_dialog_flow') is None: opt['no_dialog_flow'] = False if opt.get('do_hierarchical_query') is None: opt['do_hierarchical_query'] = False state_dict = checkpoint['state_dict'] log.info('[model loaded.]') test, test_embedding, test_answer = load_dev_data(opt) model = QAModel(opt, state_dict = state_dict) log.info('[Data loaded.]') model.setup_eval_embed(test_embedding) if args.cuda: model.cuda() batches = BatchGen_QuAC(test, batch_size=args.batch_size, evaluation=True, gpu=args.cuda, dialog_ctx=opt['explicit_dialog_ctx'], use_dialog_act=opt['use_dialog_act'], precompute_elmo=opt['elmo_batch_size'] // args.batch_size) sample_idx = random.sample(range(len(batches)), args.show) predictions = [] no_ans_scores = [] for i, batch in enumerate(batches): prediction, noans = model.predict(batch, No_Ans_Threshold=args.no_ans) predictions.extend(prediction) no_ans_scores.extend(noans) if not (i in sample_idx): continue print("Context: ", batch[-4][0]) for j in range(len(batch[-2][0])): print("Q: ", batch[-2][0][j]) print("A: ", prediction[0][j]) print(" True A: ", batch[-1][0][j], "| Follow up" if batch[-6][0][j].item() // 10 else "| Don't follow up") print(" Val. A: ", test_answer[args.batch_size * i][j]) print("") pred_out = {'predictions': predictions, 'no_ans_scores': no_ans_scores} with open(args.output, 'wb') as f: pickle.dump(pred_out, f) f1, h_f1, HEQ_Q, HEQ_D = score(predictions, test_answer, min_F1=args.min_f1) log.warning("Test F1: {:.2f}, HEQ_Q: {:.2f}, HEQ_D: {:.2f}".format(f1, HEQ_Q, HEQ_D))
Example #23
Source File: coverage.py From bncov with MIT License | 4 votes |
def load_from_file(self, filename): """Reconstruct a CoverageDB using the current BinaryView and a CoverageDB saved to disk using .save_to_file()""" if file_backing_disabled: raise Exception("[!] Can't save/load coverage db files without msgpack. Try `pip install msgpack`") self.filename = filename with open(filename, "rb") as f: loaded_dict = msgpack.load(f, raw=False) if "version" not in loaded_dict: self._old_load_from_file(loaded_dict) # Do sanity checks loaded_version = int(loaded_dict["version"]) if loaded_version != 1: raise Exception("[!] Unsupported version number: %d" % loaded_version) loaded_module_name = loaded_dict["module_name"] if loaded_module_name != self.module_name: raise Exception("[!] ERROR: Module name from covdb (%s) doesn't match BinaryView (%s)" % (loaded_module_name, self.module_name)) loaded_module_base = loaded_dict["module_base"] if loaded_module_base != self.module_base: raise Exception("[!] ERROR: Module base from covdb (0x%x) doesn't match BinaryView (0x%x)" % (loaded_module_base, self.module_base)) # Parse the saved members coverage_files = loaded_dict["coverage_files"] self.coverage_files = coverage_files block_dict = dict() loaded_block_dict = loaded_dict["block_dict"] file_index_map = {self.coverage_files.index(filepath): filepath for filepath in self.coverage_files} for block, trace_id_list in loaded_block_dict.items(): trace_list = [file_index_map[i] for i in trace_id_list] block_dict[block] = trace_list self.block_dict = block_dict # Regen other members from saved members bv = self.bv self.module_blocks = {bb.start: bb.length for bb in bv.basic_blocks} trace_dict = {} for block, trace_list in block_dict.items(): for name in trace_list: trace_dict.setdefault(name, set()).add(block) self.trace_dict = trace_dict self.total_coverage = set(block_dict.keys()) # Other members are blank/empty self.function_stats = {} self.frontier = set()