Python _collections.defaultdict() Examples
The following are 19
code examples of _collections.defaultdict().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
_collections
, or try the search function
.
Example #1
Source File: IDADebugger.py From VMAttack with MIT License | 6 votes |
def __init__(self, *args): super(IDADebugger, self).__init__(*args) self.hooked = False self.trace = Trace() self._module_name = 'IDADbg' self.arch = get_arch_dynamic() # init the cpu context with 0 if self.arch == 32: self.ctx = {c: '0' for c in ['eax', 'ebx', 'edx', 'ecx', 'ebp', 'esp', 'eip', 'edi', 'esi', 'cf', 'zf', 'sf', 'of', 'pf', 'af', 'tf', 'df']} elif self.arch == 64: self.ctx = {c: '0' for c in ['rax', 'rbx', 'rdx', 'rcx', 'rbp', 'rsp', 'rip', 'edi', 'rsi', 'r8', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15', 'cf', 'zf', 'sf', 'of', 'pf', 'af', 'tf', 'df']} self.IAT = [] self.func_args = defaultdict(lambda: set())
Example #2
Source File: goldReader.py From supervised-oie with MIT License | 6 votes |
def read(self, fn): d = defaultdict(lambda: []) with open(fn) as fin: for line_ind, line in enumerate(fin): data = line.strip().split('\t') text, rel = data[:2] args = data[2:] confidence = 1 curExtraction = Extraction(pred = rel, head_pred_index = None, sent = text, confidence = float(confidence), index = line_ind) for arg in args: curExtraction.addArg(arg) d[text].append(curExtraction) self.oie = d
Example #3
Source File: TraceAnalysis.py From VMAttack with MIT License | 5 votes |
def find_vm_addr(trace): """ Find the virtual machine addr :param trace: instruction trace :return: virtual function start addr """ push_dict = defaultdict(lambda: 0) vm_func_dict = defaultdict(lambda: 0) # try to find the vm Segment via series of push commands, which identify the vm_addr also for line in trace: try: if line.disasm[0] == 'push': push_dict[GetFunctionAttr(line.addr, FUNCATTR_START)] += 1 except: pass vm_func = max(push_dict, key=push_dict.get) vm_seg_start = SegStart(vm_func) vm_seg_end = SegEnd(vm_func) # test wheather the vm_func is the biggest func in the Segment vm_funcs = Functions(vm_seg_start, vm_seg_end) for f in vm_funcs: vm_func_dict[f] = GetFunctionAttr(f, FUNCATTR_END) - GetFunctionAttr(f, FUNCATTR_START) if max(vm_func_dict, key=vm_func_dict.get) != vm_func: return AskAddr(vm_func, "Found two possible addresses for the VM function start address: %s and %s. Choose one!" % (vm_func, max(vm_func_dict, key=vm_func_dict.get))) else: return vm_func
Example #4
Source File: TraceAnalysis.py From VMAttack with MIT License | 5 votes |
def find_virtual_regs(trace, manual=False, update=None): """ Maps the virtual registers on the stack to the actual registers after the vm exit. :param trace: instruction trace :return: virtual registers dict which maps the real regs onto virtual ones via stack addresses """ vmr = get_vmr() assert isinstance(trace, Trace) virt_regs = defaultdict(lambda: False) # trace, vm_seg_start, vm_seg_end = extract_vm_segment(trace) while trace: try: elem = trace.pop(len(trace) - 1) if len(elem.disasm) > 0 and elem.disasm[0] == 'pop': opnd = elem.disasm[1] if get_reg_class(opnd) is None: # if not a register it is a mem_loc pass elif virt_regs[opnd]: pass else: # the context always shows the registers after the execution, so we nee the SP from the instruction before stack_addr = trace[len(trace) - 1].ctx[get_reg('rsp', trace.ctx_reg_size)] virt_regs[opnd] = stack_addr except: pass if update is not None: update.pbar_update(60) vmr.vm_stack_reg_mapping = virt_regs if manual: print ''.join('%s:%s\n' % (c, virt_regs[c]) for c in virt_regs.keys()) return virt_regs
Example #5
Source File: __init__.py From android_universal with MIT License | 5 votes |
def __getitem__(self, key): for mapping in self.maps: try: return mapping[key] # can't use 'key in mapping' with defaultdict except KeyError: pass return self.__missing__(key) # support subclasses that define __missing__
Example #6
Source File: __init__.py From jawfish with MIT License | 5 votes |
def __getitem__(self, key): for mapping in self.maps: try: return mapping[key] # can't use 'key in mapping' with defaultdict except KeyError: pass return self.__missing__(key) # support subclasses that define __missing__
Example #7
Source File: __init__.py From kobo-predict with BSD 2-Clause "Simplified" License | 5 votes |
def __getitem__(self, key): for mapping in self.maps: try: return mapping[key] # can't use 'key in mapping' with defaultdict except KeyError: pass return self.__missing__(key) # support subclasses that define __missing__
Example #8
Source File: goldReader.py From oie-benchmark with MIT License | 5 votes |
def read(self, fn): d = defaultdict(lambda: []) with open(fn) as fin: for line in fin: data = line.strip().split('\t') text, base_rel, rel = data[:3] args = data[3:] confidence = 1 curExtraction = Extraction(pred = rel, sent = text, confidence = float(confidence)) for arg in args: curExtraction.addArg(arg) d[text].append(curExtraction) self.oie = d
Example #9
Source File: __init__.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def __getitem__(self, key): for mapping in self.maps: try: return mapping[key] # can't use 'key in mapping' with defaultdict except KeyError: pass return self.__missing__(key) # support subclasses that define __missing__
Example #10
Source File: __init__.py From Fluid-Designer with GNU General Public License v3.0 | 5 votes |
def __getitem__(self, key): for mapping in self.maps: try: return mapping[key] # can't use 'key in mapping' with defaultdict except KeyError: pass return self.__missing__(key) # support subclasses that define __missing__
Example #11
Source File: __init__.py From Imogen with MIT License | 5 votes |
def __getitem__(self, key): for mapping in self.maps: try: return mapping[key] # can't use 'key in mapping' with defaultdict except KeyError: pass return self.__missing__(key) # support subclasses that define __missing__
Example #12
Source File: __init__.py From scylla with Apache License 2.0 | 5 votes |
def __getitem__(self, key): for mapping in self.maps: try: return mapping[key] # can't use 'key in mapping' with defaultdict except KeyError: pass return self.__missing__(key) # support subclasses that define __missing__
Example #13
Source File: __init__.py From ironpython3 with Apache License 2.0 | 5 votes |
def __getitem__(self, key): for mapping in self.maps: try: return mapping[key] # can't use 'key in mapping' with defaultdict except KeyError: pass return self.__missing__(key) # support subclasses that define __missing__
Example #14
Source File: __init__.py From Project-New-Reign---Nemesis-Main with GNU General Public License v3.0 | 5 votes |
def __getitem__(self, key): for mapping in self.maps: try: return mapping[key] # can't use 'key in mapping' with defaultdict except KeyError: pass return self.__missing__(key) # support subclasses that define __missing__
Example #15
Source File: model.py From Mimick with GNU General Public License v3.0 | 4 votes |
def __init__(self, tagset_sizes, num_lstm_layers, hidden_dim, word_embeddings, no_we_update, use_char_rnn, charset_size, char_embedding_dim, att_props=None, vocab_size=None, word_embedding_dim=None): ''' :param tagset_sizes: dictionary of attribute_name:number_of_possible_tags :param num_lstm_layers: number of desired LSTM layers :param hidden_dim: size of hidden dimension (same for all LSTM layers, including character-level) :param word_embeddings: pre-trained list of embeddings, assumes order by word ID (optional) :param no_we_update: if toggled, don't update embeddings :param use_char_rnn: use "char->tag" option, i.e. concatenate character-level LSTM outputs to word representations (and train underlying LSTM). Only 1-layer is supported. :param charset_size: number of characters expected in dataset (needed for character embedding initialization) :param char_embedding_dim: desired character embedding dimension :param att_props: proportion of loss to assign each attribute for back-propagation weighting (optional) :param vocab_size: number of words in model (ignored if pre-trained embeddings are given) :param word_embedding_dim: desired word embedding dimension (ignored if pre-trained embeddings are given) ''' self.model = dy.Model() self.tagset_sizes = tagset_sizes self.attributes = list(tagset_sizes.keys()) self.we_update = not no_we_update if att_props is not None: self.att_props = defaultdict(float, {att:(1.0-p) for att,p in att_props.items()}) else: self.att_props = None if word_embeddings is not None: # Use pretrained embeddings vocab_size = word_embeddings.shape[0] word_embedding_dim = word_embeddings.shape[1] self.words_lookup = self.model.add_lookup_parameters((vocab_size, word_embedding_dim), name="we") if word_embeddings is not None: self.words_lookup.init_from_array(word_embeddings) # Char LSTM Parameters self.use_char_rnn = use_char_rnn self.char_hidden_dim = hidden_dim if use_char_rnn: self.char_lookup = self.model.add_lookup_parameters((charset_size, char_embedding_dim), name="ce") self.char_bi_lstm = dy.BiRNNBuilder(1, char_embedding_dim, hidden_dim, self.model, dy.LSTMBuilder) # Word LSTM parameters if use_char_rnn: input_dim = word_embedding_dim + hidden_dim else: input_dim = word_embedding_dim self.word_bi_lstm = dy.BiRNNBuilder(num_lstm_layers, input_dim, hidden_dim, self.model, dy.LSTMBuilder) # Matrix that maps from Bi-LSTM output to num tags self.lstm_to_tags_params = {} self.lstm_to_tags_bias = {} self.mlp_out = {} self.mlp_out_bias = {} for att, set_size in list(tagset_sizes.items()): self.lstm_to_tags_params[att] = self.model.add_parameters((set_size, hidden_dim), name=att+"H") self.lstm_to_tags_bias[att] = self.model.add_parameters(set_size, name=att+"Hb") self.mlp_out[att] = self.model.add_parameters((set_size, set_size), name=att+"O") self.mlp_out_bias[att] = self.model.add_parameters(set_size, name=att+"Ob")
Example #16
Source File: test_demo.py From gtfslib-python with GNU General Public License v3.0 | 4 votes |
def test_demo(self): dao = Dao(DAO_URL, sql_logging=False) dao.load_gtfs(DUMMY_GTFS) print("List of stops named '...Bordeaux...':") stops_bordeaux = list(dao.stops(fltr=(Stop.stop_name.ilike('%Bordeaux%')) & (Stop.location_type == Stop.TYPE_STOP))) for stop in stops_bordeaux: print(stop.stop_name) print("List of routes passing by those stops:") routes_bordeaux = dao.routes(fltr=or_(StopTime.stop == stop for stop in stops_bordeaux)) for route in routes_bordeaux: print("%s - %s" % (route.route_short_name, route.route_long_name)) july4 = CalendarDate.ymd(2016, 7, 4) print("All departures from those stops on %s:" % (july4.as_date())) departures = list(dao.stoptimes(fltr=(or_(StopTime.stop == stop for stop in stops_bordeaux)) & (StopTime.departure_time != None) & (func.date(CalendarDate.date) == july4.date))) print("There is %d departures" % (len(departures))) for departure in departures: print("%30.30s %10.10s %-20.20s > %s" % (departure.stop.stop_name, fmttime(departure.departure_time), departure.trip.route.route_long_name, departure.trip.trip_headsign)) print("Number of departures and time range per stop on %s:" % (july4.as_date())) departure_by_stop = defaultdict(list) for departure in departures: departure_by_stop[departure.stop].append(departure) for stop, deps in departure_by_stop.items(): min_dep = min(d.departure_time for d in deps) max_dep = max(d.departure_time for d in deps) print("%30.30s %3d departures (from %s to %s)" % (stop.stop_name, len(deps), fmttime(min_dep), fmttime(max_dep))) # Compute the average distance and time to next stop by route type ntd = [ [0, 0, 0.0] for type in range(0, Route.TYPE_FUNICULAR + 1) ] for departure in departures: # The following is guaranteed to succeed as we have departure_time == Null for last stop time in trip next_arrival = departure.trip.stop_times[departure.stop_sequence + 1] hop_dist = next_arrival.shape_dist_traveled - departure.shape_dist_traveled hop_time = next_arrival.arrival_time - departure.departure_time route_type = departure.trip.route.route_type ntd[route_type][0] += 1 ntd[route_type][1] += hop_time ntd[route_type][2] += hop_dist for route_type in range(0, len(ntd)): n, t, d = ntd[route_type] if n > 0: print("The average distance to the next stop on those departures for route type %d is %.2f meters" % (route_type, d / n)) print("The average time in sec to the next stop on those departures for route type %d is %s" % (route_type, fmttime(t / n)))
Example #17
Source File: __init__.py From simhashpy with Apache License 2.0 | 4 votes |
def find(self, value, k=2, exclude_obj_ids=set(), exclude_obj_id_contain=None): """ 查找相似的text的 id,逻辑比较复杂 1.分割要查找的origin_simhash的value成为多个key 2.将每个key查询倒排索引,得到对应可能相似的 related_simhash 3.求origin_simhash与 related_simhash之间的编辑距离 d 4.统计每个related_simhash和对应 编辑距离 d 5.多次出现的求一个额外的平均信息 6.将related_simhash按照 d从小到大排序 """ assert value != None if isinstance(value, (str, unicode)): simhash = Simhash(value=value, f=self.f) elif isinstance(value, Simhash): simhash = value else: raise 'value not text or simhash' assert simhash.f == self.f sim_hash_dict = defaultdict(list) ans = set() for key in self.get_keys(simhash): with Timer(msg='==query: %s' % key): simhash_invertindex = SimhashInvertedIndex.objects.filter(key=key) if simhash_invertindex: simhash_caches_index = [sim_index.simhash_value_obj_id for sim_index in simhash_invertindex] else: # logging.warning('SimhashInvertedIndex not exists key %s: %s' % (key, e)) continue with Timer(msg='find d < k %d' % (k)): if len(simhash_caches_index) > 200: logging.warning('Big bucket found. key:%s, len:%s', key, len(simhash_caches_index)) for simhash_cache in simhash_caches_index: try: sim2, obj_id = simhash_cache.split(',', 1) if obj_id in exclude_obj_ids or \ (exclude_obj_id_contain and exclude_obj_id_contain in simhash_cache): continue sim2 = Simhash(long(sim2, 16), self.f) d = simhash.distance(sim2) # print '**' * 50 # print "d:%d obj_id:%s key:%s " % (d, obj_id, key) sim_hash_dict[obj_id].append(d) if d < k: ans.add(obj_id) except Exception, e: logging.warning('not exists %s' % (e))
Example #18
Source File: IDADebugger.py From VMAttack with MIT License | 4 votes |
def gen_trace(self, trace_start=BeginEA(), trace_end=BADADDR): """ Generate trace for the loaded binary. :param trace_start: :param trace_end: :return: """ vmr = get_vmr() self.trace_init() # reset color heads = Heads(SegStart(ScreenEA()), SegEnd(ScreenEA())) for i in heads: SetColor(i, CIC_ITEM, 0xFFFFFF) # start exec RunTo(BeginEA()) event = GetDebuggerEvent(WFNE_SUSP, -1) # enable tracing EnableTracing(TRACE_STEP, 1) if vmr.sys_libs: pass event = GetDebuggerEvent(WFNE_ANY | WFNE_CONT, -1) while True: event = GetDebuggerEvent(WFNE_ANY, -1) addr = GetEventEa() # change color of executed line current_color = GetColor(addr, CIC_ITEM) new_color = self.get_new_color(current_color) SetColor(addr, CIC_ITEM, new_color) # break by exception if event <= 1: break # standardize the difference between ida_trace.txt files and generated trace files by debugger hook: # since dbg_trace returns the cpu context before the instruction execution and trace files the ctx after for line in self.trace: try: line.ctx = self.trace[self.trace.index(line) + 1].ctx except IndexError: line.ctx = defaultdict(lambda: '0') # return the trace, for population see dbg_trace() below msg('[*] Trace generated!\n') if vmr.extract_param: vmr.func_args = self.func_args for key in self.func_args.keys(): print 'Function %s call args:' % key, ''.join('%s, ' % arg for arg in self.func_args[key]).rstrip(', ') return self.trace
Example #19
Source File: TraceAnalysis.py From VMAttack with MIT License | 4 votes |
def dynamic_vm_values(trace, code_start=BADADDR, code_end=BADADDR, silent=False): """ Find the virtual machine context necessary for an automated static analysis. code_start = the bytecode start -> often the param for vm_func and usually starts right after vm_func code_end = the bytecode end -> bytecode usually a big chunk, so if we identify several x86/x64 inst in a row we reached the end base_addr = startaddr of the jmp table -> most often used offset in the vm_trace vm_addr = startaddr of the vm function -> biggest function in .vmp segment, :param trace: instruction trace :return: vm_ctx -> [code_start, code_end, base_addr, vm_func_addr, vm_funcs] """ base_addr = defaultdict(lambda: 0) vm_addr = find_vm_addr(deepcopy(trace)) trace, vm_seg_start, vm_seg_end = extract_vm_segment(trace) code_addrs = [] # try finding code_start if code_start == BADADDR: code_start = GetFunctionAttr(vm_addr, FUNCATTR_END)#NextHead(GetFunctionAttr(vm_addr, FUNCATTR_END), vm_seg_end) code_start = NextHead(code_start, BADADDR) while isCode(code_start): code_start = NextHead(code_start, BADADDR) for line in trace: # construct base addr dict of offsets -> jmp table should be the one most used if len(line.disasm) == 2: try: offset = re.findall(r'.*:off_([0123456789abcdefABCDEF]*)\[.*\]', line.disasm[1])[0] base_addr[offset] += 1 except: pass # code_start additional search of vm_func params if line.addr == vm_addr: for l in trace[:trace.index(line)]: if l.disasm[0] == 'push': try: arg = re.findall(r'.*_([0123456789ABCDEFabcdef]*)', l.disasm[1]) if len(arg) == 1: code_addrs.append(int(arg[0], 16)) except Exception, e: print e.message # finalize base_addr