Python Examples of _collections.defaultdict

Source File: IDADebugger.py From VMAttack with MIT License

6 votes

def __init__(self, *args):
        super(IDADebugger, self).__init__(*args)
        self.hooked = False
        self.trace = Trace()
        self._module_name = 'IDADbg'
        self.arch = get_arch_dynamic()
        # init the cpu context with 0
        if self.arch == 32:
            self.ctx = {c: '0' for c in ['eax', 'ebx', 'edx', 'ecx', 'ebp', 'esp', 'eip', 'edi', 'esi', 'cf', 'zf', 'sf', 'of', 'pf',
                         'af', 'tf', 'df']}
        elif self.arch == 64:
            self.ctx = {c: '0' for c in ['rax', 'rbx', 'rdx', 'rcx', 'rbp', 'rsp', 'rip', 'edi', 'rsi', 'r8', 'r9', 'r10', 'r11', 'r12',
                         'r13', 'r14', 'r15', 'cf', 'zf', 'sf', 'of', 'pf', 'af', 'tf', 'df']}

        self.IAT = []
        self.func_args = defaultdict(lambda: set())

Source File: goldReader.py From supervised-oie with MIT License

6 votes

def read(self, fn):
        d = defaultdict(lambda: [])
        with open(fn) as fin:
            for line_ind, line in enumerate(fin):
                data = line.strip().split('\t')
                text, rel = data[:2]
                args = data[2:]
                confidence = 1
                
                curExtraction = Extraction(pred = rel,
                                           head_pred_index = None,
                                           sent = text,
                                           confidence = float(confidence),
                                           index = line_ind)
                for arg in args:
                    curExtraction.addArg(arg)
                    
                d[text].append(curExtraction)
        self.oie = d

Source File: TraceAnalysis.py From VMAttack with MIT License

5 votes

def find_vm_addr(trace):
    """
    Find the virtual machine addr
    :param trace: instruction trace
    :return: virtual function start addr
    """
    push_dict = defaultdict(lambda: 0)
    vm_func_dict = defaultdict(lambda: 0)
    # try to find the vm Segment via series of push commands, which identify the vm_addr also
    for line in trace:
        try:
            if line.disasm[0] == 'push':
                push_dict[GetFunctionAttr(line.addr, FUNCATTR_START)] += 1
        except:
            pass

    vm_func = max(push_dict, key=push_dict.get)
    vm_seg_start = SegStart(vm_func)
    vm_seg_end = SegEnd(vm_func)
    # test wheather the vm_func is the biggest func in the Segment
    vm_funcs = Functions(vm_seg_start, vm_seg_end)
    for f in vm_funcs:
        vm_func_dict[f] = GetFunctionAttr(f, FUNCATTR_END) - GetFunctionAttr(f, FUNCATTR_START)
    if max(vm_func_dict, key=vm_func_dict.get) != vm_func:
        return AskAddr(vm_func,
                "Found two possible addresses for the VM function start address: %s and %s. Choose one!" %
                (vm_func, max(vm_func_dict, key=vm_func_dict.get)))
    else:
        return vm_func

Source File: TraceAnalysis.py From VMAttack with MIT License

5 votes

def find_virtual_regs(trace, manual=False, update=None):
    """
    Maps the virtual registers on the stack to the actual registers after the vm exit.
    :param trace: instruction trace
    :return: virtual registers dict which maps the real regs onto virtual ones via stack addresses
    """
    vmr = get_vmr()
    assert isinstance(trace, Trace)
    virt_regs = defaultdict(lambda: False)
    # trace, vm_seg_start, vm_seg_end = extract_vm_segment(trace)

    while trace:
        try:
            elem = trace.pop(len(trace) - 1)
            if len(elem.disasm) > 0 and elem.disasm[0] == 'pop':
                opnd = elem.disasm[1]
                if get_reg_class(opnd) is None:  # if not a register it is a mem_loc
                    pass
                elif virt_regs[opnd]:
                    pass
                else:
                    # the context always shows the registers after the execution, so we nee the SP from the instruction before
                    stack_addr = trace[len(trace) - 1].ctx[get_reg('rsp', trace.ctx_reg_size)]
                    virt_regs[opnd] = stack_addr
        except:
            pass

    if update is not None:
        update.pbar_update(60)

    vmr.vm_stack_reg_mapping = virt_regs
    if manual:
        print ''.join('%s:%s\n' % (c, virt_regs[c]) for c in virt_regs.keys())
    return virt_regs

Source File: __init__.py From android_universal with MIT License

5 votes

def __getitem__(self, key):
        for mapping in self.maps:
            try:
                return mapping[key]             # can't use 'key in mapping' with defaultdict
            except KeyError:
                pass
        return self.__missing__(key)            # support subclasses that define __missing__

Source File: __init__.py From jawfish with MIT License

5 votes

def __getitem__(self, key):
        for mapping in self.maps:
            try:
                return mapping[key]             # can't use 'key in mapping' with defaultdict
            except KeyError:
                pass
        return self.__missing__(key)            # support subclasses that define __missing__

Source File: __init__.py From kobo-predict with BSD 2-Clause "Simplified" License

5 votes

def __getitem__(self, key):
        for mapping in self.maps:
            try:
                return mapping[key]             # can't use 'key in mapping' with defaultdict
            except KeyError:
                pass
        return self.__missing__(key)            # support subclasses that define __missing__

Source File: goldReader.py From oie-benchmark with MIT License

5 votes

def read(self, fn):
        d = defaultdict(lambda: [])
        with open(fn) as fin:
            for line in fin:
                data = line.strip().split('\t')
                text, base_rel, rel = data[:3]
                args = data[3:]
                confidence = 1
                
                curExtraction = Extraction(pred = rel, sent = text, confidence = float(confidence))
                for arg in args:
                    curExtraction.addArg(arg)
                    
                d[text].append(curExtraction)
        self.oie = d

Source File: __init__.py From GraphicDesignPatternByPython with MIT License

5 votes

def __getitem__(self, key):
        for mapping in self.maps:
            try:
                return mapping[key]             # can't use 'key in mapping' with defaultdict
            except KeyError:
                pass
        return self.__missing__(key)            # support subclasses that define __missing__

Source File: __init__.py From Fluid-Designer with GNU General Public License v3.0

5 votes

def __getitem__(self, key):
        for mapping in self.maps:
            try:
                return mapping[key]             # can't use 'key in mapping' with defaultdict
            except KeyError:
                pass
        return self.__missing__(key)            # support subclasses that define __missing__

Source File: __init__.py From Imogen with MIT License

5 votes

def __getitem__(self, key):
        for mapping in self.maps:
            try:
                return mapping[key]             # can't use 'key in mapping' with defaultdict
            except KeyError:
                pass
        return self.__missing__(key)            # support subclasses that define __missing__

Source File: __init__.py From scylla with Apache License 2.0

5 votes

def __getitem__(self, key):
        for mapping in self.maps:
            try:
                return mapping[key]             # can't use 'key in mapping' with defaultdict
            except KeyError:
                pass
        return self.__missing__(key)            # support subclasses that define __missing__

Source File: __init__.py From ironpython3 with Apache License 2.0

5 votes

def __getitem__(self, key):
        for mapping in self.maps:
            try:
                return mapping[key]             # can't use 'key in mapping' with defaultdict
            except KeyError:
                pass
        return self.__missing__(key)            # support subclasses that define __missing__

Source File: __init__.py From Project-New-Reign---Nemesis-Main with GNU General Public License v3.0

5 votes

def __getitem__(self, key):
        for mapping in self.maps:
            try:
                return mapping[key]             # can't use 'key in mapping' with defaultdict
            except KeyError:
                pass
        return self.__missing__(key)            # support subclasses that define __missing__

Source File: model.py From Mimick with GNU General Public License v3.0

4 votes

def __init__(self, tagset_sizes, num_lstm_layers, hidden_dim, word_embeddings, no_we_update, use_char_rnn, charset_size, char_embedding_dim, att_props=None, vocab_size=None, word_embedding_dim=None):
        '''
        :param tagset_sizes: dictionary of attribute_name:number_of_possible_tags
        :param num_lstm_layers: number of desired LSTM layers
        :param hidden_dim: size of hidden dimension (same for all LSTM layers, including character-level)
        :param word_embeddings: pre-trained list of embeddings, assumes order by word ID (optional)
        :param no_we_update: if toggled, don't update embeddings
        :param use_char_rnn: use "char->tag" option, i.e. concatenate character-level LSTM outputs to word representations (and train underlying LSTM). Only 1-layer is supported.
        :param charset_size: number of characters expected in dataset (needed for character embedding initialization)
        :param char_embedding_dim: desired character embedding dimension
        :param att_props: proportion of loss to assign each attribute for back-propagation weighting (optional)
        :param vocab_size: number of words in model (ignored if pre-trained embeddings are given)
        :param word_embedding_dim: desired word embedding dimension (ignored if pre-trained embeddings are given)
        '''
        self.model = dy.Model()
        self.tagset_sizes = tagset_sizes
        self.attributes = list(tagset_sizes.keys())
        self.we_update = not no_we_update
        if att_props is not None:
            self.att_props = defaultdict(float, {att:(1.0-p) for att,p in att_props.items()})
        else:
            self.att_props = None

        if word_embeddings is not None: # Use pretrained embeddings
            vocab_size = word_embeddings.shape[0]
            word_embedding_dim = word_embeddings.shape[1]

        self.words_lookup = self.model.add_lookup_parameters((vocab_size, word_embedding_dim), name="we")

        if word_embeddings is not None:
            self.words_lookup.init_from_array(word_embeddings)

        # Char LSTM Parameters
        self.use_char_rnn = use_char_rnn
        self.char_hidden_dim = hidden_dim
        if use_char_rnn:
            self.char_lookup = self.model.add_lookup_parameters((charset_size, char_embedding_dim), name="ce")
            self.char_bi_lstm = dy.BiRNNBuilder(1, char_embedding_dim, hidden_dim, self.model, dy.LSTMBuilder)

        # Word LSTM parameters
        if use_char_rnn:
            input_dim = word_embedding_dim + hidden_dim
        else:
            input_dim = word_embedding_dim
        self.word_bi_lstm = dy.BiRNNBuilder(num_lstm_layers, input_dim, hidden_dim, self.model, dy.LSTMBuilder)

        # Matrix that maps from Bi-LSTM output to num tags
        self.lstm_to_tags_params = {}
        self.lstm_to_tags_bias = {}
        self.mlp_out = {}
        self.mlp_out_bias = {}
        for att, set_size in list(tagset_sizes.items()):
            self.lstm_to_tags_params[att] = self.model.add_parameters((set_size, hidden_dim), name=att+"H")
            self.lstm_to_tags_bias[att] = self.model.add_parameters(set_size, name=att+"Hb")
            self.mlp_out[att] = self.model.add_parameters((set_size, set_size), name=att+"O")
            self.mlp_out_bias[att] = self.model.add_parameters(set_size, name=att+"Ob")

Source File: test_demo.py From gtfslib-python with GNU General Public License v3.0

4 votes

def test_demo(self):
        dao = Dao(DAO_URL, sql_logging=False)
        dao.load_gtfs(DUMMY_GTFS)

        print("List of stops named '...Bordeaux...':")
        stops_bordeaux = list(dao.stops(fltr=(Stop.stop_name.ilike('%Bordeaux%')) & (Stop.location_type == Stop.TYPE_STOP)))
        for stop in stops_bordeaux:
            print(stop.stop_name)

        print("List of routes passing by those stops:")
        routes_bordeaux = dao.routes(fltr=or_(StopTime.stop == stop for stop in stops_bordeaux))
        for route in routes_bordeaux:
            print("%s - %s" % (route.route_short_name, route.route_long_name))

        july4 = CalendarDate.ymd(2016, 7, 4)
        print("All departures from those stops on %s:" % (july4.as_date()))
        departures = list(dao.stoptimes(fltr=(or_(StopTime.stop == stop for stop in stops_bordeaux)) & (StopTime.departure_time != None) & (func.date(CalendarDate.date) == july4.date)))
        print("There is %d departures" % (len(departures)))
        for departure in departures:
            print("%30.30s %10.10s %-20.20s > %s" % (departure.stop.stop_name, fmttime(departure.departure_time), departure.trip.route.route_long_name, departure.trip.trip_headsign))

        print("Number of departures and time range per stop on %s:" % (july4.as_date()))
        departure_by_stop = defaultdict(list)
        for departure in departures:
            departure_by_stop[departure.stop].append(departure)
        for stop, deps in departure_by_stop.items():
            min_dep = min(d.departure_time for d in deps)
            max_dep = max(d.departure_time for d in deps)
            print("%30.30s %3d departures (from %s to %s)" % (stop.stop_name, len(deps), fmttime(min_dep), fmttime(max_dep)))

        # Compute the average distance and time to next stop by route type
        ntd = [ [0, 0, 0.0] for type in range(0, Route.TYPE_FUNICULAR + 1) ]
        for departure in departures:
            # The following is guaranteed to succeed as we have departure_time == Null for last stop time in trip
            next_arrival = departure.trip.stop_times[departure.stop_sequence + 1]
            hop_dist = next_arrival.shape_dist_traveled - departure.shape_dist_traveled
            hop_time = next_arrival.arrival_time - departure.departure_time
            route_type = departure.trip.route.route_type
            ntd[route_type][0] += 1
            ntd[route_type][1] += hop_time
            ntd[route_type][2] += hop_dist
        for route_type in range(0, len(ntd)):
            n, t, d = ntd[route_type]
            if n > 0:
                print("The average distance to the next stop on those departures for route type %d is %.2f meters" % (route_type, d / n))
                print("The average time in sec to the next stop on those departures for route type %d is %s" % (route_type, fmttime(t / n)))

Source File: __init__.py From simhashpy with Apache License 2.0

4 votes

def find(self, value, k=2, exclude_obj_ids=set(), exclude_obj_id_contain=None):
        """
        查找相似的text的 id,逻辑比较复杂
        1.分割要查找的origin_simhash的value成为多个key
        2.将每个key查询倒排索引,得到对应可能相似的 related_simhash
        3.求origin_simhash与 related_simhash之间的编辑距离 d

        4.统计每个related_simhash和对应 编辑距离 d
        5.多次出现的求一个额外的平均信息

        6.将related_simhash按照 d从小到大排序
        """
        assert value != None

        if isinstance(value, (str, unicode)):
            simhash = Simhash(value=value, f=self.f)
        elif isinstance(value, Simhash):
            simhash = value
        else:
            raise 'value not text or simhash'
        assert simhash.f == self.f
        sim_hash_dict = defaultdict(list)
        ans = set()
        for key in self.get_keys(simhash):
            with Timer(msg='==query: %s' % key):
                simhash_invertindex = SimhashInvertedIndex.objects.filter(key=key)
                if simhash_invertindex:
                    simhash_caches_index = [sim_index.simhash_value_obj_id
                                        for sim_index in simhash_invertindex]
                else:
    #                 logging.warning('SimhashInvertedIndex not exists key %s: %s' % (key, e))
                    continue
            with Timer(msg='find d < k %d' % (k)):
                if len(simhash_caches_index) > 200:
                    logging.warning('Big bucket found. key:%s, len:%s', key, len(simhash_caches_index))
                for simhash_cache in simhash_caches_index:
                    try:
                        sim2, obj_id = simhash_cache.split(',', 1)
                        if obj_id in exclude_obj_ids or \
                        (exclude_obj_id_contain and exclude_obj_id_contain in simhash_cache):
                            continue

                        sim2 = Simhash(long(sim2, 16), self.f)
                        d = simhash.distance(sim2)
    #                     print '**' * 50
    #                     print "d:%d obj_id:%s key:%s " % (d, obj_id, key)
                        sim_hash_dict[obj_id].append(d)
                        if d < k:
                            ans.add(obj_id)
                    except Exception, e:
                        logging.warning('not exists %s' % (e))

Source File: IDADebugger.py From VMAttack with MIT License

4 votes

def gen_trace(self, trace_start=BeginEA(), trace_end=BADADDR):
        """
        Generate trace for the loaded binary.
        :param trace_start:
        :param trace_end:
        :return:
        """
        vmr = get_vmr()
        self.trace_init()
        # reset color
        heads = Heads(SegStart(ScreenEA()), SegEnd(ScreenEA()))
        for i in heads:
            SetColor(i, CIC_ITEM, 0xFFFFFF)
        # start exec
        RunTo(BeginEA())
        event = GetDebuggerEvent(WFNE_SUSP, -1)
        # enable tracing
        EnableTracing(TRACE_STEP, 1)
        if vmr.sys_libs:
            pass
        event = GetDebuggerEvent(WFNE_ANY | WFNE_CONT, -1)
        while True:
            event = GetDebuggerEvent(WFNE_ANY, -1)
            addr = GetEventEa()

            # change color of executed line
            current_color = GetColor(addr, CIC_ITEM)
            new_color = self.get_new_color(current_color)
            SetColor(addr, CIC_ITEM, new_color)
            # break by exception
            if event <= 1:
                break

        # standardize the difference between ida_trace.txt files and generated trace files by debugger hook:
        # since dbg_trace returns the cpu context before the instruction execution and trace files the ctx after
        for line in self.trace:
            try:
                line.ctx = self.trace[self.trace.index(line) + 1].ctx
            except IndexError:
                line.ctx = defaultdict(lambda: '0')
        # return the trace, for population see dbg_trace() below
        msg('[*] Trace generated!\n')
        if vmr.extract_param:
            vmr.func_args = self.func_args
            for key in self.func_args.keys():
                print 'Function %s call args:' % key, ''.join('%s, ' % arg for arg in self.func_args[key]).rstrip(', ')
        return self.trace

Source File: TraceAnalysis.py From VMAttack with MIT License

4 votes

def dynamic_vm_values(trace, code_start=BADADDR, code_end=BADADDR, silent=False):
    """
    Find the virtual machine context necessary for an automated static analysis.
    code_start = the bytecode start -> often the param for vm_func and usually starts right after vm_func
    code_end = the bytecode end -> bytecode usually a big chunk, so if we identify several  x86/x64 inst in a row we reached the end
    base_addr = startaddr of the jmp table -> most often used offset in the vm_trace
    vm_addr = startaddr of the vm function -> biggest function in .vmp segment,
    :param trace: instruction trace
    :return: vm_ctx -> [code_start, code_end, base_addr, vm_func_addr, vm_funcs]
    """
    base_addr = defaultdict(lambda: 0)
    vm_addr = find_vm_addr(deepcopy(trace))
    trace, vm_seg_start, vm_seg_end = extract_vm_segment(trace)

    code_addrs = []

    # try finding code_start
    if code_start == BADADDR:
        code_start = GetFunctionAttr(vm_addr, FUNCATTR_END)#NextHead(GetFunctionAttr(vm_addr, FUNCATTR_END), vm_seg_end)
        code_start = NextHead(code_start, BADADDR)
        while isCode(code_start):
            code_start = NextHead(code_start, BADADDR)

    for line in trace:
        # construct base addr dict of offsets -> jmp table should be the one most used
        if len(line.disasm) == 2:
            try:
                offset = re.findall(r'.*:off_([0123456789abcdefABCDEF]*)\[.*\]', line.disasm[1])[0]
                base_addr[offset] += 1
            except:
                pass
        # code_start additional search of vm_func params
        if line.addr == vm_addr:
            for l in trace[:trace.index(line)]:
                if l.disasm[0] == 'push':
                    try:
                        arg = re.findall(r'.*_([0123456789ABCDEFabcdef]*)', l.disasm[1])
                        if len(arg) == 1:
                            code_addrs.append(int(arg[0], 16))
                    except Exception, e:
                        print e.message

    # finalize base_addr

Python _collections.defaultdict() Examples