Python Examples of _sre.CODESIZE

Source File: sre_compile.py From android_universal with MIT License

5 votes

def _bytes_to_codes(b):
    # Convert block indices to word array
    a = memoryview(b).cast('I')
    assert a.itemsize == _sre.CODESIZE
    assert len(a) * a.itemsize == len(b)
    return a.tolist()

Source File: sre_compile.py From ironpython3 with Apache License 2.0

5 votes

def _bytes_to_codes(b):
    # Convert block indices to word array
    a = memoryview(b).cast('I')
    assert a.itemsize == _sre.CODESIZE
    assert len(a) * a.itemsize == len(b)
    return a.tolist()

Source File: sre_compile.py From telegram-robot-rss with Mozilla Public License 2.0

5 votes

def _bytes_to_codes(b):
    # Convert block indices to word array
    import array
    if _sre.CODESIZE == 2:
        code = 'H'
    else:
        code = 'I'
    a = array.array(code, bytes(b))
    assert a.itemsize == _sre.CODESIZE
    assert len(a) * a.itemsize == len(b)
    return a.tolist()

Source File: sre_compile.py From scylla with Apache License 2.0

5 votes

def _bytes_to_codes(b):
    # Convert block indices to word array
    a = memoryview(b).cast('I')
    assert a.itemsize == _sre.CODESIZE
    assert len(a) * a.itemsize == len(b)
    return a.tolist()

Source File: sre_compile.py From Imogen with MIT License

5 votes

def _hex_code(code):
    return '[%s]' % ', '.join('%#0*x' % (_sre.CODESIZE*2+2, x) for x in code)

Source File: sre_compile.py From Imogen with MIT License

5 votes

def _bytes_to_codes(b):
    # Convert block indices to word array
    a = memoryview(b).cast('I')
    assert a.itemsize == _sre.CODESIZE
    assert len(a) * a.itemsize == len(b)
    return a.tolist()

Source File: sre_compile.py From Fluid-Designer with GNU General Public License v3.0

5 votes

def _bytes_to_codes(b):
    # Convert block indices to word array
    a = memoryview(b).cast('I')
    assert a.itemsize == _sre.CODESIZE
    assert len(a) * a.itemsize == len(b)
    return a.tolist()

Source File: sre_compile.py From GraphicDesignPatternByPython with MIT License

5 votes

def _hex_code(code):
    return '[%s]' % ', '.join('%#0*x' % (_sre.CODESIZE*2+2, x) for x in code)

Source File: sre_compile.py From GraphicDesignPatternByPython with MIT License

5 votes

def _bytes_to_codes(b):
    # Convert block indices to word array
    a = memoryview(b).cast('I')
    assert a.itemsize == _sre.CODESIZE
    assert len(a) * a.itemsize == len(b)
    return a.tolist()

Source File: sre_compile.py From ImageFusion with MIT License

5 votes

def _bytes_to_codes(b):
    # Convert block indices to word array
    import array
    if _sre.CODESIZE == 2:
        code = 'H'
    else:
        code = 'I'
    a = array.array(code, bytes(b))
    assert a.itemsize == _sre.CODESIZE
    assert len(a) * a.itemsize == len(b)
    return a.tolist()

Source File: sre_compile.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def _bytes_to_codes(b):
    # Convert block indices to word array
    import array
    if _sre.CODESIZE == 2:
        code = 'H'
    else:
        code = 'I'
    a = array.array(code, bytes(b))
    assert a.itemsize == _sre.CODESIZE
    assert len(a) * a.itemsize == len(b)
    return a.tolist()

Source File: sre_compile.py From PokemonGo-DesktopMap with MIT License

5 votes

def _bytes_to_codes(b):
    # Convert block indices to word array
    import array
    if _sre.CODESIZE == 2:
        code = 'H'
    else:
        code = 'I'
    a = array.array(code, bytes(b))
    assert a.itemsize == _sre.CODESIZE
    assert len(a) * a.itemsize == len(b)
    return a.tolist()

Source File: sre_compile.py From Safejumper-for-Desktop with GNU General Public License v2.0

5 votes

def _bytes_to_codes(b):
    # Convert block indices to word array
    import array
    if _sre.CODESIZE == 2:
        code = 'H'
    else:
        code = 'I'
    a = array.array(code, bytes(b))
    assert a.itemsize == _sre.CODESIZE
    assert len(a) * a.itemsize == len(b)
    return a.tolist()

Source File: sre_compile.py From oss-ftp with MIT License

5 votes

def _bytes_to_codes(b):
    # Convert block indices to word array
    import array
    if _sre.CODESIZE == 2:
        code = 'H'
    else:
        code = 'I'
    a = array.array(code, bytes(b))
    assert a.itemsize == _sre.CODESIZE
    assert len(a) * a.itemsize == len(b)
    return a.tolist()

Source File: sre_compile.py From android_universal with MIT License

5 votes

def _hex_code(code):
    return '[%s]' % ', '.join('%#0*x' % (_sre.CODESIZE*2+2, x) for x in code)

Source File: sre_compile.py From PhonePi_SampleServer with MIT License

5 votes

def _bytes_to_codes(b):
    # Convert block indices to word array
    import array
    if _sre.CODESIZE == 2:
        code = 'H'
    else:
        code = 'I'
    a = array.array(code, bytes(b))
    assert a.itemsize == _sre.CODESIZE
    assert len(a) * a.itemsize == len(b)
    return a.tolist()

Source File: sre_compile.py From Project-New-Reign---Nemesis-Main with GNU General Public License v3.0

5 votes

def _bytes_to_codes(b):
    # Convert block indices to word array
    a = memoryview(b).cast('I')
    assert a.itemsize == _sre.CODESIZE
    assert len(a) * a.itemsize == len(b)
    return a.tolist()

Source File: sre_compile.py From kobo-predict with BSD 2-Clause "Simplified" License

5 votes

def _bytes_to_codes(b):
    # Convert block indices to word array
    a = memoryview(b).cast('I')
    assert a.itemsize == _sre.CODESIZE
    assert len(a) * a.itemsize == len(b)
    return a.tolist()

Source File: sre_compile.py From ironpython2 with Apache License 2.0

5 votes

def _bytes_to_codes(b):
    # Convert block indices to word array
    import array
    if _sre.CODESIZE == 2:
        code = 'H'
    else:
        code = 'I'
    a = array.array(code, bytes(b))
    assert a.itemsize == _sre.CODESIZE
    assert len(a) * a.itemsize == len(b)
    return a.tolist()

Source File: sre_compile.py From syntheticmass with Apache License 2.0

5 votes

def _bytes_to_codes(b):
    # Convert block indices to word array
    import array
    if _sre.CODESIZE == 2:
        code = 'H'
    else:
        code = 'I'
    a = array.array(code, bytes(b))
    assert a.itemsize == _sre.CODESIZE
    assert len(a) * a.itemsize == len(b)
    return a.tolist()

Source File: sre_compile.py From CTFCrackTools-V2 with GNU General Public License v3.0

4 votes

def _optimize_unicode(charset, fixup):
    # problems with optimization in Jython, forget about it for now
    return charset

    try:
        import array
    except ImportError:
        return charset
    charmap = [0]*65536
    negate = 0
    try:
        for op, av in charset:
            if op == NEGATE:
                negate = 1
            elif op == LITERAL:
                charmap[fixup(av)] = 1
            elif op == RANGE:
                for i in xrange(fixup(av[0]), fixup(av[1])+1):
                    charmap[i] = 1
            elif op == CATEGORY:
                # XXX: could expand category
                return charset # cannot compress
    except IndexError:
        # non-BMP characters
        return charset
    if negate:
        if sys.maxunicode != 65535:
            # XXX: negation does not work with big charsets
            return charset
        for i in xrange(65536):
            charmap[i] = not charmap[i]
    comps = {}
    mapping = [0]*256
    block = 0
    data = []
    for i in xrange(256):
        chunk = tuple(charmap[i*256:(i+1)*256])
        new = comps.setdefault(chunk, block)
        mapping[i] = new
        if new == block:
            block = block + 1
            data = data + _mk_bitmap(chunk)
    header = [block]
    if _sre.CODESIZE == 2:
        code = 'H'
    else:
        # change this for Jython from 'I', since that will expand to
        # long, and cause needless complexity (or so it seems)
        code = 'i'
    # Convert block indices to byte array of 256 bytes
    mapping = array.array('b', mapping).tostring()
    # Convert byte array to word array
    mapping = array.array(code, mapping)
    assert mapping.itemsize == _sre.CODESIZE
    header = header + mapping.tolist()
    data[0:0] = header
    return [(BIGCHARSET, data)]

Source File: sre_compile.py From CTFCrackTools-V2 with GNU General Public License v3.0

4 votes

def _optimize_unicode(charset, fixup):
    # problems with optimization in Jython, forget about it for now
    return charset

    try:
        import array
    except ImportError:
        return charset
    charmap = [0]*65536
    negate = 0
    try:
        for op, av in charset:
            if op == NEGATE:
                negate = 1
            elif op == LITERAL:
                charmap[fixup(av)] = 1
            elif op == RANGE:
                for i in xrange(fixup(av[0]), fixup(av[1])+1):
                    charmap[i] = 1
            elif op == CATEGORY:
                # XXX: could expand category
                return charset # cannot compress
    except IndexError:
        # non-BMP characters
        return charset
    if negate:
        if sys.maxunicode != 65535:
            # XXX: negation does not work with big charsets
            return charset
        for i in xrange(65536):
            charmap[i] = not charmap[i]
    comps = {}
    mapping = [0]*256
    block = 0
    data = []
    for i in xrange(256):
        chunk = tuple(charmap[i*256:(i+1)*256])
        new = comps.setdefault(chunk, block)
        mapping[i] = new
        if new == block:
            block = block + 1
            data = data + _mk_bitmap(chunk)
    header = [block]
    if _sre.CODESIZE == 2:
        code = 'H'
    else:
        # change this for Jython from 'I', since that will expand to
        # long, and cause needless complexity (or so it seems)
        code = 'i'
    # Convert block indices to byte array of 256 bytes
    mapping = array.array('b', mapping).tostring()
    # Convert byte array to word array
    mapping = array.array(code, mapping)
    assert mapping.itemsize == _sre.CODESIZE
    header = header + mapping.tolist()
    data[0:0] = header
    return [(BIGCHARSET, data)]

Source File: sre_compile.py From CTFCrackTools-V2 with GNU General Public License v3.0

4 votes

def _mk_bitmap(bits):
    data = []
    dataappend = data.append
    if _sre.CODESIZE == 2:
        start = (1, 0)
    else:
        start = (1L, 0L)
    m, v = start
    for c in bits:
        if c:
            v = v + m
        m = m + m
        if m > MAXCODE:
            dataappend(v)
            m, v = start
    return data

# To represent a big charset, first a bitmap of all characters in the
# set is constructed. Then, this bitmap == sliced into chunks of 256
# characters, duplicate chunks are eliminated, and each chunk is
# given a number. In the compiled expression, the charset is
# represented by a 16-bit word sequence, consisting of one word for
# the number of different chunks, a sequence of 256 bytes (128 words)
# of chunk numbers indexed by their original chunk position, and a
# sequence of chunks (16 words each).

# Compression is normally good: in a typical charset, large ranges of
# Unicode will be either completely excluded (e.g. if only cyrillic
# letters are to be matched), or completely included (e.g. if large
# subranges of Kanji match). These ranges will be represented by
# chunks of all one-bits or all zero-bits.

# Matching can be also done efficiently: the more significant byte of
# the Unicode character is an index into the chunk number, and the
# less significant byte is a bit index in the chunk (just like the
# CHARSET matching).

# In UCS-4 mode, the BIGCHARSET opcode still supports only subsets
# of the basic multilingual plane; an efficient representation
# for all of UTF-16 has not yet been developed. This means,
# in particular, that negated charsets cannot be represented as
# bigcharsets.

Source File: sre_compile.py From CTFCrackTools with GNU General Public License v3.0

4 votes

def _optimize_unicode(charset, fixup):
    # problems with optimization in Jython, forget about it for now
    return charset

    try:
        import array
    except ImportError:
        return charset
    charmap = [0]*65536
    negate = 0
    try:
        for op, av in charset:
            if op == NEGATE:
                negate = 1
            elif op == LITERAL:
                charmap[fixup(av)] = 1
            elif op == RANGE:
                for i in xrange(fixup(av[0]), fixup(av[1])+1):
                    charmap[i] = 1
            elif op == CATEGORY:
                # XXX: could expand category
                return charset # cannot compress
    except IndexError:
        # non-BMP characters
        return charset
    if negate:
        if sys.maxunicode != 65535:
            # XXX: negation does not work with big charsets
            return charset
        for i in xrange(65536):
            charmap[i] = not charmap[i]
    comps = {}
    mapping = [0]*256
    block = 0
    data = []
    for i in xrange(256):
        chunk = tuple(charmap[i*256:(i+1)*256])
        new = comps.setdefault(chunk, block)
        mapping[i] = new
        if new == block:
            block = block + 1
            data = data + _mk_bitmap(chunk)
    header = [block]
    if _sre.CODESIZE == 2:
        code = 'H'
    else:
        # change this for Jython from 'I', since that will expand to
        # long, and cause needless complexity (or so it seems)
        code = 'i'
    # Convert block indices to byte array of 256 bytes
    mapping = array.array('b', mapping).tostring()
    # Convert byte array to word array
    mapping = array.array(code, mapping)
    assert mapping.itemsize == _sre.CODESIZE
    header = header + mapping.tolist()
    data[0:0] = header
    return [(BIGCHARSET, data)]

Source File: sre_compile.py From RevitBatchProcessor with GNU General Public License v3.0

4 votes

def _mk_bitmap(bits):
    data = []
    dataappend = data.append
    if _sre.CODESIZE == 2:
        start = (1, 0)
    else:
        start = (1L, 0L)
    m, v = start
    for c in bits:
        if c:
            v = v + m
        m = m + m
        if m > MAXCODE:
            dataappend(v)
            m, v = start
    return data

# To represent a big charset, first a bitmap of all characters in the
# set is constructed. Then, this bitmap is sliced into chunks of 256
# characters, duplicate chunks are eliminated, and each chunk is
# given a number. In the compiled expression, the charset is
# represented by a 16-bit word sequence, consisting of one word for
# the number of different chunks, a sequence of 256 bytes (128 words)
# of chunk numbers indexed by their original chunk position, and a
# sequence of chunks (16 words each).

# Compression is normally good: in a typical charset, large ranges of
# Unicode will be either completely excluded (e.g. if only cyrillic
# letters are to be matched), or completely included (e.g. if large
# subranges of Kanji match). These ranges will be represented by
# chunks of all one-bits or all zero-bits.

# Matching can be also done efficiently: the more significant byte of
# the Unicode character is an index into the chunk number, and the
# less significant byte is a bit index in the chunk (just like the
# CHARSET matching).

# In UCS-4 mode, the BIGCHARSET opcode still supports only subsets
# of the basic multilingual plane; an efficient representation
# for all of UTF-16 has not yet been developed. This means,
# in particular, that negated charsets cannot be represented as
# bigcharsets.

Source File: sre_compile.py From RevitBatchProcessor with GNU General Public License v3.0

4 votes

def _optimize_unicode(charset, fixup):
    try:
        import array
    except ImportError:
        return charset
    charmap = [0]*65536
    negate = 0
    try:
        for op, av in charset:
            if op is NEGATE:
                negate = 1
            elif op is LITERAL:
                charmap[fixup(av)] = 1
            elif op is RANGE:
                for i in xrange(fixup(av[0]), fixup(av[1])+1):
                    charmap[i] = 1
            elif op is CATEGORY:
                # XXX: could expand category
                return charset # cannot compress
    except IndexError:
        # non-BMP characters
        return charset
    if negate:
        if sys.maxunicode != 65535:
            # XXX: negation does not work with big charsets
            return charset
        for i in xrange(65536):
            charmap[i] = not charmap[i]
    comps = {}
    mapping = [0]*256
    block = 0
    data = []
    for i in xrange(256):
        chunk = tuple(charmap[i*256:(i+1)*256])
        new = comps.setdefault(chunk, block)
        mapping[i] = new
        if new == block:
            block = block + 1
            data = data + _mk_bitmap(chunk)
    header = [block]
    if _sre.CODESIZE == 2:
        code = 'H'
    else:
        code = 'I'
    # Convert block indices to byte array of 256 bytes
    mapping = array.array('b', mapping).tostring()
    # Convert byte array to word array
    mapping = array.array(code, mapping)
    assert mapping.itemsize == _sre.CODESIZE
    header = header + mapping.tolist()
    data[0:0] = header
    return [(BIGCHARSET, data)]

Source File: sre_compile.py From canape with GNU General Public License v3.0

4 votes

def _mk_bitmap(bits):
    data = []
    dataappend = data.append
    if _sre.CODESIZE == 2:
        start = (1, 0)
    else:
        start = (1L, 0L)
    m, v = start
    for c in bits:
        if c:
            v = v + m
        m = m + m
        if m > MAXCODE:
            dataappend(v)
            m, v = start
    return data

# To represent a big charset, first a bitmap of all characters in the
# set is constructed. Then, this bitmap is sliced into chunks of 256
# characters, duplicate chunks are eliminated, and each chunk is
# given a number. In the compiled expression, the charset is
# represented by a 16-bit word sequence, consisting of one word for
# the number of different chunks, a sequence of 256 bytes (128 words)
# of chunk numbers indexed by their original chunk position, and a
# sequence of chunks (16 words each).

# Compression is normally good: in a typical charset, large ranges of
# Unicode will be either completely excluded (e.g. if only cyrillic
# letters are to be matched), or completely included (e.g. if large
# subranges of Kanji match). These ranges will be represented by
# chunks of all one-bits or all zero-bits.

# Matching can be also done efficiently: the more significant byte of
# the Unicode character is an index into the chunk number, and the
# less significant byte is a bit index in the chunk (just like the
# CHARSET matching).

# In UCS-4 mode, the BIGCHARSET opcode still supports only subsets
# of the basic multilingual plane; an efficient representation
# for all of UTF-16 has not yet been developed. This means,
# in particular, that negated charsets cannot be represented as
# bigcharsets.

Source File: sre_compile.py From canape with GNU General Public License v3.0

4 votes

def _optimize_unicode(charset, fixup):
    try:
        import array
    except ImportError:
        return charset
    charmap = [0]*65536
    negate = 0
    try:
        for op, av in charset:
            if op is NEGATE:
                negate = 1
            elif op is LITERAL:
                charmap[fixup(av)] = 1
            elif op is RANGE:
                for i in xrange(fixup(av[0]), fixup(av[1])+1):
                    charmap[i] = 1
            elif op is CATEGORY:
                # XXX: could expand category
                return charset # cannot compress
    except IndexError:
        # non-BMP characters
        return charset
    if negate:
        if sys.maxunicode != 65535:
            # XXX: negation does not work with big charsets
            return charset
        for i in xrange(65536):
            charmap[i] = not charmap[i]
    comps = {}
    mapping = [0]*256
    block = 0
    data = []
    for i in xrange(256):
        chunk = tuple(charmap[i*256:(i+1)*256])
        new = comps.setdefault(chunk, block)
        mapping[i] = new
        if new == block:
            block = block + 1
            data = data + _mk_bitmap(chunk)
    header = [block]
    if _sre.CODESIZE == 2:
        code = 'H'
    else:
        code = 'I'
    # Convert block indices to byte array of 256 bytes
    mapping = array.array('b', mapping).tostring()
    # Convert byte array to word array
    mapping = array.array(code, mapping)
    assert mapping.itemsize == _sre.CODESIZE
    header = header + mapping.tolist()
    data[0:0] = header
    return [(BIGCHARSET, data)]

Source File: sre_compile.py From CTFCrackTools with GNU General Public License v3.0

4 votes

def _mk_bitmap(bits):
    data = []
    dataappend = data.append
    if _sre.CODESIZE == 2:
        start = (1, 0)
    else:
        start = (1L, 0L)
    m, v = start
    for c in bits:
        if c:
            v = v + m
        m = m + m
        if m > MAXCODE:
            dataappend(v)
            m, v = start
    return data

# To represent a big charset, first a bitmap of all characters in the
# set is constructed. Then, this bitmap == sliced into chunks of 256
# characters, duplicate chunks are eliminated, and each chunk is
# given a number. In the compiled expression, the charset is
# represented by a 16-bit word sequence, consisting of one word for
# the number of different chunks, a sequence of 256 bytes (128 words)
# of chunk numbers indexed by their original chunk position, and a
# sequence of chunks (16 words each).

# Compression is normally good: in a typical charset, large ranges of
# Unicode will be either completely excluded (e.g. if only cyrillic
# letters are to be matched), or completely included (e.g. if large
# subranges of Kanji match). These ranges will be represented by
# chunks of all one-bits or all zero-bits.

# Matching can be also done efficiently: the more significant byte of
# the Unicode character is an index into the chunk number, and the
# less significant byte is a bit index in the chunk (just like the
# CHARSET matching).

# In UCS-4 mode, the BIGCHARSET opcode still supports only subsets
# of the basic multilingual plane; an efficient representation
# for all of UTF-16 has not yet been developed. This means,
# in particular, that negated charsets cannot be represented as
# bigcharsets.

Source File: sre_compile.py From CTFCrackTools with GNU General Public License v3.0

4 votes

def _optimize_unicode(charset, fixup):
    # problems with optimization in Jython, forget about it for now
    return charset

    try:
        import array
    except ImportError:
        return charset
    charmap = [0]*65536
    negate = 0
    try:
        for op, av in charset:
            if op == NEGATE:
                negate = 1
            elif op == LITERAL:
                charmap[fixup(av)] = 1
            elif op == RANGE:
                for i in xrange(fixup(av[0]), fixup(av[1])+1):
                    charmap[i] = 1
            elif op == CATEGORY:
                # XXX: could expand category
                return charset # cannot compress
    except IndexError:
        # non-BMP characters
        return charset
    if negate:
        if sys.maxunicode != 65535:
            # XXX: negation does not work with big charsets
            return charset
        for i in xrange(65536):
            charmap[i] = not charmap[i]
    comps = {}
    mapping = [0]*256
    block = 0
    data = []
    for i in xrange(256):
        chunk = tuple(charmap[i*256:(i+1)*256])
        new = comps.setdefault(chunk, block)
        mapping[i] = new
        if new == block:
            block = block + 1
            data = data + _mk_bitmap(chunk)
    header = [block]
    if _sre.CODESIZE == 2:
        code = 'H'
    else:
        # change this for Jython from 'I', since that will expand to
        # long, and cause needless complexity (or so it seems)
        code = 'i'
    # Convert block indices to byte array of 256 bytes
    mapping = array.array('b', mapping).tostring()
    # Convert byte array to word array
    mapping = array.array(code, mapping)
    assert mapping.itemsize == _sre.CODESIZE
    header = header + mapping.tolist()
    data[0:0] = header
    return [(BIGCHARSET, data)]

Python _sre.CODESIZE Examples