Python Examples of zlib.compress

Source File: jwe.py From jwcrypto with GNU Lesser General Public License v3.0

6 votes

def _encrypt(self, alg, enc, jh):
        aad = base64url_encode(self.objects.get('protected', ''))
        if 'aad' in self.objects:
            aad += '.' + base64url_encode(self.objects['aad'])
        aad = aad.encode('utf-8')

        compress = jh.get('zip', None)
        if compress == 'DEF':
            data = zlib.compress(self.plaintext)[2:-4]
        elif compress is None:
            data = self.plaintext
        else:
            raise ValueError('Unknown compression')

        iv, ciphertext, tag = enc.encrypt(self.cek, aad, data)
        self.objects['iv'] = iv
        self.objects['ciphertext'] = ciphertext
        self.objects['tag'] = tag

Source File: scaffold.py From CAMISIM with Apache License 2.0

6 votes

def __init__(self, id, name, contig, scaffoldSeq):
        self.id = id
        self.name = name
        self._taxPathDict = None
        self.contigs = []
        self._removeNonDna = False
        if (contig != None):
            self.contigs.append(contig)
        if (scaffoldSeq != None):
            seq = noNewLine(scaffoldSeq)
            self.seqBp = len(removeNonDna(seq))
            self._scaffCompressed = zlib.compress(seq)
            self._hash = hash(seq.upper())
            self._scaffDef = True
        else:
            self._scaffDef = False
            self._hash = None
            self.seqBp = 0

Source File: soapdenovo.py From CAMISIM with Apache License 2.0

6 votes

def sortReads(inReadsFile, outReadsFile, headerToNum=lambda x: int(x.split('_', 2)[1].strip('nr'))):
    i = 0
    seqName = None
    tupleList = []
    for line in csv.getColumnAsList(inReadsFile, sep='\n'):
        if i % 2 == 0:
            seqName = line
        else:
            seq = line
            assert seqName is not None
            tupleList.append((seqName, zlib.compress(seq), headerToNum(seqName)))
            seqName = None
        i += 1
    tupleList.sort(key=lambda x: x[2])

    out = csv.OutFileBuffer(outReadsFile)
    for t in tupleList:
        out.writeText(str(t[0]) + '\n' + str(zlib.decompress(t[1])) + '\n')
    out.close()

Source File: adventure.py From Dumb-Cogs with MIT License

6 votes

def t_suspend(self, verb, obj):
        if isinstance(obj, str):
            if os.path.exists(obj):  # pragma: no cover
                self.write('I refuse to overwrite an existing file.')
                return
            savefile = open(obj, 'wb')
        else:
            savefile = obj
        r = self.random_generator  # must replace live object with static state
        self.random_state = r.getstate()
        try:
            del self.random_generator
            savefile.write(zlib.compress(pickle.dumps(self), 9))
        finally:
            self.random_generator = r
            if savefile is not obj:
                savefile.close()
        self.write('Game saved')

Source File: cache.py From a4kScrapers with MIT License

6 votes

def __set_cache_core(query, cached_results):
    try:
        item = {}
        item['q'] = sha256(query)
        item['t'] = now()

        data = json.dumps(cached_results).replace('"', "'")
        data = zlib.compress(data.encode('utf-8'))
        item['d'] = base64.b64encode(data).decode('ascii')

        if CACHE_LOG:
            tools.log('set_cache_request: %s' % query, 'notice')

        response = __dynamo_put(__map_in_cache(item))
        if response.status_code >= 400:
          if CACHE_LOG:
            tools.log('set_cache_request_err: %s, status_code=%s, text=%s' % (query, response.status_code, response.text), 'notice')
    except:
        traceback.print_exc()

Source File: gifti.py From me-ica with GNU Lesser General Public License v2.1

6 votes

def data_tag(dataarray, encoding, datatype, ordering):
    """ Creates the data tag depending on the required encoding """
    import base64
    import zlib
    ord = array_index_order_codes.npcode[ordering]
    enclabel = gifti_encoding_codes.label[encoding]
    if enclabel == 'ASCII':
        c = BytesIO()
        # np.savetxt(c, dataarray, format, delimiter for columns)
        np.savetxt(c, dataarray, datatype, ' ')
        c.seek(0)
        da = c.read()
    elif enclabel == 'B64BIN':
        da = base64.encodestring(dataarray.tostring(ord))
    elif enclabel == 'B64GZ':
        # first compress
        comp = zlib.compress(dataarray.tostring(ord))
        da = base64.encodestring(comp)
        da = da.decode()
    elif enclabel == 'External':
        raise NotImplementedError("In what format are the external files?")
    else:
        da = ''
    return "<Data>"+da+"</Data>\n"

Source File: filebased.py From GTDWeb with GNU General Public License v2.0

6 votes

def set(self, key, value, timeout=DEFAULT_TIMEOUT, version=None):
        self._createdir()  # Cache dir can be deleted at any time.
        fname = self._key_to_file(key, version)
        self._cull()  # make some room if necessary
        fd, tmp_path = tempfile.mkstemp(dir=self._dir)
        renamed = False
        try:
            with io.open(fd, 'wb') as f:
                expiry = self.get_backend_timeout(timeout)
                f.write(pickle.dumps(expiry, -1))
                f.write(zlib.compress(pickle.dumps(value), -1))
            file_move_safe(tmp_path, fname, allow_overwrite=True)
            renamed = True
        finally:
            if not renamed:
                os.remove(tmp_path)

Source File: json_base.py From tomodachi with MIT License

6 votes

def build_message(cls, service: Any, topic: str, data: Any, **kwargs: Any) -> str:
        data_encoding = 'raw'
        if len(json.dumps(data)) >= 60000:
            data = base64.b64encode(zlib.compress(json.dumps(data).encode('utf-8'))).decode('utf-8')
            data_encoding = 'base64_gzip_json'

        message = {
            'service': {
                'name': getattr(service, 'name', None),
                'uuid': getattr(service, 'uuid', None)
            },
            'metadata': {
                'message_uuid': '{}.{}'.format(getattr(service, 'uuid', ''), str(uuid.uuid4())),
                'protocol_version': PROTOCOL_VERSION,
                'compatible_protocol_versions': ['json_base-wip'],  # deprecated
                'timestamp': time.time(),
                'topic': topic,
                'data_encoding': data_encoding
            },
            'data': data
        }
        return json.dumps(message)

Source File: undo.py From trelby with GNU General Public License v2.0

6 votes

def lines2storage(lines):
    if not lines:
        return (0,)

    lines = [str(ln) for ln in lines]
    linesStr = "\n".join(lines)

    # instead of having an arbitrary cutoff figure ("compress if < X
    # bytes"), always compress, but only use the compressed version if
    # it's shorter than the non-compressed one.

    linesStrCompressed = zlib.compress(linesStr, 6)

    if len(linesStrCompressed) < len(linesStr):
        return (len(lines), True, linesStrCompressed)
    else:
        return (len(lines), False, linesStr)

# see lines2storage.

Source File: test_buffers.py From purerpc with Apache License 2.0

6 votes

def test_message_read_buffer(byte_array):
    for i in range(100):
        data = bytes(range(i))
        compress_flag = False
        if i % 2:
            data = zlib.compress(data)
            compress_flag = True
        byte_array.extend(struct.pack('>?I', compress_flag, len(data)))
        byte_array.extend(data)

    read_buffer = MessageReadBuffer(message_encoding="gzip")
    messages = []
    while byte_array:
        if random.choice([True, False]):
            num_bytes = random.randint(0, 50)
            read_buffer.data_received(bytes(byte_array[:num_bytes]))
            byte_array = byte_array[num_bytes:]
        else:
            messages.extend(read_buffer.read_all_complete_messages())
    messages.extend(read_buffer.read_all_complete_messages())

    assert len(messages) == 100
    for idx, message in enumerate(messages):
        assert message == bytes(range(idx))

Source File: protobuf_base.py From tomodachi with MIT License

6 votes

def build_message(cls, service: Any, topic: str, data: Any, **kwargs: Any) -> str:
        message_data = data.SerializeToString()

        data_encoding = 'proto'
        if len(message_data) > 60000:
            message_data = zlib.compress(data.SerializeToString())
            data_encoding = 'gzip_proto'

        message = SNSSQSMessage()
        message.service.name = getattr(service, 'name', None)
        message.service.uuid = getattr(service, 'uuid', None)
        message.metadata.message_uuid = '{}.{}'.format(getattr(service, 'uuid', ''), str(uuid.uuid4()))
        message.metadata.protocol_version = PROTOCOL_VERSION
        message.metadata.timestamp = time.time()
        message.metadata.topic = topic
        message.metadata.data_encoding = data_encoding
        message.data = message_data

        return base64.b64encode(message.SerializeToString()).decode('ascii')

Source File: pipeline_util.py From sparkflow with MIT License

6 votes

def _to_java(self):
        """
        Convert this instance to a dill dump, then to a list of strings with the unicode integer values of each character.
        Use this list as a set of dumby stopwords and store in a StopWordsRemover instance
        :return: Java object equivalent to this instance.
        """
        dmp = dill.dumps(self)
        dmp = zlib.compress(dmp)
        sc = SparkContext._active_spark_context
        pylist = [str(i) + ',' for i in bytearray(dmp)]
        # convert bytes to string integer list
        pylist = [''.join(pylist)]
        pylist.append(PysparkObjId._getPyObjId()) # add our id so PysparkPipelineWrapper can id us.
        java_class = sc._gateway.jvm.java.lang.String
        java_array = sc._gateway.new_array(java_class, len(pylist))
        java_array[0:2] = pylist[0:2]
        _java_obj = JavaParams._new_java_obj(PysparkObjId._getCarrierClass(javaName=True), self.uid)
        _java_obj.setStopWords(java_array)
        return _java_obj

Source File: types.py From ironpython2 with Apache License 2.0

5 votes

def CheckBinaryInputForConverter(self):
        testdata = "abcdefg" * 10
        with test_support.check_py3k_warnings():
            result = self.con.execute('select ? as "x [bin]"', (buffer(zlib.compress(testdata)),)).fetchone()[0]
        self.assertEqual(testdata, result)

Source File: macros.py From pyth with MIT License

5 votes

def compress(a):
    if isinstance(a, str):
        a = a.encode('iso-8859-1')
        try:
            a = zlib.decompress(a)
        except:
            a = zlib.compress(a, 9)
        return a.decode('iso-8859-1')

    return unknown_types(compress, ".Z", a)

Source File: parse_query.py From wttr.in with Apache License 2.0

5 votes

def serialize(parsed_query):
    return base64.b64encode(
        zlib.compress(
            json.dumps(parsed_query).encode("utf-8")),
        altchars=b"-_").decode("utf-8")

Source File: test_json_data_converter.py From botoflow with Apache License 2.0

5 votes

def test_zlib(serde):
    # This test is really about ensuring that binary data isn't corrupted
    data = six.b('compress me')
    compressed = zlib.compress(data)
    assert zlib.decompress(dumps_loads(serde, compressed)) == data

Source File: types.py From vsphere-storage-for-docker with Apache License 2.0

5 votes

def CheckBinaryInputForConverter(self):
        testdata = "abcdefg" * 10
        result = self.con.execute('select ? as "x [bin]"', (buffer(zlib.compress(testdata)),)).fetchone()[0]
        self.assertEqual(testdata, result)

Source File: png.py From Talking-Face-Generation-DAVS with MIT License

5 votes

def encode(buf, width, height):
  """ buf: must be bytes or a bytearray in py3, a regular string in py2. formatted RGBRGB... """
  assert (width * height * 3 == len(buf))
  bpp = 3

  def raw_data():
    # reverse the vertical line order and add null bytes at the start
    row_bytes = width * bpp
    for row_start in range((height - 1) * width * bpp, -1, -row_bytes):
      yield b'\x00'
      yield buf[row_start:row_start + row_bytes]

  def chunk(tag, data):
    return [
        struct.pack("!I", len(data)),
        tag,
        data,
        struct.pack("!I", 0xFFFFFFFF & zlib.crc32(data, zlib.crc32(tag)))
      ]

  SIGNATURE = b'\x89PNG\r\n\x1a\n'
  COLOR_TYPE_RGB = 2
  COLOR_TYPE_RGBA = 6
  bit_depth = 8
  return b''.join(
      [ SIGNATURE ] +
      chunk(b'IHDR', struct.pack("!2I5B", width, height, bit_depth, COLOR_TYPE_RGB, 0, 0, 0)) +
      chunk(b'IDAT', zlib.compress(b''.join(raw_data()), 9)) +
      chunk(b'IEND', b'')
    )

Source File: test_make_text_chunk.py From pyAPNG with MIT License

5 votes

def test_itxt_compressed():
	chunk_type, data = make_text_chunk(type="iTXt", value=u"ＳＯＭＥ　ＴＥＸＴ", compression_flag=1)
	assert chunk_type == "iTXt"
	assert data == make_chunk(
		"iTXt",
		b"Comment\0\1\0\0\0" +
			zlib.compress(u"ＳＯＭＥ　ＴＥＸＴ".encode("utf-8")))

Source File: test_make_text_chunk.py From pyAPNG with MIT License

5 votes

def test_ztxt():
	chunk_type, data = make_text_chunk(type="zTXt", value="some text")
	assert chunk_type == "zTXt"
	assert data == make_chunk("zTXt", b"Comment\0\0" + zlib.compress(b"some text"))

Source File: utils.py From mars with Apache License 2.0

5 votes

def serialize_graph(graph, compress=False, data_serial_type=None, pickle_protocol=None):
    ser_graph = graph.to_pb(data_serial_type=data_serial_type,
                            pickle_protocol=pickle_protocol).SerializeToString()
    if compress:
        ser_graph = zlib.compress(ser_graph)
    return ser_graph

Source File: aff4_image.py From pyaff4 with Apache License 2.0

5 votes

def FlushChunk(self, chunk):
        if len(chunk) == 0:
            return

        bevy_offset = self.bevy_length

        if self.compression == lexicon.AFF4_IMAGE_COMPRESSION_ZLIB:
            compressed_chunk = zlib.compress(chunk)
        elif (snappy and self.compression ==
              lexicon.AFF4_IMAGE_COMPRESSION_SNAPPY):
            compressed_chunk = snappy.compress(chunk)
        elif self.compression == lexicon.AFF4_IMAGE_COMPRESSION_STORED:
            compressed_chunk = chunk

        compressedLen = len(compressed_chunk)

        if compressedLen < self.chunk_size - 16:
            self.bevy_index.append((bevy_offset, compressedLen))
            self.bevy.append(compressed_chunk)
            self.bevy_length += compressedLen
        else:
            self.bevy_index.append((bevy_offset, self.chunk_size))
            self.bevy.append(chunk)
            self.bevy_length += self.chunk_size

        #self.bevy_index.append((bevy_offset, len(compressed_chunk)))
        #self.bevy.append(compressed_chunk)
        #self.bevy_length += len(compressed_chunk)
        self.chunk_count_in_bevy += 1

        #self.buffer = chunk[self.chunk_size:]
        if self.chunk_count_in_bevy >= self.chunks_per_segment:
            self._FlushBevy()

Source File: ewf.py From rekall with GNU General Public License v2.0

5 votes

def write(self, data):
        """Writes the data into the file.

        This method allows the writer to be used as a file-like object.
        """
        self.buffer += data
        buffer_offset = 0
        while len(self.buffer) - buffer_offset >= self.chunk_size:
            data = self.buffer[buffer_offset:buffer_offset+self.chunk_size]
            cdata = zlib.compress(data)
            chunk_offset = self.current_offset - self.base_offset

            if len(cdata) > len(data):
                self.table.append(chunk_offset)
                cdata = data
            else:
                self.table.append(0x80000000 | chunk_offset)

            self.out_as.write(self.current_offset, cdata)
            self.current_offset += len(cdata)
            buffer_offset += self.chunk_size
            self.chunk_id += 1

            # Flush the table when it gets too large. Tables can only store 31
            # bit offset and so can only address roughly 2gb. We choose to stay
            # under 1gb: 30000 * 32kb = 0.91gb.
            if len(self.table) > 30000:
                self.session.report_progress(
                    "Flushing EWF Table %s.", self.table_count)
                self.FlushTable()
                self.StartNewTable()

        self.buffer = self.buffer[buffer_offset:]

Source File: collector.py From stem with GNU Lesser General Public License v3.0

5 votes

def test_index_lzma(self, urlopen_mock):
    if not Compression.LZMA.available:
      self.skipTest('(lzma compression unavailable)')

    import lzma
    urlopen_mock.return_value = io.BytesIO(lzma.compress(EXAMPLE_INDEX_JSON))

    collector = CollecTor()
    self.assertEqual(EXAMPLE_INDEX, collector.index(Compression.LZMA))
    urlopen_mock.assert_called_with('https://collector.torproject.org/index/index.json.xz', timeout = None)

Source File: collector.py From stem with GNU Lesser General Public License v3.0

5 votes

def test_index_bz2(self, urlopen_mock):
    if not Compression.BZ2.available:
      self.skipTest('(bz2 compression unavailable)')

    import bz2
    urlopen_mock.return_value = io.BytesIO(bz2.compress(EXAMPLE_INDEX_JSON))

    collector = CollecTor()
    self.assertEqual(EXAMPLE_INDEX, collector.index(Compression.BZ2))
    urlopen_mock.assert_called_with('https://collector.torproject.org/index/index.json.bz2', timeout = None)

Source File: collector.py From stem with GNU Lesser General Public License v3.0

5 votes

def test_index_gzip(self, urlopen_mock):
    if not Compression.GZIP.available:
      self.skipTest('(gzip compression unavailable)')

    import zlib
    urlopen_mock.return_value = io.BytesIO(zlib.compress(EXAMPLE_INDEX_JSON))

    collector = CollecTor()
    self.assertEqual(EXAMPLE_INDEX, collector.index(Compression.GZIP))
    urlopen_mock.assert_called_with('https://collector.torproject.org/index/index.json.gz', timeout = None)

Source File: exit_policy.py From stem with GNU Lesser General Public License v3.0

5 votes

def __init__(self, *rules: Union[str, 'stem.exit_policy.ExitPolicyRule']) -> None:
    # sanity check the types

    self._input_rules = None  # type: Optional[Union[bytes, Sequence[Union[str, bytes, stem.exit_policy.ExitPolicyRule]]]]

    for rule in rules:
      if not isinstance(rule, (bytes, str)) and not isinstance(rule, ExitPolicyRule):
        raise TypeError('Exit policy rules can only contain strings or ExitPolicyRules, got a %s (%s)' % (type(rule), rules))

    # Unparsed representation of the rules we were constructed with. Our
    # _get_rules() method consumes this to provide ExitPolicyRule instances.
    # This is lazily evaluated so we don't need to actually parse the exit
    # policy if it's never used.

    is_all_str = True

    for rule in rules:
      if not isinstance(rule, (bytes, str)):
        is_all_str = False

    if rules and is_all_str:
      byte_rules = [stem.util.str_tools._to_bytes(r) for r in rules]  # type: ignore
      self._input_rules = zlib.compress(b','.join(byte_rules))
    else:
      self._input_rules = rules

    self._policy_str = None  # type: Optional[str]
    self._rules = None  # type: List[stem.exit_policy.ExitPolicyRule]
    self._hash = None  # type: Optional[int]

    # Result when no rules apply. According to the spec policies default to 'is
    # allowed', but our microdescriptor policy subclass might want to change
    # this.

    self._is_allowed_default = True

Source File: jwe.py From jwcrypto with GNU Lesser General Public License v3.0

5 votes

def _decrypt(self, key, ppe):

        jh = self._get_jose_header(ppe.get('header', None))

        # TODO: allow caller to specify list of headers it understands
        self._check_crit(jh.get('crit', dict()))

        for hdr in jh:
            if hdr in self.header_registry:
                if not self.header_registry.check_header(hdr, self):
                    raise InvalidJWEData('Failed header check')

        alg = self._jwa_keymgmt(jh.get('alg', None))
        enc = self._jwa_enc(jh.get('enc', None))

        aad = base64url_encode(self.objects.get('protected', ''))
        if 'aad' in self.objects:
            aad += '.' + base64url_encode(self.objects['aad'])

        cek = alg.unwrap(key, enc.wrap_key_size,
                         ppe.get('encrypted_key', b''), jh)
        data = enc.decrypt(cek, aad.encode('utf-8'),
                           self.objects['iv'],
                           self.objects['ciphertext'],
                           self.objects['tag'])

        self.decryptlog.append('Success')
        self.cek = cek

        compress = jh.get('zip', None)
        if compress == 'DEF':
            self.plaintext = zlib.decompress(data, -zlib.MAX_WBITS)
        elif compress is None:
            self.plaintext = data
        else:
            raise ValueError('Unknown compression')

Source File: memoization.py From rasa_wechat with Apache License 2.0

5 votes

def _feature_vector_to_str(self, x, domain):
        decoded_features = self.featurizer.decode(x,
                                                  domain.input_features,
                                                  ndigits=8)
        feature_str = json.dumps(decoded_features).replace("\"", "")
        if ENABLE_FEATURE_STRING_COMPRESSION:
            compressed = zlib.compress(bytes(feature_str, "utf-8"))
            return base64.b64encode(compressed).decode("utf-8")
        else:
            return feature_str

Source File: utils.py From zmirror with MIT License

5 votes

def embed_real_url_to_embedded_url(real_url_raw, url_mime, escape_slash=False):
    """
    将url的参数(?q=some&foo=bar)编码到url路径中, 并在url末添加一个文件扩展名
    在某些对url参数支持不好的CDN中, 可以减少错误
    `cdn_redirect_encode_query_str_into_url`设置依赖于本函数, 详细说明可以看配置文件中的对应部分
    解码由 extract_real_url_from_embedded_url() 函数进行, 对应的例子也请看这个函数
    :rtype: str
    """
    # dbgprint(real_url_raw, url_mime, escape_slash)
    if escape_slash:
        real_url = real_url_raw.replace(r'\/', '/')
    else:
        real_url = real_url_raw
    url_sp = urlsplit(real_url)
    if not url_sp.query:  # no query, needn't rewrite
        return real_url_raw

    byte_query = url_sp.query.encode()
    if len(byte_query) > 128:  # 当查询参数太长时, 进行gzip压缩
        gzip_label = 'z'  # 进行压缩后的参数, 会在标识区中添加一个z
        byte_query = zlib.compress(byte_query)
    else:
        gzip_label = ''

    b64_query = base64.urlsafe_b64encode(byte_query).decode()
    # dbgprint(url_mime)
    mixed_path = url_sp.path + '_' + _url_salt + gzip_label + '_.' \
                 + b64_query \
                 + '._' + _url_salt + '_.' + mime_to_use_cdn[url_mime]
    result = urlunsplit((url_sp.scheme, url_sp.netloc, mixed_path, '', ''))

    if escape_slash:
        result = s_esc(result)
        # dbgprint('embed:', real_url_raw, 'to:', result)
    return result

Python zlib.compress() Examples