Python Examples of gzip.compress

Source File: dataserializer.py From mars with Apache License 2.0

6 votes

def loads(buf):
    mv = memoryview(buf)
    header = read_file_header(mv)
    compress = header.compress

    if compress == CompressType.NONE:
        data = buf[HEADER_LENGTH:]
    else:
        data = decompressors[compress](mv[HEADER_LENGTH:])

    if header.type == SerialType.ARROW:
        try:
            return pyarrow.deserialize(memoryview(data), mars_serialize_context())
        except pyarrow.lib.ArrowInvalid:  # pragma: no cover
            # reconstruct value from buffers of arrow components
            data_view = memoryview(data)
            meta_block_size = np.frombuffer(data_view[0:4], dtype='int32').item()
            meta = pickle.loads(data_view[4:4 + meta_block_size])  # nosec
            buffer_sizes = meta.pop('buffer_sizes')
            bounds = np.cumsum([4 + meta_block_size] + buffer_sizes)
            meta['data'] = [pyarrow.py_buffer(data_view[bounds[idx]:bounds[idx + 1]])
                            for idx in range(len(buffer_sizes))]
            return pyarrow.deserialize_components(meta, mars_serialize_context())
    else:
        return pickle.loads(data)

Source File: mmtfWriter.py From mmtf-pyspark with Apache License 2.0

6 votes

def _to_byte_array(structure, compressed):
    '''Returns an MMTF-encoded byte array with optional gzip compression

    Returns
    -------
    list
       MMTF encoded and optionally gzipped structure data
    '''

    byte_array = bytearray(msgpack.packb(structure.input_data, use_bin_type=True))
    #byte_array = bytearray(msgpack.packb(MMTFEncoder.encode_data(structure), use_bin_type = True))

    if compressed:
        return gzip.compress(byte_array)
    else:
        return byte_array

Source File: test_loadurl.py From cjworkbench with GNU Affero General Public License v3.0

6 votes

def test_fetch_deflate_encoded_csv(self):
        body = b"A,B\nx,y\nz,a"
        zo = zlib.compressobj(wbits=-zlib.MAX_WBITS)
        zbody = zo.compress(body) + zo.flush()
        url = self.build_url("/path/to.csv.gz")
        self.mock_http_response = MockHttpResponse.ok(
            zbody,
            [
                ("Content-Type", "text/csv; charset=utf-8"),
                ("Content-Encoding", "deflate"),
            ],
        )
        with call_fetch(url) as result:
            self.assertEqual(result.errors, [])
            with httpfile.read(result.path) as (_, __, headers, body_path):
                self.assertEqual(body_path.read_bytes(), body)

Source File: routeconfig.py From metrics-mvp with MIT License

6 votes

def save_routes(agency_id, routes, save_to_s3=False):
    data_str = json.dumps({
        'version': DefaultVersion,
        'routes': [route.data for route in routes]
    }, separators=(',', ':'))

    cache_path = get_cache_path(agency_id)

    with open(cache_path, "w") as f:
        f.write(data_str)

    if save_to_s3:
        s3 = boto3.resource('s3')
        s3_path = get_s3_path(agency_id)
        s3_bucket = config.s3_bucket
        print(f'saving to s3://{s3_bucket}/{s3_path}')
        object = s3.Object(s3_bucket, s3_path)
        object.put(
            Body=gzip.compress(bytes(data_str, 'utf-8')),
            CacheControl='max-age=86400',
            ContentType='application/json',
            ContentEncoding='gzip',
            ACL='public-read'
        )

Source File: utils.py From esi-knife with MIT License

6 votes

def write_data(uuid, data):
    """Try to store the data, log errors."""

    try:
        CACHE.set(
            "{}{}".format(Keys.complete.value, uuid),
            codecs.decode(
                base64.b64encode(compress(codecs.encode(
                    ujson.dumps(data),
                    "utf-8",
                ))),
                "utf-8",
            ),
            timeout=EXPIRY,
        )
    except Exception as error:
        LOG.warning("Failed to save data: %r", error)

Source File: userdata.py From kOVHernetes with Apache License 2.0

6 votes

def gen_kubeconfig(self, component, server='localhost'):
        """Generate kubeconfig"""

        kubeconfig = loads(files['kubeconfig'].decode(), object_pairs_hook=OrderedDict)
        kubeconfig['users'][0]['user']['client-certificate'] = 'tls/client/{}.crt'.format(component)
        kubeconfig['clusters'][0]['cluster']['server'] = 'https://' + server + ':6443'

        kubeconfig = compress((dumps(kubeconfig, indent=2) + '\n').encode())

        self.add_files([
            {
                'filesystem': 'root',
                'path': '/etc/kubernetes/kubeconfig-' + component + '.gz',
                'mode': 416, # 0640
                'contents': {
                    'source': 'data:,' + quote(kubeconfig)
                }
            }
        ])

Source File: data.py From pyAFQ with BSD 2-Clause "Simplified" License

6 votes

def s3fs_nifti_write(img, fname, fs=None):
    """
    Write a nifti file straight to S3

    Paramters
    ---------
    img : nib.Nifti1Image class instance
        The image containing data to be written into S3
    fname : string
        Full path (including bucket name and extension) to the S3 location
        where the file is to be saved.
    fs : an s3fs.S3FileSystem class instance, optional
        A file-system to refer to. Default to create a new file-system
    """
    if fs is None:
        fs = s3fs.S3FileSystem()

    bio = BytesIO()
    file_map = img.make_file_map({'image': bio, 'header': bio})
    img.to_file_map(file_map)
    data = gzip.compress(bio.getvalue())
    with fs.open(fname, 'wb') as ff:
        ff.write(data)

Source File: datauri.py From igv-reports with MIT License

6 votes

def get_data_uri(data):

    """
    Return a data uri for the input, which can be either a string or byte array
    """

    if isinstance(data, str):
        data = compress(data.encode())
        mediatype = "data:application/gzip"
    else:
        if data[0] == 0x1f and data[1] == 0x8b:
            mediatype = "data:application/gzip"
        else:
            mediatype = "data:application:octet-stream"

    enc_str = b64encode(data)

    data_uri = mediatype + ";base64," + str(enc_str)[2:-1]
    return data_uri

Source File: userdata.py From kOVHernetes with Apache License 2.0

6 votes

def gen_kubemanifest(self, component, tag):
        """Generate Kubernetes Pod manifest"""

        manifest = loads(files[component].decode(), object_pairs_hook=OrderedDict)
        manifest['spec']['containers'][0]['image'] = 'k8s.gcr.io/hyperkube:v{}'.format(self.k8s_ver)

        manifest = compress((dumps(manifest, indent=2) + '\n').encode())

        self.add_files([
            {
                'filesystem': 'root',
                'path': '/etc/kubernetes/manifests/kube-{}.json'.format(component) + '.gz',
                'mode': 416, # 0640
                'contents': {
                    'source': 'data:,' + quote(manifest)
                }
            }
        ])

Source File: cli.py From esi-knife with MIT License

6 votes

def write_results(results, character_id):
    """Write the results to a compressed .knife file."""

    fname = "{}.knife".format(character_id)
    i = 0
    while os.path.isfile(fname):
        i += 1
        fname = "{}-{}.knife".format(character_id, i)

    with open(fname, "w") as openout:
        openout.write(codecs.decode(
            base64.b64encode(compress(codecs.encode(
                json.dumps(results),
                "utf-8",
            ))),
            "utf-8",
        ))

    print("created {}".format(fname))

Source File: shp2json.py From handson-labs-2018 with MIT License

6 votes

def upload_s3(bucket, json_file, metadata):
    """
    파일을 gz하여 s3로 업로드
    :param json_file: 업로드할 json 파일명
    :return:
    """
    gz_name = f"{json_file}.gz"
    obj_key = f"json/{path.basename(gz_name)}"
    print("업로드", gz_name, obj_key)

    with open(json_file, 'rb') as f:
        gz = gzip.compress(f.read())
        s3.put_object(
            Body=gz,
            Bucket=bucket,
            ContentEncoding='gzip',
            ContentLanguage='string',
            ContentType='application/json',
            Key=obj_key,
            # todo : 메타데이터 추가 - 2018-07-28
            Metadata=metadata,
        )

Source File: dataserializer.py From mars with Apache License 2.0

6 votes

def dump(obj, file, *, serial_type=None, compress=None, pickle_protocol=None):
    if serial_type is None:
        serial_type = SerialType.ARROW if pyarrow is not None else SerialType.PICKLE
    if compress is None:
        compress = CompressType.NONE
    try:
        if serial_type == SerialType.ARROW:
            serialized = pyarrow.serialize(obj, mars_serialize_context())
            data_size = serialized.total_bytes
            write_file_header(file, file_header(serial_type, SERIAL_VERSION, data_size, compress))
            file = open_compression_file(file, compress)
            serialized.write_to(file)
        else:
            pickle_protocol = pickle_protocol or pickle.HIGHEST_PROTOCOL
            serialized = pickle.dumps(obj, protocol=pickle_protocol)
            data_size = len(serialized)
            write_file_header(file, file_header(serial_type, SERIAL_VERSION, data_size, compress))
            file = open_compression_file(file, compress)
            file.write(serialized)
    finally:
        if compress != CompressType.NONE:
            file.close()
    return

Source File: mmtfWriter.py From mmtf-pyspark with Apache License 2.0

6 votes

def write_sequence_file(path, structure, compressed=True):
    '''Encodes and writes MMTF encoded structure data to a Hadoop Sequnce File

    Parameters
    ----------
    path : str
       Path to Hadoop file directory)
    structure : tuple
       structure data to be written
    compress : bool
       if true, apply gzip compression
    '''
    # Can't apply first() function on list

    structure.map(lambda t: (t[0], _to_byte_array(t[1], compressed)))\
             .saveAsHadoopFile(path,
                               "org.apache.hadoop.mapred.SequenceFileOutputFormat",
                               "org.apache.hadoop.io.Text",
                               "org.apache.hadoop.io.BytesWritable")

Source File: test_persistence.py From palladium with Apache License 2.0

5 votes

def test_download(self, mocked_requests, persister):
        """ test download and activation of a model """
        expected = Dummy(name='mymodel', __metadata__={})
        zipped_model = gzip.compress(pickle.dumps(expected))

        get_md_url = "%s/mymodel-metadata.json" % (self.base_url,)
        mocked_requests.head(get_md_url, status_code=200)
        get_md = mocked_requests.get(
            get_md_url,
            json={"models": [{"version": 1}],
                  "properties": {'active-model': 1}},
            status_code=200,
            )

        get_model_url = "%s/mymodel-1.pkl.gz" % (self.base_url,)
        mocked_requests.head(get_model_url, status_code=200)
        get_model = mocked_requests.get(
            get_model_url,
            content=zipped_model,
            status_code=200,
            )

        model = persister.read()
        assert get_md.called
        assert get_model.called
        assert model == expected
        self.assert_auth_headers(mocked_requests)

Source File: v21_to_v22.py From anvio with GNU General Public License v3.0

5 votes

def convert_numpy_array_to_binary_blob(array, compress=True):
    if compress:
        return gzip.compress(memoryview(array), compresslevel=1)
    else:
        return memoryview(array)

Source File: minio_storage_for_collectstatic.py From cjworkbench with GNU Affero General Public License v3.0

5 votes

def _upload_in_thread(self, name: str, data: bytes) -> None:
        """
        Perform the actual upload.

        Raise an exception if the file is not certainly uploaded.
        """

        content_type, _ = mimetypes.guess_type(name, strict=False)
        content_type = content_type or "application/octet-stream"

        kwargs = {}
        if content_type.startswith("text") or content_type.split("/")[1] in (
            "xml",
            "json",
            "javascript",
        ):
            data = gzip.compress(data)
            kwargs["ContentEncoding"] = "gzip"

        client.put_object(
            Body=data,
            Bucket=StaticFilesBucket,
            Key=name,
            ContentLength=len(data),
            ContentType=content_type,
            # These are static files, but only Webpack-generated files have
            # hashed filenames. Logos and whatnot don't. So let's tell the
            # browser to cache for one day, to time-bound the damage when we
            # deploy a new version of our logo and users keep the old one.
            CacheControl="public, max-age=86400",
            **kwargs,
        )
        logger.info("Finished uploading %s (%d bytes)" % (name, len(data)))

Source File: test_persistence.py From palladium with Apache License 2.0

5 votes

def dbmodel(self, database):
        from palladium.util import session_scope

        model = Dummy(
            name='mymodel',
            __metadata__={'some': 'metadata', 'version': 1},
            )

        model_blob = gzip.compress(pickle.dumps(model), compresslevel=0)
        chunk_size = 4
        chunks = [model_blob[i:i + chunk_size]
                  for i in range(0, len(model_blob), chunk_size)]

        dbmodel = database.DBModel(
            version=1,
            chunks=[
                database.DBModelChunk(
                    model_version=1,
                    blob=chunk,
                    )
                for chunk in chunks
                ],
            metadata_=json.dumps(model.__metadata__),
            )

        with session_scope(database.session) as session:
            session.add(dbmodel)

        return model

Source File: v9_to_v10.py From anvio with GNU General Public License v3.0

5 votes

def convert_numpy_array_to_binary_blob(array, compress=True):
    if compress:
        return gzip.compress(memoryview(array), compresslevel=1)
    else:
        return memoryview(array)

Source File: huobipro.py From arbcharm with MIT License

5 votes

def compress_msg(msg):
        return gzip.compress(json.dumps(msg).encode())

Source File: test_format.py From hermit with Apache License 2.0

5 votes

def test_base64(self):
        with pytest.raises(hermit.InvalidSignatureRequest):
            hermit.decode_qr_code_data(base64.b64encode(gzip.compress(_DECODED.encode('utf-8'))))

Source File: test_format.py From hermit with Apache License 2.0

5 votes

def test_not_utf8(self):
        with pytest.raises(hermit.InvalidSignatureRequest):
            hermit.decode_qr_code_data(base64.b32encode(gzip.compress(_DECODED.encode('utf-16'))))

Source File: precomputed_stats.py From metrics-mvp with MIT License

5 votes

def save_stats(agency_id, stat_id, d, start_time_str, end_time_str, scheduled, data, save_to_s3=False):
    data_str = json.dumps({
        'version': DefaultVersion,
        'stat_id': stat_id,
        'start_time': start_time_str,
        'end_time': end_time_str,
        **data
    }, separators=(',', ':'))

    cache_path = get_cache_path(agency_id, stat_id, d, start_time_str, end_time_str, scheduled)

    cache_dir = Path(cache_path).parent
    if not cache_dir.exists():
        cache_dir.mkdir(parents = True, exist_ok = True)

    print(f'saving to {cache_path}')
    with open(cache_path, "w") as f:
        f.write(data_str)

    if save_to_s3:
        s3 = boto3.resource('s3')
        s3_path = get_s3_path(agency_id, stat_id, d, start_time_str, end_time_str, scheduled)
        s3_bucket = config.s3_bucket
        print(f'saving to s3://{s3_bucket}/{s3_path}')
        object = s3.Object(s3_bucket, s3_path)
        object.put(
            Body=gzip.compress(bytes(data_str, 'utf-8')),
            CacheControl='max-age=86400',
            ContentType='application/json',
            ContentEncoding='gzip',
            ACL='public-read'
        )

Source File: benchmark_zip.py From rssant with BSD 3-Clause "New" or "Revised" License

5 votes

def main():
    print(len(data), len(data_gzip), len(data_lz4))
    for i in range(10):
        t1 = timeit.timeit(lambda: gzip.compress(data), number=1000)
        t2 = timeit.timeit(lambda: lz4.compress(data), number=1000)
        print(t1, t2)
    for i in range(10):
        t1 = timeit.timeit(lambda: gzip.decompress(data_gzip), number=1000)
        t2 = timeit.timeit(lambda: lz4.decompress(data_lz4), number=1000)
        print(t1, t2)

Source File: story_data.py From rssant with BSD 3-Clause "New" or "Revised" License

5 votes

def encode(self) -> bytes:
        version = struct.pack('>B', self._version)
        if self._version == self.VERSION_GZIP:
            data_bytes = gzip.compress(self._value, compresslevel=5)
        elif self._version == self.VERSION_LZ4:
            data_bytes = lz4.compress(self._value, compression_level=7)
        elif self._version == self.VERSION_RAW:
            data_bytes = self._value
        else:
            assert False, f'unknown version {version}'
        return version + data_bytes

Source File: feed.py From rssant with BSD 3-Clause "New" or "Revised" License

5 votes

def set_content(self, content):
        if content and len(content) >= 1024:
            self.content = gzip.compress(content, compresslevel=9)
            self.is_gzipped = True
        else:
            self.content = content
            self.is_gzipped = False

Source File: story_unique_ids.py From rssant with BSD 3-Clause "New" or "Revised" License

5 votes

def encode(self) -> bytes:
        value = '\n'.join(self._unique_ids).encode('utf-8')
        unique_ids_gzip = gzip.compress(value)
        header = struct.pack('>BI', self._version, self._begin_offset)
        return header + unique_ids_gzip

Source File: message.py From rssant with BSD 3-Clause "New" or "Revised" License

5 votes

def raw_encode(cls, data, content_encoding=None):
        content_encoding = ContentEncoding.of(content_encoding)
        try:
            if content_encoding.is_json:
                data = json.dumps(data, ensure_ascii=False).encode('utf-8')
            else:
                data = msgpack.packb(data, use_bin_type=True)
        except (ValueError, TypeError) as ex:
            raise ActorMessageEncodeError(str(ex)) from ex
        if content_encoding.is_gzip:
            data = gzip.compress(data)
        return data

Source File: userdata.py From kOVHernetes with Apache License 2.0

5 votes

def res_gzip(resource):
    """Returns package data as gzipped bytes"""
    return compress(res_plain(resource))

# Reusable data from static files

Source File: arrival_history.py From metrics-mvp with MIT License

5 votes

def save_for_date(history: ArrivalHistory, d: date, s3=False):
    data_str = json.dumps(history.get_data())

    version = history.version
    agency_id = history.agency_id
    route_id = history.route_id

    cache_path = get_cache_path(agency_id, route_id, d, version)

    cache_dir = Path(cache_path).parent
    if not cache_dir.exists():
        cache_dir.mkdir(parents = True, exist_ok = True)

    with open(cache_path, "w") as f:
        f.write(data_str)

    if s3:
        s3 = boto3.resource('s3')
        s3_path = get_s3_path(agency_id, route_id, d, version)
        s3_bucket = config.s3_bucket
        print(f'saving to s3://{s3_bucket}/{s3_path}')
        object = s3.Object(s3_bucket, s3_path)
        object.put(
            Body=gzip.compress(bytes(data_str, 'utf-8')),
            ContentType='application/json',
            ContentEncoding='gzip',
            ACL='public-read'
        )

Source File: s3.py From S4 with GNU General Public License v3.0

5 votes

def flush_index(self, compressed=True):
        data = json.dumps(self.index).encode("utf-8")
        if compressed:
            logger.debug("Using gzip encoding for writing index")
            data = gzip.compress(data)
        else:
            logger.debug("Using plain text encoding for writing index")

        self.boto.put_object(Bucket=self.bucket, Key=self.index_path(), Body=data)

Python gzip.compress() Examples