Python gzip.compress() Examples
The following are 30
code examples of gzip.compress().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
gzip
, or try the search function
.
Example #1
Source File: dataserializer.py From mars with Apache License 2.0 | 6 votes |
def loads(buf): mv = memoryview(buf) header = read_file_header(mv) compress = header.compress if compress == CompressType.NONE: data = buf[HEADER_LENGTH:] else: data = decompressors[compress](mv[HEADER_LENGTH:]) if header.type == SerialType.ARROW: try: return pyarrow.deserialize(memoryview(data), mars_serialize_context()) except pyarrow.lib.ArrowInvalid: # pragma: no cover # reconstruct value from buffers of arrow components data_view = memoryview(data) meta_block_size = np.frombuffer(data_view[0:4], dtype='int32').item() meta = pickle.loads(data_view[4:4 + meta_block_size]) # nosec buffer_sizes = meta.pop('buffer_sizes') bounds = np.cumsum([4 + meta_block_size] + buffer_sizes) meta['data'] = [pyarrow.py_buffer(data_view[bounds[idx]:bounds[idx + 1]]) for idx in range(len(buffer_sizes))] return pyarrow.deserialize_components(meta, mars_serialize_context()) else: return pickle.loads(data)
Example #2
Source File: mmtfWriter.py From mmtf-pyspark with Apache License 2.0 | 6 votes |
def _to_byte_array(structure, compressed): '''Returns an MMTF-encoded byte array with optional gzip compression Returns ------- list MMTF encoded and optionally gzipped structure data ''' byte_array = bytearray(msgpack.packb(structure.input_data, use_bin_type=True)) #byte_array = bytearray(msgpack.packb(MMTFEncoder.encode_data(structure), use_bin_type = True)) if compressed: return gzip.compress(byte_array) else: return byte_array
Example #3
Source File: test_loadurl.py From cjworkbench with GNU Affero General Public License v3.0 | 6 votes |
def test_fetch_deflate_encoded_csv(self): body = b"A,B\nx,y\nz,a" zo = zlib.compressobj(wbits=-zlib.MAX_WBITS) zbody = zo.compress(body) + zo.flush() url = self.build_url("/path/to.csv.gz") self.mock_http_response = MockHttpResponse.ok( zbody, [ ("Content-Type", "text/csv; charset=utf-8"), ("Content-Encoding", "deflate"), ], ) with call_fetch(url) as result: self.assertEqual(result.errors, []) with httpfile.read(result.path) as (_, __, headers, body_path): self.assertEqual(body_path.read_bytes(), body)
Example #4
Source File: routeconfig.py From metrics-mvp with MIT License | 6 votes |
def save_routes(agency_id, routes, save_to_s3=False): data_str = json.dumps({ 'version': DefaultVersion, 'routes': [route.data for route in routes] }, separators=(',', ':')) cache_path = get_cache_path(agency_id) with open(cache_path, "w") as f: f.write(data_str) if save_to_s3: s3 = boto3.resource('s3') s3_path = get_s3_path(agency_id) s3_bucket = config.s3_bucket print(f'saving to s3://{s3_bucket}/{s3_path}') object = s3.Object(s3_bucket, s3_path) object.put( Body=gzip.compress(bytes(data_str, 'utf-8')), CacheControl='max-age=86400', ContentType='application/json', ContentEncoding='gzip', ACL='public-read' )
Example #5
Source File: utils.py From esi-knife with MIT License | 6 votes |
def write_data(uuid, data): """Try to store the data, log errors.""" try: CACHE.set( "{}{}".format(Keys.complete.value, uuid), codecs.decode( base64.b64encode(compress(codecs.encode( ujson.dumps(data), "utf-8", ))), "utf-8", ), timeout=EXPIRY, ) except Exception as error: LOG.warning("Failed to save data: %r", error)
Example #6
Source File: userdata.py From kOVHernetes with Apache License 2.0 | 6 votes |
def gen_kubeconfig(self, component, server='localhost'): """Generate kubeconfig""" kubeconfig = loads(files['kubeconfig'].decode(), object_pairs_hook=OrderedDict) kubeconfig['users'][0]['user']['client-certificate'] = 'tls/client/{}.crt'.format(component) kubeconfig['clusters'][0]['cluster']['server'] = 'https://' + server + ':6443' kubeconfig = compress((dumps(kubeconfig, indent=2) + '\n').encode()) self.add_files([ { 'filesystem': 'root', 'path': '/etc/kubernetes/kubeconfig-' + component + '.gz', 'mode': 416, # 0640 'contents': { 'source': 'data:,' + quote(kubeconfig) } } ])
Example #7
Source File: data.py From pyAFQ with BSD 2-Clause "Simplified" License | 6 votes |
def s3fs_nifti_write(img, fname, fs=None): """ Write a nifti file straight to S3 Paramters --------- img : nib.Nifti1Image class instance The image containing data to be written into S3 fname : string Full path (including bucket name and extension) to the S3 location where the file is to be saved. fs : an s3fs.S3FileSystem class instance, optional A file-system to refer to. Default to create a new file-system """ if fs is None: fs = s3fs.S3FileSystem() bio = BytesIO() file_map = img.make_file_map({'image': bio, 'header': bio}) img.to_file_map(file_map) data = gzip.compress(bio.getvalue()) with fs.open(fname, 'wb') as ff: ff.write(data)
Example #8
Source File: datauri.py From igv-reports with MIT License | 6 votes |
def get_data_uri(data): """ Return a data uri for the input, which can be either a string or byte array """ if isinstance(data, str): data = compress(data.encode()) mediatype = "data:application/gzip" else: if data[0] == 0x1f and data[1] == 0x8b: mediatype = "data:application/gzip" else: mediatype = "data:application:octet-stream" enc_str = b64encode(data) data_uri = mediatype + ";base64," + str(enc_str)[2:-1] return data_uri
Example #9
Source File: userdata.py From kOVHernetes with Apache License 2.0 | 6 votes |
def gen_kubemanifest(self, component, tag): """Generate Kubernetes Pod manifest""" manifest = loads(files[component].decode(), object_pairs_hook=OrderedDict) manifest['spec']['containers'][0]['image'] = 'k8s.gcr.io/hyperkube:v{}'.format(self.k8s_ver) manifest = compress((dumps(manifest, indent=2) + '\n').encode()) self.add_files([ { 'filesystem': 'root', 'path': '/etc/kubernetes/manifests/kube-{}.json'.format(component) + '.gz', 'mode': 416, # 0640 'contents': { 'source': 'data:,' + quote(manifest) } } ])
Example #10
Source File: cli.py From esi-knife with MIT License | 6 votes |
def write_results(results, character_id): """Write the results to a compressed .knife file.""" fname = "{}.knife".format(character_id) i = 0 while os.path.isfile(fname): i += 1 fname = "{}-{}.knife".format(character_id, i) with open(fname, "w") as openout: openout.write(codecs.decode( base64.b64encode(compress(codecs.encode( json.dumps(results), "utf-8", ))), "utf-8", )) print("created {}".format(fname))
Example #11
Source File: shp2json.py From handson-labs-2018 with MIT License | 6 votes |
def upload_s3(bucket, json_file, metadata): """ 파일을 gz하여 s3로 업로드 :param json_file: 업로드할 json 파일명 :return: """ gz_name = f"{json_file}.gz" obj_key = f"json/{path.basename(gz_name)}" print("업로드", gz_name, obj_key) with open(json_file, 'rb') as f: gz = gzip.compress(f.read()) s3.put_object( Body=gz, Bucket=bucket, ContentEncoding='gzip', ContentLanguage='string', ContentType='application/json', Key=obj_key, # todo : 메타데이터 추가 - 2018-07-28 Metadata=metadata, )
Example #12
Source File: dataserializer.py From mars with Apache License 2.0 | 6 votes |
def dump(obj, file, *, serial_type=None, compress=None, pickle_protocol=None): if serial_type is None: serial_type = SerialType.ARROW if pyarrow is not None else SerialType.PICKLE if compress is None: compress = CompressType.NONE try: if serial_type == SerialType.ARROW: serialized = pyarrow.serialize(obj, mars_serialize_context()) data_size = serialized.total_bytes write_file_header(file, file_header(serial_type, SERIAL_VERSION, data_size, compress)) file = open_compression_file(file, compress) serialized.write_to(file) else: pickle_protocol = pickle_protocol or pickle.HIGHEST_PROTOCOL serialized = pickle.dumps(obj, protocol=pickle_protocol) data_size = len(serialized) write_file_header(file, file_header(serial_type, SERIAL_VERSION, data_size, compress)) file = open_compression_file(file, compress) file.write(serialized) finally: if compress != CompressType.NONE: file.close() return
Example #13
Source File: mmtfWriter.py From mmtf-pyspark with Apache License 2.0 | 6 votes |
def write_sequence_file(path, structure, compressed=True): '''Encodes and writes MMTF encoded structure data to a Hadoop Sequnce File Parameters ---------- path : str Path to Hadoop file directory) structure : tuple structure data to be written compress : bool if true, apply gzip compression ''' # Can't apply first() function on list structure.map(lambda t: (t[0], _to_byte_array(t[1], compressed)))\ .saveAsHadoopFile(path, "org.apache.hadoop.mapred.SequenceFileOutputFormat", "org.apache.hadoop.io.Text", "org.apache.hadoop.io.BytesWritable")
Example #14
Source File: test_persistence.py From palladium with Apache License 2.0 | 5 votes |
def test_download(self, mocked_requests, persister): """ test download and activation of a model """ expected = Dummy(name='mymodel', __metadata__={}) zipped_model = gzip.compress(pickle.dumps(expected)) get_md_url = "%s/mymodel-metadata.json" % (self.base_url,) mocked_requests.head(get_md_url, status_code=200) get_md = mocked_requests.get( get_md_url, json={"models": [{"version": 1}], "properties": {'active-model': 1}}, status_code=200, ) get_model_url = "%s/mymodel-1.pkl.gz" % (self.base_url,) mocked_requests.head(get_model_url, status_code=200) get_model = mocked_requests.get( get_model_url, content=zipped_model, status_code=200, ) model = persister.read() assert get_md.called assert get_model.called assert model == expected self.assert_auth_headers(mocked_requests)
Example #15
Source File: v21_to_v22.py From anvio with GNU General Public License v3.0 | 5 votes |
def convert_numpy_array_to_binary_blob(array, compress=True): if compress: return gzip.compress(memoryview(array), compresslevel=1) else: return memoryview(array)
Example #16
Source File: minio_storage_for_collectstatic.py From cjworkbench with GNU Affero General Public License v3.0 | 5 votes |
def _upload_in_thread(self, name: str, data: bytes) -> None: """ Perform the actual upload. Raise an exception if the file is not certainly uploaded. """ content_type, _ = mimetypes.guess_type(name, strict=False) content_type = content_type or "application/octet-stream" kwargs = {} if content_type.startswith("text") or content_type.split("/")[1] in ( "xml", "json", "javascript", ): data = gzip.compress(data) kwargs["ContentEncoding"] = "gzip" client.put_object( Body=data, Bucket=StaticFilesBucket, Key=name, ContentLength=len(data), ContentType=content_type, # These are static files, but only Webpack-generated files have # hashed filenames. Logos and whatnot don't. So let's tell the # browser to cache for one day, to time-bound the damage when we # deploy a new version of our logo and users keep the old one. CacheControl="public, max-age=86400", **kwargs, ) logger.info("Finished uploading %s (%d bytes)" % (name, len(data)))
Example #17
Source File: test_persistence.py From palladium with Apache License 2.0 | 5 votes |
def dbmodel(self, database): from palladium.util import session_scope model = Dummy( name='mymodel', __metadata__={'some': 'metadata', 'version': 1}, ) model_blob = gzip.compress(pickle.dumps(model), compresslevel=0) chunk_size = 4 chunks = [model_blob[i:i + chunk_size] for i in range(0, len(model_blob), chunk_size)] dbmodel = database.DBModel( version=1, chunks=[ database.DBModelChunk( model_version=1, blob=chunk, ) for chunk in chunks ], metadata_=json.dumps(model.__metadata__), ) with session_scope(database.session) as session: session.add(dbmodel) return model
Example #18
Source File: v9_to_v10.py From anvio with GNU General Public License v3.0 | 5 votes |
def convert_numpy_array_to_binary_blob(array, compress=True): if compress: return gzip.compress(memoryview(array), compresslevel=1) else: return memoryview(array)
Example #19
Source File: huobipro.py From arbcharm with MIT License | 5 votes |
def compress_msg(msg): return gzip.compress(json.dumps(msg).encode())
Example #20
Source File: test_format.py From hermit with Apache License 2.0 | 5 votes |
def test_base64(self): with pytest.raises(hermit.InvalidSignatureRequest): hermit.decode_qr_code_data(base64.b64encode(gzip.compress(_DECODED.encode('utf-8'))))
Example #21
Source File: test_format.py From hermit with Apache License 2.0 | 5 votes |
def test_not_utf8(self): with pytest.raises(hermit.InvalidSignatureRequest): hermit.decode_qr_code_data(base64.b32encode(gzip.compress(_DECODED.encode('utf-16'))))
Example #22
Source File: precomputed_stats.py From metrics-mvp with MIT License | 5 votes |
def save_stats(agency_id, stat_id, d, start_time_str, end_time_str, scheduled, data, save_to_s3=False): data_str = json.dumps({ 'version': DefaultVersion, 'stat_id': stat_id, 'start_time': start_time_str, 'end_time': end_time_str, **data }, separators=(',', ':')) cache_path = get_cache_path(agency_id, stat_id, d, start_time_str, end_time_str, scheduled) cache_dir = Path(cache_path).parent if not cache_dir.exists(): cache_dir.mkdir(parents = True, exist_ok = True) print(f'saving to {cache_path}') with open(cache_path, "w") as f: f.write(data_str) if save_to_s3: s3 = boto3.resource('s3') s3_path = get_s3_path(agency_id, stat_id, d, start_time_str, end_time_str, scheduled) s3_bucket = config.s3_bucket print(f'saving to s3://{s3_bucket}/{s3_path}') object = s3.Object(s3_bucket, s3_path) object.put( Body=gzip.compress(bytes(data_str, 'utf-8')), CacheControl='max-age=86400', ContentType='application/json', ContentEncoding='gzip', ACL='public-read' )
Example #23
Source File: benchmark_zip.py From rssant with BSD 3-Clause "New" or "Revised" License | 5 votes |
def main(): print(len(data), len(data_gzip), len(data_lz4)) for i in range(10): t1 = timeit.timeit(lambda: gzip.compress(data), number=1000) t2 = timeit.timeit(lambda: lz4.compress(data), number=1000) print(t1, t2) for i in range(10): t1 = timeit.timeit(lambda: gzip.decompress(data_gzip), number=1000) t2 = timeit.timeit(lambda: lz4.decompress(data_lz4), number=1000) print(t1, t2)
Example #24
Source File: story_data.py From rssant with BSD 3-Clause "New" or "Revised" License | 5 votes |
def encode(self) -> bytes: version = struct.pack('>B', self._version) if self._version == self.VERSION_GZIP: data_bytes = gzip.compress(self._value, compresslevel=5) elif self._version == self.VERSION_LZ4: data_bytes = lz4.compress(self._value, compression_level=7) elif self._version == self.VERSION_RAW: data_bytes = self._value else: assert False, f'unknown version {version}' return version + data_bytes
Example #25
Source File: feed.py From rssant with BSD 3-Clause "New" or "Revised" License | 5 votes |
def set_content(self, content): if content and len(content) >= 1024: self.content = gzip.compress(content, compresslevel=9) self.is_gzipped = True else: self.content = content self.is_gzipped = False
Example #26
Source File: story_unique_ids.py From rssant with BSD 3-Clause "New" or "Revised" License | 5 votes |
def encode(self) -> bytes: value = '\n'.join(self._unique_ids).encode('utf-8') unique_ids_gzip = gzip.compress(value) header = struct.pack('>BI', self._version, self._begin_offset) return header + unique_ids_gzip
Example #27
Source File: message.py From rssant with BSD 3-Clause "New" or "Revised" License | 5 votes |
def raw_encode(cls, data, content_encoding=None): content_encoding = ContentEncoding.of(content_encoding) try: if content_encoding.is_json: data = json.dumps(data, ensure_ascii=False).encode('utf-8') else: data = msgpack.packb(data, use_bin_type=True) except (ValueError, TypeError) as ex: raise ActorMessageEncodeError(str(ex)) from ex if content_encoding.is_gzip: data = gzip.compress(data) return data
Example #28
Source File: userdata.py From kOVHernetes with Apache License 2.0 | 5 votes |
def res_gzip(resource): """Returns package data as gzipped bytes""" return compress(res_plain(resource)) # Reusable data from static files
Example #29
Source File: arrival_history.py From metrics-mvp with MIT License | 5 votes |
def save_for_date(history: ArrivalHistory, d: date, s3=False): data_str = json.dumps(history.get_data()) version = history.version agency_id = history.agency_id route_id = history.route_id cache_path = get_cache_path(agency_id, route_id, d, version) cache_dir = Path(cache_path).parent if not cache_dir.exists(): cache_dir.mkdir(parents = True, exist_ok = True) with open(cache_path, "w") as f: f.write(data_str) if s3: s3 = boto3.resource('s3') s3_path = get_s3_path(agency_id, route_id, d, version) s3_bucket = config.s3_bucket print(f'saving to s3://{s3_bucket}/{s3_path}') object = s3.Object(s3_bucket, s3_path) object.put( Body=gzip.compress(bytes(data_str, 'utf-8')), ContentType='application/json', ContentEncoding='gzip', ACL='public-read' )
Example #30
Source File: s3.py From S4 with GNU General Public License v3.0 | 5 votes |
def flush_index(self, compressed=True): data = json.dumps(self.index).encode("utf-8") if compressed: logger.debug("Using gzip encoding for writing index") data = gzip.compress(data) else: logger.debug("Using plain text encoding for writing index") self.boto.put_object(Bucket=self.bucket, Key=self.index_path(), Body=data)