Python pyarrow.serialize() Examples
The following are 30
code examples of pyarrow.serialize().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pyarrow
, or try the search function
.
Example #1
Source File: dataserializer.py From mars with Apache License 2.0 | 6 votes |
def dump(obj, file, *, serial_type=None, compress=None, pickle_protocol=None): if serial_type is None: serial_type = SerialType.ARROW if pyarrow is not None else SerialType.PICKLE if compress is None: compress = CompressType.NONE try: if serial_type == SerialType.ARROW: serialized = pyarrow.serialize(obj, mars_serialize_context()) data_size = serialized.total_bytes write_file_header(file, file_header(serial_type, SERIAL_VERSION, data_size, compress)) file = open_compression_file(file, compress) serialized.write_to(file) else: pickle_protocol = pickle_protocol or pickle.HIGHEST_PROTOCOL serialized = pickle.dumps(obj, protocol=pickle_protocol) data_size = len(serialized) write_file_header(file, file_header(serial_type, SERIAL_VERSION, data_size, compress)) file = open_compression_file(file, compress) file.write(serialized) finally: if compress != CompressType.NONE: file.close() return
Example #2
Source File: serialize.py From PoseFix_RELEASE with MIT License | 5 votes |
def dumps_pyarrow(obj): """ Serialize an object. Returns: Implementation-dependent bytes-like object """ return pa.serialize(obj).to_buffer()
Example #3
Source File: learner.py From rl_algorithms with MIT License | 5 votes |
def send_info_to_logger( self, np_state_dict: List[np.ndarray], step_info: list, ): """Send new params and log info to logger.""" log_value = dict(update_step=self.update_step, step_info=step_info) log_info = dict(log_value=log_value, state_dict=np_state_dict) log_info_id = pa.serialize(log_info).to_buffer() self.push_socket.send(log_info_id)
Example #4
Source File: wrapper.py From rl_algorithms with MIT License | 5 votes |
def send_batch_to_learner(self): """Send batch to learner and receive priorities.""" # Send batch and request priorities (blocking recv) batch = self.buffer.sample(self.per_beta) batch_id = pa.serialize(batch).to_buffer() self.req_socket.send(batch_id) self.num_sent = self.num_sent + 1 # Receive priorities new_priors_id = self.req_socket.recv() idxes, new_priorities = pa.deserialize(new_priors_id) self.buffer.update_priorities(idxes, new_priorities)
Example #5
Source File: serialize.py From tf-cpn with MIT License | 5 votes |
def dumps_pyarrow(obj): """ Serialize an object. Returns: Implementation-dependent bytes-like object """ return pa.serialize(obj).to_buffer()
Example #6
Source File: serialize.py From petridishnn with MIT License | 5 votes |
def dumps_pyarrow(obj): """ Serialize an object. Returns: Implementation-dependent bytes-like object. May not be compatible across different versions of pyarrow. """ return pa.serialize(obj).to_buffer()
Example #7
Source File: serialize.py From lighttrack with MIT License | 5 votes |
def dumps_pyarrow(obj): """ Serialize an object. Returns: Implementation-dependent bytes-like object """ return pa.serialize(obj).to_buffer()
Example #8
Source File: serialize.py From ADL with MIT License | 5 votes |
def dumps(obj): """ Serialize an object. Returns: Implementation-dependent bytes-like object. May not be compatible across different versions of pyarrow. """ import pyarrow as pa return pa.serialize(obj).to_buffer()
Example #9
Source File: serializer.py From cloudburst with Apache License 2.0 | 5 votes |
def dump(self, data, valobj=None, serialize=True): if not valobj: valobj = Value() # If we are attempting to pass a future into another function, we # simply turn it into a reference because the runtime knows how to # automatically resolve it. if type(data) == bytes: valobj.body = data valobj.type = DEFAULT elif isinstance(data, future.CloudburstFuture): valobj.body = self._dump_default(CloudburstReference(data.obj_id, True)) valobj.type = DEFAULT elif isinstance(data, np.ndarray) or isinstance(data, pd.DataFrame): valobj.body = self._dump_numpy(data) valobj.type = NUMPY elif isinstance(data, str): valobj.body = self._dump_string(data) valobj.type = STRING else: valobj.body = self._dump_default(data) valobj.type = DEFAULT if not serialize: return valobj return valobj.SerializeToString()
Example #10
Source File: serializer.py From cloudburst with Apache License 2.0 | 5 votes |
def _dump_numpy(self, msg): return pa.serialize(msg).to_buffer().to_pybytes()
Example #11
Source File: learner.py From rl_algorithms with MIT License | 5 votes |
def publish_params(self, update_step: int, np_state_dict: List[np.ndarray]): """Broadcast updated params to all workers.""" param_info = [update_step, np_state_dict] new_params_id = pa.serialize(param_info).to_buffer() self.pub_socket.send(new_params_id)
Example #12
Source File: serialization.py From catalyst with Apache License 2.0 | 5 votes |
def pyarrow_serialize(data): """Serialize the data into bytes using pyarrow. Args: data: a value Returns: Returns a bytes object serialized with pyarrow data. """ return pyarrow.serialize(data).to_buffer().to_pybytes()
Example #13
Source File: serialize.py From video-to-pose3D with MIT License | 5 votes |
def dumps_pyarrow(obj): """ Serialize an object. Returns: Implementation-dependent bytes-like object """ return pa.serialize(obj).to_buffer()
Example #14
Source File: serializer.py From surreal with MIT License | 5 votes |
def pa_serialize(obj): return pa.serialize(obj).to_buffer()
Example #15
Source File: serializer.py From surreal with MIT License | 5 votes |
def serialize(obj): """ We can improve this function if we *really* need more memory efficiency """ return _SERIALIZER(obj)
Example #16
Source File: serializer.py From surreal with MIT License | 5 votes |
def pyobj_hash(obj): return binary_hash(serialize(obj))
Example #17
Source File: serialize.py From tensorpack with Apache License 2.0 | 5 votes |
def dumps(obj): """ Serialize an object. Returns: Implementation-dependent bytes-like object. May not be compatible across different versions of pyarrow. """ import pyarrow as pa return pa.serialize(obj).to_buffer()
Example #18
Source File: convert_lmdb.py From torch-toolbox with BSD 3-Clause "New" or "Revised" License | 5 votes |
def dumps_pyarrow(obj): return pyarrow.serialize(obj).to_buffer()
Example #19
Source File: learner.py From rl_algorithms with MIT License | 5 votes |
def send_new_priorities(self, indices: np.ndarray, priorities: np.ndarray): """Send new priority values and corresponding indices to buffer.""" new_priors = [indices, priorities] new_priors_id = pa.serialize(new_priors).to_buffer() self.rep_socket.send(new_priors_id)
Example #20
Source File: worker.py From rl_algorithms with MIT License | 5 votes |
def send_data_to_buffer(self, replay_data): """Send replay data to global buffer.""" replay_data_id = pa.serialize(replay_data).to_buffer() self.push_socket.send(replay_data_id)
Example #21
Source File: pyarrow_serializer.py From petastorm with Apache License 2.0 | 5 votes |
def serialize(self, rows): return pyarrow.serialize(rows, self._get_serialization_context()).to_buffer()
Example #22
Source File: local_disk_arrow_table_cache.py From petastorm with Apache License 2.0 | 5 votes |
def get(self, key, fill_cache_func): value = self._cache.get(key, default=None) if value is None: value = fill_cache_func() table_pandas = value.to_pandas() serialized_df = pa.serialize(table_pandas) components = serialized_df.to_components() self._cache.set(key, components) else: original_df = pa.deserialize_components(value) value = pa.Table.from_pandas(original_df, preserve_index=False) return value
Example #23
Source File: local_disk_arrow_table_cache.py From petastorm with Apache License 2.0 | 5 votes |
def __init__(self, *args, **kwargs): super(LocalDiskArrowTableCache, self).__init__(*args, **kwargs) # Workaround for https://issues.apache.org/jira/browse/ARROW-5260 # unless we try to serialize something before deserialize_components is called, we would crash with a sigsegv pa.serialize(0)
Example #24
Source File: ray_util.py From rlgraph with Apache License 2.0 | 5 votes |
def ray_compress(data): data = pyarrow.serialize(data).to_buffer().to_pybytes() data = lz4.frame.compress(data) # Unclear why ascii decoding. data = base64.b64encode(data).decode("ascii") # data = base64.b64encode(data) return data
Example #25
Source File: sharedstore.py From mars with Apache License 2.0 | 5 votes |
def __init__(self, plasma_client, mapper_ref): from ...serialize.dataserializer import mars_serialize_context self._plasma_client = plasma_client self._actual_size = None self._serialize_context = mars_serialize_context() self._mapper_ref = mapper_ref self._pool = mapper_ref.ctx.threadpool(1)
Example #26
Source File: vineyardhandler.py From mars with Apache License 2.0 | 5 votes |
def load_from_bytes_io(self, session_id, data_keys, src_handler, pin_token=None): def _read_serialized(reader): with reader: return reader.get_io_pool().submit(reader.read).result() def _fallback(*_): return self._batch_load_objects( session_id, data_keys, lambda k: src_handler.create_bytes_reader(session_id, k, _promise=True).then(_read_serialized), serialize=True ) return self.transfer_in_runner(session_id, data_keys, src_handler, _fallback)
Example #27
Source File: vineyardhandler.py From mars with Apache License 2.0 | 5 votes |
def get_objects(self, session_id, data_keys, serialize=False, _promise=False): data_ids = [self._get_object_id(session_id, data_key) for data_key in data_keys] return self._client.get_object(data_ids)
Example #28
Source File: vineyardhandler.py From mars with Apache License 2.0 | 5 votes |
def __init__(self, vineyard_client, session_id, data_key, data_id, mode='w', nbytes=None, packed=False, compress=None, auto_register=True, pin_token=None, handler=None): from .objectholder import SharedHolderActor logger.debug('create vineyard bytes IO: mode = %s, packed = %s', mode, packed) super().__init__(session_id, data_key, mode=mode, handler=handler) self._client = vineyard_client self._data_id = data_id self._components = None self._offset = 0 self._nbytes = nbytes self._holder_ref = self._storage_ctx.actor_ctx.actor_ref(SharedHolderActor.default_uid()) self._compress = compress or dataserializer.CompressType.NONE self._packed = packed self._auto_register = auto_register self._pin_token = pin_token block_size = options.worker.copy_block_size if self.is_readable: logger.debug('bytes io get: session_id = %s, data_key = %s, data_id = %r, type(data_id) = %r', session_id, data_key, data_id, type(data_id)) data = self._client.get(data_id) self._components = pyarrow.serialize(data, dataserializer.mars_serialize_context()).to_components() if packed: self._buf = ArrowComponentsIO( self._components, 'r', compress_out=compress, block_size=block_size) else: raise NotImplementedError('Unknown how to read vineyard values in a unpacked way') else: raise NotImplementedError
Example #29
Source File: dataserializer.py From mars with Apache License 2.0 | 5 votes |
def serialize(data): return pyarrow.serialize(data, mars_serialize_context())
Example #30
Source File: serialize.py From dataflow with Apache License 2.0 | 5 votes |
def dumps(obj): """ Serialize an object. Returns: Implementation-dependent bytes-like object. May not be compatible across different versions of pyarrow. """ import pyarrow as pa return pa.serialize(obj).to_buffer()