Python pyarrow.deserialize() Examples
The following are 26
code examples of pyarrow.deserialize().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pyarrow
, or try the search function
.
Example #1
Source File: dataserializer.py From mars with Apache License 2.0 | 6 votes |
def loads(buf): mv = memoryview(buf) header = read_file_header(mv) compress = header.compress if compress == CompressType.NONE: data = buf[HEADER_LENGTH:] else: data = decompressors[compress](mv[HEADER_LENGTH:]) if header.type == SerialType.ARROW: try: return pyarrow.deserialize(memoryview(data), mars_serialize_context()) except pyarrow.lib.ArrowInvalid: # pragma: no cover # reconstruct value from buffers of arrow components data_view = memoryview(data) meta_block_size = np.frombuffer(data_view[0:4], dtype='int32').item() meta = pickle.loads(data_view[4:4 + meta_block_size]) # nosec buffer_sizes = meta.pop('buffer_sizes') bounds = np.cumsum([4 + meta_block_size] + buffer_sizes) meta['data'] = [pyarrow.py_buffer(data_view[bounds[idx]:bounds[idx + 1]]) for idx in range(len(buffer_sizes))] return pyarrow.deserialize_components(meta, mars_serialize_context()) else: return pickle.loads(data)
Example #2
Source File: worker.py From rl_algorithms with MIT License | 6 votes |
def recv_params_from_learner(self): """Get new params and sync. return True if success, False otherwise.""" received = False try: new_params_id = self.sub_socket.recv(zmq.DONTWAIT) received = True except zmq.Again: # Although learner doesn't send params, don't wait pass if received: new_param_info = pa.deserialize(new_params_id) update_step, new_params = new_param_info self.update_step = update_step self.worker.synchronize(new_params) # Add new entry for scores dict self.scores[self.update_step] = []
Example #3
Source File: distributed_logger.py From rl_algorithms with MIT License | 6 votes |
def run(self): """Run main logging loop; continuously receive data and log.""" if self.args.log: self.set_wandb() while self.update_step < self.args.max_update_step: self.recv_log_info() if self.log_info_queue: # if non-empty log_info_id = self.log_info_queue.pop() log_info = pa.deserialize(log_info_id) state_dict = log_info["state_dict"] log_value = log_info["log_value"] self.update_step = log_value["update_step"] self.synchronize(state_dict) avg_score = self.test(self.update_step) log_value["avg_score"] = avg_score self.write_log(log_value)
Example #4
Source File: wrapper.py From rl_algorithms with MIT License | 6 votes |
def recv_worker_data(self): """Receive replay data from worker and incorporate to buffer.""" received = False try: new_replay_data_id = self.pull_socket.recv(zmq.DONTWAIT) received = True except zmq.Again: pass if received: new_replay_data = pa.deserialize(new_replay_data_id) experience, priorities = new_replay_data for idx in range(len(experience["states"])): transition = ( experience["states"][idx], experience["actions"][idx], experience["rewards"][idx], experience["next_states"][idx], experience["dones"][idx], ) self.buffer.add(transition) self.buffer.update_priorities([len(self.buffer) - 1], priorities[idx])
Example #5
Source File: serializer.py From cloudburst with Apache License 2.0 | 6 votes |
def load(self, data): # If the type of the input is bytes, then we need to deserialize the # input first. if type(data) == bytes: val = Value() val.ParseFromString(data) elif type(data).__name__ == Value.__name__: # If it's already deserialized, we can just proceed. val = data else: raise ValueError(f'''Input to load was of unsupported type {str(type(data))}.''') if val.type == DEFAULT: try: return self._load_default(val.body) except: # Unpickling error. return val.body elif val.type == STRING: return self._load_string(val.body) elif val.type == NUMPY: return self._load_numpy(val.body)
Example #6
Source File: serialize.py From tf-cpn with MIT License | 5 votes |
def loads_pyarrow(buf): """ Args: buf: the output of `dumps`. """ return pa.deserialize(buf)
Example #7
Source File: serialize.py From tensorpack with Apache License 2.0 | 5 votes |
def loads(buf): """ Args: buf: the output of `dumps` or `dumps_bytes`. """ import pyarrow as pa return pa.deserialize(buf)
Example #8
Source File: serializer.py From surreal with MIT License | 5 votes |
def deserialize(binary): """ We can improve this function if we *really* need more memory efficiency """ return _DESERIALIZER(binary)
Example #9
Source File: serializer.py From surreal with MIT License | 5 votes |
def pa_deserialize(binary): return pa.deserialize(binary)
Example #10
Source File: serialize.py From video-to-pose3D with MIT License | 5 votes |
def loads_pyarrow(buf): """ Args: buf: the output of `dumps`. """ return pa.deserialize(buf)
Example #11
Source File: serialization.py From catalyst with Apache License 2.0 | 5 votes |
def pyarrow_deserialize(bytes): """Deserialize bytes into an object using pyarrow. Args: bytes: a bytes object containing serialized with pyarrow data. Returns: Returns a value deserialized from the bytes-like object. """ return pyarrow.deserialize(bytes)
Example #12
Source File: utils.py From incubator-superset with Apache License 2.0 | 5 votes |
def _deserialize_results_payload( payload: Union[bytes, str], query: Query, use_msgpack: Optional[bool] = False ) -> Dict[str, Any]: logger.debug("Deserializing from msgpack: %r", use_msgpack) if use_msgpack: with stats_timing( "sqllab.query.results_backend_msgpack_deserialize", stats_logger ): ds_payload = msgpack.loads(payload, raw=False) with stats_timing("sqllab.query.results_backend_pa_deserialize", stats_logger): pa_table = pa.deserialize(ds_payload["data"]) df = result_set.SupersetResultSet.convert_table_to_df(pa_table) ds_payload["data"] = dataframe.df_to_records(df) or [] db_engine_spec = query.database.db_engine_spec all_columns, data, expanded_columns = db_engine_spec.expand_data( ds_payload["selected_columns"], ds_payload["data"] ) ds_payload.update( {"data": data, "columns": all_columns, "expanded_columns": expanded_columns} ) return ds_payload with stats_timing("sqllab.query.results_backend_json_deserialize", stats_logger): return json.loads(payload)
Example #13
Source File: serialize.py From PoseFix_RELEASE with MIT License | 5 votes |
def loads_pyarrow(buf): """ Args: buf: the output of `dumps`. """ return pa.deserialize(buf)
Example #14
Source File: serializer.py From cloudburst with Apache License 2.0 | 5 votes |
def _load_numpy(self, msg): if not msg: return msg return pa.deserialize(msg)
Example #15
Source File: serialize.py From ADL with MIT License | 5 votes |
def loads(buf): """ Args: buf: the output of `dumps` or `dumps_bytes`. """ import pyarrow as pa return pa.deserialize(buf)
Example #16
Source File: serialize.py From lighttrack with MIT License | 5 votes |
def loads_pyarrow(buf): """ Args: buf: the output of `dumps`. """ return pa.deserialize(buf)
Example #17
Source File: serialize.py From petridishnn with MIT License | 5 votes |
def loads_pyarrow(buf): """ Args: buf: the output of `dumps`. """ return pa.deserialize(buf) # import pyarrow has a lot of side effect: # https://github.com/apache/arrow/pull/2329 # https://groups.google.com/a/tensorflow.org/forum/#!topic/developers/TMqRaT-H2bI # So we use msgpack as default.
Example #18
Source File: convert_lmdb.py From torch-toolbox with BSD 3-Clause "New" or "Revised" License | 5 votes |
def load_pyarrow(buf): assert buf is not None, 'buf should not be None.' return pyarrow.deserialize(buf)
Example #19
Source File: wrapper.py From rl_algorithms with MIT License | 5 votes |
def send_batch_to_learner(self): """Send batch to learner and receive priorities.""" # Send batch and request priorities (blocking recv) batch = self.buffer.sample(self.per_beta) batch_id = pa.serialize(batch).to_buffer() self.req_socket.send(batch_id) self.num_sent = self.num_sent + 1 # Receive priorities new_priors_id = self.req_socket.recv() idxes, new_priorities = pa.deserialize(new_priors_id) self.buffer.update_priorities(idxes, new_priorities)
Example #20
Source File: learner.py From rl_algorithms with MIT License | 5 votes |
def recv_replay_data(self): """Receive replay data from gloal buffer.""" replay_data_id = self.rep_socket.recv() replay_data = pa.deserialize(replay_data_id) return replay_data
Example #21
Source File: pyarrow_serializer.py From petastorm with Apache License 2.0 | 5 votes |
def deserialize(self, serialized_rows): return pyarrow.deserialize(serialized_rows, self._get_serialization_context())
Example #22
Source File: ray_util.py From rlgraph with Apache License 2.0 | 5 votes |
def ray_decompress(data): if isinstance(data, bytes) or isinstance(data, string_types): data = base64.b64decode(data) data = lz4.frame.decompress(data) data = pyarrow.deserialize(data) return data # Ray's magic constant worker explorations..
Example #23
Source File: dataserializer.py From mars with Apache License 2.0 | 5 votes |
def deserialize(data): return pyarrow.deserialize(data, mars_serialize_context())
Example #24
Source File: dataserializer.py From mars with Apache License 2.0 | 5 votes |
def load(file): header = read_file_header(file) file = open_decompression_file(file, header.compress) try: buf = file.read() finally: if header.compress != CompressType.NONE: file.close() if header.type == SerialType.ARROW: return pyarrow.deserialize(memoryview(buf), mars_serialize_context()) else: return pickle.loads(buf)
Example #25
Source File: serialize.py From dataflow with Apache License 2.0 | 5 votes |
def loads(buf): """ Args: buf: the output of `dumps` or `dumps_bytes`. """ import pyarrow as pa return pa.deserialize(buf)
Example #26
Source File: test_dataio.py From mars with Apache License 2.0 | 4 votes |
def testArrowBufferIO(self): if not np: return from numpy.testing import assert_array_equal for compress in [dataserializer.CompressType.LZ4, dataserializer.CompressType.GZIP]: if compress not in dataserializer.get_supported_compressions(): continue data = np.random.random((1000, 100)) serialized = pyarrow.serialize(data).to_buffer() # test complete read reader = ArrowBufferIO( pyarrow.py_buffer(serialized), 'r', compress_out=compress) assert_array_equal(data, dataserializer.loads(reader.read())) # test partial read reader = ArrowBufferIO( pyarrow.py_buffer(serialized), 'r', compress_out=compress) block = reader.read(128) data_left = reader.read() assert_array_equal(data, dataserializer.loads(block + data_left)) # test read by chunks bio = BytesIO() reader = ArrowBufferIO( pyarrow.py_buffer(serialized), 'r', compress_out=compress) while True: block = reader.read(128) if not block: break bio.write(block) compressed = bio.getvalue() assert_array_equal(data, dataserializer.loads(compressed)) # test write by chunks data_sink = bytearray(len(serialized)) compressed_mv = memoryview(compressed) writer = ArrowBufferIO(pyarrow.py_buffer(data_sink), 'w') pos = 0 while pos < len(compressed): endpos = min(pos + 128, len(compressed)) writer.write(compressed_mv[pos:endpos]) pos = endpos assert_array_equal(data, pyarrow.deserialize(data_sink))