Python pyarrow.deserialize() Examples

The following are 26 code examples of pyarrow.deserialize(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pyarrow , or try the search function .
Example #1
Source File: dataserializer.py    From mars with Apache License 2.0 6 votes vote down vote up
def loads(buf):
    mv = memoryview(buf)
    header = read_file_header(mv)
    compress = header.compress

    if compress == CompressType.NONE:
        data = buf[HEADER_LENGTH:]
    else:
        data = decompressors[compress](mv[HEADER_LENGTH:])

    if header.type == SerialType.ARROW:
        try:
            return pyarrow.deserialize(memoryview(data), mars_serialize_context())
        except pyarrow.lib.ArrowInvalid:  # pragma: no cover
            # reconstruct value from buffers of arrow components
            data_view = memoryview(data)
            meta_block_size = np.frombuffer(data_view[0:4], dtype='int32').item()
            meta = pickle.loads(data_view[4:4 + meta_block_size])  # nosec
            buffer_sizes = meta.pop('buffer_sizes')
            bounds = np.cumsum([4 + meta_block_size] + buffer_sizes)
            meta['data'] = [pyarrow.py_buffer(data_view[bounds[idx]:bounds[idx + 1]])
                            for idx in range(len(buffer_sizes))]
            return pyarrow.deserialize_components(meta, mars_serialize_context())
    else:
        return pickle.loads(data) 
Example #2
Source File: worker.py    From rl_algorithms with MIT License 6 votes vote down vote up
def recv_params_from_learner(self):
        """Get new params and sync. return True if success, False otherwise."""
        received = False
        try:
            new_params_id = self.sub_socket.recv(zmq.DONTWAIT)
            received = True
        except zmq.Again:
            # Although learner doesn't send params, don't wait
            pass

        if received:
            new_param_info = pa.deserialize(new_params_id)
            update_step, new_params = new_param_info
            self.update_step = update_step
            self.worker.synchronize(new_params)

            # Add new entry for scores dict
            self.scores[self.update_step] = [] 
Example #3
Source File: distributed_logger.py    From rl_algorithms with MIT License 6 votes vote down vote up
def run(self):
        """Run main logging loop; continuously receive data and log."""
        if self.args.log:
            self.set_wandb()

        while self.update_step < self.args.max_update_step:
            self.recv_log_info()
            if self.log_info_queue:  # if non-empty
                log_info_id = self.log_info_queue.pop()
                log_info = pa.deserialize(log_info_id)
                state_dict = log_info["state_dict"]
                log_value = log_info["log_value"]
                self.update_step = log_value["update_step"]

                self.synchronize(state_dict)
                avg_score = self.test(self.update_step)
                log_value["avg_score"] = avg_score
                self.write_log(log_value) 
Example #4
Source File: wrapper.py    From rl_algorithms with MIT License 6 votes vote down vote up
def recv_worker_data(self):
        """Receive replay data from worker and incorporate to buffer."""
        received = False
        try:
            new_replay_data_id = self.pull_socket.recv(zmq.DONTWAIT)
            received = True
        except zmq.Again:
            pass

        if received:
            new_replay_data = pa.deserialize(new_replay_data_id)
            experience, priorities = new_replay_data
            for idx in range(len(experience["states"])):
                transition = (
                    experience["states"][idx],
                    experience["actions"][idx],
                    experience["rewards"][idx],
                    experience["next_states"][idx],
                    experience["dones"][idx],
                )
                self.buffer.add(transition)
                self.buffer.update_priorities([len(self.buffer) - 1], priorities[idx]) 
Example #5
Source File: serializer.py    From cloudburst with Apache License 2.0 6 votes vote down vote up
def load(self, data):
        # If the type of the input is bytes, then we need to deserialize the
        # input first.
        if type(data) == bytes:
            val = Value()
            val.ParseFromString(data)
        elif type(data).__name__ == Value.__name__:
            # If it's already deserialized, we can just proceed.
            val = data
        else:
            raise ValueError(f'''Input to load was of unsupported type
                             {str(type(data))}.''')

        if val.type == DEFAULT:
            try:
                return self._load_default(val.body)
            except: # Unpickling error.
                return val.body
        elif val.type == STRING:
            return self._load_string(val.body)
        elif val.type == NUMPY:
            return self._load_numpy(val.body) 
Example #6
Source File: serialize.py    From tf-cpn with MIT License 5 votes vote down vote up
def loads_pyarrow(buf):
    """
    Args:
        buf: the output of `dumps`.
    """
    return pa.deserialize(buf) 
Example #7
Source File: serialize.py    From tensorpack with Apache License 2.0 5 votes vote down vote up
def loads(buf):
        """
        Args:
            buf: the output of `dumps` or `dumps_bytes`.
        """
        import pyarrow as pa
        return pa.deserialize(buf) 
Example #8
Source File: serializer.py    From surreal with MIT License 5 votes vote down vote up
def deserialize(binary):
    """
    We can improve this function if we *really* need more memory efficiency
    """
    return _DESERIALIZER(binary) 
Example #9
Source File: serializer.py    From surreal with MIT License 5 votes vote down vote up
def pa_deserialize(binary):
    return pa.deserialize(binary) 
Example #10
Source File: serialize.py    From video-to-pose3D with MIT License 5 votes vote down vote up
def loads_pyarrow(buf):
    """
    Args:
        buf: the output of `dumps`.
    """
    return pa.deserialize(buf) 
Example #11
Source File: serialization.py    From catalyst with Apache License 2.0 5 votes vote down vote up
def pyarrow_deserialize(bytes):
    """Deserialize bytes into an object using pyarrow.

    Args:
        bytes: a bytes object containing serialized with pyarrow data.

    Returns:
        Returns a value deserialized from the bytes-like object.
    """
    return pyarrow.deserialize(bytes) 
Example #12
Source File: utils.py    From incubator-superset with Apache License 2.0 5 votes vote down vote up
def _deserialize_results_payload(
    payload: Union[bytes, str], query: Query, use_msgpack: Optional[bool] = False
) -> Dict[str, Any]:
    logger.debug("Deserializing from msgpack: %r", use_msgpack)
    if use_msgpack:
        with stats_timing(
            "sqllab.query.results_backend_msgpack_deserialize", stats_logger
        ):
            ds_payload = msgpack.loads(payload, raw=False)

        with stats_timing("sqllab.query.results_backend_pa_deserialize", stats_logger):
            pa_table = pa.deserialize(ds_payload["data"])

        df = result_set.SupersetResultSet.convert_table_to_df(pa_table)
        ds_payload["data"] = dataframe.df_to_records(df) or []

        db_engine_spec = query.database.db_engine_spec
        all_columns, data, expanded_columns = db_engine_spec.expand_data(
            ds_payload["selected_columns"], ds_payload["data"]
        )
        ds_payload.update(
            {"data": data, "columns": all_columns, "expanded_columns": expanded_columns}
        )

        return ds_payload

    with stats_timing("sqllab.query.results_backend_json_deserialize", stats_logger):
        return json.loads(payload) 
Example #13
Source File: serialize.py    From PoseFix_RELEASE with MIT License 5 votes vote down vote up
def loads_pyarrow(buf):
    """
    Args:
        buf: the output of `dumps`.
    """
    return pa.deserialize(buf) 
Example #14
Source File: serializer.py    From cloudburst with Apache License 2.0 5 votes vote down vote up
def _load_numpy(self, msg):
        if not msg:
            return msg

        return pa.deserialize(msg) 
Example #15
Source File: serialize.py    From ADL with MIT License 5 votes vote down vote up
def loads(buf):
        """
        Args:
            buf: the output of `dumps` or `dumps_bytes`.
        """
        import pyarrow as pa
        return pa.deserialize(buf) 
Example #16
Source File: serialize.py    From lighttrack with MIT License 5 votes vote down vote up
def loads_pyarrow(buf):
    """
    Args:
        buf: the output of `dumps`.
    """
    return pa.deserialize(buf) 
Example #17
Source File: serialize.py    From petridishnn with MIT License 5 votes vote down vote up
def loads_pyarrow(buf):
    """
    Args:
        buf: the output of `dumps`.
    """
    return pa.deserialize(buf)


# import pyarrow has a lot of side effect:
# https://github.com/apache/arrow/pull/2329
# https://groups.google.com/a/tensorflow.org/forum/#!topic/developers/TMqRaT-H2bI
# So we use msgpack as default. 
Example #18
Source File: convert_lmdb.py    From torch-toolbox with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def load_pyarrow(buf):
    assert buf is not None, 'buf should not be None.'
    return pyarrow.deserialize(buf) 
Example #19
Source File: wrapper.py    From rl_algorithms with MIT License 5 votes vote down vote up
def send_batch_to_learner(self):
        """Send batch to learner and receive priorities."""
        # Send batch and request priorities (blocking recv)
        batch = self.buffer.sample(self.per_beta)
        batch_id = pa.serialize(batch).to_buffer()
        self.req_socket.send(batch_id)
        self.num_sent = self.num_sent + 1

        # Receive priorities
        new_priors_id = self.req_socket.recv()
        idxes, new_priorities = pa.deserialize(new_priors_id)
        self.buffer.update_priorities(idxes, new_priorities) 
Example #20
Source File: learner.py    From rl_algorithms with MIT License 5 votes vote down vote up
def recv_replay_data(self):
        """Receive replay data from gloal buffer."""
        replay_data_id = self.rep_socket.recv()
        replay_data = pa.deserialize(replay_data_id)
        return replay_data 
Example #21
Source File: pyarrow_serializer.py    From petastorm with Apache License 2.0 5 votes vote down vote up
def deserialize(self, serialized_rows):
        return pyarrow.deserialize(serialized_rows, self._get_serialization_context()) 
Example #22
Source File: ray_util.py    From rlgraph with Apache License 2.0 5 votes vote down vote up
def ray_decompress(data):
    if isinstance(data, bytes) or isinstance(data, string_types):
        data = base64.b64decode(data)
        data = lz4.frame.decompress(data)
        data = pyarrow.deserialize(data)
    return data


# Ray's magic constant worker explorations.. 
Example #23
Source File: dataserializer.py    From mars with Apache License 2.0 5 votes vote down vote up
def deserialize(data):
    return pyarrow.deserialize(data, mars_serialize_context()) 
Example #24
Source File: dataserializer.py    From mars with Apache License 2.0 5 votes vote down vote up
def load(file):
    header = read_file_header(file)
    file = open_decompression_file(file, header.compress)

    try:
        buf = file.read()
    finally:
        if header.compress != CompressType.NONE:
            file.close()

    if header.type == SerialType.ARROW:
        return pyarrow.deserialize(memoryview(buf), mars_serialize_context())
    else:
        return pickle.loads(buf) 
Example #25
Source File: serialize.py    From dataflow with Apache License 2.0 5 votes vote down vote up
def loads(buf):
        """
        Args:
            buf: the output of `dumps` or `dumps_bytes`.
        """
        import pyarrow as pa
        return pa.deserialize(buf) 
Example #26
Source File: test_dataio.py    From mars with Apache License 2.0 4 votes vote down vote up
def testArrowBufferIO(self):
        if not np:
            return
        from numpy.testing import assert_array_equal

        for compress in [dataserializer.CompressType.LZ4, dataserializer.CompressType.GZIP]:
            if compress not in dataserializer.get_supported_compressions():
                continue

            data = np.random.random((1000, 100))
            serialized = pyarrow.serialize(data).to_buffer()

            # test complete read
            reader = ArrowBufferIO(
                pyarrow.py_buffer(serialized), 'r', compress_out=compress)
            assert_array_equal(data, dataserializer.loads(reader.read()))

            # test partial read
            reader = ArrowBufferIO(
                pyarrow.py_buffer(serialized), 'r', compress_out=compress)
            block = reader.read(128)
            data_left = reader.read()
            assert_array_equal(data, dataserializer.loads(block + data_left))

            # test read by chunks
            bio = BytesIO()
            reader = ArrowBufferIO(
                pyarrow.py_buffer(serialized), 'r', compress_out=compress)
            while True:
                block = reader.read(128)
                if not block:
                    break
                bio.write(block)

            compressed = bio.getvalue()
            assert_array_equal(data, dataserializer.loads(compressed))

            # test write by chunks
            data_sink = bytearray(len(serialized))
            compressed_mv = memoryview(compressed)
            writer = ArrowBufferIO(pyarrow.py_buffer(data_sink), 'w')
            pos = 0
            while pos < len(compressed):
                endpos = min(pos + 128, len(compressed))
                writer.write(compressed_mv[pos:endpos])
                pos = endpos

            assert_array_equal(data, pyarrow.deserialize(data_sink))