Python Examples of pyarrow.deserialize

Source File: dataserializer.py From mars with Apache License 2.0

6 votes

def loads(buf):
    mv = memoryview(buf)
    header = read_file_header(mv)
    compress = header.compress

    if compress == CompressType.NONE:
        data = buf[HEADER_LENGTH:]
    else:
        data = decompressors[compress](mv[HEADER_LENGTH:])

    if header.type == SerialType.ARROW:
        try:
            return pyarrow.deserialize(memoryview(data), mars_serialize_context())
        except pyarrow.lib.ArrowInvalid:  # pragma: no cover
            # reconstruct value from buffers of arrow components
            data_view = memoryview(data)
            meta_block_size = np.frombuffer(data_view[0:4], dtype='int32').item()
            meta = pickle.loads(data_view[4:4 + meta_block_size])  # nosec
            buffer_sizes = meta.pop('buffer_sizes')
            bounds = np.cumsum([4 + meta_block_size] + buffer_sizes)
            meta['data'] = [pyarrow.py_buffer(data_view[bounds[idx]:bounds[idx + 1]])
                            for idx in range(len(buffer_sizes))]
            return pyarrow.deserialize_components(meta, mars_serialize_context())
    else:
        return pickle.loads(data)

Source File: worker.py From rl_algorithms with MIT License

6 votes

def recv_params_from_learner(self):
        """Get new params and sync. return True if success, False otherwise."""
        received = False
        try:
            new_params_id = self.sub_socket.recv(zmq.DONTWAIT)
            received = True
        except zmq.Again:
            # Although learner doesn't send params, don't wait
            pass

        if received:
            new_param_info = pa.deserialize(new_params_id)
            update_step, new_params = new_param_info
            self.update_step = update_step
            self.worker.synchronize(new_params)

            # Add new entry for scores dict
            self.scores[self.update_step] = []

Source File: distributed_logger.py From rl_algorithms with MIT License

6 votes

def run(self):
        """Run main logging loop; continuously receive data and log."""
        if self.args.log:
            self.set_wandb()

        while self.update_step < self.args.max_update_step:
            self.recv_log_info()
            if self.log_info_queue:  # if non-empty
                log_info_id = self.log_info_queue.pop()
                log_info = pa.deserialize(log_info_id)
                state_dict = log_info["state_dict"]
                log_value = log_info["log_value"]
                self.update_step = log_value["update_step"]

                self.synchronize(state_dict)
                avg_score = self.test(self.update_step)
                log_value["avg_score"] = avg_score
                self.write_log(log_value)

Source File: wrapper.py From rl_algorithms with MIT License

6 votes

def recv_worker_data(self):
        """Receive replay data from worker and incorporate to buffer."""
        received = False
        try:
            new_replay_data_id = self.pull_socket.recv(zmq.DONTWAIT)
            received = True
        except zmq.Again:
            pass

        if received:
            new_replay_data = pa.deserialize(new_replay_data_id)
            experience, priorities = new_replay_data
            for idx in range(len(experience["states"])):
                transition = (
                    experience["states"][idx],
                    experience["actions"][idx],
                    experience["rewards"][idx],
                    experience["next_states"][idx],
                    experience["dones"][idx],
                )
                self.buffer.add(transition)
                self.buffer.update_priorities([len(self.buffer) - 1], priorities[idx])

Source File: serializer.py From cloudburst with Apache License 2.0

6 votes

def load(self, data):
        # If the type of the input is bytes, then we need to deserialize the
        # input first.
        if type(data) == bytes:
            val = Value()
            val.ParseFromString(data)
        elif type(data).__name__ == Value.__name__:
            # If it's already deserialized, we can just proceed.
            val = data
        else:
            raise ValueError(f'''Input to load was of unsupported type
                             {str(type(data))}.''')

        if val.type == DEFAULT:
            try:
                return self._load_default(val.body)
            except: # Unpickling error.
                return val.body
        elif val.type == STRING:
            return self._load_string(val.body)
        elif val.type == NUMPY:
            return self._load_numpy(val.body)

Source File: serialize.py From tf-cpn with MIT License

5 votes

def loads_pyarrow(buf):
    """
    Args:
        buf: the output of `dumps`.
    """
    return pa.deserialize(buf)

Source File: serialize.py From tensorpack with Apache License 2.0

5 votes

def loads(buf):
        """
        Args:
            buf: the output of `dumps` or `dumps_bytes`.
        """
        import pyarrow as pa
        return pa.deserialize(buf)

Source File: serializer.py From surreal with MIT License

5 votes

def deserialize(binary):
    """
    We can improve this function if we *really* need more memory efficiency
    """
    return _DESERIALIZER(binary)

Source File: serializer.py From surreal with MIT License

5 votes

def pa_deserialize(binary):
    return pa.deserialize(binary)

Source File: serialize.py From video-to-pose3D with MIT License

5 votes

def loads_pyarrow(buf):
    """
    Args:
        buf: the output of `dumps`.
    """
    return pa.deserialize(buf)

Source File: serialization.py From catalyst with Apache License 2.0

5 votes

def pyarrow_deserialize(bytes):
    """Deserialize bytes into an object using pyarrow.

    Args:
        bytes: a bytes object containing serialized with pyarrow data.

    Returns:
        Returns a value deserialized from the bytes-like object.
    """
    return pyarrow.deserialize(bytes)

Source File: utils.py From incubator-superset with Apache License 2.0

5 votes

def _deserialize_results_payload(
    payload: Union[bytes, str], query: Query, use_msgpack: Optional[bool] = False
) -> Dict[str, Any]:
    logger.debug("Deserializing from msgpack: %r", use_msgpack)
    if use_msgpack:
        with stats_timing(
            "sqllab.query.results_backend_msgpack_deserialize", stats_logger
        ):
            ds_payload = msgpack.loads(payload, raw=False)

        with stats_timing("sqllab.query.results_backend_pa_deserialize", stats_logger):
            pa_table = pa.deserialize(ds_payload["data"])

        df = result_set.SupersetResultSet.convert_table_to_df(pa_table)
        ds_payload["data"] = dataframe.df_to_records(df) or []

        db_engine_spec = query.database.db_engine_spec
        all_columns, data, expanded_columns = db_engine_spec.expand_data(
            ds_payload["selected_columns"], ds_payload["data"]
        )
        ds_payload.update(
            {"data": data, "columns": all_columns, "expanded_columns": expanded_columns}
        )

        return ds_payload

    with stats_timing("sqllab.query.results_backend_json_deserialize", stats_logger):
        return json.loads(payload)

Source File: serialize.py From PoseFix_RELEASE with MIT License

5 votes

def loads_pyarrow(buf):
    """
    Args:
        buf: the output of `dumps`.
    """
    return pa.deserialize(buf)

Source File: serializer.py From cloudburst with Apache License 2.0

5 votes

def _load_numpy(self, msg):
        if not msg:
            return msg

        return pa.deserialize(msg)

Source File: serialize.py From ADL with MIT License

5 votes

def loads(buf):
        """
        Args:
            buf: the output of `dumps` or `dumps_bytes`.
        """
        import pyarrow as pa
        return pa.deserialize(buf)

Source File: serialize.py From lighttrack with MIT License

5 votes

def loads_pyarrow(buf):
    """
    Args:
        buf: the output of `dumps`.
    """
    return pa.deserialize(buf)

Source File: serialize.py From petridishnn with MIT License

5 votes

def loads_pyarrow(buf):
    """
    Args:
        buf: the output of `dumps`.
    """
    return pa.deserialize(buf)


# import pyarrow has a lot of side effect:
# https://github.com/apache/arrow/pull/2329
# https://groups.google.com/a/tensorflow.org/forum/#!topic/developers/TMqRaT-H2bI
# So we use msgpack as default.

Source File: convert_lmdb.py From torch-toolbox with BSD 3-Clause "New" or "Revised" License

5 votes

def load_pyarrow(buf):
    assert buf is not None, 'buf should not be None.'
    return pyarrow.deserialize(buf)

Source File: wrapper.py From rl_algorithms with MIT License

5 votes

def send_batch_to_learner(self):
        """Send batch to learner and receive priorities."""
        # Send batch and request priorities (blocking recv)
        batch = self.buffer.sample(self.per_beta)
        batch_id = pa.serialize(batch).to_buffer()
        self.req_socket.send(batch_id)
        self.num_sent = self.num_sent + 1

        # Receive priorities
        new_priors_id = self.req_socket.recv()
        idxes, new_priorities = pa.deserialize(new_priors_id)
        self.buffer.update_priorities(idxes, new_priorities)

Source File: learner.py From rl_algorithms with MIT License

5 votes

def recv_replay_data(self):
        """Receive replay data from gloal buffer."""
        replay_data_id = self.rep_socket.recv()
        replay_data = pa.deserialize(replay_data_id)
        return replay_data

Source File: pyarrow_serializer.py From petastorm with Apache License 2.0

5 votes

def deserialize(self, serialized_rows):
        return pyarrow.deserialize(serialized_rows, self._get_serialization_context())

Source File: ray_util.py From rlgraph with Apache License 2.0

5 votes

def ray_decompress(data):
    if isinstance(data, bytes) or isinstance(data, string_types):
        data = base64.b64decode(data)
        data = lz4.frame.decompress(data)
        data = pyarrow.deserialize(data)
    return data


# Ray's magic constant worker explorations..

Source File: dataserializer.py From mars with Apache License 2.0

5 votes

def deserialize(data):
    return pyarrow.deserialize(data, mars_serialize_context())

Source File: dataserializer.py From mars with Apache License 2.0

5 votes

def load(file):
    header = read_file_header(file)
    file = open_decompression_file(file, header.compress)

    try:
        buf = file.read()
    finally:
        if header.compress != CompressType.NONE:
            file.close()

    if header.type == SerialType.ARROW:
        return pyarrow.deserialize(memoryview(buf), mars_serialize_context())
    else:
        return pickle.loads(buf)

Source File: serialize.py From dataflow with Apache License 2.0

5 votes

def loads(buf):
        """
        Args:
            buf: the output of `dumps` or `dumps_bytes`.
        """
        import pyarrow as pa
        return pa.deserialize(buf)

Source File: test_dataio.py From mars with Apache License 2.0

4 votes

def testArrowBufferIO(self):
        if not np:
            return
        from numpy.testing import assert_array_equal

        for compress in [dataserializer.CompressType.LZ4, dataserializer.CompressType.GZIP]:
            if compress not in dataserializer.get_supported_compressions():
                continue

            data = np.random.random((1000, 100))
            serialized = pyarrow.serialize(data).to_buffer()

            # test complete read
            reader = ArrowBufferIO(
                pyarrow.py_buffer(serialized), 'r', compress_out=compress)
            assert_array_equal(data, dataserializer.loads(reader.read()))

            # test partial read
            reader = ArrowBufferIO(
                pyarrow.py_buffer(serialized), 'r', compress_out=compress)
            block = reader.read(128)
            data_left = reader.read()
            assert_array_equal(data, dataserializer.loads(block + data_left))

            # test read by chunks
            bio = BytesIO()
            reader = ArrowBufferIO(
                pyarrow.py_buffer(serialized), 'r', compress_out=compress)
            while True:
                block = reader.read(128)
                if not block:
                    break
                bio.write(block)

            compressed = bio.getvalue()
            assert_array_equal(data, dataserializer.loads(compressed))

            # test write by chunks
            data_sink = bytearray(len(serialized))
            compressed_mv = memoryview(compressed)
            writer = ArrowBufferIO(pyarrow.py_buffer(data_sink), 'w')
            pos = 0
            while pos < len(compressed):
                endpos = min(pos + 128, len(compressed))
                writer.write(compressed_mv[pos:endpos])
                pos = endpos

            assert_array_equal(data, pyarrow.deserialize(data_sink))

Python pyarrow.deserialize() Examples