Python pyarrow.serialize() Examples

The following are 30 code examples of pyarrow.serialize(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pyarrow , or try the search function .
Example #1
Source File: dataserializer.py    From mars with Apache License 2.0 6 votes vote down vote up
def dump(obj, file, *, serial_type=None, compress=None, pickle_protocol=None):
    if serial_type is None:
        serial_type = SerialType.ARROW if pyarrow is not None else SerialType.PICKLE
    if compress is None:
        compress = CompressType.NONE
    try:
        if serial_type == SerialType.ARROW:
            serialized = pyarrow.serialize(obj, mars_serialize_context())
            data_size = serialized.total_bytes
            write_file_header(file, file_header(serial_type, SERIAL_VERSION, data_size, compress))
            file = open_compression_file(file, compress)
            serialized.write_to(file)
        else:
            pickle_protocol = pickle_protocol or pickle.HIGHEST_PROTOCOL
            serialized = pickle.dumps(obj, protocol=pickle_protocol)
            data_size = len(serialized)
            write_file_header(file, file_header(serial_type, SERIAL_VERSION, data_size, compress))
            file = open_compression_file(file, compress)
            file.write(serialized)
    finally:
        if compress != CompressType.NONE:
            file.close()
    return 
Example #2
Source File: serialize.py    From PoseFix_RELEASE with MIT License 5 votes vote down vote up
def dumps_pyarrow(obj):
    """
    Serialize an object.

    Returns:
        Implementation-dependent bytes-like object
    """
    return pa.serialize(obj).to_buffer() 
Example #3
Source File: learner.py    From rl_algorithms with MIT License 5 votes vote down vote up
def send_info_to_logger(
        self, np_state_dict: List[np.ndarray], step_info: list,
    ):
        """Send new params and log info to logger."""
        log_value = dict(update_step=self.update_step, step_info=step_info)
        log_info = dict(log_value=log_value, state_dict=np_state_dict)
        log_info_id = pa.serialize(log_info).to_buffer()
        self.push_socket.send(log_info_id) 
Example #4
Source File: wrapper.py    From rl_algorithms with MIT License 5 votes vote down vote up
def send_batch_to_learner(self):
        """Send batch to learner and receive priorities."""
        # Send batch and request priorities (blocking recv)
        batch = self.buffer.sample(self.per_beta)
        batch_id = pa.serialize(batch).to_buffer()
        self.req_socket.send(batch_id)
        self.num_sent = self.num_sent + 1

        # Receive priorities
        new_priors_id = self.req_socket.recv()
        idxes, new_priorities = pa.deserialize(new_priors_id)
        self.buffer.update_priorities(idxes, new_priorities) 
Example #5
Source File: serialize.py    From tf-cpn with MIT License 5 votes vote down vote up
def dumps_pyarrow(obj):
    """
    Serialize an object.

    Returns:
        Implementation-dependent bytes-like object
    """
    return pa.serialize(obj).to_buffer() 
Example #6
Source File: serialize.py    From petridishnn with MIT License 5 votes vote down vote up
def dumps_pyarrow(obj):
    """
    Serialize an object.

    Returns:
        Implementation-dependent bytes-like object.
        May not be compatible across different versions of pyarrow.
    """
    return pa.serialize(obj).to_buffer() 
Example #7
Source File: serialize.py    From lighttrack with MIT License 5 votes vote down vote up
def dumps_pyarrow(obj):
    """
    Serialize an object.

    Returns:
        Implementation-dependent bytes-like object
    """
    return pa.serialize(obj).to_buffer() 
Example #8
Source File: serialize.py    From ADL with MIT License 5 votes vote down vote up
def dumps(obj):
        """
        Serialize an object.

        Returns:
            Implementation-dependent bytes-like object.
            May not be compatible across different versions of pyarrow.
        """
        import pyarrow as pa
        return pa.serialize(obj).to_buffer() 
Example #9
Source File: serializer.py    From cloudburst with Apache License 2.0 5 votes vote down vote up
def dump(self, data, valobj=None, serialize=True):
        if not valobj:
            valobj = Value()

        # If we are attempting to pass a future into another function, we
        # simply turn it into a reference because the runtime knows how to
        # automatically resolve it.
        if type(data) == bytes:
            valobj.body = data
            valobj.type = DEFAULT
        elif isinstance(data, future.CloudburstFuture):
            valobj.body = self._dump_default(CloudburstReference(data.obj_id,
                                                              True))
            valobj.type = DEFAULT
        elif isinstance(data, np.ndarray) or isinstance(data, pd.DataFrame):
            valobj.body = self._dump_numpy(data)
            valobj.type = NUMPY
        elif isinstance(data, str):
            valobj.body =  self._dump_string(data)
            valobj.type = STRING
        else:
            valobj.body = self._dump_default(data)
            valobj.type = DEFAULT

        if not serialize:
            return valobj

        return valobj.SerializeToString() 
Example #10
Source File: serializer.py    From cloudburst with Apache License 2.0 5 votes vote down vote up
def _dump_numpy(self, msg):
        return pa.serialize(msg).to_buffer().to_pybytes() 
Example #11
Source File: learner.py    From rl_algorithms with MIT License 5 votes vote down vote up
def publish_params(self, update_step: int, np_state_dict: List[np.ndarray]):
        """Broadcast updated params to all workers."""
        param_info = [update_step, np_state_dict]
        new_params_id = pa.serialize(param_info).to_buffer()
        self.pub_socket.send(new_params_id) 
Example #12
Source File: serialization.py    From catalyst with Apache License 2.0 5 votes vote down vote up
def pyarrow_serialize(data):
    """Serialize the data into bytes using pyarrow.

    Args:
        data: a value

    Returns:
        Returns a bytes object serialized with pyarrow data.
    """
    return pyarrow.serialize(data).to_buffer().to_pybytes() 
Example #13
Source File: serialize.py    From video-to-pose3D with MIT License 5 votes vote down vote up
def dumps_pyarrow(obj):
    """
    Serialize an object.

    Returns:
        Implementation-dependent bytes-like object
    """
    return pa.serialize(obj).to_buffer() 
Example #14
Source File: serializer.py    From surreal with MIT License 5 votes vote down vote up
def pa_serialize(obj):
    return pa.serialize(obj).to_buffer() 
Example #15
Source File: serializer.py    From surreal with MIT License 5 votes vote down vote up
def serialize(obj):
    """
    We can improve this function if we *really* need more memory efficiency
    """
    return _SERIALIZER(obj) 
Example #16
Source File: serializer.py    From surreal with MIT License 5 votes vote down vote up
def pyobj_hash(obj):
    return binary_hash(serialize(obj)) 
Example #17
Source File: serialize.py    From tensorpack with Apache License 2.0 5 votes vote down vote up
def dumps(obj):
        """
        Serialize an object.

        Returns:
            Implementation-dependent bytes-like object.
            May not be compatible across different versions of pyarrow.
        """
        import pyarrow as pa
        return pa.serialize(obj).to_buffer() 
Example #18
Source File: convert_lmdb.py    From torch-toolbox with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def dumps_pyarrow(obj):
    return pyarrow.serialize(obj).to_buffer() 
Example #19
Source File: learner.py    From rl_algorithms with MIT License 5 votes vote down vote up
def send_new_priorities(self, indices: np.ndarray, priorities: np.ndarray):
        """Send new priority values and corresponding indices to buffer."""
        new_priors = [indices, priorities]
        new_priors_id = pa.serialize(new_priors).to_buffer()
        self.rep_socket.send(new_priors_id) 
Example #20
Source File: worker.py    From rl_algorithms with MIT License 5 votes vote down vote up
def send_data_to_buffer(self, replay_data):
        """Send replay data to global buffer."""
        replay_data_id = pa.serialize(replay_data).to_buffer()
        self.push_socket.send(replay_data_id) 
Example #21
Source File: pyarrow_serializer.py    From petastorm with Apache License 2.0 5 votes vote down vote up
def serialize(self, rows):
        return pyarrow.serialize(rows, self._get_serialization_context()).to_buffer() 
Example #22
Source File: local_disk_arrow_table_cache.py    From petastorm with Apache License 2.0 5 votes vote down vote up
def get(self, key, fill_cache_func):
        value = self._cache.get(key, default=None)
        if value is None:
            value = fill_cache_func()
            table_pandas = value.to_pandas()
            serialized_df = pa.serialize(table_pandas)
            components = serialized_df.to_components()
            self._cache.set(key, components)
        else:
            original_df = pa.deserialize_components(value)
            value = pa.Table.from_pandas(original_df, preserve_index=False)

        return value 
Example #23
Source File: local_disk_arrow_table_cache.py    From petastorm with Apache License 2.0 5 votes vote down vote up
def __init__(self, *args, **kwargs):
        super(LocalDiskArrowTableCache, self).__init__(*args, **kwargs)
        # Workaround for https://issues.apache.org/jira/browse/ARROW-5260
        # unless we try to serialize something before deserialize_components is called, we would crash with a sigsegv
        pa.serialize(0) 
Example #24
Source File: ray_util.py    From rlgraph with Apache License 2.0 5 votes vote down vote up
def ray_compress(data):
    data = pyarrow.serialize(data).to_buffer().to_pybytes()
    data = lz4.frame.compress(data)
    # Unclear why ascii decoding.
    data = base64.b64encode(data).decode("ascii")
    # data = base64.b64encode(data)
    return data 
Example #25
Source File: sharedstore.py    From mars with Apache License 2.0 5 votes vote down vote up
def __init__(self, plasma_client, mapper_ref):
        from ...serialize.dataserializer import mars_serialize_context

        self._plasma_client = plasma_client
        self._actual_size = None
        self._serialize_context = mars_serialize_context()

        self._mapper_ref = mapper_ref
        self._pool = mapper_ref.ctx.threadpool(1) 
Example #26
Source File: vineyardhandler.py    From mars with Apache License 2.0 5 votes vote down vote up
def load_from_bytes_io(self, session_id, data_keys, src_handler, pin_token=None):
        def _read_serialized(reader):
            with reader:
                return reader.get_io_pool().submit(reader.read).result()

        def _fallback(*_):
            return self._batch_load_objects(
                session_id, data_keys,
                lambda k: src_handler.create_bytes_reader(session_id, k, _promise=True).then(_read_serialized),
                serialize=True
            )

        return self.transfer_in_runner(session_id, data_keys, src_handler, _fallback) 
Example #27
Source File: vineyardhandler.py    From mars with Apache License 2.0 5 votes vote down vote up
def get_objects(self, session_id, data_keys, serialize=False, _promise=False):
        data_ids = [self._get_object_id(session_id, data_key) for data_key in data_keys]
        return self._client.get_object(data_ids) 
Example #28
Source File: vineyardhandler.py    From mars with Apache License 2.0 5 votes vote down vote up
def __init__(self, vineyard_client, session_id, data_key, data_id, mode='w',
                 nbytes=None, packed=False, compress=None, auto_register=True,
                 pin_token=None, handler=None):
        from .objectholder import SharedHolderActor

        logger.debug('create vineyard bytes IO: mode = %s, packed = %s', mode, packed)

        super().__init__(session_id, data_key, mode=mode, handler=handler)
        self._client = vineyard_client
        self._data_id = data_id
        self._components = None
        self._offset = 0
        self._nbytes = nbytes
        self._holder_ref = self._storage_ctx.actor_ctx.actor_ref(SharedHolderActor.default_uid())
        self._compress = compress or dataserializer.CompressType.NONE
        self._packed = packed
        self._auto_register = auto_register
        self._pin_token = pin_token

        block_size = options.worker.copy_block_size

        if self.is_readable:
            logger.debug('bytes io get: session_id = %s, data_key = %s, data_id = %r, type(data_id) = %r',
                         session_id, data_key, data_id, type(data_id))
            data = self._client.get(data_id)

            self._components = pyarrow.serialize(data, dataserializer.mars_serialize_context()).to_components()
            if packed:
                self._buf = ArrowComponentsIO(
                    self._components, 'r', compress_out=compress, block_size=block_size)
            else:
                raise NotImplementedError('Unknown how to read vineyard values in a unpacked way')
        else:
            raise NotImplementedError 
Example #29
Source File: dataserializer.py    From mars with Apache License 2.0 5 votes vote down vote up
def serialize(data):
    return pyarrow.serialize(data, mars_serialize_context()) 
Example #30
Source File: serialize.py    From dataflow with Apache License 2.0 5 votes vote down vote up
def dumps(obj):
        """
        Serialize an object.

        Returns:
            Implementation-dependent bytes-like object.
            May not be compatible across different versions of pyarrow.
        """
        import pyarrow as pa
        return pa.serialize(obj).to_buffer()