Python pyarrow.uint32() Examples

The following are 4 code examples of pyarrow.uint32(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pyarrow , or try the search function

Example #1

Source File: parquet_pio.py From sdc with BSD 2-Clause "Simplified" License

5 votes

def _get_numba_typ_from_pa_typ(pa_typ):
    import pyarrow as pa
    _typ_map = {
        # boolean
        pa.bool_(): types.bool_,
        # signed int types
        pa.int8(): types.int8,
        pa.int16(): types.int16,
        pa.int32(): types.int32,
        pa.int64(): types.int64,
        # unsigned int types
        pa.uint8(): types.uint8,
        pa.uint16(): types.uint16,
        pa.uint32(): types.uint32,
        pa.uint64(): types.uint64,
        # float types (TODO: float16?)
        pa.float32(): types.float32,
        pa.float64(): types.float64,
        # String
        pa.string(): string_type,
        # date
        pa.date32(): types.NPDatetime('ns'),
        pa.date64(): types.NPDatetime('ns'),
        # time (TODO: time32, time64, ...)
        pa.timestamp('ns'): types.NPDatetime('ns'),
        pa.timestamp('us'): types.NPDatetime('ns'),
        pa.timestamp('ms'): types.NPDatetime('ns'),
        pa.timestamp('s'): types.NPDatetime('ns'),
    }
    if pa_typ not in _typ_map:
        raise ValueError("Arrow data type {} not supported yet".format(pa_typ))
    return _typ_map[pa_typ]

Example #2

Source File: types.py From cjworkbench with GNU Affero General Public License v3.0

5 votes

def _dtype_to_arrow_type(dtype: np.dtype) -> pyarrow.DataType:
    if dtype == np.int8:
        return pyarrow.int8()
    elif dtype == np.int16:
        return pyarrow.int16()
    elif dtype == np.int32:
        return pyarrow.int32()
    elif dtype == np.int64:
        return pyarrow.int64()
    elif dtype == np.uint8:
        return pyarrow.uint8()
    elif dtype == np.uint16:
        return pyarrow.uint16()
    elif dtype == np.uint32:
        return pyarrow.uint32()
    elif dtype == np.uint64:
        return pyarrow.uint64()
    elif dtype == np.float16:
        return pyarrow.float16()
    elif dtype == np.float32:
        return pyarrow.float32()
    elif dtype == np.float64:
        return pyarrow.float64()
    elif dtype.kind == "M":
        # [2019-09-17] Pandas only allows "ns" unit -- as in, datetime64[ns]
        # https://github.com/pandas-dev/pandas/issues/7307#issuecomment-224180563
        assert dtype.str.endswith("[ns]")
        return pyarrow.timestamp(unit="ns", tz=None)
    elif dtype == np.object_:
        return pyarrow.string()
    else:
        raise RuntimeError("Unhandled dtype %r" % dtype)

Example #3

Source File: array_util_test.py From tfx-bsl with Apache License 2.0

5 votes

def _get_numeric_byte_size_test_cases():
  result = []
  for array_type, sizeof in [
      (pa.int8(), 1),
      (pa.uint8(), 1),
      (pa.int16(), 2),
      (pa.uint16(), 2),
      (pa.int32(), 4),
      (pa.uint32(), 4),
      (pa.int64(), 8),
      (pa.uint64(), 8),
      (pa.float32(), 4),
      (pa.float64(), 8),
  ]:
    result.append(
        dict(
            testcase_name=str(array_type),
            array=pa.array(range(9), type=array_type),
            slice_offset=2,
            slice_length=3,
            expected_size=(_all_false_null_bitmap_size(2) + sizeof * 9),
            expected_sliced_size=(_all_false_null_bitmap_size(1) + sizeof * 3)))
  return result

Example #4

Source File: tensor_adapter_test.py From tfx-bsl with Apache License 2.0

5 votes

def test2DSparseTensor(self):
    tensor_representation = text_format.Parse(
        """
        sparse_tensor {
          value_column_name: "values"
          index_column_names: ["d0", "d1"]
          dense_shape {
            dim {
              size: 10
            }
            dim {
              size: 20
            }
          }
        }
        """, schema_pb2.TensorRepresentation())
    record_batch = pa.RecordBatch.from_arrays([
        pa.array([[1], None, [2], [3, 4, 5], []], type=pa.list_(pa.int64())),
        # Also test that the index column can be of an integral type other
        # than int64.
        pa.array([[9], None, [9], [7, 8, 9], []], type=pa.list_(pa.uint32())),
        pa.array([[0], None, [0], [0, 1, 2], []], type=pa.list_(pa.int64()))
    ], ["values", "d0", "d1"])
    adapter = tensor_adapter.TensorAdapter(
        tensor_adapter.TensorAdapterConfig(record_batch.schema,
                                           {"output": tensor_representation}))
    converted = adapter.ToBatchTensors(record_batch)
    self.assertLen(converted, 1)
    self.assertIn("output", converted)
    actual_output = converted["output"]
    self.assertIsInstance(actual_output,
                          (tf.SparseTensor, tf.compat.v1.SparseTensorValue))
    self.assertSparseAllEqual(
        tf.compat.v1.SparseTensorValue(
            dense_shape=[5, 10, 20],
            indices=[[0, 9, 0], [2, 9, 0], [3, 7, 0], [3, 8, 1], [3, 9, 2]],
            values=tf.convert_to_tensor([1, 2, 3, 4, 5], dtype=tf.int64)),
        actual_output)

    self.assertAdapterCanProduceNonEagerInEagerMode(adapter, record_batch)