Python pyarrow.uint64() Examples

The following are 8 code examples of pyarrow.uint64(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pyarrow , or try the search function .
Example #1
Source File: parquet_pio.py    From sdc with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def _get_numba_typ_from_pa_typ(pa_typ):
    import pyarrow as pa
    _typ_map = {
        # boolean
        pa.bool_(): types.bool_,
        # signed int types
        pa.int8(): types.int8,
        pa.int16(): types.int16,
        pa.int32(): types.int32,
        pa.int64(): types.int64,
        # unsigned int types
        pa.uint8(): types.uint8,
        pa.uint16(): types.uint16,
        pa.uint32(): types.uint32,
        pa.uint64(): types.uint64,
        # float types (TODO: float16?)
        pa.float32(): types.float32,
        pa.float64(): types.float64,
        # String
        pa.string(): string_type,
        # date
        pa.date32(): types.NPDatetime('ns'),
        pa.date64(): types.NPDatetime('ns'),
        # time (TODO: time32, time64, ...)
        pa.timestamp('ns'): types.NPDatetime('ns'),
        pa.timestamp('us'): types.NPDatetime('ns'),
        pa.timestamp('ms'): types.NPDatetime('ns'),
        pa.timestamp('s'): types.NPDatetime('ns'),
    }
    if pa_typ not in _typ_map:
        raise ValueError("Arrow data type {} not supported yet".format(pa_typ))
    return _typ_map[pa_typ] 
Example #2
Source File: types.py    From cjworkbench with GNU Affero General Public License v3.0 5 votes vote down vote up
def _dtype_to_arrow_type(dtype: np.dtype) -> pyarrow.DataType:
    if dtype == np.int8:
        return pyarrow.int8()
    elif dtype == np.int16:
        return pyarrow.int16()
    elif dtype == np.int32:
        return pyarrow.int32()
    elif dtype == np.int64:
        return pyarrow.int64()
    elif dtype == np.uint8:
        return pyarrow.uint8()
    elif dtype == np.uint16:
        return pyarrow.uint16()
    elif dtype == np.uint32:
        return pyarrow.uint32()
    elif dtype == np.uint64:
        return pyarrow.uint64()
    elif dtype == np.float16:
        return pyarrow.float16()
    elif dtype == np.float32:
        return pyarrow.float32()
    elif dtype == np.float64:
        return pyarrow.float64()
    elif dtype.kind == "M":
        # [2019-09-17] Pandas only allows "ns" unit -- as in, datetime64[ns]
        # https://github.com/pandas-dev/pandas/issues/7307#issuecomment-224180563
        assert dtype.str.endswith("[ns]")
        return pyarrow.timestamp(unit="ns", tz=None)
    elif dtype == np.object_:
        return pyarrow.string()
    else:
        raise RuntimeError("Unhandled dtype %r" % dtype) 
Example #3
Source File: test_common_metadata.py    From kartothek with MIT License 5 votes vote down vote up
def test_empty_dataframe_from_schema(df_all_types):
    schema = make_meta(df_all_types, origin="1")
    actual_df = empty_dataframe_from_schema(schema)

    expected_df = df_all_types.loc[[]]
    expected_df["date"] = pd.Series([], dtype="datetime64[ns]")
    for c in expected_df.columns:
        if c.startswith("float"):
            expected_df[c] = pd.Series([], dtype=float)
        if c.startswith("int"):
            expected_df[c] = pd.Series([], dtype=int)
        if c.startswith("uint"):
            expected_df[c] = pd.Series([], dtype=np.uint64)

    pdt.assert_frame_equal(actual_df, expected_df) 
Example #4
Source File: test_common_metadata.py    From kartothek with MIT License 5 votes vote down vote up
def test_empty_dataframe_from_schema_columns(df_all_types):
    schema = make_meta(df_all_types, origin="1")
    actual_df = empty_dataframe_from_schema(schema, ["uint64", "int64"])

    expected_df = df_all_types.loc[[], ["uint64", "int64"]]
    pdt.assert_frame_equal(actual_df, expected_df) 
Example #5
Source File: array_util_test.py    From tfx-bsl with Apache License 2.0 5 votes vote down vote up
def _get_numeric_byte_size_test_cases():
  result = []
  for array_type, sizeof in [
      (pa.int8(), 1),
      (pa.uint8(), 1),
      (pa.int16(), 2),
      (pa.uint16(), 2),
      (pa.int32(), 4),
      (pa.uint32(), 4),
      (pa.int64(), 8),
      (pa.uint64(), 8),
      (pa.float32(), 4),
      (pa.float64(), 8),
  ]:
    result.append(
        dict(
            testcase_name=str(array_type),
            array=pa.array(range(9), type=array_type),
            slice_offset=2,
            slice_length=3,
            expected_size=(_all_false_null_bitmap_size(2) + sizeof * 9),
            expected_sliced_size=(_all_false_null_bitmap_size(1) + sizeof * 3)))
  return result 
Example #6
Source File: common_metadata.py    From kartothek with MIT License 4 votes vote down vote up
def normalize_type(t_pa, t_pd, t_np, metadata):
    """
    This will normalize types as followed:

    - all signed integers (``int8``, ``int16``, ``int32``, ``int64``) will be converted to ``int64``
    - all unsigned integers (``uint8``, ``uint16``, ``uint32``, ``uint64``) will be converted to ``uint64``
    - all floats (``float32``, ``float64``) will be converted to ``float64``
    - all list value types will be normalized (e.g. ``list[int16]`` to ``list[int64]``, ``list[list[uint8]]`` to
      ``list[list[uint64]]``)
    - all dict value types will be normalized (e.g. ``dictionary<values=float32, indices=int16, ordered=0>`` to
      ``float64``)

    Parameters
    ----------
    t_pa: pyarrow.Type
        pyarrow type object, e.g. ``pa.list_(pa.int8())``.
    t_pd: string
        pandas type identifier, e.g. ``"list[int8]"``.
    t_np: string
        numpy type identifier, e.g. ``"object"``.
    metadata: Union[None, Dict[String, Any]]
        metadata associated with the type, e.g. information about categorials.

    Returns
    -------
    type_tuple: Tuple[pyarrow.Type, string, string, Union[None, Dict[String, Any]]]
        tuple of ``t_pa``, ``t_pd``, ``t_np``, ``metadata`` for normalized type
    """
    if pa.types.is_signed_integer(t_pa):
        return pa.int64(), "int64", "int64", None
    elif pa.types.is_unsigned_integer(t_pa):
        return pa.uint64(), "uint64", "uint64", None
    elif pa.types.is_floating(t_pa):
        return pa.float64(), "float64", "float64", None
    elif pa.types.is_list(t_pa):
        t_pa2, t_pd2, t_np2, metadata2 = normalize_type(
            t_pa.value_type, t_pd[len("list[") : -1], None, None
        )
        return pa.list_(t_pa2), "list[{}]".format(t_pd2), "object", None
    elif pa.types.is_dictionary(t_pa):
        # downcast to dictionary content, `t_pd` is useless in that case
        if ARROW_LARGER_EQ_0141:
            return normalize_type(t_pa.value_type, t_np, t_np, None)
        else:
            return normalize_type(t_pa.dictionary.type, t_np, t_np, None)
    else:
        return t_pa, t_pd, t_np, metadata 
Example #7
Source File: test_common_metadata.py    From kartothek with MIT License 4 votes vote down vote up
def test_store_schema_metadata(store, df_all_types):
    store_schema_metadata(
        schema=make_meta(df_all_types, origin="df_all_types"),
        dataset_uuid="some_uuid",
        store=store,
        table="some_table",
    )

    key = "some_uuid/some_table/_common_metadata"
    assert key in store.keys()
    pq_file = pq.ParquetFile(store.open(key))
    actual_schema = pq_file.schema.to_arrow_schema()
    fields = [
        pa.field("array_float32", pa.list_(pa.float64())),
        pa.field("array_float64", pa.list_(pa.float64())),
        pa.field("array_int16", pa.list_(pa.int64())),
        pa.field("array_int32", pa.list_(pa.int64())),
        pa.field("array_int64", pa.list_(pa.int64())),
        pa.field("array_int8", pa.list_(pa.int64())),
        pa.field("array_uint16", pa.list_(pa.uint64())),
        pa.field("array_uint32", pa.list_(pa.uint64())),
        pa.field("array_uint64", pa.list_(pa.uint64())),
        pa.field("array_uint8", pa.list_(pa.uint64())),
        pa.field("array_unicode", pa.list_(pa.string())),
        pa.field("bool", pa.bool_()),
        pa.field("byte", pa.binary()),
        pa.field("date", pa.date32()),
        pa.field("datetime64", pa.timestamp("us")),
        pa.field("float32", pa.float64()),
        pa.field("float64", pa.float64()),
        pa.field("int16", pa.int64()),
        pa.field("int32", pa.int64()),
        pa.field("int64", pa.int64()),
        pa.field("int8", pa.int64()),
        pa.field("null", pa.null()),
        pa.field("uint16", pa.uint64()),
        pa.field("uint32", pa.uint64()),
        pa.field("uint64", pa.uint64()),
        pa.field("uint8", pa.uint64()),
        pa.field("unicode", pa.string()),
    ]
    expected_schema = pa.schema(fields)

    assert actual_schema.remove_metadata() == expected_schema 
Example #8
Source File: test_index.py    From kartothek with MIT License 4 votes vote down vote up
def test_eq_explicit():
    def assert_eq(a, b):
        assert a == b
        assert b == a
        assert not (a != b)
        assert not (b != a)

    def assert_ne(a, b):
        assert a != b
        assert b != a
        assert not (a == b)
        assert not (b == a)

    original_index = ExplicitSecondaryIndex(
        column="col",
        index_dct={1: ["part_1"]},
        dtype=pa.int64(),
        index_storage_key="dataset_uuid/some_index.parquet",
    )

    idx1 = original_index.copy()
    assert_eq(idx1, original_index)

    idx2 = original_index.copy()
    idx2.column = "col2"
    assert_ne(idx2, original_index)

    idx3 = original_index.copy()
    idx3.dtype = pa.uint64()
    assert_ne(idx3, original_index)

    idx4 = original_index.copy()
    idx4.index_dct = {1: ["part_1"], 2: ["part_2"]}
    assert_ne(idx4, original_index)

    idx5 = original_index.copy()
    idx5.index_dct = {1: ["part_1", "part_2"]}
    assert_ne(idx5, original_index)

    idx6 = original_index.copy()
    idx6.index_dct = {1: ["part_2"]}
    assert_ne(idx6, original_index)

    idx7 = original_index.copy()
    idx7.index_dct = {2: ["part_1"]}
    assert_ne(idx7, original_index)

    idx8 = original_index.copy()
    idx8.dtype = None
    assert_ne(idx8, original_index)

    idx9a = original_index.copy()
    idx9b = original_index.copy()
    idx9a.dtype = None
    idx9b.dtype = None
    assert_eq(idx9a, idx9b)