Python pyarrow.uint64() Examples
The following are 8
code examples of pyarrow.uint64().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pyarrow
, or try the search function
.
Example #1
Source File: parquet_pio.py From sdc with BSD 2-Clause "Simplified" License | 5 votes |
def _get_numba_typ_from_pa_typ(pa_typ): import pyarrow as pa _typ_map = { # boolean pa.bool_(): types.bool_, # signed int types pa.int8(): types.int8, pa.int16(): types.int16, pa.int32(): types.int32, pa.int64(): types.int64, # unsigned int types pa.uint8(): types.uint8, pa.uint16(): types.uint16, pa.uint32(): types.uint32, pa.uint64(): types.uint64, # float types (TODO: float16?) pa.float32(): types.float32, pa.float64(): types.float64, # String pa.string(): string_type, # date pa.date32(): types.NPDatetime('ns'), pa.date64(): types.NPDatetime('ns'), # time (TODO: time32, time64, ...) pa.timestamp('ns'): types.NPDatetime('ns'), pa.timestamp('us'): types.NPDatetime('ns'), pa.timestamp('ms'): types.NPDatetime('ns'), pa.timestamp('s'): types.NPDatetime('ns'), } if pa_typ not in _typ_map: raise ValueError("Arrow data type {} not supported yet".format(pa_typ)) return _typ_map[pa_typ]
Example #2
Source File: types.py From cjworkbench with GNU Affero General Public License v3.0 | 5 votes |
def _dtype_to_arrow_type(dtype: np.dtype) -> pyarrow.DataType: if dtype == np.int8: return pyarrow.int8() elif dtype == np.int16: return pyarrow.int16() elif dtype == np.int32: return pyarrow.int32() elif dtype == np.int64: return pyarrow.int64() elif dtype == np.uint8: return pyarrow.uint8() elif dtype == np.uint16: return pyarrow.uint16() elif dtype == np.uint32: return pyarrow.uint32() elif dtype == np.uint64: return pyarrow.uint64() elif dtype == np.float16: return pyarrow.float16() elif dtype == np.float32: return pyarrow.float32() elif dtype == np.float64: return pyarrow.float64() elif dtype.kind == "M": # [2019-09-17] Pandas only allows "ns" unit -- as in, datetime64[ns] # https://github.com/pandas-dev/pandas/issues/7307#issuecomment-224180563 assert dtype.str.endswith("[ns]") return pyarrow.timestamp(unit="ns", tz=None) elif dtype == np.object_: return pyarrow.string() else: raise RuntimeError("Unhandled dtype %r" % dtype)
Example #3
Source File: test_common_metadata.py From kartothek with MIT License | 5 votes |
def test_empty_dataframe_from_schema(df_all_types): schema = make_meta(df_all_types, origin="1") actual_df = empty_dataframe_from_schema(schema) expected_df = df_all_types.loc[[]] expected_df["date"] = pd.Series([], dtype="datetime64[ns]") for c in expected_df.columns: if c.startswith("float"): expected_df[c] = pd.Series([], dtype=float) if c.startswith("int"): expected_df[c] = pd.Series([], dtype=int) if c.startswith("uint"): expected_df[c] = pd.Series([], dtype=np.uint64) pdt.assert_frame_equal(actual_df, expected_df)
Example #4
Source File: test_common_metadata.py From kartothek with MIT License | 5 votes |
def test_empty_dataframe_from_schema_columns(df_all_types): schema = make_meta(df_all_types, origin="1") actual_df = empty_dataframe_from_schema(schema, ["uint64", "int64"]) expected_df = df_all_types.loc[[], ["uint64", "int64"]] pdt.assert_frame_equal(actual_df, expected_df)
Example #5
Source File: array_util_test.py From tfx-bsl with Apache License 2.0 | 5 votes |
def _get_numeric_byte_size_test_cases(): result = [] for array_type, sizeof in [ (pa.int8(), 1), (pa.uint8(), 1), (pa.int16(), 2), (pa.uint16(), 2), (pa.int32(), 4), (pa.uint32(), 4), (pa.int64(), 8), (pa.uint64(), 8), (pa.float32(), 4), (pa.float64(), 8), ]: result.append( dict( testcase_name=str(array_type), array=pa.array(range(9), type=array_type), slice_offset=2, slice_length=3, expected_size=(_all_false_null_bitmap_size(2) + sizeof * 9), expected_sliced_size=(_all_false_null_bitmap_size(1) + sizeof * 3))) return result
Example #6
Source File: common_metadata.py From kartothek with MIT License | 4 votes |
def normalize_type(t_pa, t_pd, t_np, metadata): """ This will normalize types as followed: - all signed integers (``int8``, ``int16``, ``int32``, ``int64``) will be converted to ``int64`` - all unsigned integers (``uint8``, ``uint16``, ``uint32``, ``uint64``) will be converted to ``uint64`` - all floats (``float32``, ``float64``) will be converted to ``float64`` - all list value types will be normalized (e.g. ``list[int16]`` to ``list[int64]``, ``list[list[uint8]]`` to ``list[list[uint64]]``) - all dict value types will be normalized (e.g. ``dictionary<values=float32, indices=int16, ordered=0>`` to ``float64``) Parameters ---------- t_pa: pyarrow.Type pyarrow type object, e.g. ``pa.list_(pa.int8())``. t_pd: string pandas type identifier, e.g. ``"list[int8]"``. t_np: string numpy type identifier, e.g. ``"object"``. metadata: Union[None, Dict[String, Any]] metadata associated with the type, e.g. information about categorials. Returns ------- type_tuple: Tuple[pyarrow.Type, string, string, Union[None, Dict[String, Any]]] tuple of ``t_pa``, ``t_pd``, ``t_np``, ``metadata`` for normalized type """ if pa.types.is_signed_integer(t_pa): return pa.int64(), "int64", "int64", None elif pa.types.is_unsigned_integer(t_pa): return pa.uint64(), "uint64", "uint64", None elif pa.types.is_floating(t_pa): return pa.float64(), "float64", "float64", None elif pa.types.is_list(t_pa): t_pa2, t_pd2, t_np2, metadata2 = normalize_type( t_pa.value_type, t_pd[len("list[") : -1], None, None ) return pa.list_(t_pa2), "list[{}]".format(t_pd2), "object", None elif pa.types.is_dictionary(t_pa): # downcast to dictionary content, `t_pd` is useless in that case if ARROW_LARGER_EQ_0141: return normalize_type(t_pa.value_type, t_np, t_np, None) else: return normalize_type(t_pa.dictionary.type, t_np, t_np, None) else: return t_pa, t_pd, t_np, metadata
Example #7
Source File: test_common_metadata.py From kartothek with MIT License | 4 votes |
def test_store_schema_metadata(store, df_all_types): store_schema_metadata( schema=make_meta(df_all_types, origin="df_all_types"), dataset_uuid="some_uuid", store=store, table="some_table", ) key = "some_uuid/some_table/_common_metadata" assert key in store.keys() pq_file = pq.ParquetFile(store.open(key)) actual_schema = pq_file.schema.to_arrow_schema() fields = [ pa.field("array_float32", pa.list_(pa.float64())), pa.field("array_float64", pa.list_(pa.float64())), pa.field("array_int16", pa.list_(pa.int64())), pa.field("array_int32", pa.list_(pa.int64())), pa.field("array_int64", pa.list_(pa.int64())), pa.field("array_int8", pa.list_(pa.int64())), pa.field("array_uint16", pa.list_(pa.uint64())), pa.field("array_uint32", pa.list_(pa.uint64())), pa.field("array_uint64", pa.list_(pa.uint64())), pa.field("array_uint8", pa.list_(pa.uint64())), pa.field("array_unicode", pa.list_(pa.string())), pa.field("bool", pa.bool_()), pa.field("byte", pa.binary()), pa.field("date", pa.date32()), pa.field("datetime64", pa.timestamp("us")), pa.field("float32", pa.float64()), pa.field("float64", pa.float64()), pa.field("int16", pa.int64()), pa.field("int32", pa.int64()), pa.field("int64", pa.int64()), pa.field("int8", pa.int64()), pa.field("null", pa.null()), pa.field("uint16", pa.uint64()), pa.field("uint32", pa.uint64()), pa.field("uint64", pa.uint64()), pa.field("uint8", pa.uint64()), pa.field("unicode", pa.string()), ] expected_schema = pa.schema(fields) assert actual_schema.remove_metadata() == expected_schema
Example #8
Source File: test_index.py From kartothek with MIT License | 4 votes |
def test_eq_explicit(): def assert_eq(a, b): assert a == b assert b == a assert not (a != b) assert not (b != a) def assert_ne(a, b): assert a != b assert b != a assert not (a == b) assert not (b == a) original_index = ExplicitSecondaryIndex( column="col", index_dct={1: ["part_1"]}, dtype=pa.int64(), index_storage_key="dataset_uuid/some_index.parquet", ) idx1 = original_index.copy() assert_eq(idx1, original_index) idx2 = original_index.copy() idx2.column = "col2" assert_ne(idx2, original_index) idx3 = original_index.copy() idx3.dtype = pa.uint64() assert_ne(idx3, original_index) idx4 = original_index.copy() idx4.index_dct = {1: ["part_1"], 2: ["part_2"]} assert_ne(idx4, original_index) idx5 = original_index.copy() idx5.index_dct = {1: ["part_1", "part_2"]} assert_ne(idx5, original_index) idx6 = original_index.copy() idx6.index_dct = {1: ["part_2"]} assert_ne(idx6, original_index) idx7 = original_index.copy() idx7.index_dct = {2: ["part_1"]} assert_ne(idx7, original_index) idx8 = original_index.copy() idx8.dtype = None assert_ne(idx8, original_index) idx9a = original_index.copy() idx9b = original_index.copy() idx9a.dtype = None idx9b.dtype = None assert_eq(idx9a, idx9b)