Python Examples of pyarrow.timestamp

Source File: test_renderprep.py From cjworkbench with GNU Affero General Public License v3.0

6 votes

def test_list_prompting_error_concatenate_different_type_to_text(self):
        context = self._render_context(
            input_table=arrow_table(
                {"A": [1], "B": pa.array([datetime.now()], pa.timestamp("ns"))}
            )
        )
        schema = ParamDType.List(
            inner_dtype=ParamDType.Column(column_types=frozenset({"text"}))
        )
        with self.assertRaises(PromptingError) as cm:
            clean_value(schema, ["A", "B"], context)

        self.assertEqual(
            cm.exception.errors,
            [PromptingError.WrongColumnType(["A", "B"], None, frozenset({"text"}))],
        )

Source File: test_renderprep.py From cjworkbench with GNU Affero General Public License v3.0

6 votes

def test_list_prompting_error_concatenate_different_type(self):
        context = self._render_context(
            input_table=arrow_table(
                {"A": ["1"], "B": pa.array([datetime.now()], pa.timestamp("ns"))}
            )
        )
        schema = ParamDType.List(
            inner_dtype=ParamDType.Column(column_types=frozenset({"number"}))
        )
        with self.assertRaises(PromptingError) as cm:
            clean_value(schema, ["A", "B"], context)

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})),
                PromptingError.WrongColumnType(
                    ["B"], "datetime", frozenset({"number"})
                ),
            ],
        )

Source File: test_renderprep.py From cjworkbench with GNU Affero General Public License v3.0

6 votes

def test_dict_prompting_error_concatenate_different_types(self):
        context = self._render_context(
            input_table=arrow_table(
                {"A": ["1"], "B": pa.array([datetime.now()], pa.timestamp("ns"))}
            )
        )
        schema = ParamDType.Dict(
            {
                "x": ParamDType.Column(column_types=frozenset({"number"})),
                "y": ParamDType.Column(column_types=frozenset({"number"})),
            }
        )
        with self.assertRaises(PromptingError) as cm:
            clean_value(schema, {"x": "A", "y": "B"}, context)

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})),
                PromptingError.WrongColumnType(
                    ["B"], "datetime", frozenset({"number"})
                ),
            ],
        )

Source File: test_index.py From kartothek with MIT License

6 votes

def test_serialization_normalization(key):
    """
    Check that index normalizes values consistently after serializing.

    This is helpful to ensure correct behavior for cases such as when
    key=`datetime.datetime(2018, 1, 1, 12, 30)`, as this would be parsed to
    `pa.timestamp("us")` during index creation, but stored as `pa.timestamp("ns")`.
    """
    index = ExplicitSecondaryIndex(
        column="col", index_dct={key: ["part_2", "part_4", "part_1"]}
    )
    index2 = pickle.loads(pickle.dumps(index))

    assert index.normalize_value(index.dtype, key) == index2.normalize_value(
        index2.dtype, key
    )

Source File: test_io.py From cjworkbench with GNU Affero General Public License v3.0

6 votes

def test_metadata_comes_from_db_columns(self):
        columns = [
            Column("A", ColumnType.Number(format="{:,.2f}")),
            Column("B", ColumnType.Datetime()),
            Column("C", ColumnType.Text()),
        ]
        result = RenderResult(
            arrow_table(
                {
                    "A": [1],
                    "B": pa.array([datetime.datetime.now()], pa.timestamp("ns")),
                    "C": ["x"],
                },
                columns=columns,
            )
        )
        cache_render_result(self.workflow, self.wf_module, self.delta.id, result)
        # Delete from disk entirely, to prove we did not read.
        minio.remove(BUCKET, crr_parquet_key(self.wf_module.cached_render_result))

        # Load _new_ CachedRenderResult -- from DB columns, not memory
        fresh_wf_module = WfModule.objects.get(id=self.wf_module.id)
        cached_result = fresh_wf_module.cached_render_result

        self.assertEqual(cached_result.table_metadata, TableMetadata(1, columns))

Source File: types.py From LearningApacheSpark with MIT License

6 votes

def _check_series_localize_timestamps(s, timezone):
    """
    Convert timezone aware timestamps to timezone-naive in the specified timezone or local timezone.

    If the input series is not a timestamp series, then the same series is returned. If the input
    series is a timestamp series, then a converted series is returned.

    :param s: pandas.Series
    :param timezone: the timezone to convert. if None then use local timezone
    :return pandas.Series that have been converted to tz-naive
    """
    from pyspark.sql.utils import require_minimum_pandas_version
    require_minimum_pandas_version()

    from pandas.api.types import is_datetime64tz_dtype
    tz = timezone or _get_local_timezone()
    # TODO: handle nested timestamps, such as ArrayType(TimestampType())?
    if is_datetime64tz_dtype(s.dtype):
        return s.dt.tz_convert(tz).dt.tz_localize(None)
    else:
        return s

Source File: index.py From kartothek with MIT License

6 votes

def _parquet_bytes_to_dict(column: str, index_buffer: bytes):
    reader = pa.BufferReader(index_buffer)
    # This can be done much more efficient but would take a lot more
    # time to implement so this will be only done on request.
    table = pq.read_table(reader)
    if ARROW_LARGER_EQ_0150:
        column_type = table.schema.field(column).type
    else:
        column_type = table.schema.field_by_name(column).type

    # `datetime.datetime` objects have a precision of up to microseconds only, so arrow
    # parses the type to `pa.timestamp("us")`. Since the
    # values are normalized to `numpy.datetime64[ns]` anyways, we do not care about this
    # and load the column type as `pa.timestamp("ns")`
    if column_type == pa.timestamp("us"):
        column_type = pa.timestamp("ns")

    df = _fix_pyarrow_07992_table(table).to_pandas()  # Could eventually be phased out

    index_dct = dict(
        zip(df[column].values, (list(x) for x in df[_PARTITION_COLUMN_NAME].values))
    )
    return index_dct, column_type

Source File: test_renderprep.py From cjworkbench with GNU Affero General Public License v3.0

6 votes

def test_clean_multichartseries_non_number_is_prompting_error(self):
        context = self._render_context(
            input_table=arrow_table(
                {"A": ["a"], "B": pa.array([datetime.now()], pa.timestamp("ns"))}
            )
        )
        value = [
            {"column": "A", "color": "#aaaaaa"},
            {"column": "B", "color": "#cccccc"},
        ]
        with self.assertRaises(PromptingError) as cm:
            clean_value(ParamDType.Multichartseries(), value, context)

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})),
                PromptingError.WrongColumnType(
                    ["B"], "datetime", frozenset({"number"})
                ),
            ],
        )

Source File: test_wfmodule.py From cjworkbench with GNU Affero General Public License v3.0

6 votes

def test_wf_module_render_null_datetime(self):
        # Ran into problems 2019-09-06, when switching to Arrow
        cache_render_result(
            self.workflow,
            self.wf_module2,
            self.wf_module2.last_relevant_delta_id,
            RenderResult(
                arrow_table(
                    {
                        "A": pa.array(
                            [dt(2019, 1, 2, 3, 4, 5, 6007, None), None],
                            pa.timestamp("ns"),
                        )
                    }
                )
            ),
        )

        response = self.client.get("/api/wfmodules/%d/render" % self.wf_module2.id)
        self.assertEqual(response.status_code, status.HTTP_200_OK)
        self.assertEqual(
            json.loads(response.content)["rows"],
            [{"A": "2019-01-02T03:04:05.006007Z"}, {"A": None}],
        )

Source File: test_types.py From cjworkbench with GNU Affero General Public License v3.0

6 votes

def test_arrow_datetime_column(self):
        dataframe, columns = arrow_table_to_dataframe(
            arrow_table(
                {
                    "A": pyarrow.array(
                        [dt.fromisoformat("2019-09-17T21:21:00.123456"), None],
                        type=pyarrow.timestamp(unit="ns", tz=None),
                    )
                },
                [atypes.Column("A", atypes.ColumnType.Datetime())],
            )
        )
        assert_frame_equal(
            dataframe,
            pd.DataFrame(
                {"A": ["2019-09-17T21:21:00.123456Z", None]}, dtype="datetime64[ns]"
            ),
        )
        self.assertEqual(columns, [Column("A", ColumnType.DATETIME())])

Source File: test_types.py From cjworkbench with GNU Affero General Public License v3.0

6 votes

def test_dataframe_datetime_column(self):
        assert_arrow_table_equals(
            dataframe_to_arrow_table(
                pd.DataFrame(
                    {"A": ["2019-09-17T21:21:00.123456Z", None]}, dtype="datetime64[ns]"
                ),
                [Column("A", ColumnType.DATETIME())],
                self.path,
            ),
            arrow_table(
                {
                    "A": pyarrow.array(
                        [dt.fromisoformat("2019-09-17T21:21:00.123456"), None],
                        type=pyarrow.timestamp(unit="ns", tz=None),
                    )
                },
                [atypes.Column("A", atypes.ColumnType.Datetime())],
            ),
        )

Source File: test_module.py From cjworkbench with GNU Affero General Public License v3.0

6 votes

def test_render_with_input_columns(self):
        def render(table, params, *, input_columns):
            self.assertEqual(
                input_columns,
                {
                    "A": ptypes.RenderColumn("A", "text", None),
                    "B": ptypes.RenderColumn("B", "number", "{:,.3f}"),
                    "C": ptypes.RenderColumn("C", "datetime", None),
                },
            )

        with arrow_table_context(
            {"A": ["x"], "B": [1], "C": pa.array([datetime.now()], pa.timestamp("ns"))},
            columns=[
                Column("A", ColumnType.Text()),
                Column("B", ColumnType.Number("{:,.3f}")),
                Column("C", ColumnType.Datetime()),
            ],
            dir=self.basedir,
        ) as arrow_table:
            self._test_render(render, arrow_table=arrow_table)

Source File: test_validate.py From cjworkbench with GNU Affero General Public License v3.0

5 votes

def test_column_datetime_must_be_ns_resolution(self):
        # [2019-09-17] Pandas only supports datetime64[ns]
        # https://github.com/pandas-dev/pandas/issues/7307#issuecomment-224180563
        with self.assertRaises(DatetimeUnitNotAllowed):
            validate_table_metadata(
                pyarrow.table(
                    {
                        "A": pyarrow.array(
                            [5298375234], type=pyarrow.timestamp("us", tz=None)
                        )
                    }
                ),
                TableMetadata(1, [Datetime("A")]),
            )

Source File: types.py From cjworkbench with GNU Affero General Public License v3.0

5 votes

def _dtype_to_arrow_type(dtype: np.dtype) -> pyarrow.DataType:
    if dtype == np.int8:
        return pyarrow.int8()
    elif dtype == np.int16:
        return pyarrow.int16()
    elif dtype == np.int32:
        return pyarrow.int32()
    elif dtype == np.int64:
        return pyarrow.int64()
    elif dtype == np.uint8:
        return pyarrow.uint8()
    elif dtype == np.uint16:
        return pyarrow.uint16()
    elif dtype == np.uint32:
        return pyarrow.uint32()
    elif dtype == np.uint64:
        return pyarrow.uint64()
    elif dtype == np.float16:
        return pyarrow.float16()
    elif dtype == np.float32:
        return pyarrow.float32()
    elif dtype == np.float64:
        return pyarrow.float64()
    elif dtype.kind == "M":
        # [2019-09-17] Pandas only allows "ns" unit -- as in, datetime64[ns]
        # https://github.com/pandas-dev/pandas/issues/7307#issuecomment-224180563
        assert dtype.str.endswith("[ns]")
        return pyarrow.timestamp(unit="ns", tz=None)
    elif dtype == np.object_:
        return pyarrow.string()
    else:
        raise RuntimeError("Unhandled dtype %r" % dtype)

Source File: test_client.py From json2parquet with MIT License

5 votes

def test_ingest_with_datetime_formatted():
    """
    Test ingesting datetime data with a given schema and custom date format
    """
    schema = pa.schema([
        pa.field("foo", pa.int64()),
        pa.field("bar", pa.int64()),
        pa.field("baz", pa.timestamp("ns"))
    ])

    data = [{"foo": 1, "bar": 2, "baz": "2018/01/01 01:02:03"}, {"foo": 10, "bar": 20, "baz": "2018/01/02 01:02:03"}]

    converted_data = client.ingest_data(data, schema, date_format="%Y/%m/%d %H:%M:%S")
    timestamp_values = [pd.to_datetime("2018-01-01 01:02:03"), pd.to_datetime("2018-01-02 01:02:03")]
    assert converted_data.to_pydict() == {'foo': [1, 10], 'bar': [2, 20], 'baz': timestamp_values}

Source File: index.py From kartothek with MIT License

5 votes

def store(self, store: KeyValueStore, dataset_uuid: str) -> str:
        """
        Store the index as a parquet file

        If compatible, the new keyname will be the name stored under the attribute `index_storage_key`.
        If this attribute is None, a new key will be generated of the format

            `{dataset_uuid}/indices/{column}/{timestamp}.by-dataset-index.parquet`

        where the timestamp is in nanosecond accuracy and is created upon Index object initialization

        Parameters
        ----------
        store:
        dataset_uuid:
        """
        storage_key = None

        if (
            self.index_storage_key is not None
            and dataset_uuid
            and dataset_uuid in self.index_storage_key
        ):
            storage_key = self.index_storage_key
        if storage_key is None:
            storage_key = "{dataset_uuid}/indices/{column}/{timestamp}{suffix}".format(
                dataset_uuid=dataset_uuid,
                suffix=naming.EXTERNAL_INDEX_SUFFIX,
                column=quote(self.column),
                timestamp=quote(self.creation_time.isoformat()),
            )

        table = _index_dct_to_table(self.index_dct, self.column, self.dtype)
        buf = pa.BufferOutputStream()
        pq.write_table(table, buf)

        store.put(storage_key, buf.getvalue().to_pybytes())
        return storage_key

Source File: test_index.py From kartothek with MIT License

5 votes

def test_index_ts_inference(store):
    index = ExplicitSecondaryIndex(
        column="col",
        index_dct={
            pd.Timestamp("2017-01-01"): ["part_1", "part_2"],
            pd.Timestamp("2017-01-02"): ["part_3"],
        },
    )
    assert index.dtype == pa.timestamp("ns")

Source File: array_util_test.py From tfx-bsl with Apache License 2.0

5 votes

def testUnsupported(self):
    with self.assertRaisesRegex(RuntimeError, "Unimplemented"):
      array_util.GetByteSize(pa.array([], type=pa.timestamp("s")))

Source File: test_client.py From json2parquet with MIT License

5 votes

def test_ingest_with_datetime():
    """
    Test ingesting datetime data with a given schema
    """
    schema = pa.schema([
        pa.field("foo", pa.int64()),
        pa.field("bar", pa.int64()),
        pa.field("baz", pa.timestamp("ns"))
    ])

    data = [{"foo": 1, "bar": 2, "baz": "2018-01-01 01:02:03"}, {"foo": 10, "bar": 20, "baz": "2018-01-02 01:02:03"}]

    converted_data = client.ingest_data(data, schema)
    timestamp_values = [pd.to_datetime("2018-01-01 01:02:03"), pd.to_datetime("2018-01-02 01:02:03")]
    assert converted_data.to_pydict() == {'foo': [1, 10], 'bar': [2, 20], 'baz': timestamp_values}

Source File: csv2parquet.py From csv2parquet with Apache License 2.0

5 votes

def get_pyarrow_types():
    return {
        'bool': PA_BOOL,
        'float32': PA_FLOAT32,
        'float64': PA_FLOAT64,
        'int8': PA_INT8,
        'int16': PA_INT16,
        'int32': PA_INT32,
        'int64': PA_INT64,
        'string': PA_STRING,
        'timestamp': PA_TIMESTAMP,
        'base64': PA_BINARY
    }

# pylint: disable=too-many-branches,too-many-statements

Source File: types.py From LearningApacheSpark with MIT License

5 votes

def _check_series_convert_timestamps_localize(s, from_timezone, to_timezone):
    """
    Convert timestamp to timezone-naive in the specified timezone or local timezone

    :param s: a pandas.Series
    :param from_timezone: the timezone to convert from. if None then use local timezone
    :param to_timezone: the timezone to convert to. if None then use local timezone
    :return pandas.Series where if it is a timestamp, has been converted to tz-naive
    """
    from pyspark.sql.utils import require_minimum_pandas_version
    require_minimum_pandas_version()

    import pandas as pd
    from pandas.api.types import is_datetime64tz_dtype, is_datetime64_dtype
    from_tz = from_timezone or _get_local_timezone()
    to_tz = to_timezone or _get_local_timezone()
    # TODO: handle nested timestamps, such as ArrayType(TimestampType())?
    if is_datetime64tz_dtype(s.dtype):
        return s.dt.tz_convert(to_tz).dt.tz_localize(None)
    elif is_datetime64_dtype(s.dtype) and from_tz != to_tz:
        # `s.dt.tz_localize('tzlocal()')` doesn't work properly when including NaT.
        return s.apply(
            lambda ts: ts.tz_localize(from_tz, ambiguous=False).tz_convert(to_tz).tz_localize(None)
            if ts is not pd.NaT else pd.NaT)
    else:
        return s

Source File: parquet.py From spectrify with MIT License

5 votes

def _pa_timestamp_ns():
    """Wrapper function around Arrow's timestamp type function, which is the
    only type function that requires an argument...
    """
    return pa.timestamp('ns')

Source File: test_parquet.py From spectrify with MIT License

5 votes

def setUp(self):
        self.sa_meta = sa.MetaData()
        self.data = [
            [17.124, 1.12, 3.14, 13.37],
            [1, 2, 3, 4],
            [1, 2, 3, 4],
            [1, 2, 3, 4],
            [True, None, False, True],
            ['string 1', 'string 2', None, 'string 3'],
            [datetime(2007, 7, 13, 1, 23, 34, 123456),
             None,
             datetime(2006, 1, 13, 12, 34, 56, 432539),
             datetime(2010, 8, 13, 5, 46, 57, 437699), ],
            ["Test Text", "Some#More#Test#  Text", "!@#$%%^&*&", None],
        ]
        self.table = sa.Table(
            'unit_test_table',
            self.sa_meta,
            sa.Column('real_col', sa.REAL),
            sa.Column('bigint_col', sa.BIGINT),
            sa.Column('int_col', sa.INTEGER),
            sa.Column('smallint_col', sa.SMALLINT),
            sa.Column('bool_col', sa.BOOLEAN),
            sa.Column('str_col', sa.VARCHAR),
            sa.Column('timestamp_col', sa.TIMESTAMP),
            sa.Column('plaintext_col', sa.TEXT),
        )

        self.expected_datatypes = [
            pa.float32(),
            pa.int64(),
            pa.int32(),
            pa.int16(),
            pa.bool_(),
            pa.string(),
            pa.timestamp('ns'),
            pa.string(),
        ]

Source File: test_parquet.py From spectrify with MIT License

5 votes

def test_write(self):
        # Write out test file
        with UncloseableBytesIO() as write_buffer:
            with Writer(write_buffer, self.table) as writer:
                writer.write_row_group(self.data)
            file_bytes = write_buffer.getvalue()

        # Read in test file
        read_buffer = BytesIO(file_bytes)
        with pa.PythonFile(read_buffer, mode='r') as infile:

            # Verify data
            parq_table = pq.read_table(infile)
            written_data = list(parq_table.to_pydict().values())

            tuples_by_data_type = zip(self.data, written_data)
            for i in tuples_by_data_type:
                tuples_by_order = zip(i[0], i[1])
                for j in tuples_by_order:
                    self.assertAlmostEquals(j[0], j[1], places=5)

            # Verify parquet file schema
            for i, field in enumerate(parq_table.schema):
                self.assertEqual(field.type.id, self.expected_datatypes[i].id)

            # Ensure timestamp column was written with int96; right now
            # there is no way to see except to check that the unit on
            # the timestamp type is 'ns'
            ts_col = parq_table.schema.field_by_name('timestamp_col')
            self.assertEqual(ts_col.type.unit, 'ns')

Source File: test_unischema.py From petastorm with Apache License 2.0

5 votes

def test_arrow_schema_convertion():
    fields = [
        pa.field('string', pa.string()),
        pa.field('int8', pa.int8()),
        pa.field('int16', pa.int16()),
        pa.field('int32', pa.int32()),
        pa.field('int64', pa.int64()),
        pa.field('float', pa.float32()),
        pa.field('double', pa.float64()),
        pa.field('bool', pa.bool_(), False),
        pa.field('fixed_size_binary', pa.binary(10)),
        pa.field('variable_size_binary', pa.binary()),
        pa.field('decimal', pa.decimal128(3, 4)),
        pa.field('timestamp_s', pa.timestamp('s')),
        pa.field('timestamp_ns', pa.timestamp('ns')),
        pa.field('date_32', pa.date32()),
        pa.field('date_64', pa.date64())
    ]
    arrow_schema = pa.schema(fields)

    mock_dataset = _mock_parquet_dataset([], arrow_schema)

    unischema = Unischema.from_arrow_schema(mock_dataset)
    for name in arrow_schema.names:
        assert getattr(unischema, name).name == name
        assert getattr(unischema, name).codec is None

        if name == 'bool':
            assert not getattr(unischema, name).nullable
        else:
            assert getattr(unischema, name).nullable

    # Test schema preserve fields order
    field_name_list = [f.name for f in fields]
    assert list(unischema.fields.keys()) == field_name_list

Source File: types.py From LearningApacheSpark with MIT License

5 votes

def to_arrow_type(dt):
    """ Convert Spark data type to pyarrow type
    """
    from distutils.version import LooseVersion
    import pyarrow as pa
    if type(dt) == BooleanType:
        arrow_type = pa.bool_()
    elif type(dt) == ByteType:
        arrow_type = pa.int8()
    elif type(dt) == ShortType:
        arrow_type = pa.int16()
    elif type(dt) == IntegerType:
        arrow_type = pa.int32()
    elif type(dt) == LongType:
        arrow_type = pa.int64()
    elif type(dt) == FloatType:
        arrow_type = pa.float32()
    elif type(dt) == DoubleType:
        arrow_type = pa.float64()
    elif type(dt) == DecimalType:
        arrow_type = pa.decimal128(dt.precision, dt.scale)
    elif type(dt) == StringType:
        arrow_type = pa.string()
    elif type(dt) == BinaryType:
        # TODO: remove version check once minimum pyarrow version is 0.10.0
        if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
            raise TypeError("Unsupported type in conversion to Arrow: " + str(dt) +
                            "\nPlease install pyarrow >= 0.10.0 for BinaryType support.")
        arrow_type = pa.binary()
    elif type(dt) == DateType:
        arrow_type = pa.date32()
    elif type(dt) == TimestampType:
        # Timestamps should be in UTC, JVM Arrow timestamps require a timezone to be read
        arrow_type = pa.timestamp('us', tz='UTC')
    elif type(dt) == ArrayType:
        if type(dt.elementType) == TimestampType:
            raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
        arrow_type = pa.list_(to_arrow_type(dt.elementType))
    else:
        raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
    return arrow_type

Source File: test_io.py From cjworkbench with GNU Affero General Public License v3.0

5 votes

def test_read_cached_render_result_slice_as_text_datetime(self):
        result = RenderResult(
            arrow_table(
                {"A": pa.array([2134213412341232967, None], pa.timestamp("ns"))},
                columns=[Column("A", ColumnType.Datetime())],
            )
        )
        cache_render_result(self.workflow, self.wf_module, self.delta.id, result)
        crr = self.wf_module.cached_render_result
        self.assertEqual(
            read_cached_render_result_slice_as_text(crr, "csv", range(2), range(3)),
            "A\n2037-08-18T13:03:32.341232967Z\n",
        )

Source File: types.py From LearningApacheSpark with MIT License

5 votes

def _check_series_convert_timestamps_local_tz(s, timezone):
    """
    Convert timestamp to timezone-naive in the specified timezone or local timezone

    :param s: a pandas.Series
    :param timezone: the timezone to convert to. if None then use local timezone
    :return pandas.Series where if it is a timestamp, has been converted to tz-naive
    """
    return _check_series_convert_timestamps_localize(s, None, timezone)

Source File: test__pandas_helpers.py From python-bigquery with Apache License 2.0

5 votes

def is_timestamp(type_):
    # See: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#timestamp-type
    return all_(
        pyarrow.types.is_timestamp,
        lambda type_: type_.unit == "us",
        lambda type_: type_.tz == "UTC",
    )(type_)

Source File: test__pandas_helpers.py From python-bigquery with Apache License 2.0

5 votes

def test_is_datetime():
    assert is_datetime(pyarrow.timestamp("us", tz=None))
    assert not is_datetime(pyarrow.timestamp("ms", tz=None))
    assert not is_datetime(pyarrow.timestamp("us", tz="UTC"))
    assert not is_datetime(pyarrow.timestamp("ns", tz="UTC"))
    assert not is_datetime(pyarrow.string())

Python pyarrow.timestamp() Examples