Python pyarrow.timestamp() Examples

The following are 30 code examples of pyarrow.timestamp(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pyarrow , or try the search function .
Example #1
Source File: test_renderprep.py    From cjworkbench with GNU Affero General Public License v3.0 6 votes vote down vote up
def test_list_prompting_error_concatenate_different_type_to_text(self):
        context = self._render_context(
            input_table=arrow_table(
                {"A": [1], "B": pa.array([datetime.now()], pa.timestamp("ns"))}
            )
        )
        schema = ParamDType.List(
            inner_dtype=ParamDType.Column(column_types=frozenset({"text"}))
        )
        with self.assertRaises(PromptingError) as cm:
            clean_value(schema, ["A", "B"], context)

        self.assertEqual(
            cm.exception.errors,
            [PromptingError.WrongColumnType(["A", "B"], None, frozenset({"text"}))],
        ) 
Example #2
Source File: test_renderprep.py    From cjworkbench with GNU Affero General Public License v3.0 6 votes vote down vote up
def test_list_prompting_error_concatenate_different_type(self):
        context = self._render_context(
            input_table=arrow_table(
                {"A": ["1"], "B": pa.array([datetime.now()], pa.timestamp("ns"))}
            )
        )
        schema = ParamDType.List(
            inner_dtype=ParamDType.Column(column_types=frozenset({"number"}))
        )
        with self.assertRaises(PromptingError) as cm:
            clean_value(schema, ["A", "B"], context)

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})),
                PromptingError.WrongColumnType(
                    ["B"], "datetime", frozenset({"number"})
                ),
            ],
        ) 
Example #3
Source File: test_renderprep.py    From cjworkbench with GNU Affero General Public License v3.0 6 votes vote down vote up
def test_dict_prompting_error_concatenate_different_types(self):
        context = self._render_context(
            input_table=arrow_table(
                {"A": ["1"], "B": pa.array([datetime.now()], pa.timestamp("ns"))}
            )
        )
        schema = ParamDType.Dict(
            {
                "x": ParamDType.Column(column_types=frozenset({"number"})),
                "y": ParamDType.Column(column_types=frozenset({"number"})),
            }
        )
        with self.assertRaises(PromptingError) as cm:
            clean_value(schema, {"x": "A", "y": "B"}, context)

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})),
                PromptingError.WrongColumnType(
                    ["B"], "datetime", frozenset({"number"})
                ),
            ],
        ) 
Example #4
Source File: test_index.py    From kartothek with MIT License 6 votes vote down vote up
def test_serialization_normalization(key):
    """
    Check that index normalizes values consistently after serializing.

    This is helpful to ensure correct behavior for cases such as when
    key=`datetime.datetime(2018, 1, 1, 12, 30)`, as this would be parsed to
    `pa.timestamp("us")` during index creation, but stored as `pa.timestamp("ns")`.
    """
    index = ExplicitSecondaryIndex(
        column="col", index_dct={key: ["part_2", "part_4", "part_1"]}
    )
    index2 = pickle.loads(pickle.dumps(index))

    assert index.normalize_value(index.dtype, key) == index2.normalize_value(
        index2.dtype, key
    ) 
Example #5
Source File: test_io.py    From cjworkbench with GNU Affero General Public License v3.0 6 votes vote down vote up
def test_metadata_comes_from_db_columns(self):
        columns = [
            Column("A", ColumnType.Number(format="{:,.2f}")),
            Column("B", ColumnType.Datetime()),
            Column("C", ColumnType.Text()),
        ]
        result = RenderResult(
            arrow_table(
                {
                    "A": [1],
                    "B": pa.array([datetime.datetime.now()], pa.timestamp("ns")),
                    "C": ["x"],
                },
                columns=columns,
            )
        )
        cache_render_result(self.workflow, self.wf_module, self.delta.id, result)
        # Delete from disk entirely, to prove we did not read.
        minio.remove(BUCKET, crr_parquet_key(self.wf_module.cached_render_result))

        # Load _new_ CachedRenderResult -- from DB columns, not memory
        fresh_wf_module = WfModule.objects.get(id=self.wf_module.id)
        cached_result = fresh_wf_module.cached_render_result

        self.assertEqual(cached_result.table_metadata, TableMetadata(1, columns)) 
Example #6
Source File: types.py    From LearningApacheSpark with MIT License 6 votes vote down vote up
def _check_series_localize_timestamps(s, timezone):
    """
    Convert timezone aware timestamps to timezone-naive in the specified timezone or local timezone.

    If the input series is not a timestamp series, then the same series is returned. If the input
    series is a timestamp series, then a converted series is returned.

    :param s: pandas.Series
    :param timezone: the timezone to convert. if None then use local timezone
    :return pandas.Series that have been converted to tz-naive
    """
    from pyspark.sql.utils import require_minimum_pandas_version
    require_minimum_pandas_version()

    from pandas.api.types import is_datetime64tz_dtype
    tz = timezone or _get_local_timezone()
    # TODO: handle nested timestamps, such as ArrayType(TimestampType())?
    if is_datetime64tz_dtype(s.dtype):
        return s.dt.tz_convert(tz).dt.tz_localize(None)
    else:
        return s 
Example #7
Source File: index.py    From kartothek with MIT License 6 votes vote down vote up
def _parquet_bytes_to_dict(column: str, index_buffer: bytes):
    reader = pa.BufferReader(index_buffer)
    # This can be done much more efficient but would take a lot more
    # time to implement so this will be only done on request.
    table = pq.read_table(reader)
    if ARROW_LARGER_EQ_0150:
        column_type = table.schema.field(column).type
    else:
        column_type = table.schema.field_by_name(column).type

    # `datetime.datetime` objects have a precision of up to microseconds only, so arrow
    # parses the type to `pa.timestamp("us")`. Since the
    # values are normalized to `numpy.datetime64[ns]` anyways, we do not care about this
    # and load the column type as `pa.timestamp("ns")`
    if column_type == pa.timestamp("us"):
        column_type = pa.timestamp("ns")

    df = _fix_pyarrow_07992_table(table).to_pandas()  # Could eventually be phased out

    index_dct = dict(
        zip(df[column].values, (list(x) for x in df[_PARTITION_COLUMN_NAME].values))
    )
    return index_dct, column_type 
Example #8
Source File: test_renderprep.py    From cjworkbench with GNU Affero General Public License v3.0 6 votes vote down vote up
def test_clean_multichartseries_non_number_is_prompting_error(self):
        context = self._render_context(
            input_table=arrow_table(
                {"A": ["a"], "B": pa.array([datetime.now()], pa.timestamp("ns"))}
            )
        )
        value = [
            {"column": "A", "color": "#aaaaaa"},
            {"column": "B", "color": "#cccccc"},
        ]
        with self.assertRaises(PromptingError) as cm:
            clean_value(ParamDType.Multichartseries(), value, context)

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})),
                PromptingError.WrongColumnType(
                    ["B"], "datetime", frozenset({"number"})
                ),
            ],
        ) 
Example #9
Source File: test_wfmodule.py    From cjworkbench with GNU Affero General Public License v3.0 6 votes vote down vote up
def test_wf_module_render_null_datetime(self):
        # Ran into problems 2019-09-06, when switching to Arrow
        cache_render_result(
            self.workflow,
            self.wf_module2,
            self.wf_module2.last_relevant_delta_id,
            RenderResult(
                arrow_table(
                    {
                        "A": pa.array(
                            [dt(2019, 1, 2, 3, 4, 5, 6007, None), None],
                            pa.timestamp("ns"),
                        )
                    }
                )
            ),
        )

        response = self.client.get("/api/wfmodules/%d/render" % self.wf_module2.id)
        self.assertEqual(response.status_code, status.HTTP_200_OK)
        self.assertEqual(
            json.loads(response.content)["rows"],
            [{"A": "2019-01-02T03:04:05.006007Z"}, {"A": None}],
        ) 
Example #10
Source File: test_types.py    From cjworkbench with GNU Affero General Public License v3.0 6 votes vote down vote up
def test_arrow_datetime_column(self):
        dataframe, columns = arrow_table_to_dataframe(
            arrow_table(
                {
                    "A": pyarrow.array(
                        [dt.fromisoformat("2019-09-17T21:21:00.123456"), None],
                        type=pyarrow.timestamp(unit="ns", tz=None),
                    )
                },
                [atypes.Column("A", atypes.ColumnType.Datetime())],
            )
        )
        assert_frame_equal(
            dataframe,
            pd.DataFrame(
                {"A": ["2019-09-17T21:21:00.123456Z", None]}, dtype="datetime64[ns]"
            ),
        )
        self.assertEqual(columns, [Column("A", ColumnType.DATETIME())]) 
Example #11
Source File: test_types.py    From cjworkbench with GNU Affero General Public License v3.0 6 votes vote down vote up
def test_dataframe_datetime_column(self):
        assert_arrow_table_equals(
            dataframe_to_arrow_table(
                pd.DataFrame(
                    {"A": ["2019-09-17T21:21:00.123456Z", None]}, dtype="datetime64[ns]"
                ),
                [Column("A", ColumnType.DATETIME())],
                self.path,
            ),
            arrow_table(
                {
                    "A": pyarrow.array(
                        [dt.fromisoformat("2019-09-17T21:21:00.123456"), None],
                        type=pyarrow.timestamp(unit="ns", tz=None),
                    )
                },
                [atypes.Column("A", atypes.ColumnType.Datetime())],
            ),
        ) 
Example #12
Source File: test_module.py    From cjworkbench with GNU Affero General Public License v3.0 6 votes vote down vote up
def test_render_with_input_columns(self):
        def render(table, params, *, input_columns):
            self.assertEqual(
                input_columns,
                {
                    "A": ptypes.RenderColumn("A", "text", None),
                    "B": ptypes.RenderColumn("B", "number", "{:,.3f}"),
                    "C": ptypes.RenderColumn("C", "datetime", None),
                },
            )

        with arrow_table_context(
            {"A": ["x"], "B": [1], "C": pa.array([datetime.now()], pa.timestamp("ns"))},
            columns=[
                Column("A", ColumnType.Text()),
                Column("B", ColumnType.Number("{:,.3f}")),
                Column("C", ColumnType.Datetime()),
            ],
            dir=self.basedir,
        ) as arrow_table:
            self._test_render(render, arrow_table=arrow_table) 
Example #13
Source File: test_validate.py    From cjworkbench with GNU Affero General Public License v3.0 5 votes vote down vote up
def test_column_datetime_must_be_ns_resolution(self):
        # [2019-09-17] Pandas only supports datetime64[ns]
        # https://github.com/pandas-dev/pandas/issues/7307#issuecomment-224180563
        with self.assertRaises(DatetimeUnitNotAllowed):
            validate_table_metadata(
                pyarrow.table(
                    {
                        "A": pyarrow.array(
                            [5298375234], type=pyarrow.timestamp("us", tz=None)
                        )
                    }
                ),
                TableMetadata(1, [Datetime("A")]),
            ) 
Example #14
Source File: types.py    From cjworkbench with GNU Affero General Public License v3.0 5 votes vote down vote up
def _dtype_to_arrow_type(dtype: np.dtype) -> pyarrow.DataType:
    if dtype == np.int8:
        return pyarrow.int8()
    elif dtype == np.int16:
        return pyarrow.int16()
    elif dtype == np.int32:
        return pyarrow.int32()
    elif dtype == np.int64:
        return pyarrow.int64()
    elif dtype == np.uint8:
        return pyarrow.uint8()
    elif dtype == np.uint16:
        return pyarrow.uint16()
    elif dtype == np.uint32:
        return pyarrow.uint32()
    elif dtype == np.uint64:
        return pyarrow.uint64()
    elif dtype == np.float16:
        return pyarrow.float16()
    elif dtype == np.float32:
        return pyarrow.float32()
    elif dtype == np.float64:
        return pyarrow.float64()
    elif dtype.kind == "M":
        # [2019-09-17] Pandas only allows "ns" unit -- as in, datetime64[ns]
        # https://github.com/pandas-dev/pandas/issues/7307#issuecomment-224180563
        assert dtype.str.endswith("[ns]")
        return pyarrow.timestamp(unit="ns", tz=None)
    elif dtype == np.object_:
        return pyarrow.string()
    else:
        raise RuntimeError("Unhandled dtype %r" % dtype) 
Example #15
Source File: test_client.py    From json2parquet with MIT License 5 votes vote down vote up
def test_ingest_with_datetime_formatted():
    """
    Test ingesting datetime data with a given schema and custom date format
    """
    schema = pa.schema([
        pa.field("foo", pa.int64()),
        pa.field("bar", pa.int64()),
        pa.field("baz", pa.timestamp("ns"))
    ])

    data = [{"foo": 1, "bar": 2, "baz": "2018/01/01 01:02:03"}, {"foo": 10, "bar": 20, "baz": "2018/01/02 01:02:03"}]

    converted_data = client.ingest_data(data, schema, date_format="%Y/%m/%d %H:%M:%S")
    timestamp_values = [pd.to_datetime("2018-01-01 01:02:03"), pd.to_datetime("2018-01-02 01:02:03")]
    assert converted_data.to_pydict() == {'foo': [1, 10], 'bar': [2, 20], 'baz': timestamp_values} 
Example #16
Source File: index.py    From kartothek with MIT License 5 votes vote down vote up
def store(self, store: KeyValueStore, dataset_uuid: str) -> str:
        """
        Store the index as a parquet file

        If compatible, the new keyname will be the name stored under the attribute `index_storage_key`.
        If this attribute is None, a new key will be generated of the format

            `{dataset_uuid}/indices/{column}/{timestamp}.by-dataset-index.parquet`

        where the timestamp is in nanosecond accuracy and is created upon Index object initialization

        Parameters
        ----------
        store:
        dataset_uuid:
        """
        storage_key = None

        if (
            self.index_storage_key is not None
            and dataset_uuid
            and dataset_uuid in self.index_storage_key
        ):
            storage_key = self.index_storage_key
        if storage_key is None:
            storage_key = "{dataset_uuid}/indices/{column}/{timestamp}{suffix}".format(
                dataset_uuid=dataset_uuid,
                suffix=naming.EXTERNAL_INDEX_SUFFIX,
                column=quote(self.column),
                timestamp=quote(self.creation_time.isoformat()),
            )

        table = _index_dct_to_table(self.index_dct, self.column, self.dtype)
        buf = pa.BufferOutputStream()
        pq.write_table(table, buf)

        store.put(storage_key, buf.getvalue().to_pybytes())
        return storage_key 
Example #17
Source File: test_index.py    From kartothek with MIT License 5 votes vote down vote up
def test_index_ts_inference(store):
    index = ExplicitSecondaryIndex(
        column="col",
        index_dct={
            pd.Timestamp("2017-01-01"): ["part_1", "part_2"],
            pd.Timestamp("2017-01-02"): ["part_3"],
        },
    )
    assert index.dtype == pa.timestamp("ns") 
Example #18
Source File: array_util_test.py    From tfx-bsl with Apache License 2.0 5 votes vote down vote up
def testUnsupported(self):
    with self.assertRaisesRegex(RuntimeError, "Unimplemented"):
      array_util.GetByteSize(pa.array([], type=pa.timestamp("s"))) 
Example #19
Source File: test_client.py    From json2parquet with MIT License 5 votes vote down vote up
def test_ingest_with_datetime():
    """
    Test ingesting datetime data with a given schema
    """
    schema = pa.schema([
        pa.field("foo", pa.int64()),
        pa.field("bar", pa.int64()),
        pa.field("baz", pa.timestamp("ns"))
    ])

    data = [{"foo": 1, "bar": 2, "baz": "2018-01-01 01:02:03"}, {"foo": 10, "bar": 20, "baz": "2018-01-02 01:02:03"}]

    converted_data = client.ingest_data(data, schema)
    timestamp_values = [pd.to_datetime("2018-01-01 01:02:03"), pd.to_datetime("2018-01-02 01:02:03")]
    assert converted_data.to_pydict() == {'foo': [1, 10], 'bar': [2, 20], 'baz': timestamp_values} 
Example #20
Source File: csv2parquet.py    From csv2parquet with Apache License 2.0 5 votes vote down vote up
def get_pyarrow_types():
    return {
        'bool': PA_BOOL,
        'float32': PA_FLOAT32,
        'float64': PA_FLOAT64,
        'int8': PA_INT8,
        'int16': PA_INT16,
        'int32': PA_INT32,
        'int64': PA_INT64,
        'string': PA_STRING,
        'timestamp': PA_TIMESTAMP,
        'base64': PA_BINARY
    }

# pylint: disable=too-many-branches,too-many-statements 
Example #21
Source File: types.py    From LearningApacheSpark with MIT License 5 votes vote down vote up
def _check_series_convert_timestamps_localize(s, from_timezone, to_timezone):
    """
    Convert timestamp to timezone-naive in the specified timezone or local timezone

    :param s: a pandas.Series
    :param from_timezone: the timezone to convert from. if None then use local timezone
    :param to_timezone: the timezone to convert to. if None then use local timezone
    :return pandas.Series where if it is a timestamp, has been converted to tz-naive
    """
    from pyspark.sql.utils import require_minimum_pandas_version
    require_minimum_pandas_version()

    import pandas as pd
    from pandas.api.types import is_datetime64tz_dtype, is_datetime64_dtype
    from_tz = from_timezone or _get_local_timezone()
    to_tz = to_timezone or _get_local_timezone()
    # TODO: handle nested timestamps, such as ArrayType(TimestampType())?
    if is_datetime64tz_dtype(s.dtype):
        return s.dt.tz_convert(to_tz).dt.tz_localize(None)
    elif is_datetime64_dtype(s.dtype) and from_tz != to_tz:
        # `s.dt.tz_localize('tzlocal()')` doesn't work properly when including NaT.
        return s.apply(
            lambda ts: ts.tz_localize(from_tz, ambiguous=False).tz_convert(to_tz).tz_localize(None)
            if ts is not pd.NaT else pd.NaT)
    else:
        return s 
Example #22
Source File: parquet.py    From spectrify with MIT License 5 votes vote down vote up
def _pa_timestamp_ns():
    """Wrapper function around Arrow's timestamp type function, which is the
    only type function that requires an argument...
    """
    return pa.timestamp('ns') 
Example #23
Source File: test_parquet.py    From spectrify with MIT License 5 votes vote down vote up
def setUp(self):
        self.sa_meta = sa.MetaData()
        self.data = [
            [17.124, 1.12, 3.14, 13.37],
            [1, 2, 3, 4],
            [1, 2, 3, 4],
            [1, 2, 3, 4],
            [True, None, False, True],
            ['string 1', 'string 2', None, 'string 3'],
            [datetime(2007, 7, 13, 1, 23, 34, 123456),
             None,
             datetime(2006, 1, 13, 12, 34, 56, 432539),
             datetime(2010, 8, 13, 5, 46, 57, 437699), ],
            ["Test Text", "Some#More#Test#  Text", "!@#$%%^&*&", None],
        ]
        self.table = sa.Table(
            'unit_test_table',
            self.sa_meta,
            sa.Column('real_col', sa.REAL),
            sa.Column('bigint_col', sa.BIGINT),
            sa.Column('int_col', sa.INTEGER),
            sa.Column('smallint_col', sa.SMALLINT),
            sa.Column('bool_col', sa.BOOLEAN),
            sa.Column('str_col', sa.VARCHAR),
            sa.Column('timestamp_col', sa.TIMESTAMP),
            sa.Column('plaintext_col', sa.TEXT),
        )

        self.expected_datatypes = [
            pa.float32(),
            pa.int64(),
            pa.int32(),
            pa.int16(),
            pa.bool_(),
            pa.string(),
            pa.timestamp('ns'),
            pa.string(),
        ] 
Example #24
Source File: test_parquet.py    From spectrify with MIT License 5 votes vote down vote up
def test_write(self):
        # Write out test file
        with UncloseableBytesIO() as write_buffer:
            with Writer(write_buffer, self.table) as writer:
                writer.write_row_group(self.data)
            file_bytes = write_buffer.getvalue()

        # Read in test file
        read_buffer = BytesIO(file_bytes)
        with pa.PythonFile(read_buffer, mode='r') as infile:

            # Verify data
            parq_table = pq.read_table(infile)
            written_data = list(parq_table.to_pydict().values())

            tuples_by_data_type = zip(self.data, written_data)
            for i in tuples_by_data_type:
                tuples_by_order = zip(i[0], i[1])
                for j in tuples_by_order:
                    self.assertAlmostEquals(j[0], j[1], places=5)

            # Verify parquet file schema
            for i, field in enumerate(parq_table.schema):
                self.assertEqual(field.type.id, self.expected_datatypes[i].id)

            # Ensure timestamp column was written with int96; right now
            # there is no way to see except to check that the unit on
            # the timestamp type is 'ns'
            ts_col = parq_table.schema.field_by_name('timestamp_col')
            self.assertEqual(ts_col.type.unit, 'ns') 
Example #25
Source File: test_unischema.py    From petastorm with Apache License 2.0 5 votes vote down vote up
def test_arrow_schema_convertion():
    fields = [
        pa.field('string', pa.string()),
        pa.field('int8', pa.int8()),
        pa.field('int16', pa.int16()),
        pa.field('int32', pa.int32()),
        pa.field('int64', pa.int64()),
        pa.field('float', pa.float32()),
        pa.field('double', pa.float64()),
        pa.field('bool', pa.bool_(), False),
        pa.field('fixed_size_binary', pa.binary(10)),
        pa.field('variable_size_binary', pa.binary()),
        pa.field('decimal', pa.decimal128(3, 4)),
        pa.field('timestamp_s', pa.timestamp('s')),
        pa.field('timestamp_ns', pa.timestamp('ns')),
        pa.field('date_32', pa.date32()),
        pa.field('date_64', pa.date64())
    ]
    arrow_schema = pa.schema(fields)

    mock_dataset = _mock_parquet_dataset([], arrow_schema)

    unischema = Unischema.from_arrow_schema(mock_dataset)
    for name in arrow_schema.names:
        assert getattr(unischema, name).name == name
        assert getattr(unischema, name).codec is None

        if name == 'bool':
            assert not getattr(unischema, name).nullable
        else:
            assert getattr(unischema, name).nullable

    # Test schema preserve fields order
    field_name_list = [f.name for f in fields]
    assert list(unischema.fields.keys()) == field_name_list 
Example #26
Source File: types.py    From LearningApacheSpark with MIT License 5 votes vote down vote up
def to_arrow_type(dt):
    """ Convert Spark data type to pyarrow type
    """
    from distutils.version import LooseVersion
    import pyarrow as pa
    if type(dt) == BooleanType:
        arrow_type = pa.bool_()
    elif type(dt) == ByteType:
        arrow_type = pa.int8()
    elif type(dt) == ShortType:
        arrow_type = pa.int16()
    elif type(dt) == IntegerType:
        arrow_type = pa.int32()
    elif type(dt) == LongType:
        arrow_type = pa.int64()
    elif type(dt) == FloatType:
        arrow_type = pa.float32()
    elif type(dt) == DoubleType:
        arrow_type = pa.float64()
    elif type(dt) == DecimalType:
        arrow_type = pa.decimal128(dt.precision, dt.scale)
    elif type(dt) == StringType:
        arrow_type = pa.string()
    elif type(dt) == BinaryType:
        # TODO: remove version check once minimum pyarrow version is 0.10.0
        if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
            raise TypeError("Unsupported type in conversion to Arrow: " + str(dt) +
                            "\nPlease install pyarrow >= 0.10.0 for BinaryType support.")
        arrow_type = pa.binary()
    elif type(dt) == DateType:
        arrow_type = pa.date32()
    elif type(dt) == TimestampType:
        # Timestamps should be in UTC, JVM Arrow timestamps require a timezone to be read
        arrow_type = pa.timestamp('us', tz='UTC')
    elif type(dt) == ArrayType:
        if type(dt.elementType) == TimestampType:
            raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
        arrow_type = pa.list_(to_arrow_type(dt.elementType))
    else:
        raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
    return arrow_type 
Example #27
Source File: test_io.py    From cjworkbench with GNU Affero General Public License v3.0 5 votes vote down vote up
def test_read_cached_render_result_slice_as_text_datetime(self):
        result = RenderResult(
            arrow_table(
                {"A": pa.array([2134213412341232967, None], pa.timestamp("ns"))},
                columns=[Column("A", ColumnType.Datetime())],
            )
        )
        cache_render_result(self.workflow, self.wf_module, self.delta.id, result)
        crr = self.wf_module.cached_render_result
        self.assertEqual(
            read_cached_render_result_slice_as_text(crr, "csv", range(2), range(3)),
            "A\n2037-08-18T13:03:32.341232967Z\n",
        ) 
Example #28
Source File: types.py    From LearningApacheSpark with MIT License 5 votes vote down vote up
def _check_series_convert_timestamps_local_tz(s, timezone):
    """
    Convert timestamp to timezone-naive in the specified timezone or local timezone

    :param s: a pandas.Series
    :param timezone: the timezone to convert to. if None then use local timezone
    :return pandas.Series where if it is a timestamp, has been converted to tz-naive
    """
    return _check_series_convert_timestamps_localize(s, None, timezone) 
Example #29
Source File: test__pandas_helpers.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def is_timestamp(type_):
    # See: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#timestamp-type
    return all_(
        pyarrow.types.is_timestamp,
        lambda type_: type_.unit == "us",
        lambda type_: type_.tz == "UTC",
    )(type_) 
Example #30
Source File: test__pandas_helpers.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def test_is_datetime():
    assert is_datetime(pyarrow.timestamp("us", tz=None))
    assert not is_datetime(pyarrow.timestamp("ms", tz=None))
    assert not is_datetime(pyarrow.timestamp("us", tz="UTC"))
    assert not is_datetime(pyarrow.timestamp("ns", tz="UTC"))
    assert not is_datetime(pyarrow.string())