Python pyarrow.decimal128() Examples

The following are 4 code examples of pyarrow.decimal128(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pyarrow , or try the search function .
Example #1
Source File: parquet.py    From spectrify with MIT License 6 votes vote down vote up
def determine_pyarrow_types(self, cols):
        pa_types = []
        for col in cols:
            sa_class = col.type.__class__
            if isinstance(col.type, (sa.types.NUMERIC, sa.types.DECIMAL)):
                pa_type = functools.partial(pa.decimal128, col.type.precision, col.type.scale)
            else:
                pa_type = self.pyarrow_type_map[sa_class]
            pa_types.append(pa_type)
        return pa_types 
Example #2
Source File: test_unischema.py    From petastorm with Apache License 2.0 5 votes vote down vote up
def test_arrow_schema_convertion():
    fields = [
        pa.field('string', pa.string()),
        pa.field('int8', pa.int8()),
        pa.field('int16', pa.int16()),
        pa.field('int32', pa.int32()),
        pa.field('int64', pa.int64()),
        pa.field('float', pa.float32()),
        pa.field('double', pa.float64()),
        pa.field('bool', pa.bool_(), False),
        pa.field('fixed_size_binary', pa.binary(10)),
        pa.field('variable_size_binary', pa.binary()),
        pa.field('decimal', pa.decimal128(3, 4)),
        pa.field('timestamp_s', pa.timestamp('s')),
        pa.field('timestamp_ns', pa.timestamp('ns')),
        pa.field('date_32', pa.date32()),
        pa.field('date_64', pa.date64())
    ]
    arrow_schema = pa.schema(fields)

    mock_dataset = _mock_parquet_dataset([], arrow_schema)

    unischema = Unischema.from_arrow_schema(mock_dataset)
    for name in arrow_schema.names:
        assert getattr(unischema, name).name == name
        assert getattr(unischema, name).codec is None

        if name == 'bool':
            assert not getattr(unischema, name).nullable
        else:
            assert getattr(unischema, name).nullable

    # Test schema preserve fields order
    field_name_list = [f.name for f in fields]
    assert list(unischema.fields.keys()) == field_name_list 
Example #3
Source File: types.py    From LearningApacheSpark with MIT License 5 votes vote down vote up
def to_arrow_type(dt):
    """ Convert Spark data type to pyarrow type
    """
    from distutils.version import LooseVersion
    import pyarrow as pa
    if type(dt) == BooleanType:
        arrow_type = pa.bool_()
    elif type(dt) == ByteType:
        arrow_type = pa.int8()
    elif type(dt) == ShortType:
        arrow_type = pa.int16()
    elif type(dt) == IntegerType:
        arrow_type = pa.int32()
    elif type(dt) == LongType:
        arrow_type = pa.int64()
    elif type(dt) == FloatType:
        arrow_type = pa.float32()
    elif type(dt) == DoubleType:
        arrow_type = pa.float64()
    elif type(dt) == DecimalType:
        arrow_type = pa.decimal128(dt.precision, dt.scale)
    elif type(dt) == StringType:
        arrow_type = pa.string()
    elif type(dt) == BinaryType:
        # TODO: remove version check once minimum pyarrow version is 0.10.0
        if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
            raise TypeError("Unsupported type in conversion to Arrow: " + str(dt) +
                            "\nPlease install pyarrow >= 0.10.0 for BinaryType support.")
        arrow_type = pa.binary()
    elif type(dt) == DateType:
        arrow_type = pa.date32()
    elif type(dt) == TimestampType:
        # Timestamps should be in UTC, JVM Arrow timestamps require a timezone to be read
        arrow_type = pa.timestamp('us', tz='UTC')
    elif type(dt) == ArrayType:
        if type(dt.elementType) == TimestampType:
            raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
        arrow_type = pa.list_(to_arrow_type(dt.elementType))
    else:
        raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
    return arrow_type 
Example #4
Source File: _pandas_helpers.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def pyarrow_numeric():
    return pyarrow.decimal128(38, 9)