Python Examples of pyarrow.__version_

Source File: parquet.py From recruit with Apache License 2.0

6 votes

def __init__(self):
        # since pandas is a dependency of fastparquet
        # we need to import on first use
        try:
            import fastparquet
        except ImportError:
            raise ImportError(
                "fastparquet is required for parquet support\n\n"
                "you can install via conda\n"
                "conda install fastparquet -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U fastparquet"
            )
        if LooseVersion(fastparquet.__version__) < '0.2.1':
            raise ImportError(
                "fastparquet >= 0.2.1 is required for parquet "
                "support\n\n"
                "you can install via conda\n"
                "conda install fastparquet -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U fastparquet"
            )
        self.api = fastparquet

Source File: parquet.py From elasticintel with GNU General Public License v3.0

6 votes

def __init__(self):
        # since pandas is a dependency of fastparquet
        # we need to import on first use

        try:
            import fastparquet
        except ImportError:
            raise ImportError("fastparquet is required for parquet support\n\n"
                              "you can install via conda\n"
                              "conda install fastparquet -c conda-forge\n"
                              "\nor via pip\n"
                              "pip install -U fastparquet")

        if LooseVersion(fastparquet.__version__) < '0.1.0':
            raise ImportError("fastparquet >= 0.1.0 is required for parquet "
                              "support\n\n"
                              "you can install via conda\n"
                              "conda install fastparquet -c conda-forge\n"
                              "\nor via pip\n"
                              "pip install -U fastparquet")

        self.api = fastparquet

Source File: parquet.py From elasticintel with GNU General Public License v3.0

6 votes

def __init__(self):
        # since pandas is a dependency of pyarrow
        # we need to import on first use

        try:
            import pyarrow
            import pyarrow.parquet
        except ImportError:
            raise ImportError("pyarrow is required for parquet support\n\n"
                              "you can install via conda\n"
                              "conda install pyarrow -c conda-forge\n"
                              "\nor via pip\n"
                              "pip install -U pyarrow\n")

        if LooseVersion(pyarrow.__version__) < '0.4.1':
            raise ImportError("pyarrow >= 0.4.1 is required for parquet"
                              "support\n\n"
                              "you can install via conda\n"
                              "conda install pyarrow -c conda-forge\n"
                              "\nor via pip\n"
                              "pip install -U pyarrow\n")

        self._pyarrow_lt_050 = LooseVersion(pyarrow.__version__) < '0.5.0'
        self._pyarrow_lt_060 = LooseVersion(pyarrow.__version__) < '0.6.0'
        self.api = pyarrow

Source File: parquet.py From Splunking-Crime with GNU Affero General Public License v3.0

6 votes

def __init__(self):
        # since pandas is a dependency of fastparquet
        # we need to import on first use
        try:
            import fastparquet
        except ImportError:
            raise ImportError(
                "fastparquet is required for parquet support\n\n"
                "you can install via conda\n"
                "conda install fastparquet -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U fastparquet"
            )
        if LooseVersion(fastparquet.__version__) < '0.1.0':
            raise ImportError(
                "fastparquet >= 0.1.0 is required for parquet "
                "support\n\n"
                "you can install via conda\n"
                "conda install fastparquet -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U fastparquet"
            )
        self.api = fastparquet

Source File: __init__.py From koalas with Apache License 2.0

6 votes

def assert_pyspark_version():
    import logging

    pyspark_ver = None
    try:
        import pyspark
    except ImportError:
        raise ImportError(
            "Unable to import pyspark - consider doing a pip install with [spark] "
            "extra to install pyspark with pip"
        )
    else:
        pyspark_ver = getattr(pyspark, "__version__")
        if pyspark_ver is None or pyspark_ver < "2.4":
            logging.warning(
                'Found pyspark version "{}" installed. pyspark>=2.4.0 is recommended.'.format(
                    pyspark_ver if pyspark_ver is not None else "<unknown version>"
                )
            )

Source File: utils.py From LearningApacheSpark with MIT License

6 votes

def require_minimum_pyarrow_version():
    """ Raise ImportError if minimum version of pyarrow is not installed
    """
    # TODO(HyukjinKwon): Relocate and deduplicate the version specification.
    minimum_pyarrow_version = "0.8.0"

    from distutils.version import LooseVersion
    try:
        import pyarrow
        have_arrow = True
    except ImportError:
        have_arrow = False
    if not have_arrow:
        raise ImportError("PyArrow >= %s must be installed; however, "
                          "it was not found." % minimum_pyarrow_version)
    if LooseVersion(pyarrow.__version__) < LooseVersion(minimum_pyarrow_version):
        raise ImportError("PyArrow >= %s must be installed; however, "
                          "your version was %s." % (minimum_pyarrow_version, pyarrow.__version__))

Source File: utils.py From LearningApacheSpark with MIT License

6 votes

def require_minimum_pandas_version():
    """ Raise ImportError if minimum version of Pandas is not installed
    """
    # TODO(HyukjinKwon): Relocate and deduplicate the version specification.
    minimum_pandas_version = "0.19.2"

    from distutils.version import LooseVersion
    try:
        import pandas
        have_pandas = True
    except ImportError:
        have_pandas = False
    if not have_pandas:
        raise ImportError("Pandas >= %s must be installed; however, "
                          "it was not found." % minimum_pandas_version)
    if LooseVersion(pandas.__version__) < LooseVersion(minimum_pandas_version):
        raise ImportError("Pandas >= %s must be installed; however, "
                          "your version was %s." % (minimum_pandas_version, pandas.__version__))

Source File: feather_format.py From predictive-maintenance-using-machine-learning with Apache License 2.0

6 votes

def _try_import():
    # since pandas is a dependency of pyarrow
    # we need to import on first use
    try:
        import pyarrow
        from pyarrow import feather
    except ImportError:
        # give a nice error message
        raise ImportError("pyarrow is not installed\n\n"
                          "you can install via conda\n"
                          "conda install pyarrow -c conda-forge\n"
                          "or via pip\n"
                          "pip install -U pyarrow\n")

    if LooseVersion(pyarrow.__version__) < LooseVersion('0.9.0'):
        raise ImportError("pyarrow >= 0.9.0 required for feather support\n\n"
                          "you can install via conda\n"
                          "conda install pyarrow -c conda-forge"
                          "or via pip\n"
                          "pip install -U pyarrow\n")

    return feather, pyarrow

Source File: parquet.py From predictive-maintenance-using-machine-learning with Apache License 2.0

6 votes

def __init__(self):
        # since pandas is a dependency of fastparquet
        # we need to import on first use
        try:
            import fastparquet
        except ImportError:
            raise ImportError(
                "fastparquet is required for parquet support\n\n"
                "you can install via conda\n"
                "conda install fastparquet -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U fastparquet"
            )
        if LooseVersion(fastparquet.__version__) < '0.2.1':
            raise ImportError(
                "fastparquet >= 0.2.1 is required for parquet "
                "support\n\n"
                "you can install via conda\n"
                "conda install fastparquet -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U fastparquet"
            )
        self.api = fastparquet

Source File: parquet.py From predictive-maintenance-using-machine-learning with Apache License 2.0

6 votes

def __init__(self):
        # since pandas is a dependency of pyarrow
        # we need to import on first use
        try:
            import pyarrow
            import pyarrow.parquet
        except ImportError:
            raise ImportError(
                "pyarrow is required for parquet support\n\n"
                "you can install via conda\n"
                "conda install pyarrow -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U pyarrow\n"
            )
        if LooseVersion(pyarrow.__version__) < '0.9.0':
            raise ImportError(
                "pyarrow >= 0.9.0 is required for parquet support\n\n"
                "you can install via conda\n"
                "conda install pyarrow -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U pyarrow\n"
            )

        self.api = pyarrow

Source File: parquet.py From vnpy_crypto with MIT License

6 votes

def __init__(self):
        # since pandas is a dependency of fastparquet
        # we need to import on first use
        try:
            import fastparquet
        except ImportError:
            raise ImportError(
                "fastparquet is required for parquet support\n\n"
                "you can install via conda\n"
                "conda install fastparquet -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U fastparquet"
            )
        if LooseVersion(fastparquet.__version__) < '0.1.0':
            raise ImportError(
                "fastparquet >= 0.1.0 is required for parquet "
                "support\n\n"
                "you can install via conda\n"
                "conda install fastparquet -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U fastparquet"
            )
        self.api = fastparquet

Source File: parquet.py From recruit with Apache License 2.0

6 votes

def __init__(self):
        # since pandas is a dependency of pyarrow
        # we need to import on first use
        try:
            import pyarrow
            import pyarrow.parquet
        except ImportError:
            raise ImportError(
                "pyarrow is required for parquet support\n\n"
                "you can install via conda\n"
                "conda install pyarrow -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U pyarrow\n"
            )
        if LooseVersion(pyarrow.__version__) < '0.9.0':
            raise ImportError(
                "pyarrow >= 0.9.0 is required for parquet support\n\n"
                "you can install via conda\n"
                "conda install pyarrow -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U pyarrow\n"
            )

        self.api = pyarrow

Source File: feather_format.py From recruit with Apache License 2.0

6 votes

def _try_import():
    # since pandas is a dependency of pyarrow
    # we need to import on first use
    try:
        import pyarrow
        from pyarrow import feather
    except ImportError:
        # give a nice error message
        raise ImportError("pyarrow is not installed\n\n"
                          "you can install via conda\n"
                          "conda install pyarrow -c conda-forge\n"
                          "or via pip\n"
                          "pip install -U pyarrow\n")

    if LooseVersion(pyarrow.__version__) < LooseVersion('0.9.0'):
        raise ImportError("pyarrow >= 0.9.0 required for feather support\n\n"
                          "you can install via conda\n"
                          "conda install pyarrow -c conda-forge"
                          "or via pip\n"
                          "pip install -U pyarrow\n")

    return feather, pyarrow

Source File: session.py From mars with Apache License 2.0

5 votes

def _main(self):
        try:
            import pyarrow
            self._serial_type = dataserializer.SerialType(options.client.serial_type.lower())
        except ImportError:
            pyarrow = None
            self._serial_type = dataserializer.SerialType.PICKLE

        args = self._args.copy()
        args['pyver'] = '.'.join(str(v) for v in sys.version_info[:3])
        args['pickle_protocol'] = self._pickle_protocol
        if pyarrow is not None:
            args['arrow_version'] = pyarrow.__version__

        if self._session_id is None:
            resp = self._req_session.post(self._endpoint + '/api/session', data=args)

            if resp.status_code >= 400:
                raise SystemError('Failed to create mars session: ' + resp.reason)
        else:
            resp = self._req_session.get(self._endpoint + '/api/session/' + self._session_id, params=args)
            if resp.status_code == 404:
                raise ValueError('The session with id = %s doesn\'t exist' % self._session_id)
            if resp.status_code >= 400:
                raise SystemError('Failed to check mars session.')

        content = json.loads(resp.text)
        self._session_id = content['session_id']
        self._pickle_protocol = content.get('pickle_protocol', pickle.HIGHEST_PROTOCOL)
        if not content.get('arrow_compatible'):
            self._serial_type = dataserializer.SerialType.PICKLE

Source File: array_util_test.py From tfx-bsl with Apache License 2.0

5 votes

def _all_false_null_bitmap_size(size):
  if pa.__version__ < "0.17":
    return size
  # starting from arrow 0.17, the array factory won't create a null bitmap if
  # no element is null.
  # TODO(zhuo): clean up this shim once tfx_bsl supports arrow 0.17+
  # exclusively.
  return 0

Source File: feather_format.py From recruit with Apache License 2.0

5 votes

def read_feather(path, columns=None, use_threads=True):
    """
    Load a feather-format object from the file path

    .. versionadded 0.20.0

    Parameters
    ----------
    path : string file path, or file-like object
    columns : sequence, default None
        If not provided, all columns are read

        .. versionadded 0.24.0
    nthreads : int, default 1
        Number of CPU threads to use when reading to pandas.DataFrame

       .. versionadded 0.21.0
       .. deprecated 0.24.0
    use_threads : bool, default True
        Whether to parallelize reading using multiple threads

       .. versionadded 0.24.0

    Returns
    -------
    type of object stored in file

    """

    feather, pyarrow = _try_import()
    path = _stringify_path(path)

    if LooseVersion(pyarrow.__version__) < LooseVersion('0.11.0'):
        int_use_threads = int(use_threads)
        if int_use_threads < 1:
            int_use_threads = 1
        return feather.read_feather(path, columns=columns,
                                    nthreads=int_use_threads)

    return feather.read_feather(path, columns=columns,
                                use_threads=bool(use_threads))

Source File: parquet.py From Splunking-Crime with GNU Affero General Public License v3.0

5 votes

def __init__(self):
        # since pandas is a dependency of pyarrow
        # we need to import on first use
        try:
            import pyarrow
            import pyarrow.parquet
        except ImportError:
            raise ImportError(
                "pyarrow is required for parquet support\n\n"
                "you can install via conda\n"
                "conda install pyarrow -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U pyarrow\n"
            )
        if LooseVersion(pyarrow.__version__) < '0.4.1':
            raise ImportError(
                "pyarrow >= 0.4.1 is required for parquet support\n\n"
                "you can install via conda\n"
                "conda install pyarrow -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U pyarrow\n"
            )

        self._pyarrow_lt_060 = (
            LooseVersion(pyarrow.__version__) < LooseVersion('0.6.0'))
        self._pyarrow_lt_070 = (
            LooseVersion(pyarrow.__version__) < LooseVersion('0.7.0'))

        self.api = pyarrow

Source File: conftest.py From koalas with Apache License 2.0

5 votes

def add_pa(doctest_namespace):
    if os.getenv("PYARROW_VERSION", None) is not None:
        assert pa.__version__ == os.getenv("PYARROW_VERSION")
    doctest_namespace["pa"] = pa

Source File: conftest.py From koalas with Apache License 2.0

5 votes

def add_pd(doctest_namespace):
    if os.getenv("PANDAS_VERSION", None) is not None:
        assert pd.__version__ == os.getenv("PANDAS_VERSION")
    doctest_namespace["pd"] = pd

Source File: utils.py From koalas with Apache License 2.0

5 votes

def default_session(conf=None):
    if conf is None:
        conf = dict()
    should_use_legacy_ipc = False
    if LooseVersion(pyarrow.__version__) >= LooseVersion("0.15") and LooseVersion(
        pyspark.__version__
    ) < LooseVersion("3.0"):
        conf["spark.executorEnv.ARROW_PRE_0_15_IPC_FORMAT"] = "1"
        conf["spark.yarn.appMasterEnv.ARROW_PRE_0_15_IPC_FORMAT"] = "1"
        conf["spark.mesos.driverEnv.ARROW_PRE_0_15_IPC_FORMAT"] = "1"
        conf["spark.kubernetes.driverEnv.ARROW_PRE_0_15_IPC_FORMAT"] = "1"
        should_use_legacy_ipc = True

    builder = spark.SparkSession.builder.appName("Koalas")
    for key, value in conf.items():
        builder = builder.config(key, value)
    # Currently, Koalas is dependent on such join due to 'compute.ops_on_diff_frames'
    # configuration. This is needed with Spark 3.0+.
    builder.config("spark.sql.analyzer.failAmbiguousSelfJoin", False)
    session = builder.getOrCreate()

    if not should_use_legacy_ipc:
        is_legacy_ipc_set = any(
            v == "1"
            for v in [
                session.conf.get("spark.executorEnv.ARROW_PRE_0_15_IPC_FORMAT", None),
                session.conf.get("spark.yarn.appMasterEnv.ARROW_PRE_0_15_IPC_FORMAT", None),
                session.conf.get("spark.mesos.driverEnv.ARROW_PRE_0_15_IPC_FORMAT", None),
                session.conf.get("spark.kubernetes.driverEnv.ARROW_PRE_0_15_IPC_FORMAT", None),
            ]
        )
        if is_legacy_ipc_set:
            raise RuntimeError(
                "Please explicitly unset 'ARROW_PRE_0_15_IPC_FORMAT' environment variable in "
                "both driver and executor sides. Check your spark.executorEnv.*, "
                "spark.yarn.appMasterEnv.*, spark.mesos.driverEnv.* and "
                "spark.kubernetes.driverEnv.* configurations. It is required to set this "
                "environment variable only when you use pyarrow>=0.15 and pyspark<3.0."
            )
    return session

Source File: apihandlers.py From mars with Apache License 2.0

5 votes

def _check_arrow_compatibility(client_version):
        import pyarrow
        client_version = tuple(LooseVersion(client_version or pyarrow.__version__).version[:2])
        server_version = tuple(LooseVersion(pyarrow.__version__).version[:2])
        return client_version == server_version

Source File: types.py From LearningApacheSpark with MIT License

5 votes

def from_arrow_type(at):
    """ Convert pyarrow type to Spark data type.
    """
    from distutils.version import LooseVersion
    import pyarrow as pa
    import pyarrow.types as types
    if types.is_boolean(at):
        spark_type = BooleanType()
    elif types.is_int8(at):
        spark_type = ByteType()
    elif types.is_int16(at):
        spark_type = ShortType()
    elif types.is_int32(at):
        spark_type = IntegerType()
    elif types.is_int64(at):
        spark_type = LongType()
    elif types.is_float32(at):
        spark_type = FloatType()
    elif types.is_float64(at):
        spark_type = DoubleType()
    elif types.is_decimal(at):
        spark_type = DecimalType(precision=at.precision, scale=at.scale)
    elif types.is_string(at):
        spark_type = StringType()
    elif types.is_binary(at):
        # TODO: remove version check once minimum pyarrow version is 0.10.0
        if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
            raise TypeError("Unsupported type in conversion from Arrow: " + str(at) +
                            "\nPlease install pyarrow >= 0.10.0 for BinaryType support.")
        spark_type = BinaryType()
    elif types.is_date32(at):
        spark_type = DateType()
    elif types.is_timestamp(at):
        spark_type = TimestampType()
    elif types.is_list(at):
        if types.is_timestamp(at.value_type):
            raise TypeError("Unsupported type in conversion from Arrow: " + str(at))
        spark_type = ArrayType(from_arrow_type(at.value_type))
    else:
        raise TypeError("Unsupported type in conversion from Arrow: " + str(at))
    return spark_type

Source File: types.py From LearningApacheSpark with MIT License

5 votes

def to_arrow_type(dt):
    """ Convert Spark data type to pyarrow type
    """
    from distutils.version import LooseVersion
    import pyarrow as pa
    if type(dt) == BooleanType:
        arrow_type = pa.bool_()
    elif type(dt) == ByteType:
        arrow_type = pa.int8()
    elif type(dt) == ShortType:
        arrow_type = pa.int16()
    elif type(dt) == IntegerType:
        arrow_type = pa.int32()
    elif type(dt) == LongType:
        arrow_type = pa.int64()
    elif type(dt) == FloatType:
        arrow_type = pa.float32()
    elif type(dt) == DoubleType:
        arrow_type = pa.float64()
    elif type(dt) == DecimalType:
        arrow_type = pa.decimal128(dt.precision, dt.scale)
    elif type(dt) == StringType:
        arrow_type = pa.string()
    elif type(dt) == BinaryType:
        # TODO: remove version check once minimum pyarrow version is 0.10.0
        if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
            raise TypeError("Unsupported type in conversion to Arrow: " + str(dt) +
                            "\nPlease install pyarrow >= 0.10.0 for BinaryType support.")
        arrow_type = pa.binary()
    elif type(dt) == DateType:
        arrow_type = pa.date32()
    elif type(dt) == TimestampType:
        # Timestamps should be in UTC, JVM Arrow timestamps require a timezone to be read
        arrow_type = pa.timestamp('us', tz='UTC')
    elif type(dt) == ArrayType:
        if type(dt.elementType) == TimestampType:
            raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
        arrow_type = pa.list_(to_arrow_type(dt.elementType))
    else:
        raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
    return arrow_type

Source File: test_parquet.py From vnpy_crypto with MIT License

5 votes

def test_categorical(self, fp):
        if LooseVersion(fastparquet.__version__) < LooseVersion("0.1.3"):
            pytest.skip("CategoricalDtype not supported for older fp")
        df = pd.DataFrame({'a': pd.Categorical(list('abc'))})
        check_round_trip(df, fp)

Source File: test_parquet.py From vnpy_crypto with MIT License

5 votes

def pa_lt_070():
    if not _HAVE_PYARROW:
        pytest.skip("pyarrow is not installed")
    if LooseVersion(pyarrow.__version__) >= LooseVersion('0.7.0'):
        pytest.skip("pyarrow is >= 0.7.0")
    return 'pyarrow'

Source File: serializers.py From LearningApacheSpark with MIT License

5 votes

def _create_batch(series, timezone):
    """
    Create an Arrow record batch from the given pandas.Series or list of Series, with optional type.

    :param series: A single pandas.Series, list of Series, or list of (series, arrow_type)
    :param timezone: A timezone to respect when handling timestamp values
    :return: Arrow RecordBatch
    """
    import decimal
    from distutils.version import LooseVersion
    import pyarrow as pa
    from pyspark.sql.types import _check_series_convert_timestamps_internal
    # Make input conform to [(series1, type1), (series2, type2), ...]
    if not isinstance(series, (list, tuple)) or \
            (len(series) == 2 and isinstance(series[1], pa.DataType)):
        series = [series]
    series = ((s, None) if not isinstance(s, (list, tuple)) else s for s in series)

    def create_array(s, t):
        mask = s.isnull()
        # Ensure timestamp series are in expected form for Spark internal representation
        # TODO: maybe don't need None check anymore as of Arrow 0.9.1
        if t is not None and pa.types.is_timestamp(t):
            s = _check_series_convert_timestamps_internal(s.fillna(0), timezone)
            # TODO: need cast after Arrow conversion, ns values cause error with pandas 0.19.2
            return pa.Array.from_pandas(s, mask=mask).cast(t, safe=False)
        elif t is not None and pa.types.is_string(t) and sys.version < '3':
            # TODO: need decode before converting to Arrow in Python 2
            # TODO: don't need as of Arrow 0.9.1
            return pa.Array.from_pandas(s.apply(
                lambda v: v.decode("utf-8") if isinstance(v, str) else v), mask=mask, type=t)
        elif t is not None and pa.types.is_decimal(t) and \
                LooseVersion("0.9.0") <= LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
            # TODO: see ARROW-2432. Remove when the minimum PyArrow version becomes 0.10.0.
            return pa.Array.from_pandas(s.apply(
                lambda v: decimal.Decimal('NaN') if v is None else v), mask=mask, type=t)
        return pa.Array.from_pandas(s, mask=mask, type=t)

    arrs = [create_array(s, t) for s, t in series]
    return pa.RecordBatch.from_arrays(arrs, ["_%d" % i for i in xrange(len(arrs))])

Source File: feather_format.py From predictive-maintenance-using-machine-learning with Apache License 2.0

5 votes

def read_feather(path, columns=None, use_threads=True):
    """
    Load a feather-format object from the file path

    .. versionadded 0.20.0

    Parameters
    ----------
    path : string file path, or file-like object
    columns : sequence, default None
        If not provided, all columns are read

        .. versionadded 0.24.0
    nthreads : int, default 1
        Number of CPU threads to use when reading to pandas.DataFrame

       .. versionadded 0.21.0
       .. deprecated 0.24.0
    use_threads : bool, default True
        Whether to parallelize reading using multiple threads

       .. versionadded 0.24.0

    Returns
    -------
    type of object stored in file

    """

    feather, pyarrow = _try_import()
    path = _stringify_path(path)

    if LooseVersion(pyarrow.__version__) < LooseVersion('0.11.0'):
        int_use_threads = int(use_threads)
        if int_use_threads < 1:
            int_use_threads = 1
        return feather.read_feather(path, columns=columns,
                                    nthreads=int_use_threads)

    return feather.read_feather(path, columns=columns,
                                use_threads=bool(use_threads))

Source File: test_parquet.py From vnpy_crypto with MIT License

5 votes

def pa_ge_070():
    if not _HAVE_PYARROW:
        pytest.skip("pyarrow is not installed")
    if LooseVersion(pyarrow.__version__) < LooseVersion('0.7.0'):
        pytest.skip("pyarrow is < 0.7.0")
    return 'pyarrow'

Source File: test_parquet.py From vnpy_crypto with MIT License

5 votes

def fp_lt_014():
    if not _HAVE_FASTPARQUET:
        pytest.skip("fastparquet is not installed")
    if LooseVersion(fastparquet.__version__) >= LooseVersion('0.1.4'):
        pytest.skip("fastparquet is >= 0.1.4")
    return 'fastparquet'

Source File: test_parquet.py From vnpy_crypto with MIT License

5 votes

def test_write_index(self, engine):
        check_names = engine != 'fastparquet'

        if engine == 'pyarrow':
            import pyarrow
            if LooseVersion(pyarrow.__version__) < LooseVersion('0.7.0'):
                pytest.skip("pyarrow is < 0.7.0")

        df = pd.DataFrame({'A': [1, 2, 3]})
        check_round_trip(df, engine)

        indexes = [
            [2, 3, 4],
            pd.date_range('20130101', periods=3),
            list('abc'),
            [1, 3, 4],
        ]
        # non-default index
        for index in indexes:
            df.index = index
            check_round_trip(df, engine, check_names=check_names)

        # index with meta-data
        df.index = [0, 1, 2]
        df.index.name = 'foo'
        check_round_trip(df, engine)

Python pyarrow.__version__() Examples

Python pyarrow.version() Examples