Python pyarrow.__version__() Examples

The following are 30 code examples of pyarrow.__version__(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pyarrow , or try the search function .
Example #1
Source File: parquet.py    From recruit with Apache License 2.0 6 votes vote down vote up
def __init__(self):
        # since pandas is a dependency of fastparquet
        # we need to import on first use
        try:
            import fastparquet
        except ImportError:
            raise ImportError(
                "fastparquet is required for parquet support\n\n"
                "you can install via conda\n"
                "conda install fastparquet -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U fastparquet"
            )
        if LooseVersion(fastparquet.__version__) < '0.2.1':
            raise ImportError(
                "fastparquet >= 0.2.1 is required for parquet "
                "support\n\n"
                "you can install via conda\n"
                "conda install fastparquet -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U fastparquet"
            )
        self.api = fastparquet 
Example #2
Source File: parquet.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def __init__(self):
        # since pandas is a dependency of fastparquet
        # we need to import on first use

        try:
            import fastparquet
        except ImportError:
            raise ImportError("fastparquet is required for parquet support\n\n"
                              "you can install via conda\n"
                              "conda install fastparquet -c conda-forge\n"
                              "\nor via pip\n"
                              "pip install -U fastparquet")

        if LooseVersion(fastparquet.__version__) < '0.1.0':
            raise ImportError("fastparquet >= 0.1.0 is required for parquet "
                              "support\n\n"
                              "you can install via conda\n"
                              "conda install fastparquet -c conda-forge\n"
                              "\nor via pip\n"
                              "pip install -U fastparquet")

        self.api = fastparquet 
Example #3
Source File: parquet.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def __init__(self):
        # since pandas is a dependency of pyarrow
        # we need to import on first use

        try:
            import pyarrow
            import pyarrow.parquet
        except ImportError:
            raise ImportError("pyarrow is required for parquet support\n\n"
                              "you can install via conda\n"
                              "conda install pyarrow -c conda-forge\n"
                              "\nor via pip\n"
                              "pip install -U pyarrow\n")

        if LooseVersion(pyarrow.__version__) < '0.4.1':
            raise ImportError("pyarrow >= 0.4.1 is required for parquet"
                              "support\n\n"
                              "you can install via conda\n"
                              "conda install pyarrow -c conda-forge\n"
                              "\nor via pip\n"
                              "pip install -U pyarrow\n")

        self._pyarrow_lt_050 = LooseVersion(pyarrow.__version__) < '0.5.0'
        self._pyarrow_lt_060 = LooseVersion(pyarrow.__version__) < '0.6.0'
        self.api = pyarrow 
Example #4
Source File: parquet.py    From Splunking-Crime with GNU Affero General Public License v3.0 6 votes vote down vote up
def __init__(self):
        # since pandas is a dependency of fastparquet
        # we need to import on first use
        try:
            import fastparquet
        except ImportError:
            raise ImportError(
                "fastparquet is required for parquet support\n\n"
                "you can install via conda\n"
                "conda install fastparquet -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U fastparquet"
            )
        if LooseVersion(fastparquet.__version__) < '0.1.0':
            raise ImportError(
                "fastparquet >= 0.1.0 is required for parquet "
                "support\n\n"
                "you can install via conda\n"
                "conda install fastparquet -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U fastparquet"
            )
        self.api = fastparquet 
Example #5
Source File: __init__.py    From koalas with Apache License 2.0 6 votes vote down vote up
def assert_pyspark_version():
    import logging

    pyspark_ver = None
    try:
        import pyspark
    except ImportError:
        raise ImportError(
            "Unable to import pyspark - consider doing a pip install with [spark] "
            "extra to install pyspark with pip"
        )
    else:
        pyspark_ver = getattr(pyspark, "__version__")
        if pyspark_ver is None or pyspark_ver < "2.4":
            logging.warning(
                'Found pyspark version "{}" installed. pyspark>=2.4.0 is recommended.'.format(
                    pyspark_ver if pyspark_ver is not None else "<unknown version>"
                )
            ) 
Example #6
Source File: utils.py    From LearningApacheSpark with MIT License 6 votes vote down vote up
def require_minimum_pyarrow_version():
    """ Raise ImportError if minimum version of pyarrow is not installed
    """
    # TODO(HyukjinKwon): Relocate and deduplicate the version specification.
    minimum_pyarrow_version = "0.8.0"

    from distutils.version import LooseVersion
    try:
        import pyarrow
        have_arrow = True
    except ImportError:
        have_arrow = False
    if not have_arrow:
        raise ImportError("PyArrow >= %s must be installed; however, "
                          "it was not found." % minimum_pyarrow_version)
    if LooseVersion(pyarrow.__version__) < LooseVersion(minimum_pyarrow_version):
        raise ImportError("PyArrow >= %s must be installed; however, "
                          "your version was %s." % (minimum_pyarrow_version, pyarrow.__version__)) 
Example #7
Source File: utils.py    From LearningApacheSpark with MIT License 6 votes vote down vote up
def require_minimum_pandas_version():
    """ Raise ImportError if minimum version of Pandas is not installed
    """
    # TODO(HyukjinKwon): Relocate and deduplicate the version specification.
    minimum_pandas_version = "0.19.2"

    from distutils.version import LooseVersion
    try:
        import pandas
        have_pandas = True
    except ImportError:
        have_pandas = False
    if not have_pandas:
        raise ImportError("Pandas >= %s must be installed; however, "
                          "it was not found." % minimum_pandas_version)
    if LooseVersion(pandas.__version__) < LooseVersion(minimum_pandas_version):
        raise ImportError("Pandas >= %s must be installed; however, "
                          "your version was %s." % (minimum_pandas_version, pandas.__version__)) 
Example #8
Source File: feather_format.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def _try_import():
    # since pandas is a dependency of pyarrow
    # we need to import on first use
    try:
        import pyarrow
        from pyarrow import feather
    except ImportError:
        # give a nice error message
        raise ImportError("pyarrow is not installed\n\n"
                          "you can install via conda\n"
                          "conda install pyarrow -c conda-forge\n"
                          "or via pip\n"
                          "pip install -U pyarrow\n")

    if LooseVersion(pyarrow.__version__) < LooseVersion('0.9.0'):
        raise ImportError("pyarrow >= 0.9.0 required for feather support\n\n"
                          "you can install via conda\n"
                          "conda install pyarrow -c conda-forge"
                          "or via pip\n"
                          "pip install -U pyarrow\n")

    return feather, pyarrow 
Example #9
Source File: parquet.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def __init__(self):
        # since pandas is a dependency of fastparquet
        # we need to import on first use
        try:
            import fastparquet
        except ImportError:
            raise ImportError(
                "fastparquet is required for parquet support\n\n"
                "you can install via conda\n"
                "conda install fastparquet -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U fastparquet"
            )
        if LooseVersion(fastparquet.__version__) < '0.2.1':
            raise ImportError(
                "fastparquet >= 0.2.1 is required for parquet "
                "support\n\n"
                "you can install via conda\n"
                "conda install fastparquet -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U fastparquet"
            )
        self.api = fastparquet 
Example #10
Source File: parquet.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def __init__(self):
        # since pandas is a dependency of pyarrow
        # we need to import on first use
        try:
            import pyarrow
            import pyarrow.parquet
        except ImportError:
            raise ImportError(
                "pyarrow is required for parquet support\n\n"
                "you can install via conda\n"
                "conda install pyarrow -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U pyarrow\n"
            )
        if LooseVersion(pyarrow.__version__) < '0.9.0':
            raise ImportError(
                "pyarrow >= 0.9.0 is required for parquet support\n\n"
                "you can install via conda\n"
                "conda install pyarrow -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U pyarrow\n"
            )

        self.api = pyarrow 
Example #11
Source File: parquet.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def __init__(self):
        # since pandas is a dependency of fastparquet
        # we need to import on first use
        try:
            import fastparquet
        except ImportError:
            raise ImportError(
                "fastparquet is required for parquet support\n\n"
                "you can install via conda\n"
                "conda install fastparquet -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U fastparquet"
            )
        if LooseVersion(fastparquet.__version__) < '0.1.0':
            raise ImportError(
                "fastparquet >= 0.1.0 is required for parquet "
                "support\n\n"
                "you can install via conda\n"
                "conda install fastparquet -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U fastparquet"
            )
        self.api = fastparquet 
Example #12
Source File: parquet.py    From recruit with Apache License 2.0 6 votes vote down vote up
def __init__(self):
        # since pandas is a dependency of pyarrow
        # we need to import on first use
        try:
            import pyarrow
            import pyarrow.parquet
        except ImportError:
            raise ImportError(
                "pyarrow is required for parquet support\n\n"
                "you can install via conda\n"
                "conda install pyarrow -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U pyarrow\n"
            )
        if LooseVersion(pyarrow.__version__) < '0.9.0':
            raise ImportError(
                "pyarrow >= 0.9.0 is required for parquet support\n\n"
                "you can install via conda\n"
                "conda install pyarrow -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U pyarrow\n"
            )

        self.api = pyarrow 
Example #13
Source File: feather_format.py    From recruit with Apache License 2.0 6 votes vote down vote up
def _try_import():
    # since pandas is a dependency of pyarrow
    # we need to import on first use
    try:
        import pyarrow
        from pyarrow import feather
    except ImportError:
        # give a nice error message
        raise ImportError("pyarrow is not installed\n\n"
                          "you can install via conda\n"
                          "conda install pyarrow -c conda-forge\n"
                          "or via pip\n"
                          "pip install -U pyarrow\n")

    if LooseVersion(pyarrow.__version__) < LooseVersion('0.9.0'):
        raise ImportError("pyarrow >= 0.9.0 required for feather support\n\n"
                          "you can install via conda\n"
                          "conda install pyarrow -c conda-forge"
                          "or via pip\n"
                          "pip install -U pyarrow\n")

    return feather, pyarrow 
Example #14
Source File: session.py    From mars with Apache License 2.0 5 votes vote down vote up
def _main(self):
        try:
            import pyarrow
            self._serial_type = dataserializer.SerialType(options.client.serial_type.lower())
        except ImportError:
            pyarrow = None
            self._serial_type = dataserializer.SerialType.PICKLE

        args = self._args.copy()
        args['pyver'] = '.'.join(str(v) for v in sys.version_info[:3])
        args['pickle_protocol'] = self._pickle_protocol
        if pyarrow is not None:
            args['arrow_version'] = pyarrow.__version__

        if self._session_id is None:
            resp = self._req_session.post(self._endpoint + '/api/session', data=args)

            if resp.status_code >= 400:
                raise SystemError('Failed to create mars session: ' + resp.reason)
        else:
            resp = self._req_session.get(self._endpoint + '/api/session/' + self._session_id, params=args)
            if resp.status_code == 404:
                raise ValueError('The session with id = %s doesn\'t exist' % self._session_id)
            if resp.status_code >= 400:
                raise SystemError('Failed to check mars session.')

        content = json.loads(resp.text)
        self._session_id = content['session_id']
        self._pickle_protocol = content.get('pickle_protocol', pickle.HIGHEST_PROTOCOL)
        if not content.get('arrow_compatible'):
            self._serial_type = dataserializer.SerialType.PICKLE 
Example #15
Source File: array_util_test.py    From tfx-bsl with Apache License 2.0 5 votes vote down vote up
def _all_false_null_bitmap_size(size):
  if pa.__version__ < "0.17":
    return size
  # starting from arrow 0.17, the array factory won't create a null bitmap if
  # no element is null.
  # TODO(zhuo): clean up this shim once tfx_bsl supports arrow 0.17+
  # exclusively.
  return 0 
Example #16
Source File: feather_format.py    From recruit with Apache License 2.0 5 votes vote down vote up
def read_feather(path, columns=None, use_threads=True):
    """
    Load a feather-format object from the file path

    .. versionadded 0.20.0

    Parameters
    ----------
    path : string file path, or file-like object
    columns : sequence, default None
        If not provided, all columns are read

        .. versionadded 0.24.0
    nthreads : int, default 1
        Number of CPU threads to use when reading to pandas.DataFrame

       .. versionadded 0.21.0
       .. deprecated 0.24.0
    use_threads : bool, default True
        Whether to parallelize reading using multiple threads

       .. versionadded 0.24.0

    Returns
    -------
    type of object stored in file

    """

    feather, pyarrow = _try_import()
    path = _stringify_path(path)

    if LooseVersion(pyarrow.__version__) < LooseVersion('0.11.0'):
        int_use_threads = int(use_threads)
        if int_use_threads < 1:
            int_use_threads = 1
        return feather.read_feather(path, columns=columns,
                                    nthreads=int_use_threads)

    return feather.read_feather(path, columns=columns,
                                use_threads=bool(use_threads)) 
Example #17
Source File: parquet.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def __init__(self):
        # since pandas is a dependency of pyarrow
        # we need to import on first use
        try:
            import pyarrow
            import pyarrow.parquet
        except ImportError:
            raise ImportError(
                "pyarrow is required for parquet support\n\n"
                "you can install via conda\n"
                "conda install pyarrow -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U pyarrow\n"
            )
        if LooseVersion(pyarrow.__version__) < '0.4.1':
            raise ImportError(
                "pyarrow >= 0.4.1 is required for parquet support\n\n"
                "you can install via conda\n"
                "conda install pyarrow -c conda-forge\n"
                "\nor via pip\n"
                "pip install -U pyarrow\n"
            )

        self._pyarrow_lt_060 = (
            LooseVersion(pyarrow.__version__) < LooseVersion('0.6.0'))
        self._pyarrow_lt_070 = (
            LooseVersion(pyarrow.__version__) < LooseVersion('0.7.0'))

        self.api = pyarrow 
Example #18
Source File: conftest.py    From koalas with Apache License 2.0 5 votes vote down vote up
def add_pa(doctest_namespace):
    if os.getenv("PYARROW_VERSION", None) is not None:
        assert pa.__version__ == os.getenv("PYARROW_VERSION")
    doctest_namespace["pa"] = pa 
Example #19
Source File: conftest.py    From koalas with Apache License 2.0 5 votes vote down vote up
def add_pd(doctest_namespace):
    if os.getenv("PANDAS_VERSION", None) is not None:
        assert pd.__version__ == os.getenv("PANDAS_VERSION")
    doctest_namespace["pd"] = pd 
Example #20
Source File: utils.py    From koalas with Apache License 2.0 5 votes vote down vote up
def default_session(conf=None):
    if conf is None:
        conf = dict()
    should_use_legacy_ipc = False
    if LooseVersion(pyarrow.__version__) >= LooseVersion("0.15") and LooseVersion(
        pyspark.__version__
    ) < LooseVersion("3.0"):
        conf["spark.executorEnv.ARROW_PRE_0_15_IPC_FORMAT"] = "1"
        conf["spark.yarn.appMasterEnv.ARROW_PRE_0_15_IPC_FORMAT"] = "1"
        conf["spark.mesos.driverEnv.ARROW_PRE_0_15_IPC_FORMAT"] = "1"
        conf["spark.kubernetes.driverEnv.ARROW_PRE_0_15_IPC_FORMAT"] = "1"
        should_use_legacy_ipc = True

    builder = spark.SparkSession.builder.appName("Koalas")
    for key, value in conf.items():
        builder = builder.config(key, value)
    # Currently, Koalas is dependent on such join due to 'compute.ops_on_diff_frames'
    # configuration. This is needed with Spark 3.0+.
    builder.config("spark.sql.analyzer.failAmbiguousSelfJoin", False)
    session = builder.getOrCreate()

    if not should_use_legacy_ipc:
        is_legacy_ipc_set = any(
            v == "1"
            for v in [
                session.conf.get("spark.executorEnv.ARROW_PRE_0_15_IPC_FORMAT", None),
                session.conf.get("spark.yarn.appMasterEnv.ARROW_PRE_0_15_IPC_FORMAT", None),
                session.conf.get("spark.mesos.driverEnv.ARROW_PRE_0_15_IPC_FORMAT", None),
                session.conf.get("spark.kubernetes.driverEnv.ARROW_PRE_0_15_IPC_FORMAT", None),
            ]
        )
        if is_legacy_ipc_set:
            raise RuntimeError(
                "Please explicitly unset 'ARROW_PRE_0_15_IPC_FORMAT' environment variable in "
                "both driver and executor sides. Check your spark.executorEnv.*, "
                "spark.yarn.appMasterEnv.*, spark.mesos.driverEnv.* and "
                "spark.kubernetes.driverEnv.* configurations. It is required to set this "
                "environment variable only when you use pyarrow>=0.15 and pyspark<3.0."
            )
    return session 
Example #21
Source File: apihandlers.py    From mars with Apache License 2.0 5 votes vote down vote up
def _check_arrow_compatibility(client_version):
        import pyarrow
        client_version = tuple(LooseVersion(client_version or pyarrow.__version__).version[:2])
        server_version = tuple(LooseVersion(pyarrow.__version__).version[:2])
        return client_version == server_version 
Example #22
Source File: types.py    From LearningApacheSpark with MIT License 5 votes vote down vote up
def from_arrow_type(at):
    """ Convert pyarrow type to Spark data type.
    """
    from distutils.version import LooseVersion
    import pyarrow as pa
    import pyarrow.types as types
    if types.is_boolean(at):
        spark_type = BooleanType()
    elif types.is_int8(at):
        spark_type = ByteType()
    elif types.is_int16(at):
        spark_type = ShortType()
    elif types.is_int32(at):
        spark_type = IntegerType()
    elif types.is_int64(at):
        spark_type = LongType()
    elif types.is_float32(at):
        spark_type = FloatType()
    elif types.is_float64(at):
        spark_type = DoubleType()
    elif types.is_decimal(at):
        spark_type = DecimalType(precision=at.precision, scale=at.scale)
    elif types.is_string(at):
        spark_type = StringType()
    elif types.is_binary(at):
        # TODO: remove version check once minimum pyarrow version is 0.10.0
        if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
            raise TypeError("Unsupported type in conversion from Arrow: " + str(at) +
                            "\nPlease install pyarrow >= 0.10.0 for BinaryType support.")
        spark_type = BinaryType()
    elif types.is_date32(at):
        spark_type = DateType()
    elif types.is_timestamp(at):
        spark_type = TimestampType()
    elif types.is_list(at):
        if types.is_timestamp(at.value_type):
            raise TypeError("Unsupported type in conversion from Arrow: " + str(at))
        spark_type = ArrayType(from_arrow_type(at.value_type))
    else:
        raise TypeError("Unsupported type in conversion from Arrow: " + str(at))
    return spark_type 
Example #23
Source File: types.py    From LearningApacheSpark with MIT License 5 votes vote down vote up
def to_arrow_type(dt):
    """ Convert Spark data type to pyarrow type
    """
    from distutils.version import LooseVersion
    import pyarrow as pa
    if type(dt) == BooleanType:
        arrow_type = pa.bool_()
    elif type(dt) == ByteType:
        arrow_type = pa.int8()
    elif type(dt) == ShortType:
        arrow_type = pa.int16()
    elif type(dt) == IntegerType:
        arrow_type = pa.int32()
    elif type(dt) == LongType:
        arrow_type = pa.int64()
    elif type(dt) == FloatType:
        arrow_type = pa.float32()
    elif type(dt) == DoubleType:
        arrow_type = pa.float64()
    elif type(dt) == DecimalType:
        arrow_type = pa.decimal128(dt.precision, dt.scale)
    elif type(dt) == StringType:
        arrow_type = pa.string()
    elif type(dt) == BinaryType:
        # TODO: remove version check once minimum pyarrow version is 0.10.0
        if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
            raise TypeError("Unsupported type in conversion to Arrow: " + str(dt) +
                            "\nPlease install pyarrow >= 0.10.0 for BinaryType support.")
        arrow_type = pa.binary()
    elif type(dt) == DateType:
        arrow_type = pa.date32()
    elif type(dt) == TimestampType:
        # Timestamps should be in UTC, JVM Arrow timestamps require a timezone to be read
        arrow_type = pa.timestamp('us', tz='UTC')
    elif type(dt) == ArrayType:
        if type(dt.elementType) == TimestampType:
            raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
        arrow_type = pa.list_(to_arrow_type(dt.elementType))
    else:
        raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
    return arrow_type 
Example #24
Source File: test_parquet.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_categorical(self, fp):
        if LooseVersion(fastparquet.__version__) < LooseVersion("0.1.3"):
            pytest.skip("CategoricalDtype not supported for older fp")
        df = pd.DataFrame({'a': pd.Categorical(list('abc'))})
        check_round_trip(df, fp) 
Example #25
Source File: test_parquet.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def pa_lt_070():
    if not _HAVE_PYARROW:
        pytest.skip("pyarrow is not installed")
    if LooseVersion(pyarrow.__version__) >= LooseVersion('0.7.0'):
        pytest.skip("pyarrow is >= 0.7.0")
    return 'pyarrow' 
Example #26
Source File: serializers.py    From LearningApacheSpark with MIT License 5 votes vote down vote up
def _create_batch(series, timezone):
    """
    Create an Arrow record batch from the given pandas.Series or list of Series, with optional type.

    :param series: A single pandas.Series, list of Series, or list of (series, arrow_type)
    :param timezone: A timezone to respect when handling timestamp values
    :return: Arrow RecordBatch
    """
    import decimal
    from distutils.version import LooseVersion
    import pyarrow as pa
    from pyspark.sql.types import _check_series_convert_timestamps_internal
    # Make input conform to [(series1, type1), (series2, type2), ...]
    if not isinstance(series, (list, tuple)) or \
            (len(series) == 2 and isinstance(series[1], pa.DataType)):
        series = [series]
    series = ((s, None) if not isinstance(s, (list, tuple)) else s for s in series)

    def create_array(s, t):
        mask = s.isnull()
        # Ensure timestamp series are in expected form for Spark internal representation
        # TODO: maybe don't need None check anymore as of Arrow 0.9.1
        if t is not None and pa.types.is_timestamp(t):
            s = _check_series_convert_timestamps_internal(s.fillna(0), timezone)
            # TODO: need cast after Arrow conversion, ns values cause error with pandas 0.19.2
            return pa.Array.from_pandas(s, mask=mask).cast(t, safe=False)
        elif t is not None and pa.types.is_string(t) and sys.version < '3':
            # TODO: need decode before converting to Arrow in Python 2
            # TODO: don't need as of Arrow 0.9.1
            return pa.Array.from_pandas(s.apply(
                lambda v: v.decode("utf-8") if isinstance(v, str) else v), mask=mask, type=t)
        elif t is not None and pa.types.is_decimal(t) and \
                LooseVersion("0.9.0") <= LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
            # TODO: see ARROW-2432. Remove when the minimum PyArrow version becomes 0.10.0.
            return pa.Array.from_pandas(s.apply(
                lambda v: decimal.Decimal('NaN') if v is None else v), mask=mask, type=t)
        return pa.Array.from_pandas(s, mask=mask, type=t)

    arrs = [create_array(s, t) for s, t in series]
    return pa.RecordBatch.from_arrays(arrs, ["_%d" % i for i in xrange(len(arrs))]) 
Example #27
Source File: feather_format.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def read_feather(path, columns=None, use_threads=True):
    """
    Load a feather-format object from the file path

    .. versionadded 0.20.0

    Parameters
    ----------
    path : string file path, or file-like object
    columns : sequence, default None
        If not provided, all columns are read

        .. versionadded 0.24.0
    nthreads : int, default 1
        Number of CPU threads to use when reading to pandas.DataFrame

       .. versionadded 0.21.0
       .. deprecated 0.24.0
    use_threads : bool, default True
        Whether to parallelize reading using multiple threads

       .. versionadded 0.24.0

    Returns
    -------
    type of object stored in file

    """

    feather, pyarrow = _try_import()
    path = _stringify_path(path)

    if LooseVersion(pyarrow.__version__) < LooseVersion('0.11.0'):
        int_use_threads = int(use_threads)
        if int_use_threads < 1:
            int_use_threads = 1
        return feather.read_feather(path, columns=columns,
                                    nthreads=int_use_threads)

    return feather.read_feather(path, columns=columns,
                                use_threads=bool(use_threads)) 
Example #28
Source File: test_parquet.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def pa_ge_070():
    if not _HAVE_PYARROW:
        pytest.skip("pyarrow is not installed")
    if LooseVersion(pyarrow.__version__) < LooseVersion('0.7.0'):
        pytest.skip("pyarrow is < 0.7.0")
    return 'pyarrow' 
Example #29
Source File: test_parquet.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def fp_lt_014():
    if not _HAVE_FASTPARQUET:
        pytest.skip("fastparquet is not installed")
    if LooseVersion(fastparquet.__version__) >= LooseVersion('0.1.4'):
        pytest.skip("fastparquet is >= 0.1.4")
    return 'fastparquet' 
Example #30
Source File: test_parquet.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_write_index(self, engine):
        check_names = engine != 'fastparquet'

        if engine == 'pyarrow':
            import pyarrow
            if LooseVersion(pyarrow.__version__) < LooseVersion('0.7.0'):
                pytest.skip("pyarrow is < 0.7.0")

        df = pd.DataFrame({'A': [1, 2, 3]})
        check_round_trip(df, engine)

        indexes = [
            [2, 3, 4],
            pd.date_range('20130101', periods=3),
            list('abc'),
            [1, 3, 4],
        ]
        # non-default index
        for index in indexes:
            df.index = index
            check_round_trip(df, engine, check_names=check_names)

        # index with meta-data
        df.index = [0, 1, 2]
        df.index.name = 'foo'
        check_round_trip(df, engine)