Python pyarrow.__version__() Examples
The following are 30
code examples of pyarrow.__version__().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pyarrow
, or try the search function
.
Example #1
Source File: parquet.py From recruit with Apache License 2.0 | 6 votes |
def __init__(self): # since pandas is a dependency of fastparquet # we need to import on first use try: import fastparquet except ImportError: raise ImportError( "fastparquet is required for parquet support\n\n" "you can install via conda\n" "conda install fastparquet -c conda-forge\n" "\nor via pip\n" "pip install -U fastparquet" ) if LooseVersion(fastparquet.__version__) < '0.2.1': raise ImportError( "fastparquet >= 0.2.1 is required for parquet " "support\n\n" "you can install via conda\n" "conda install fastparquet -c conda-forge\n" "\nor via pip\n" "pip install -U fastparquet" ) self.api = fastparquet
Example #2
Source File: parquet.py From elasticintel with GNU General Public License v3.0 | 6 votes |
def __init__(self): # since pandas is a dependency of fastparquet # we need to import on first use try: import fastparquet except ImportError: raise ImportError("fastparquet is required for parquet support\n\n" "you can install via conda\n" "conda install fastparquet -c conda-forge\n" "\nor via pip\n" "pip install -U fastparquet") if LooseVersion(fastparquet.__version__) < '0.1.0': raise ImportError("fastparquet >= 0.1.0 is required for parquet " "support\n\n" "you can install via conda\n" "conda install fastparquet -c conda-forge\n" "\nor via pip\n" "pip install -U fastparquet") self.api = fastparquet
Example #3
Source File: parquet.py From elasticintel with GNU General Public License v3.0 | 6 votes |
def __init__(self): # since pandas is a dependency of pyarrow # we need to import on first use try: import pyarrow import pyarrow.parquet except ImportError: raise ImportError("pyarrow is required for parquet support\n\n" "you can install via conda\n" "conda install pyarrow -c conda-forge\n" "\nor via pip\n" "pip install -U pyarrow\n") if LooseVersion(pyarrow.__version__) < '0.4.1': raise ImportError("pyarrow >= 0.4.1 is required for parquet" "support\n\n" "you can install via conda\n" "conda install pyarrow -c conda-forge\n" "\nor via pip\n" "pip install -U pyarrow\n") self._pyarrow_lt_050 = LooseVersion(pyarrow.__version__) < '0.5.0' self._pyarrow_lt_060 = LooseVersion(pyarrow.__version__) < '0.6.0' self.api = pyarrow
Example #4
Source File: parquet.py From Splunking-Crime with GNU Affero General Public License v3.0 | 6 votes |
def __init__(self): # since pandas is a dependency of fastparquet # we need to import on first use try: import fastparquet except ImportError: raise ImportError( "fastparquet is required for parquet support\n\n" "you can install via conda\n" "conda install fastparquet -c conda-forge\n" "\nor via pip\n" "pip install -U fastparquet" ) if LooseVersion(fastparquet.__version__) < '0.1.0': raise ImportError( "fastparquet >= 0.1.0 is required for parquet " "support\n\n" "you can install via conda\n" "conda install fastparquet -c conda-forge\n" "\nor via pip\n" "pip install -U fastparquet" ) self.api = fastparquet
Example #5
Source File: __init__.py From koalas with Apache License 2.0 | 6 votes |
def assert_pyspark_version(): import logging pyspark_ver = None try: import pyspark except ImportError: raise ImportError( "Unable to import pyspark - consider doing a pip install with [spark] " "extra to install pyspark with pip" ) else: pyspark_ver = getattr(pyspark, "__version__") if pyspark_ver is None or pyspark_ver < "2.4": logging.warning( 'Found pyspark version "{}" installed. pyspark>=2.4.0 is recommended.'.format( pyspark_ver if pyspark_ver is not None else "<unknown version>" ) )
Example #6
Source File: utils.py From LearningApacheSpark with MIT License | 6 votes |
def require_minimum_pyarrow_version(): """ Raise ImportError if minimum version of pyarrow is not installed """ # TODO(HyukjinKwon): Relocate and deduplicate the version specification. minimum_pyarrow_version = "0.8.0" from distutils.version import LooseVersion try: import pyarrow have_arrow = True except ImportError: have_arrow = False if not have_arrow: raise ImportError("PyArrow >= %s must be installed; however, " "it was not found." % minimum_pyarrow_version) if LooseVersion(pyarrow.__version__) < LooseVersion(minimum_pyarrow_version): raise ImportError("PyArrow >= %s must be installed; however, " "your version was %s." % (minimum_pyarrow_version, pyarrow.__version__))
Example #7
Source File: utils.py From LearningApacheSpark with MIT License | 6 votes |
def require_minimum_pandas_version(): """ Raise ImportError if minimum version of Pandas is not installed """ # TODO(HyukjinKwon): Relocate and deduplicate the version specification. minimum_pandas_version = "0.19.2" from distutils.version import LooseVersion try: import pandas have_pandas = True except ImportError: have_pandas = False if not have_pandas: raise ImportError("Pandas >= %s must be installed; however, " "it was not found." % minimum_pandas_version) if LooseVersion(pandas.__version__) < LooseVersion(minimum_pandas_version): raise ImportError("Pandas >= %s must be installed; however, " "your version was %s." % (minimum_pandas_version, pandas.__version__))
Example #8
Source File: feather_format.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def _try_import(): # since pandas is a dependency of pyarrow # we need to import on first use try: import pyarrow from pyarrow import feather except ImportError: # give a nice error message raise ImportError("pyarrow is not installed\n\n" "you can install via conda\n" "conda install pyarrow -c conda-forge\n" "or via pip\n" "pip install -U pyarrow\n") if LooseVersion(pyarrow.__version__) < LooseVersion('0.9.0'): raise ImportError("pyarrow >= 0.9.0 required for feather support\n\n" "you can install via conda\n" "conda install pyarrow -c conda-forge" "or via pip\n" "pip install -U pyarrow\n") return feather, pyarrow
Example #9
Source File: parquet.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def __init__(self): # since pandas is a dependency of fastparquet # we need to import on first use try: import fastparquet except ImportError: raise ImportError( "fastparquet is required for parquet support\n\n" "you can install via conda\n" "conda install fastparquet -c conda-forge\n" "\nor via pip\n" "pip install -U fastparquet" ) if LooseVersion(fastparquet.__version__) < '0.2.1': raise ImportError( "fastparquet >= 0.2.1 is required for parquet " "support\n\n" "you can install via conda\n" "conda install fastparquet -c conda-forge\n" "\nor via pip\n" "pip install -U fastparquet" ) self.api = fastparquet
Example #10
Source File: parquet.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 6 votes |
def __init__(self): # since pandas is a dependency of pyarrow # we need to import on first use try: import pyarrow import pyarrow.parquet except ImportError: raise ImportError( "pyarrow is required for parquet support\n\n" "you can install via conda\n" "conda install pyarrow -c conda-forge\n" "\nor via pip\n" "pip install -U pyarrow\n" ) if LooseVersion(pyarrow.__version__) < '0.9.0': raise ImportError( "pyarrow >= 0.9.0 is required for parquet support\n\n" "you can install via conda\n" "conda install pyarrow -c conda-forge\n" "\nor via pip\n" "pip install -U pyarrow\n" ) self.api = pyarrow
Example #11
Source File: parquet.py From vnpy_crypto with MIT License | 6 votes |
def __init__(self): # since pandas is a dependency of fastparquet # we need to import on first use try: import fastparquet except ImportError: raise ImportError( "fastparquet is required for parquet support\n\n" "you can install via conda\n" "conda install fastparquet -c conda-forge\n" "\nor via pip\n" "pip install -U fastparquet" ) if LooseVersion(fastparquet.__version__) < '0.1.0': raise ImportError( "fastparquet >= 0.1.0 is required for parquet " "support\n\n" "you can install via conda\n" "conda install fastparquet -c conda-forge\n" "\nor via pip\n" "pip install -U fastparquet" ) self.api = fastparquet
Example #12
Source File: parquet.py From recruit with Apache License 2.0 | 6 votes |
def __init__(self): # since pandas is a dependency of pyarrow # we need to import on first use try: import pyarrow import pyarrow.parquet except ImportError: raise ImportError( "pyarrow is required for parquet support\n\n" "you can install via conda\n" "conda install pyarrow -c conda-forge\n" "\nor via pip\n" "pip install -U pyarrow\n" ) if LooseVersion(pyarrow.__version__) < '0.9.0': raise ImportError( "pyarrow >= 0.9.0 is required for parquet support\n\n" "you can install via conda\n" "conda install pyarrow -c conda-forge\n" "\nor via pip\n" "pip install -U pyarrow\n" ) self.api = pyarrow
Example #13
Source File: feather_format.py From recruit with Apache License 2.0 | 6 votes |
def _try_import(): # since pandas is a dependency of pyarrow # we need to import on first use try: import pyarrow from pyarrow import feather except ImportError: # give a nice error message raise ImportError("pyarrow is not installed\n\n" "you can install via conda\n" "conda install pyarrow -c conda-forge\n" "or via pip\n" "pip install -U pyarrow\n") if LooseVersion(pyarrow.__version__) < LooseVersion('0.9.0'): raise ImportError("pyarrow >= 0.9.0 required for feather support\n\n" "you can install via conda\n" "conda install pyarrow -c conda-forge" "or via pip\n" "pip install -U pyarrow\n") return feather, pyarrow
Example #14
Source File: session.py From mars with Apache License 2.0 | 5 votes |
def _main(self): try: import pyarrow self._serial_type = dataserializer.SerialType(options.client.serial_type.lower()) except ImportError: pyarrow = None self._serial_type = dataserializer.SerialType.PICKLE args = self._args.copy() args['pyver'] = '.'.join(str(v) for v in sys.version_info[:3]) args['pickle_protocol'] = self._pickle_protocol if pyarrow is not None: args['arrow_version'] = pyarrow.__version__ if self._session_id is None: resp = self._req_session.post(self._endpoint + '/api/session', data=args) if resp.status_code >= 400: raise SystemError('Failed to create mars session: ' + resp.reason) else: resp = self._req_session.get(self._endpoint + '/api/session/' + self._session_id, params=args) if resp.status_code == 404: raise ValueError('The session with id = %s doesn\'t exist' % self._session_id) if resp.status_code >= 400: raise SystemError('Failed to check mars session.') content = json.loads(resp.text) self._session_id = content['session_id'] self._pickle_protocol = content.get('pickle_protocol', pickle.HIGHEST_PROTOCOL) if not content.get('arrow_compatible'): self._serial_type = dataserializer.SerialType.PICKLE
Example #15
Source File: array_util_test.py From tfx-bsl with Apache License 2.0 | 5 votes |
def _all_false_null_bitmap_size(size): if pa.__version__ < "0.17": return size # starting from arrow 0.17, the array factory won't create a null bitmap if # no element is null. # TODO(zhuo): clean up this shim once tfx_bsl supports arrow 0.17+ # exclusively. return 0
Example #16
Source File: feather_format.py From recruit with Apache License 2.0 | 5 votes |
def read_feather(path, columns=None, use_threads=True): """ Load a feather-format object from the file path .. versionadded 0.20.0 Parameters ---------- path : string file path, or file-like object columns : sequence, default None If not provided, all columns are read .. versionadded 0.24.0 nthreads : int, default 1 Number of CPU threads to use when reading to pandas.DataFrame .. versionadded 0.21.0 .. deprecated 0.24.0 use_threads : bool, default True Whether to parallelize reading using multiple threads .. versionadded 0.24.0 Returns ------- type of object stored in file """ feather, pyarrow = _try_import() path = _stringify_path(path) if LooseVersion(pyarrow.__version__) < LooseVersion('0.11.0'): int_use_threads = int(use_threads) if int_use_threads < 1: int_use_threads = 1 return feather.read_feather(path, columns=columns, nthreads=int_use_threads) return feather.read_feather(path, columns=columns, use_threads=bool(use_threads))
Example #17
Source File: parquet.py From Splunking-Crime with GNU Affero General Public License v3.0 | 5 votes |
def __init__(self): # since pandas is a dependency of pyarrow # we need to import on first use try: import pyarrow import pyarrow.parquet except ImportError: raise ImportError( "pyarrow is required for parquet support\n\n" "you can install via conda\n" "conda install pyarrow -c conda-forge\n" "\nor via pip\n" "pip install -U pyarrow\n" ) if LooseVersion(pyarrow.__version__) < '0.4.1': raise ImportError( "pyarrow >= 0.4.1 is required for parquet support\n\n" "you can install via conda\n" "conda install pyarrow -c conda-forge\n" "\nor via pip\n" "pip install -U pyarrow\n" ) self._pyarrow_lt_060 = ( LooseVersion(pyarrow.__version__) < LooseVersion('0.6.0')) self._pyarrow_lt_070 = ( LooseVersion(pyarrow.__version__) < LooseVersion('0.7.0')) self.api = pyarrow
Example #18
Source File: conftest.py From koalas with Apache License 2.0 | 5 votes |
def add_pa(doctest_namespace): if os.getenv("PYARROW_VERSION", None) is not None: assert pa.__version__ == os.getenv("PYARROW_VERSION") doctest_namespace["pa"] = pa
Example #19
Source File: conftest.py From koalas with Apache License 2.0 | 5 votes |
def add_pd(doctest_namespace): if os.getenv("PANDAS_VERSION", None) is not None: assert pd.__version__ == os.getenv("PANDAS_VERSION") doctest_namespace["pd"] = pd
Example #20
Source File: utils.py From koalas with Apache License 2.0 | 5 votes |
def default_session(conf=None): if conf is None: conf = dict() should_use_legacy_ipc = False if LooseVersion(pyarrow.__version__) >= LooseVersion("0.15") and LooseVersion( pyspark.__version__ ) < LooseVersion("3.0"): conf["spark.executorEnv.ARROW_PRE_0_15_IPC_FORMAT"] = "1" conf["spark.yarn.appMasterEnv.ARROW_PRE_0_15_IPC_FORMAT"] = "1" conf["spark.mesos.driverEnv.ARROW_PRE_0_15_IPC_FORMAT"] = "1" conf["spark.kubernetes.driverEnv.ARROW_PRE_0_15_IPC_FORMAT"] = "1" should_use_legacy_ipc = True builder = spark.SparkSession.builder.appName("Koalas") for key, value in conf.items(): builder = builder.config(key, value) # Currently, Koalas is dependent on such join due to 'compute.ops_on_diff_frames' # configuration. This is needed with Spark 3.0+. builder.config("spark.sql.analyzer.failAmbiguousSelfJoin", False) session = builder.getOrCreate() if not should_use_legacy_ipc: is_legacy_ipc_set = any( v == "1" for v in [ session.conf.get("spark.executorEnv.ARROW_PRE_0_15_IPC_FORMAT", None), session.conf.get("spark.yarn.appMasterEnv.ARROW_PRE_0_15_IPC_FORMAT", None), session.conf.get("spark.mesos.driverEnv.ARROW_PRE_0_15_IPC_FORMAT", None), session.conf.get("spark.kubernetes.driverEnv.ARROW_PRE_0_15_IPC_FORMAT", None), ] ) if is_legacy_ipc_set: raise RuntimeError( "Please explicitly unset 'ARROW_PRE_0_15_IPC_FORMAT' environment variable in " "both driver and executor sides. Check your spark.executorEnv.*, " "spark.yarn.appMasterEnv.*, spark.mesos.driverEnv.* and " "spark.kubernetes.driverEnv.* configurations. It is required to set this " "environment variable only when you use pyarrow>=0.15 and pyspark<3.0." ) return session
Example #21
Source File: apihandlers.py From mars with Apache License 2.0 | 5 votes |
def _check_arrow_compatibility(client_version): import pyarrow client_version = tuple(LooseVersion(client_version or pyarrow.__version__).version[:2]) server_version = tuple(LooseVersion(pyarrow.__version__).version[:2]) return client_version == server_version
Example #22
Source File: types.py From LearningApacheSpark with MIT License | 5 votes |
def from_arrow_type(at): """ Convert pyarrow type to Spark data type. """ from distutils.version import LooseVersion import pyarrow as pa import pyarrow.types as types if types.is_boolean(at): spark_type = BooleanType() elif types.is_int8(at): spark_type = ByteType() elif types.is_int16(at): spark_type = ShortType() elif types.is_int32(at): spark_type = IntegerType() elif types.is_int64(at): spark_type = LongType() elif types.is_float32(at): spark_type = FloatType() elif types.is_float64(at): spark_type = DoubleType() elif types.is_decimal(at): spark_type = DecimalType(precision=at.precision, scale=at.scale) elif types.is_string(at): spark_type = StringType() elif types.is_binary(at): # TODO: remove version check once minimum pyarrow version is 0.10.0 if LooseVersion(pa.__version__) < LooseVersion("0.10.0"): raise TypeError("Unsupported type in conversion from Arrow: " + str(at) + "\nPlease install pyarrow >= 0.10.0 for BinaryType support.") spark_type = BinaryType() elif types.is_date32(at): spark_type = DateType() elif types.is_timestamp(at): spark_type = TimestampType() elif types.is_list(at): if types.is_timestamp(at.value_type): raise TypeError("Unsupported type in conversion from Arrow: " + str(at)) spark_type = ArrayType(from_arrow_type(at.value_type)) else: raise TypeError("Unsupported type in conversion from Arrow: " + str(at)) return spark_type
Example #23
Source File: types.py From LearningApacheSpark with MIT License | 5 votes |
def to_arrow_type(dt): """ Convert Spark data type to pyarrow type """ from distutils.version import LooseVersion import pyarrow as pa if type(dt) == BooleanType: arrow_type = pa.bool_() elif type(dt) == ByteType: arrow_type = pa.int8() elif type(dt) == ShortType: arrow_type = pa.int16() elif type(dt) == IntegerType: arrow_type = pa.int32() elif type(dt) == LongType: arrow_type = pa.int64() elif type(dt) == FloatType: arrow_type = pa.float32() elif type(dt) == DoubleType: arrow_type = pa.float64() elif type(dt) == DecimalType: arrow_type = pa.decimal128(dt.precision, dt.scale) elif type(dt) == StringType: arrow_type = pa.string() elif type(dt) == BinaryType: # TODO: remove version check once minimum pyarrow version is 0.10.0 if LooseVersion(pa.__version__) < LooseVersion("0.10.0"): raise TypeError("Unsupported type in conversion to Arrow: " + str(dt) + "\nPlease install pyarrow >= 0.10.0 for BinaryType support.") arrow_type = pa.binary() elif type(dt) == DateType: arrow_type = pa.date32() elif type(dt) == TimestampType: # Timestamps should be in UTC, JVM Arrow timestamps require a timezone to be read arrow_type = pa.timestamp('us', tz='UTC') elif type(dt) == ArrayType: if type(dt.elementType) == TimestampType: raise TypeError("Unsupported type in conversion to Arrow: " + str(dt)) arrow_type = pa.list_(to_arrow_type(dt.elementType)) else: raise TypeError("Unsupported type in conversion to Arrow: " + str(dt)) return arrow_type
Example #24
Source File: test_parquet.py From vnpy_crypto with MIT License | 5 votes |
def test_categorical(self, fp): if LooseVersion(fastparquet.__version__) < LooseVersion("0.1.3"): pytest.skip("CategoricalDtype not supported for older fp") df = pd.DataFrame({'a': pd.Categorical(list('abc'))}) check_round_trip(df, fp)
Example #25
Source File: test_parquet.py From vnpy_crypto with MIT License | 5 votes |
def pa_lt_070(): if not _HAVE_PYARROW: pytest.skip("pyarrow is not installed") if LooseVersion(pyarrow.__version__) >= LooseVersion('0.7.0'): pytest.skip("pyarrow is >= 0.7.0") return 'pyarrow'
Example #26
Source File: serializers.py From LearningApacheSpark with MIT License | 5 votes |
def _create_batch(series, timezone): """ Create an Arrow record batch from the given pandas.Series or list of Series, with optional type. :param series: A single pandas.Series, list of Series, or list of (series, arrow_type) :param timezone: A timezone to respect when handling timestamp values :return: Arrow RecordBatch """ import decimal from distutils.version import LooseVersion import pyarrow as pa from pyspark.sql.types import _check_series_convert_timestamps_internal # Make input conform to [(series1, type1), (series2, type2), ...] if not isinstance(series, (list, tuple)) or \ (len(series) == 2 and isinstance(series[1], pa.DataType)): series = [series] series = ((s, None) if not isinstance(s, (list, tuple)) else s for s in series) def create_array(s, t): mask = s.isnull() # Ensure timestamp series are in expected form for Spark internal representation # TODO: maybe don't need None check anymore as of Arrow 0.9.1 if t is not None and pa.types.is_timestamp(t): s = _check_series_convert_timestamps_internal(s.fillna(0), timezone) # TODO: need cast after Arrow conversion, ns values cause error with pandas 0.19.2 return pa.Array.from_pandas(s, mask=mask).cast(t, safe=False) elif t is not None and pa.types.is_string(t) and sys.version < '3': # TODO: need decode before converting to Arrow in Python 2 # TODO: don't need as of Arrow 0.9.1 return pa.Array.from_pandas(s.apply( lambda v: v.decode("utf-8") if isinstance(v, str) else v), mask=mask, type=t) elif t is not None and pa.types.is_decimal(t) and \ LooseVersion("0.9.0") <= LooseVersion(pa.__version__) < LooseVersion("0.10.0"): # TODO: see ARROW-2432. Remove when the minimum PyArrow version becomes 0.10.0. return pa.Array.from_pandas(s.apply( lambda v: decimal.Decimal('NaN') if v is None else v), mask=mask, type=t) return pa.Array.from_pandas(s, mask=mask, type=t) arrs = [create_array(s, t) for s, t in series] return pa.RecordBatch.from_arrays(arrs, ["_%d" % i for i in xrange(len(arrs))])
Example #27
Source File: feather_format.py From predictive-maintenance-using-machine-learning with Apache License 2.0 | 5 votes |
def read_feather(path, columns=None, use_threads=True): """ Load a feather-format object from the file path .. versionadded 0.20.0 Parameters ---------- path : string file path, or file-like object columns : sequence, default None If not provided, all columns are read .. versionadded 0.24.0 nthreads : int, default 1 Number of CPU threads to use when reading to pandas.DataFrame .. versionadded 0.21.0 .. deprecated 0.24.0 use_threads : bool, default True Whether to parallelize reading using multiple threads .. versionadded 0.24.0 Returns ------- type of object stored in file """ feather, pyarrow = _try_import() path = _stringify_path(path) if LooseVersion(pyarrow.__version__) < LooseVersion('0.11.0'): int_use_threads = int(use_threads) if int_use_threads < 1: int_use_threads = 1 return feather.read_feather(path, columns=columns, nthreads=int_use_threads) return feather.read_feather(path, columns=columns, use_threads=bool(use_threads))
Example #28
Source File: test_parquet.py From vnpy_crypto with MIT License | 5 votes |
def pa_ge_070(): if not _HAVE_PYARROW: pytest.skip("pyarrow is not installed") if LooseVersion(pyarrow.__version__) < LooseVersion('0.7.0'): pytest.skip("pyarrow is < 0.7.0") return 'pyarrow'
Example #29
Source File: test_parquet.py From vnpy_crypto with MIT License | 5 votes |
def fp_lt_014(): if not _HAVE_FASTPARQUET: pytest.skip("fastparquet is not installed") if LooseVersion(fastparquet.__version__) >= LooseVersion('0.1.4'): pytest.skip("fastparquet is >= 0.1.4") return 'fastparquet'
Example #30
Source File: test_parquet.py From vnpy_crypto with MIT License | 5 votes |
def test_write_index(self, engine): check_names = engine != 'fastparquet' if engine == 'pyarrow': import pyarrow if LooseVersion(pyarrow.__version__) < LooseVersion('0.7.0'): pytest.skip("pyarrow is < 0.7.0") df = pd.DataFrame({'A': [1, 2, 3]}) check_round_trip(df, engine) indexes = [ [2, 3, 4], pd.date_range('20130101', periods=3), list('abc'), [1, 3, 4], ] # non-default index for index in indexes: df.index = index check_round_trip(df, engine, check_names=check_names) # index with meta-data df.index = [0, 1, 2] df.index.name = 'foo' check_round_trip(df, engine)