Python pyarrow.list_() Examples
The following are 30
code examples of pyarrow.list_().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pyarrow
, or try the search function
.
Example #1
Source File: tf_sequence_example_record_test.py From tfx-bsl with Apache License 2.0 | 6 votes |
def _GetExpectedColumnValues(tfxio): if tfxio._can_produce_large_types: list_factory = pa.large_list bytes_type = pa.large_binary() else: list_factory = pa.list_ bytes_type = pa.binary() return { path.ColumnPath(["int_feature"]): pa.array([[1], [2], [3]], type=list_factory(pa.int64())), path.ColumnPath(["float_feature"]): pa.array([[1, 2, 3, 4], [2, 3, 4, 5], None], type=list_factory(pa.float32())), path.ColumnPath([_SEQUENCE_COLUMN_NAME, "int_feature"]): pa.array([[[1, 2], [3]], None, [[4]]], list_factory(list_factory(pa.int64()))), path.ColumnPath([_SEQUENCE_COLUMN_NAME, "string_feature"]): pa.array([None, [[b"foo", b"bar"], []], [[b"baz"]]], list_factory(list_factory(bytes_type))) }
Example #2
Source File: basic_stats_generator_test.py From data-validation with Apache License 2.0 | 6 votes |
def test_basic_stats_generator_empty_batch(self): batches = [ pa.RecordBatch.from_arrays([pa.array([], type=pa.list_(pa.binary()))], ['a']) ] expected_result = { types.FeaturePath(['a']): text_format.Parse( """ path { step: 'a' } type: STRING string_stats { common_stats { num_non_missing: 0 tot_num_values: 0 } } """, statistics_pb2.FeatureNameStatistics())} generator = basic_stats_generator.BasicStatsGenerator() self.assertCombinerOutputEqual(batches, generator, expected_result)
Example #3
Source File: example_coder_test.py From tfx-bsl with Apache License 2.0 | 6 votes |
def test_decode(self, schema_text_proto, examples_text_proto, create_expected): serialized_examples = [ text_format.Parse(pbtxt, tf.train.Example()).SerializeToString() for pbtxt in examples_text_proto ] serialized_schema = None if schema_text_proto is not None: serialized_schema = text_format.Parse( schema_text_proto, schema_pb2.Schema()).SerializeToString() if serialized_schema: coder = example_coder.ExamplesToRecordBatchDecoder(serialized_schema) else: coder = example_coder.ExamplesToRecordBatchDecoder() result = coder.DecodeBatch(serialized_examples) self.assertIsInstance(result, pa.RecordBatch) expected = create_expected(pa.list_, pa.binary()) self.assertTrue( result.equals(expected), "actual: {}\n expected:{}".format(result, expected)) if serialized_schema: self.assertTrue(expected.schema.equals(coder.ArrowSchema()))
Example #4
Source File: tf_example_record_test.py From tfx-bsl with Apache License 2.0 | 6 votes |
def GetExpectedColumnValues(tfxio): if tfxio._can_produce_large_types: int_type = pa.large_list(pa.int64()) float_type = pa.large_list(pa.float32()) bytes_type = pa.large_list(pa.large_binary()) else: int_type = pa.list_(pa.int64()) float_type = pa.list_(pa.float32()) bytes_type = pa.list_(pa.binary()) return { "int_feature": pa.array([[1], [2], [3]], type=int_type), "float_feature": pa.array([[1, 2, 3, 4], [2, 3, 4, 5], [4, 5, 6, 7]], type=float_type), "string_feature": pa.array([None, ["foo", "bar"], None], type=bytes_type), }
Example #5
Source File: table_util_test.py From tfx-bsl with Apache License 2.0 | 6 votes |
def test_simple(self, factory): # 3 int64 values # 5 int32 offsets # 1 null bitmap byte for outer ListArray # 1 null bitmap byte for inner Int64Array # 46 bytes in total. list_array = pa.array([[1, 2], [None], None, None], type=pa.list_(pa.int64())) # 1 null bitmap byte for outer StructArray. # 1 null bitmap byte for inner Int64Array. # 4 int64 values. # 34 bytes in total struct_array = pa.array([{"a": 1}, {"a": 2}, {"a": None}, None], type=pa.struct([pa.field("a", pa.int64())])) entity = factory([list_array, struct_array], ["a1", "a2"]) self.assertEqual(46 + 34, table_util.TotalByteSize(entity))
Example #6
Source File: csv_tfxio_test.py From tfx-bsl with Apache License 2.0 | 6 votes |
def _GetExpectedArrowSchema(tfxio, raw_record_column_name=None): if tfxio._can_produce_large_types: int_type = pa.large_list(pa.int64()) float_type = pa.large_list(pa.float32()) bytes_type = pa.large_list(pa.large_binary()) else: int_type = pa.list_(pa.int64()) float_type = pa.list_(pa.float32()) bytes_type = pa.list_(pa.binary()) fields = [ pa.field("int_feature", int_type), pa.field("float_feature", float_type), pa.field("string_feature", bytes_type) ] if raw_record_column_name is not None: fields.append(pa.field(raw_record_column_name, bytes_type)) return pa.schema(fields)
Example #7
Source File: tensor_adapter_test.py From tfx-bsl with Apache License 2.0 | 6 votes |
def testRaggedTensorStructTypeInvalidSteps(self): tensor_representation = text_format.Parse( """ ragged_tensor { feature_path { step: "ragged_feature" step: "wrong_step" } } """, schema_pb2.TensorRepresentation()) record_batch = pa.RecordBatch.from_arrays([ pa.StructArray.from_arrays([ pa.array([[1, 2, 3]], pa.list_(pa.int64())), pa.array([["a", "b", "c"]], pa.list_(pa.binary())) ], ["inner_feature", "x2"]) ], ["ragged_feature"]) with self.assertRaisesRegex(ValueError, ".*Unable to handle tensor output.*"): tensor_adapter.TensorAdapter( tensor_adapter.TensorAdapterConfig(record_batch.schema, {"output": tensor_representation}))
Example #8
Source File: tensor_adapter_test.py From tfx-bsl with Apache License 2.0 | 6 votes |
def testRaggedTensorStructTypeTooManySteps(self): tensor_representation = text_format.Parse( """ ragged_tensor { feature_path { step: "ragged_feature" step: "inner_feature" step: "non_existant_feature" } } """, schema_pb2.TensorRepresentation()) record_batch = pa.RecordBatch.from_arrays([ pa.StructArray.from_arrays([ pa.array([[1, 2, 3]], pa.list_(pa.int64())), pa.array([["a", "b", "c"]], pa.list_(pa.binary())) ], ["inner_feature", "x2"]) ], ["ragged_feature"]) with self.assertRaisesRegex(ValueError, ".*Unable to handle tensor output.*"): tensor_adapter.TensorAdapter( tensor_adapter.TensorAdapterConfig(record_batch.schema, {"output": tensor_representation}))
Example #9
Source File: tensor_adapter_test.py From tfx-bsl with Apache License 2.0 | 6 votes |
def testRaiseOnNoMatchingHandler(self): with self.assertRaisesRegexp(ValueError, "Unable to handle tensor"): tensor_adapter.TensorAdapter( tensor_adapter.TensorAdapterConfig( # nested lists are not supported now. pa.schema([pa.field("unsupported_column", pa.list_(pa.list_(pa.int64())))]), { "tensor": text_format.Parse( """ dense_tensor { column_name: "unsupported_column" shape: {} } """, schema_pb2.TensorRepresentation()) }))
Example #10
Source File: tensor_adapter_test.py From tfx-bsl with Apache License 2.0 | 6 votes |
def testRaggedTensorStructTypeNonLeaf(self): tensor_representation = text_format.Parse( """ ragged_tensor { feature_path { step: "ragged_feature" } } """, schema_pb2.TensorRepresentation()) record_batch = pa.RecordBatch.from_arrays([ pa.StructArray.from_arrays([ pa.array([[1, 2, 3]], pa.list_(pa.int64())), pa.array([["a", "b", "c"]], pa.list_(pa.binary())) ], ["inner_feature", "x2"]) ], ["ragged_feature"]) with self.assertRaisesRegex(ValueError, ".*Unable to handle tensor output.*"): tensor_adapter.TensorAdapter( tensor_adapter.TensorAdapterConfig(record_batch.schema, {"output": tensor_representation}))
Example #11
Source File: _pandas_helpers.py From python-bigquery with Apache License 2.0 | 6 votes |
def bq_to_arrow_data_type(field): """Return the Arrow data type, corresponding to a given BigQuery column. Returns: None: if default Arrow type inspection should be used. """ if field.mode is not None and field.mode.upper() == "REPEATED": inner_type = bq_to_arrow_data_type( schema.SchemaField(field.name, field.field_type, fields=field.fields) ) if inner_type: return pyarrow.list_(inner_type) return None field_type_upper = field.field_type.upper() if field.field_type else "" if field_type_upper in schema._STRUCT_TYPES: return bq_to_arrow_struct_data_type(field) data_type_constructor = BQ_TO_ARROW_SCALARS.get(field_type_upper) if data_type_constructor is None: return None return data_type_constructor()
Example #12
Source File: tensor_adapter_test.py From tfx-bsl with Apache License 2.0 | 5 votes |
def testRaiseOnInvalidDefaultValue(self, value_type, default_value_pbtxt, exception_regexp): tensor_representation = text_format.Parse(""" dense_tensor { column_name: "column" shape {} }""", schema_pb2.TensorRepresentation()) tensor_representation.dense_tensor.default_value.CopyFrom( text_format.Parse(default_value_pbtxt, schema_pb2.TensorRepresentation.DefaultValue())) with self.assertRaisesRegexp(ValueError, exception_regexp): tensor_adapter.TensorAdapter( tensor_adapter.TensorAdapterConfig( pa.schema([pa.field("column", pa.list_(value_type))]), {"tensor": tensor_representation}))
Example #13
Source File: tensor_adapter_test.py From tfx-bsl with Apache License 2.0 | 5 votes |
def _MakeFloatingDefaultFilledDenseTensorFromListArrayTestCases(): tensor_representation_textpb = """ dense_tensor { column_name: "input" shape { dim { size: 2 } dim { size: 1 } } default_value { float_value: -1 } } """ result = [] for t in _ALL_SUPPORTED_FLOATING_VALUE_TYPES: arrow_array = pa.array([None, [1, 2], None], type=pa.list_(t)) if tf.executing_eagerly(): expected_output = tf.constant([[-1, -1], [1, 2], [-1, -1]], dtype=_ARROW_TYPE_TO_TF_TYPE[t], shape=(3, 2, 1)) else: expected_output = np.array( [-1, -1, 1, 2, -1, -1], dtype=_ARROW_TYPE_TO_NP_TYPE[t]).reshape((3, 2, 1)) result.append({ "testcase_name": "default_filled_dense_from_list_array_{}".format(t), "tensor_representation_textpb": tensor_representation_textpb, "arrow_array": arrow_array, "expected_output": expected_output, "expected_type_spec": tf.TensorSpec([None, 2, 1], dtype=_ARROW_TYPE_TO_TF_TYPE[t]) }) return result
Example #14
Source File: tensor_adapter_test.py From tfx-bsl with Apache License 2.0 | 5 votes |
def _MakeStringDefaultFilledDenseTensorFromListArrayTestCases(): tensor_representation_textpb = """ dense_tensor { column_name: "input" shape { } default_value { bytes_value: "nil" } } """ result = [] for t in _ALL_SUPPORTED_STRING_VALUE_TYPES: arrow_array = pa.array([None, ["hello"], None], type=pa.list_(t)) if tf.executing_eagerly(): expected_output = tf.constant(["nil", "hello", "nil"], dtype=_ARROW_TYPE_TO_TF_TYPE[t]) else: expected_output = np.array([b"nil", b"hello", b"nil"], dtype=_ARROW_TYPE_TO_NP_TYPE[t]) result.append({ "testcase_name": "default_filled_dense_from_list_array_{}".format(t), "tensor_representation_textpb": tensor_representation_textpb, "arrow_array": arrow_array, "expected_output": expected_output, "expected_type_spec": tf.TensorSpec([None], _ARROW_TYPE_TO_TF_TYPE[t]) }) return result
Example #15
Source File: tensor_adapter_test.py From tfx-bsl with Apache License 2.0 | 5 votes |
def test2DSparseTensor(self): tensor_representation = text_format.Parse( """ sparse_tensor { value_column_name: "values" index_column_names: ["d0", "d1"] dense_shape { dim { size: 10 } dim { size: 20 } } } """, schema_pb2.TensorRepresentation()) record_batch = pa.RecordBatch.from_arrays([ pa.array([[1], None, [2], [3, 4, 5], []], type=pa.list_(pa.int64())), # Also test that the index column can be of an integral type other # than int64. pa.array([[9], None, [9], [7, 8, 9], []], type=pa.list_(pa.uint32())), pa.array([[0], None, [0], [0, 1, 2], []], type=pa.list_(pa.int64())) ], ["values", "d0", "d1"]) adapter = tensor_adapter.TensorAdapter( tensor_adapter.TensorAdapterConfig(record_batch.schema, {"output": tensor_representation})) converted = adapter.ToBatchTensors(record_batch) self.assertLen(converted, 1) self.assertIn("output", converted) actual_output = converted["output"] self.assertIsInstance(actual_output, (tf.SparseTensor, tf.compat.v1.SparseTensorValue)) self.assertSparseAllEqual( tf.compat.v1.SparseTensorValue( dense_shape=[5, 10, 20], indices=[[0, 9, 0], [2, 9, 0], [3, 7, 0], [3, 8, 1], [3, 9, 2]], values=tf.convert_to_tensor([1, 2, 3, 4, 5], dtype=tf.int64)), actual_output) self.assertAdapterCanProduceNonEagerInEagerMode(adapter, record_batch)
Example #16
Source File: tensor_adapter_test.py From tfx-bsl with Apache License 2.0 | 5 votes |
def testRaiseOnUnsupportedTensorRepresentation(self): with self.assertRaisesRegexp(ValueError, "Unable to handle tensor"): tensor_adapter.TensorAdapter( tensor_adapter.TensorAdapterConfig( pa.schema([pa.field("a", pa.list_(pa.int64()))]), {"tensor": schema_pb2.TensorRepresentation()}))
Example #17
Source File: tensor_to_arrow_test.py From tfx-bsl with Apache License 2.0 | 5 votes |
def _make_2d_varlen_sparse_tensor_test_cases(): result = [] for tf_type, arrow_type in _TF_TYPE_TO_ARROW_TYPE.items(): if tf_type == tf.string: values = tf.constant([b"1", b"2", b"3"], dtype=tf.string) expected_array = pa.array([[b"1"], [], [b"2", b"3"], []], type=pa.list_(arrow_type)) else: values = tf.constant([1, 2, 3], dtype=tf_type) expected_array = pa.array([[1], [], [2, 3], []], type=pa.list_(arrow_type)) result.append(dict( testcase_name="2d_varlen_sparse_tensor_%s" % tf_type.name, type_specs={"sp": tf.SparseTensorSpec([None, None], tf_type)}, expected_schema={ "sp": pa.list_(arrow_type) }, expected_tensor_representations={ "sp": """varlen_sparse_tensor { column_name: "sp" }""", }, tensor_input={ "sp": tf.SparseTensor( values=values, indices=[[0, 0], [2, 0], [2, 1]], dense_shape=[4, 2]), }, expected_record_batch={ "sp": expected_array } )) return result
Example #18
Source File: raw_tf_record_test.py From tfx-bsl with Apache License 2.0 | 5 votes |
def testRecordBatchAndTensorAdapter(self): column_name = "raw_record" telemetry_descriptors = ["some", "component"] tfxio = raw_tf_record.RawTfRecordTFXIO( self._raw_record_file, column_name, telemetry_descriptors=telemetry_descriptors) expected_type = ( pa.large_list(pa.large_binary()) if _ProducesLargeTypes(tfxio) else pa.list_(pa.binary())) got_schema = tfxio.ArrowSchema() self.assertTrue(got_schema.equals( pa.schema([pa.field(column_name, expected_type)])), "got: {}".format(got_schema)) def _AssertFn(record_batches): self.assertLen(record_batches, 1) record_batch = record_batches[0] self.assertTrue(record_batch.schema.equals(tfxio.ArrowSchema())) self.assertTrue(record_batch.columns[0].equals( pa.array([[r] for r in _RAW_RECORDS], type=expected_type))) tensor_adapter = tfxio.TensorAdapter() tensors = tensor_adapter.ToBatchTensors(record_batch) self.assertLen(tensors, 1) self.assertIn(column_name, tensors) p = beam.Pipeline() record_batch_pcoll = p | tfxio.BeamSource(batch_size=len(_RAW_RECORDS)) beam_testing_util.assert_that(record_batch_pcoll, _AssertFn) pipeline_result = p.run() pipeline_result.wait_until_finish() telemetry_test_util.ValidateMetrics(self, pipeline_result, telemetry_descriptors, "bytes", "tfrecords_gzip")
Example #19
Source File: tensor_to_arrow.py From tfx-bsl with Apache License 2.0 | 5 votes |
def arrow_fields(self) -> List[pa.Field]: return [ pa.field(self._tensor_name, pa.list_(_tf_dtype_to_arrow_type(self._type_spec.dtype))) ]
Example #20
Source File: csv_tfxio_test.py From tfx-bsl with Apache License 2.0 | 5 votes |
def _GetExpectedColumnValues(tfxio): if tfxio._can_produce_large_types: int_type = pa.large_list(pa.int64()) float_type = pa.large_list(pa.float32()) bytes_type = pa.large_list(pa.large_binary()) else: int_type = pa.list_(pa.int64()) float_type = pa.list_(pa.float32()) bytes_type = pa.list_(pa.binary()) return { "int_feature": pa.array([[1], [2]], type=int_type), "float_feature": pa.array([[2.0], [3.0]], type=float_type), "string_feature": pa.array([[b"abc"], [b"xyz"]], type=bytes_type), }
Example #21
Source File: record_based_tfxio.py From tfx-bsl with Apache License 2.0 | 5 votes |
def ArrowSchema(self) -> pa.Schema: schema = self._ArrowSchemaNoRawRecordColumn() if self._raw_record_column_name is not None: column_type = (pa.large_list(pa.large_binary()) if self._can_produce_large_types else pa.list_(pa.binary())) if schema.get_field_index(self._raw_record_column_name) != -1: raise ValueError( "Raw record column name {} collided with a column in the schema." .format(self._raw_record_column_name)) schema = schema.append( pa.field(self._raw_record_column_name, column_type)) return schema
Example #22
Source File: sequence_example_coder_test.py From tfx-bsl with Apache License 2.0 | 5 votes |
def _test_decode(self, schema_text_proto, sequence_examples_text_proto, create_expected, use_large_types): serialized_sequence_examples = [ text_format.Parse(pbtxt, tf.train.SequenceExample()).SerializeToString() for pbtxt in sequence_examples_text_proto ] serialized_schema = None if schema_text_proto is not None: serialized_schema = text_format.Parse( schema_text_proto, schema_pb2.Schema()).SerializeToString() if serialized_schema: coder = sequence_example_coder.SequenceExamplesToRecordBatchDecoder( _TEST_SEQUENCE_COLUMN_NAME, serialized_schema, use_large_types=use_large_types) else: coder = sequence_example_coder.SequenceExamplesToRecordBatchDecoder( _TEST_SEQUENCE_COLUMN_NAME, use_large_types=use_large_types) result = coder.DecodeBatch(serialized_sequence_examples) self.assertIsInstance(result, pa.RecordBatch) if use_large_types: expected = create_expected(pa.large_list, pa.large_binary()) else: expected = create_expected(pa.list_, pa.binary()) self.assertTrue( result.equals(expected), "actual: {}\n expected:{}".format(result, expected)) if serialized_schema is not None: self.assertTrue(coder.ArrowSchema().equals(result.schema))
Example #23
Source File: csv_decoder.py From tfx-bsl with Apache License 2.0 | 5 votes |
def _GetFeatureTypeToArrowTypeMapping( large_types: bool) -> Dict[int, pa.DataType]: if large_types: return { ColumnType.UNKNOWN: pa.null(), ColumnType.INT: pa.large_list(pa.int64()), ColumnType.FLOAT: pa.large_list(pa.float32()), ColumnType.STRING: pa.large_list(pa.large_binary()) } return { ColumnType.UNKNOWN: pa.null(), ColumnType.INT: pa.list_(pa.int64()), ColumnType.FLOAT: pa.list_(pa.float32()), ColumnType.STRING: pa.list_(pa.binary()) }
Example #24
Source File: impl_use_tfxio_test.py From transform with Apache License 2.0 | 5 votes |
def testPassthroughKeys(self): passthrough_key = '__passthrough__' def preprocessing_fn(inputs): self.assertNotIn(passthrough_key, inputs) return {'x_scaled': tft.scale_to_0_1(inputs['x'])} x_data = [0., 1., 2.] passthrough_data = [1, None, 3] input_record_batch = pa.RecordBatch.from_arrays([ pa.array([[x] for x in x_data], type=pa.list_(pa.float32())), pa.array([None if p is None else [p] for p in passthrough_data], type=pa.list_(pa.int64())), ], ['x', passthrough_key]) tensor_adapter_config = tensor_adapter.TensorAdapterConfig( input_record_batch.schema, {'x': text_format.Parse( 'dense_tensor { column_name: "x" shape {} }', schema_pb2.TensorRepresentation())}) expected_data = [{'x_scaled': x / 2.0, passthrough_key: p} for x, p in zip(x_data, passthrough_data)] with self._makeTestPipeline() as pipeline: input_data = ( pipeline | beam.Create([input_record_batch])) with beam_impl.Context( temp_dir=self.get_temp_dir(), passthrough_keys=set([passthrough_key])): (transformed_data, _), _ = ( (input_data, tensor_adapter_config) | beam_impl.AnalyzeAndTransformDataset(preprocessing_fn)) def _assert_fn(output_data): self.assertCountEqual(expected_data, output_data) beam_test_util.assert_that(transformed_data, _assert_fn)
Example #25
Source File: arrow_util_test.py From data-validation with Apache License 2.0 | 5 votes |
def testIsListLike(self): for t in (pa.list_(pa.int64()), pa.large_list(pa.int64())): self.assertTrue(arrow_util.is_list_like(t)) for t in (pa.binary(), pa.int64(), pa.large_string()): self.assertFalse(arrow_util.is_list_like(t))
Example #26
Source File: arrow_util_test.py From data-validation with Apache License 2.0 | 5 votes |
def testIsBinaryLike(self): for t in (pa.binary(), pa.large_binary(), pa.string(), pa.large_string()): self.assertTrue(arrow_util.is_binary_like(t)) for t in (pa.list_(pa.binary()), pa.large_list(pa.string())): self.assertFalse(arrow_util.is_binary_like(t))
Example #27
Source File: basic_stats_generator_test.py From data-validation with Apache License 2.0 | 5 votes |
def test_basic_stats_generator_invalid_value_numpy_dtype(self): batches = [pa.RecordBatch.from_arrays( [pa.array([[]], type=pa.list_(pa.date32()))], ['a'])] generator = basic_stats_generator.BasicStatsGenerator() with self.assertRaisesRegex( # pylint: disable=g-error-prone-assert-raises TypeError, 'Feature a has unsupported arrow type'): self.assertCombinerOutputEqual(batches, generator, None)
Example #28
Source File: top_k_uniques_combiner_stats_generator_test.py From data-validation with Apache License 2.0 | 5 votes |
def test_topk_uniques_combiner_zero_row(self): batches = [ pa.RecordBatch.from_arrays([pa.array([], type=pa.list_(pa.binary()))], ['f1']) ] expected_result = {} generator = ( top_k_uniques_combiner_stats_generator .TopKUniquesCombinerStatsGenerator( num_top_values=4, num_rank_histogram_buckets=3)) self.assertCombinerOutputEqual(batches, generator, expected_result)
Example #29
Source File: lift_stats_generator_test.py From data-validation with Apache License 2.0 | 5 votes |
def test_lift_missing_x_and_y(self): examples = [ pa.RecordBatch.from_arrays([ # explicitly construct type to avoid treating as null type pa.array([], type=pa.list_(pa.binary())), pa.array([], type=pa.list_(pa.binary())), ], ['categorical_x', 'string_y']), ] schema = text_format.Parse( """ feature { name: 'categorical_x' type: BYTES } feature { name: 'string_y' type: BYTES } """, schema_pb2.Schema()) expected_result = [] generator = lift_stats_generator.LiftStatsGenerator( schema=schema, y_path=types.FeaturePath(['string_y'])) self.assertSlicingAwareTransformOutputEqual( examples, generator, expected_result, add_default_slice_key_to_input=True, add_default_slice_key_to_output=True)
Example #30
Source File: stats_impl_test.py From data-validation with Apache License 2.0 | 5 votes |
def test_filter_features(self): input_record_batch = pa.RecordBatch.from_arrays([ pa.array([[]], type=pa.list_(pa.int64())), pa.array([[]], type=pa.list_(pa.int64())), pa.array([[]], type=pa.list_(pa.int64())), ], ['a', 'b', 'c']) actual = stats_impl._filter_features(input_record_batch, ['a', 'c']) expected = pa.RecordBatch.from_arrays([ pa.array([[]], type=pa.list_(pa.int64())), pa.array([[]], type=pa.list_(pa.int64())), ], ['a', 'c']) self.assertEqual(set(actual.schema.names), set(expected.schema.names))