Python pyarrow.binary() Examples

The following are 23 code examples of pyarrow.binary(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pyarrow , or try the search function .
Example #1
Source File: test_unit_arrow_chunk_iterator.py    From snowflake-connector-python with Apache License 2.0 6 votes vote down vote up
def test_iterate_over_binary_chunk():
    random.seed(datetime.datetime.now())
    column_meta = {
        "byteLength": "100",
        "logicalType": "BINARY",
        "precision": "0",
        "scale": "0",
        "charLength": "0"
    }

    def byte_array_generator():
        return bytearray(os.urandom(1000))

    iterate_over_test_chunk([pyarrow.binary(), pyarrow.binary()],
                            [column_meta, column_meta],
                            byte_array_generator) 
Example #2
Source File: test_unit_arrow_chunk_iterator.py    From snowflake-connector-python with Apache License 2.0 6 votes vote down vote up
def test_iterate_over_binary_chunk():
    random.seed(datetime.datetime.now())
    column_meta = {
        "byteLength": "100",
        "logicalType": "BINARY",
        "precision": "0",
        "scale": "0",
        "charLength": "0"
    }

    def byte_array_generator():
        return bytearray(os.urandom(1000))

    iterate_over_test_chunk([pyarrow.binary(), pyarrow.binary()],
                            [column_meta, column_meta],
                            byte_array_generator) 
Example #3
Source File: tensor_adapter_test.py    From tfx-bsl with Apache License 2.0 6 votes vote down vote up
def testRaggedTensorStructTypeNonLeaf(self):
    tensor_representation = text_format.Parse(
        """
        ragged_tensor {
          feature_path {
            step: "ragged_feature"
          }
        }
        """, schema_pb2.TensorRepresentation())
    record_batch = pa.RecordBatch.from_arrays([
        pa.StructArray.from_arrays([
            pa.array([[1, 2, 3]], pa.list_(pa.int64())),
            pa.array([["a", "b", "c"]], pa.list_(pa.binary()))
        ], ["inner_feature", "x2"])
    ], ["ragged_feature"])
    with self.assertRaisesRegex(ValueError,
                                ".*Unable to handle tensor output.*"):
      tensor_adapter.TensorAdapter(
          tensor_adapter.TensorAdapterConfig(record_batch.schema,
                                             {"output": tensor_representation})) 
Example #4
Source File: tensor_adapter_test.py    From tfx-bsl with Apache License 2.0 6 votes vote down vote up
def testRaggedTensorStructTypeInvalidSteps(self):
    tensor_representation = text_format.Parse(
        """
        ragged_tensor {
          feature_path {
            step: "ragged_feature"
            step: "wrong_step"
          }
        }
        """, schema_pb2.TensorRepresentation())
    record_batch = pa.RecordBatch.from_arrays([
        pa.StructArray.from_arrays([
            pa.array([[1, 2, 3]], pa.list_(pa.int64())),
            pa.array([["a", "b", "c"]], pa.list_(pa.binary()))
        ], ["inner_feature", "x2"])
    ], ["ragged_feature"])
    with self.assertRaisesRegex(ValueError,
                                ".*Unable to handle tensor output.*"):
      tensor_adapter.TensorAdapter(
          tensor_adapter.TensorAdapterConfig(record_batch.schema,
                                             {"output": tensor_representation})) 
Example #5
Source File: tf_sequence_example_record_test.py    From tfx-bsl with Apache License 2.0 6 votes vote down vote up
def _GetExpectedColumnValues(tfxio):
  if tfxio._can_produce_large_types:
    list_factory = pa.large_list
    bytes_type = pa.large_binary()
  else:
    list_factory = pa.list_
    bytes_type = pa.binary()

  return {
      path.ColumnPath(["int_feature"]):
          pa.array([[1], [2], [3]], type=list_factory(pa.int64())),
      path.ColumnPath(["float_feature"]):
          pa.array([[1, 2, 3, 4], [2, 3, 4, 5], None],
                   type=list_factory(pa.float32())),
      path.ColumnPath([_SEQUENCE_COLUMN_NAME, "int_feature"]):
          pa.array([[[1, 2], [3]], None, [[4]]],
                   list_factory(list_factory(pa.int64()))),
      path.ColumnPath([_SEQUENCE_COLUMN_NAME, "string_feature"]):
          pa.array([None, [[b"foo", b"bar"], []], [[b"baz"]]],
                   list_factory(list_factory(bytes_type)))
  } 
Example #6
Source File: sequence_example_coder_test.py    From tfx-bsl with Apache License 2.0 5 votes vote down vote up
def _test_decode(self, schema_text_proto, sequence_examples_text_proto,
                   create_expected, use_large_types):
    serialized_sequence_examples = [
        text_format.Parse(pbtxt,
                          tf.train.SequenceExample()).SerializeToString()
        for pbtxt in sequence_examples_text_proto
    ]
    serialized_schema = None
    if schema_text_proto is not None:
      serialized_schema = text_format.Parse(
          schema_text_proto, schema_pb2.Schema()).SerializeToString()

    if serialized_schema:
      coder = sequence_example_coder.SequenceExamplesToRecordBatchDecoder(
          _TEST_SEQUENCE_COLUMN_NAME,
          serialized_schema,
          use_large_types=use_large_types)
    else:
      coder = sequence_example_coder.SequenceExamplesToRecordBatchDecoder(
          _TEST_SEQUENCE_COLUMN_NAME, use_large_types=use_large_types)

    result = coder.DecodeBatch(serialized_sequence_examples)
    self.assertIsInstance(result, pa.RecordBatch)
    if use_large_types:
      expected = create_expected(pa.large_list, pa.large_binary())
    else:
      expected = create_expected(pa.list_, pa.binary())
    self.assertTrue(
        result.equals(expected),
        "actual: {}\n expected:{}".format(result, expected))

    if serialized_schema is not None:
      self.assertTrue(coder.ArrowSchema().equals(result.schema)) 
Example #7
Source File: record_based_tfxio.py    From tfx-bsl with Apache License 2.0 5 votes vote down vote up
def CreateRawRecordColumn(
    raw_records: List[bytes], produce_large_types: bool) -> pa.Array:
  """Returns an Array that satisfies the requirement of a raw record column."""
  list_array_factory = (
      pa.LargeListArray.from_arrays
      if produce_large_types else pa.ListArray.from_arrays)
  binary_type = pa.large_binary() if produce_large_types else pa.binary()
  return list_array_factory(
      np.arange(0, len(raw_records) + 1, dtype=np.int64),
      pa.array(raw_records, type=binary_type)) 
Example #8
Source File: record_based_tfxio.py    From tfx-bsl with Apache License 2.0 5 votes vote down vote up
def ArrowSchema(self) -> pa.Schema:
    schema = self._ArrowSchemaNoRawRecordColumn()
    if self._raw_record_column_name is not None:
      column_type = (pa.large_list(pa.large_binary()) if
                     self._can_produce_large_types else pa.list_(pa.binary()))
      if schema.get_field_index(self._raw_record_column_name) != -1:
        raise ValueError(
            "Raw record column name {} collided with a column in the schema."
            .format(self._raw_record_column_name))
      schema = schema.append(
          pa.field(self._raw_record_column_name, column_type))
    return schema 
Example #9
Source File: tensor_adapter.py    From tfx-bsl with Apache License 2.0 5 votes vote down vote up
def _GetConvertToBinaryFn(
    array_type: pa.DataType) -> Optional[Callable[[pa.Array], pa.Array]]:
  """Returns a function that converts a StringArray to BinaryArray."""

  if pa.types.is_string(array_type):
    return lambda array: array.view(pa.binary())
  if pa.types.is_large_string(array_type):
    return lambda array: array.view(pa.large_binary())
  return None 
Example #10
Source File: tensor_to_arrow.py    From tfx-bsl with Apache License 2.0 5 votes vote down vote up
def _tf_dtype_to_arrow_type(dtype: tf.DType):
  """Maps a tf Dtype to an Arrow type."""
  if dtype == tf.string:
    return pa.binary()
  elif dtype == tf.bool:
    raise TypeError("Unable to handle bool tensors -- consider casting it to a "
                    "tf.uint8")
  return pa.from_numpy_dtype(dtype.as_numpy_dtype) 
Example #11
Source File: tf_sequence_example_record_test.py    From tfx-bsl with Apache License 2.0 5 votes vote down vote up
def _ValidateRecordBatch(
      self, tfxio, record_batch, raw_record_column_name=None):
    self.assertIsInstance(record_batch, pa.RecordBatch)
    self.assertEqual(record_batch.num_rows, 3)
    expected_column_values = _GetExpectedColumnValues(tfxio)
    for i, field in enumerate(record_batch.schema):
      if field.name == raw_record_column_name:
        continue
      if field.name == _SEQUENCE_COLUMN_NAME:
        self.assertTrue(pa.types.is_struct(field.type))
        for seq_column, seq_field in zip(
            record_batch.column(i).flatten(), list(field.type)):
          expected_array = expected_column_values[path.ColumnPath(
              [_SEQUENCE_COLUMN_NAME, seq_field.name])]
          self.assertTrue(
              seq_column.equals(expected_array),
              "Sequence column {} did not match ({} vs {})".format(
                  seq_field.name, seq_column, expected_array))
        continue
      self.assertTrue(
          record_batch.column(i).equals(expected_column_values[path.ColumnPath(
              [field.name])]), "Column {} did not match ({} vs {}).".format(
                  field.name, record_batch.column(i),
                  expected_column_values[path.ColumnPath([field.name])]))

    if raw_record_column_name is not None:
      if tfxio._can_produce_large_types:
        raw_record_column_type = pa.large_list(pa.large_binary())
      else:
        raw_record_column_type = pa.list_(pa.binary())
      self.assertEqual(record_batch.schema.names[-1], raw_record_column_name)
      self.assertTrue(
          record_batch.columns[-1].type.equals(raw_record_column_type))
      self.assertEqual(record_batch.columns[-1].flatten().to_pylist(),
                       _SERIALIZED_EXAMPLES) 
Example #12
Source File: table_util_test.py    From tfx-bsl with Apache License 2.0 5 votes vote down vote up
def test_success(self, row_indices, expected_output):
    record_batch = pa.RecordBatch.from_arrays([
        pa.array([[1, 2, 3], None, [4], [], [5, 6], [7], [8, 9], [10], []],
                 type=pa.list_(pa.int32())),
        pa.array(
            [["a"], ["b", "c"], None, [], None, ["d", "e"], ["f"], None, ["g"]],
            type=pa.list_(pa.binary())),
    ], ["f1", "f2"])

    for row_indices_type in (pa.int32(), pa.int64()):
      sliced = table_util.RecordBatchTake(
          record_batch, pa.array(row_indices, type=row_indices_type))
      self.assertTrue(
          sliced.equals(expected_output),
          "Expected {}, got {}".format(expected_output, sliced)) 
Example #13
Source File: array_util_test.py    From tfx-bsl with Apache License 2.0 5 votes vote down vote up
def _get_binary_like_byte_size_test_cases():
  result = []
  for array_type, sizeof_offsets in [
      (pa.binary(), 4),
      (pa.string(), 4),
      (pa.large_binary(), 8),
      (pa.large_string(), 8),
  ]:
    result.append(
        dict(
            testcase_name=str(array_type),
            array=pa.array([
                "a", "bb", "ccc", "dddd", "eeeee", "ffffff", "ggggggg",
                "hhhhhhhh", "iiiiiiiii"
            ],
                           type=array_type),
            slice_offset=1,
            slice_length=3,
            # contents: 45
            # offsets: 10 * sizeof_offsets
            # null bitmap: 2
            expected_size=(45 + sizeof_offsets * 10 +
                           _all_false_null_bitmap_size(2)),
            # contents: 9
            # offsets: 4 * sizeof_offsets
            # null bitmap: 1
            expected_sliced_size=(9 + sizeof_offsets * 4 +
                                  _all_false_null_bitmap_size(1))))
  return result 
Example #14
Source File: test__pandas_helpers.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type):
    fields = (
        schema.SchemaField("field01", "STRING"),
        schema.SchemaField("field02", "BYTES"),
        schema.SchemaField("field03", "INTEGER"),
        schema.SchemaField("field04", "INT64"),
        schema.SchemaField("field05", "FLOAT"),
        schema.SchemaField("field06", "FLOAT64"),
        schema.SchemaField("field07", "NUMERIC"),
        schema.SchemaField("field08", "BOOLEAN"),
        schema.SchemaField("field09", "BOOL"),
        schema.SchemaField("field10", "TIMESTAMP"),
        schema.SchemaField("field11", "DATE"),
        schema.SchemaField("field12", "TIME"),
        schema.SchemaField("field13", "DATETIME"),
        schema.SchemaField("field14", "GEOGRAPHY"),
    )
    field = schema.SchemaField("ignored_name", bq_type, mode="REPEATED", fields=fields)
    actual = module_under_test.bq_to_arrow_data_type(field)
    expected_value_type = pyarrow.struct(
        (
            pyarrow.field("field01", pyarrow.string()),
            pyarrow.field("field02", pyarrow.binary()),
            pyarrow.field("field03", pyarrow.int64()),
            pyarrow.field("field04", pyarrow.int64()),
            pyarrow.field("field05", pyarrow.float64()),
            pyarrow.field("field06", pyarrow.float64()),
            pyarrow.field("field07", module_under_test.pyarrow_numeric()),
            pyarrow.field("field08", pyarrow.bool_()),
            pyarrow.field("field09", pyarrow.bool_()),
            pyarrow.field("field10", module_under_test.pyarrow_timestamp()),
            pyarrow.field("field11", pyarrow.date32()),
            pyarrow.field("field12", module_under_test.pyarrow_time()),
            pyarrow.field("field13", module_under_test.pyarrow_datetime()),
            pyarrow.field("field14", pyarrow.string()),
        )
    )
    assert pyarrow.types.is_list(actual)
    assert pyarrow.types.is_struct(actual.value_type)
    assert actual.value_type.num_children == len(fields)
    assert actual.value_type.equals(expected_value_type) 
Example #15
Source File: test__pandas_helpers.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type):
    fields = (
        schema.SchemaField("field01", "STRING"),
        schema.SchemaField("field02", "BYTES"),
        schema.SchemaField("field03", "INTEGER"),
        schema.SchemaField("field04", "INT64"),
        schema.SchemaField("field05", "FLOAT"),
        schema.SchemaField("field06", "FLOAT64"),
        schema.SchemaField("field07", "NUMERIC"),
        schema.SchemaField("field08", "BOOLEAN"),
        schema.SchemaField("field09", "BOOL"),
        schema.SchemaField("field10", "TIMESTAMP"),
        schema.SchemaField("field11", "DATE"),
        schema.SchemaField("field12", "TIME"),
        schema.SchemaField("field13", "DATETIME"),
        schema.SchemaField("field14", "GEOGRAPHY"),
    )
    field = schema.SchemaField("ignored_name", bq_type, mode="NULLABLE", fields=fields)
    actual = module_under_test.bq_to_arrow_data_type(field)
    expected = pyarrow.struct(
        (
            pyarrow.field("field01", pyarrow.string()),
            pyarrow.field("field02", pyarrow.binary()),
            pyarrow.field("field03", pyarrow.int64()),
            pyarrow.field("field04", pyarrow.int64()),
            pyarrow.field("field05", pyarrow.float64()),
            pyarrow.field("field06", pyarrow.float64()),
            pyarrow.field("field07", module_under_test.pyarrow_numeric()),
            pyarrow.field("field08", pyarrow.bool_()),
            pyarrow.field("field09", pyarrow.bool_()),
            pyarrow.field("field10", module_under_test.pyarrow_timestamp()),
            pyarrow.field("field11", pyarrow.date32()),
            pyarrow.field("field12", module_under_test.pyarrow_time()),
            pyarrow.field("field13", module_under_test.pyarrow_datetime()),
            pyarrow.field("field14", pyarrow.string()),
        )
    )
    assert pyarrow.types.is_struct(actual)
    assert actual.num_children == len(fields)
    assert actual.equals(expected) 
Example #16
Source File: types.py    From LearningApacheSpark with MIT License 5 votes vote down vote up
def to_arrow_type(dt):
    """ Convert Spark data type to pyarrow type
    """
    from distutils.version import LooseVersion
    import pyarrow as pa
    if type(dt) == BooleanType:
        arrow_type = pa.bool_()
    elif type(dt) == ByteType:
        arrow_type = pa.int8()
    elif type(dt) == ShortType:
        arrow_type = pa.int16()
    elif type(dt) == IntegerType:
        arrow_type = pa.int32()
    elif type(dt) == LongType:
        arrow_type = pa.int64()
    elif type(dt) == FloatType:
        arrow_type = pa.float32()
    elif type(dt) == DoubleType:
        arrow_type = pa.float64()
    elif type(dt) == DecimalType:
        arrow_type = pa.decimal128(dt.precision, dt.scale)
    elif type(dt) == StringType:
        arrow_type = pa.string()
    elif type(dt) == BinaryType:
        # TODO: remove version check once minimum pyarrow version is 0.10.0
        if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
            raise TypeError("Unsupported type in conversion to Arrow: " + str(dt) +
                            "\nPlease install pyarrow >= 0.10.0 for BinaryType support.")
        arrow_type = pa.binary()
    elif type(dt) == DateType:
        arrow_type = pa.date32()
    elif type(dt) == TimestampType:
        # Timestamps should be in UTC, JVM Arrow timestamps require a timezone to be read
        arrow_type = pa.timestamp('us', tz='UTC')
    elif type(dt) == ArrayType:
        if type(dt.elementType) == TimestampType:
            raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
        arrow_type = pa.list_(to_arrow_type(dt.elementType))
    else:
        raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
    return arrow_type 
Example #17
Source File: test_unischema.py    From petastorm with Apache License 2.0 5 votes vote down vote up
def test_arrow_schema_convertion():
    fields = [
        pa.field('string', pa.string()),
        pa.field('int8', pa.int8()),
        pa.field('int16', pa.int16()),
        pa.field('int32', pa.int32()),
        pa.field('int64', pa.int64()),
        pa.field('float', pa.float32()),
        pa.field('double', pa.float64()),
        pa.field('bool', pa.bool_(), False),
        pa.field('fixed_size_binary', pa.binary(10)),
        pa.field('variable_size_binary', pa.binary()),
        pa.field('decimal', pa.decimal128(3, 4)),
        pa.field('timestamp_s', pa.timestamp('s')),
        pa.field('timestamp_ns', pa.timestamp('ns')),
        pa.field('date_32', pa.date32()),
        pa.field('date_64', pa.date64())
    ]
    arrow_schema = pa.schema(fields)

    mock_dataset = _mock_parquet_dataset([], arrow_schema)

    unischema = Unischema.from_arrow_schema(mock_dataset)
    for name in arrow_schema.names:
        assert getattr(unischema, name).name == name
        assert getattr(unischema, name).codec is None

        if name == 'bool':
            assert not getattr(unischema, name).nullable
        else:
            assert getattr(unischema, name).nullable

    # Test schema preserve fields order
    field_name_list = [f.name for f in fields]
    assert list(unischema.fields.keys()) == field_name_list 
Example #18
Source File: test_db.py    From aws-data-wrangler with Apache License 2.0 5 votes vote down vote up
def test_redshift_category(bucket, databases_parameters):
    path = f"s3://{bucket}/test_redshift_category/"
    df = get_df_category().drop(["binary"], axis=1, inplace=False)
    engine = wr.catalog.get_engine(connection="aws-data-wrangler-redshift")
    wr.db.copy_to_redshift(
        df=df,
        path=path,
        con=engine,
        schema="public",
        table="test_redshift_category",
        mode="overwrite",
        iam_role=databases_parameters["redshift"]["role"],
    )
    df2 = wr.db.unload_redshift(
        sql="SELECT * FROM public.test_redshift_category",
        con=engine,
        iam_role=databases_parameters["redshift"]["role"],
        path=path,
        keep_files=False,
        categories=df.columns,
    )
    ensure_data_types_category(df2)
    dfs = wr.db.unload_redshift(
        sql="SELECT * FROM public.test_redshift_category",
        con=engine,
        iam_role=databases_parameters["redshift"]["role"],
        path=path,
        keep_files=False,
        categories=df.columns,
        chunked=True,
    )
    for df2 in dfs:
        ensure_data_types_category(df2)
    wr.s3.delete_objects(path=path) 
Example #19
Source File: arrow_util_test.py    From data-validation with Apache License 2.0 5 votes vote down vote up
def testEnumerateArraysStringWeight(self):
    # The arrow type of a string changes between py2 and py3 so we accept either
    with self.assertRaisesRegex(
        ValueError,
        r'Weight column "w" must be of numeric type. Found (string|binary).*'):
      for _ in arrow_util.enumerate_arrays(
          pa.RecordBatch.from_arrays(
              [pa.array([[1], [2, 3]]),
               pa.array([["a"], ["b"]])], ["v", "w"]),
          weight_column="w",
          enumerate_leaves_only=True):
        pass 
Example #20
Source File: arrow_util_test.py    From data-validation with Apache License 2.0 5 votes vote down vote up
def testIsBinaryLike(self):
    for t in (pa.binary(), pa.large_binary(), pa.string(), pa.large_string()):
      self.assertTrue(arrow_util.is_binary_like(t))

    for t in (pa.list_(pa.binary()), pa.large_list(pa.string())):
      self.assertFalse(arrow_util.is_binary_like(t)) 
Example #21
Source File: arrow_util_test.py    From data-validation with Apache License 2.0 5 votes vote down vote up
def testIsListLike(self):
    for t in (pa.list_(pa.int64()), pa.large_list(pa.int64())):
      self.assertTrue(arrow_util.is_list_like(t))

    for t in (pa.binary(), pa.int64(), pa.large_string()):
      self.assertFalse(arrow_util.is_list_like(t)) 
Example #22
Source File: test_common_metadata.py    From kartothek with MIT License 4 votes vote down vote up
def test_store_schema_metadata(store, df_all_types):
    store_schema_metadata(
        schema=make_meta(df_all_types, origin="df_all_types"),
        dataset_uuid="some_uuid",
        store=store,
        table="some_table",
    )

    key = "some_uuid/some_table/_common_metadata"
    assert key in store.keys()
    pq_file = pq.ParquetFile(store.open(key))
    actual_schema = pq_file.schema.to_arrow_schema()
    fields = [
        pa.field("array_float32", pa.list_(pa.float64())),
        pa.field("array_float64", pa.list_(pa.float64())),
        pa.field("array_int16", pa.list_(pa.int64())),
        pa.field("array_int32", pa.list_(pa.int64())),
        pa.field("array_int64", pa.list_(pa.int64())),
        pa.field("array_int8", pa.list_(pa.int64())),
        pa.field("array_uint16", pa.list_(pa.uint64())),
        pa.field("array_uint32", pa.list_(pa.uint64())),
        pa.field("array_uint64", pa.list_(pa.uint64())),
        pa.field("array_uint8", pa.list_(pa.uint64())),
        pa.field("array_unicode", pa.list_(pa.string())),
        pa.field("bool", pa.bool_()),
        pa.field("byte", pa.binary()),
        pa.field("date", pa.date32()),
        pa.field("datetime64", pa.timestamp("us")),
        pa.field("float32", pa.float64()),
        pa.field("float64", pa.float64()),
        pa.field("int16", pa.int64()),
        pa.field("int32", pa.int64()),
        pa.field("int64", pa.int64()),
        pa.field("int8", pa.int64()),
        pa.field("null", pa.null()),
        pa.field("uint16", pa.uint64()),
        pa.field("uint32", pa.uint64()),
        pa.field("uint64", pa.uint64()),
        pa.field("uint8", pa.uint64()),
        pa.field("unicode", pa.string()),
    ]
    expected_schema = pa.schema(fields)

    assert actual_schema.remove_metadata() == expected_schema 
Example #23
Source File: test_db.py    From aws-data-wrangler with Apache License 2.0 4 votes vote down vote up
def test_redshift_copy_unload(bucket, databases_parameters):
    path = f"s3://{bucket}/test_redshift_copy/"
    df = get_df().drop(["iint8", "binary"], axis=1, inplace=False)
    engine = wr.catalog.get_engine(connection="aws-data-wrangler-redshift")
    wr.db.copy_to_redshift(
        df=df,
        path=path,
        con=engine,
        schema="public",
        table="__test_redshift_copy",
        mode="overwrite",
        iam_role=databases_parameters["redshift"]["role"],
    )
    df2 = wr.db.unload_redshift(
        sql="SELECT * FROM public.__test_redshift_copy",
        con=engine,
        iam_role=databases_parameters["redshift"]["role"],
        path=path,
        keep_files=False,
    )
    assert len(df2.index) == 3
    ensure_data_types(df=df2, has_list=False)
    wr.db.copy_to_redshift(
        df=df,
        path=path,
        con=engine,
        schema="public",
        table="__test_redshift_copy",
        mode="append",
        iam_role=databases_parameters["redshift"]["role"],
    )
    df2 = wr.db.unload_redshift(
        sql="SELECT * FROM public.__test_redshift_copy",
        con=engine,
        iam_role=databases_parameters["redshift"]["role"],
        path=path,
        keep_files=False,
    )
    assert len(df2.index) == 6
    ensure_data_types(df=df2, has_list=False)
    dfs = wr.db.unload_redshift(
        sql="SELECT * FROM public.__test_redshift_copy",
        con=engine,
        iam_role=databases_parameters["redshift"]["role"],
        path=path,
        keep_files=False,
        chunked=True,
    )
    for chunk in dfs:
        ensure_data_types(df=chunk, has_list=False)