Java Code Examples for org.apache.avro.generic.GenericData#setStringType()
The following examples show how to use
org.apache.avro.generic.GenericData#setStringType() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FastGenericDeserializerGeneratorTest.java From avro-fastserde with Apache License 2.0 | 6 votes |
@Test public void shouldReadArrayOfJavaStrings() { // given Schema javaStringSchema = Schema.create(Schema.Type.STRING); GenericData.setStringType(javaStringSchema, GenericData.StringType.String); Schema javaStringArraySchema = Schema.createArray(javaStringSchema); GenericData.Array<String> javaStringArray = new GenericData.Array<>(0, javaStringArraySchema); javaStringArray.add("aaa"); javaStringArray.add("abc"); GenericData.Array<String> resultJavaStringArray = deserializeGenericFast(javaStringArraySchema, javaStringArraySchema, serializeGeneric(javaStringArray)); // then Assert.assertEquals(2, resultJavaStringArray.size()); Assert.assertEquals("aaa", resultJavaStringArray.get(0)); Assert.assertEquals("abc", resultJavaStringArray.get(1)); }
Example 2
Source File: FastGenericDeserializerGeneratorTest.java From avro-fastserde with Apache License 2.0 | 6 votes |
@Test public void shouldReadMapOfJavaStrings() { // given Schema stringMapSchema = Schema.createMap(Schema.create(Schema.Type.STRING)); Schema javaStringSchema = Schema.create(Schema.Type.STRING); GenericData.setStringType(javaStringSchema, GenericData.StringType.String); Schema javaStringMapSchema = Schema.createMap(javaStringSchema); Map<String, String> stringMap = new HashMap<>(0); stringMap.put("1", "abc"); stringMap.put("2", "aaa"); // when Map<Utf8, String> resultJavaStringMap = deserializeGenericFast(stringMapSchema, javaStringMapSchema, serializeGeneric(stringMap, javaStringMapSchema)); // then Assert.assertEquals(2, resultJavaStringMap.size()); Assert.assertEquals("abc", resultJavaStringMap.get(new Utf8("1"))); Assert.assertEquals("aaa", resultJavaStringMap.get(new Utf8("2"))); }
Example 3
Source File: FastGenericDeserializerGeneratorTest.java From avro-fastserde with Apache License 2.0 | 6 votes |
@Test public void shouldReadJavaStringKeyedMapOfRecords() { // given Schema recordSchema = createRecord("record", createPrimitiveUnionFieldSchema("field", Schema.Type.STRING)); Schema mapRecordSchema = Schema.createMap(recordSchema); GenericData.setStringType(mapRecordSchema, GenericData.StringType.String); GenericRecordBuilder subRecordBuilder = new GenericRecordBuilder(recordSchema); subRecordBuilder.set("field", "abc"); Map<String, GenericData.Record> recordsMap = new HashMap<>(); recordsMap.put("1", subRecordBuilder.build()); recordsMap.put("2", subRecordBuilder.build()); // when Map<String, GenericRecord> mapWithStringKeys = deserializeGenericFast(mapRecordSchema, mapRecordSchema, serializeGeneric(recordsMap, mapRecordSchema)); // then Assert.assertEquals(2, mapWithStringKeys.size()); Assert.assertEquals("abc", mapWithStringKeys.get("1").get("field").toString()); Assert.assertEquals("abc", mapWithStringKeys.get("2").get("field").toString()); }
Example 4
Source File: TestWriteAvroResultWithSchema.java From nifi with Apache License 2.0 | 5 votes |
@Override protected GenericRecord readRecord(final InputStream in, final Schema schema) throws IOException { final DataFileStream<GenericRecord> dataFileStream = new DataFileStream<>(in, new GenericDatumReader<>()); final Schema avroSchema = dataFileStream.getSchema(); GenericData.setStringType(avroSchema, StringType.String); final GenericRecord avroRecord = dataFileStream.next(); return avroRecord; }
Example 5
Source File: TestWriteAvroResultWithSchema.java From nifi with Apache License 2.0 | 5 votes |
@Override protected List<GenericRecord> readRecords(final InputStream in, final Schema schema, final int recordCount) throws IOException { final DataFileStream<GenericRecord> dataFileStream = new DataFileStream<>(in, new GenericDatumReader<>()); final Schema avroSchema = dataFileStream.getSchema(); GenericData.setStringType(avroSchema, StringType.String); List<GenericRecord> records = new ArrayList<>(); for (int i = 0; i < recordCount; i++) { records.add(dataFileStream.next()); } return records; }
Example 6
Source File: BucketingSinkTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * This tests {@link AvroKeyValueSinkWriter} * with non-rolling output and with compression. */ @Test public void testNonRollingAvroKeyValueWithCompressionWriter() throws Exception { final String outPath = hdfsURI + "/avro-kv-no-comp-non-rolling-out"; final int numElements = 20; Map<String, String> properties = new HashMap<>(); Schema keySchema = Schema.create(Schema.Type.INT); Schema valueSchema = Schema.create(Schema.Type.STRING); properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_KEY_SCHEMA, keySchema.toString()); properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_VALUE_SCHEMA, valueSchema.toString()); properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS, String.valueOf(true)); properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS_CODEC, DataFileConstants.SNAPPY_CODEC); BucketingSink<Tuple2<Integer, String>> sink = new BucketingSink<Tuple2<Integer, String>>(outPath) .setWriter(new AvroKeyValueSinkWriter<Integer, String>(properties)) .setBucketer(new BasePathBucketer<Tuple2<Integer, String>>()) .setPartPrefix(PART_PREFIX) .setPendingPrefix("") .setPendingSuffix(""); OneInputStreamOperatorTestHarness<Tuple2<Integer, String>, Object> testHarness = createTestSink(sink, 1, 0); testHarness.setProcessingTime(0L); testHarness.setup(); testHarness.open(); for (int i = 0; i < numElements; i++) { testHarness.processElement(new StreamRecord<>(Tuple2.of( i, "message #" + Integer.toString(i) ))); } testHarness.close(); GenericData.setStringType(valueSchema, GenericData.StringType.String); Schema elementSchema = AvroKeyValueSinkWriter.AvroKeyValue.getSchema(keySchema, valueSchema); FSDataInputStream inStream = dfs.open(new Path(outPath + "/" + PART_PREFIX + "-0-0")); SpecificDatumReader<GenericRecord> elementReader = new SpecificDatumReader<>(elementSchema); DataFileStream<GenericRecord> dataFileStream = new DataFileStream<>(inStream, elementReader); for (int i = 0; i < numElements; i++) { AvroKeyValueSinkWriter.AvroKeyValue<Integer, String> wrappedEntry = new AvroKeyValueSinkWriter.AvroKeyValue<>(dataFileStream.next()); int key = wrappedEntry.getKey(); Assert.assertEquals(i, key); String value = wrappedEntry.getValue(); Assert.assertEquals("message #" + i, value); } dataFileStream.close(); inStream.close(); }
Example 7
Source File: BucketingSinkTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * This tests user defined hdfs configuration. * @throws Exception */ @Test public void testUserDefinedConfiguration() throws Exception { final String outPath = hdfsURI + "/string-non-rolling-with-config"; final int numElements = 20; Map<String, String> properties = new HashMap<>(); Schema keySchema = Schema.create(Schema.Type.INT); Schema valueSchema = Schema.create(Schema.Type.STRING); properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_KEY_SCHEMA, keySchema.toString()); properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_VALUE_SCHEMA, valueSchema.toString()); properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS, String.valueOf(true)); properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS_CODEC, DataFileConstants.SNAPPY_CODEC); Configuration conf = new Configuration(); conf.set("io.file.buffer.size", "40960"); BucketingSink<Tuple2<Integer, String>> sink = new BucketingSink<Tuple2<Integer, String>>(outPath) .setFSConfig(conf) .setWriter(new StreamWriterWithConfigCheck<Integer, String>(properties, "io.file.buffer.size", "40960")) .setBucketer(new BasePathBucketer<Tuple2<Integer, String>>()) .setPartPrefix(PART_PREFIX) .setPendingPrefix("") .setPendingSuffix(""); OneInputStreamOperatorTestHarness<Tuple2<Integer, String>, Object> testHarness = createTestSink(sink, 1, 0); testHarness.setProcessingTime(0L); testHarness.setup(); testHarness.open(); for (int i = 0; i < numElements; i++) { testHarness.processElement(new StreamRecord<>(Tuple2.of( i, "message #" + Integer.toString(i) ))); } testHarness.close(); GenericData.setStringType(valueSchema, GenericData.StringType.String); Schema elementSchema = AvroKeyValueSinkWriter.AvroKeyValue.getSchema(keySchema, valueSchema); FSDataInputStream inStream = dfs.open(new Path(outPath + "/" + PART_PREFIX + "-0-0")); SpecificDatumReader<GenericRecord> elementReader = new SpecificDatumReader<>(elementSchema); DataFileStream<GenericRecord> dataFileStream = new DataFileStream<>(inStream, elementReader); for (int i = 0; i < numElements; i++) { AvroKeyValueSinkWriter.AvroKeyValue<Integer, String> wrappedEntry = new AvroKeyValueSinkWriter.AvroKeyValue<>(dataFileStream.next()); int key = wrappedEntry.getKey(); Assert.assertEquals(i, key); String value = wrappedEntry.getValue(); Assert.assertEquals("message #" + i, value); } dataFileStream.close(); inStream.close(); }
Example 8
Source File: BucketingSinkTest.java From flink with Apache License 2.0 | 4 votes |
/** * This tests {@link AvroKeyValueSinkWriter} * with non-rolling output and with compression. */ @Test public void testNonRollingAvroKeyValueWithCompressionWriter() throws Exception { final String outPath = hdfsURI + "/avro-kv-no-comp-non-rolling-out"; final int numElements = 20; Map<String, String> properties = new HashMap<>(); Schema keySchema = Schema.create(Schema.Type.INT); Schema valueSchema = Schema.create(Schema.Type.STRING); properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_KEY_SCHEMA, keySchema.toString()); properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_VALUE_SCHEMA, valueSchema.toString()); properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS, String.valueOf(true)); properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS_CODEC, DataFileConstants.SNAPPY_CODEC); BucketingSink<Tuple2<Integer, String>> sink = new BucketingSink<Tuple2<Integer, String>>(outPath) .setWriter(new AvroKeyValueSinkWriter<Integer, String>(properties)) .setBucketer(new BasePathBucketer<Tuple2<Integer, String>>()) .setPartPrefix(PART_PREFIX) .setPendingPrefix("") .setPendingSuffix(""); OneInputStreamOperatorTestHarness<Tuple2<Integer, String>, Object> testHarness = createTestSink(sink, 1, 0); testHarness.setProcessingTime(0L); testHarness.setup(); testHarness.open(); for (int i = 0; i < numElements; i++) { testHarness.processElement(new StreamRecord<>(Tuple2.of( i, "message #" + Integer.toString(i) ))); } testHarness.close(); GenericData.setStringType(valueSchema, GenericData.StringType.String); Schema elementSchema = AvroKeyValueSinkWriter.AvroKeyValue.getSchema(keySchema, valueSchema); FSDataInputStream inStream = dfs.open(new Path(outPath + "/" + PART_PREFIX + "-0-0")); SpecificDatumReader<GenericRecord> elementReader = new SpecificDatumReader<>(elementSchema); DataFileStream<GenericRecord> dataFileStream = new DataFileStream<>(inStream, elementReader); for (int i = 0; i < numElements; i++) { AvroKeyValueSinkWriter.AvroKeyValue<Integer, String> wrappedEntry = new AvroKeyValueSinkWriter.AvroKeyValue<>(dataFileStream.next()); int key = wrappedEntry.getKey(); Assert.assertEquals(i, key); String value = wrappedEntry.getValue(); Assert.assertEquals("message #" + i, value); } dataFileStream.close(); inStream.close(); }
Example 9
Source File: BucketingSinkTest.java From flink with Apache License 2.0 | 4 votes |
/** * This tests user defined hdfs configuration. * @throws Exception */ @Test public void testUserDefinedConfiguration() throws Exception { final String outPath = hdfsURI + "/string-non-rolling-with-config"; final int numElements = 20; Map<String, String> properties = new HashMap<>(); Schema keySchema = Schema.create(Schema.Type.INT); Schema valueSchema = Schema.create(Schema.Type.STRING); properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_KEY_SCHEMA, keySchema.toString()); properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_VALUE_SCHEMA, valueSchema.toString()); properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS, String.valueOf(true)); properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS_CODEC, DataFileConstants.SNAPPY_CODEC); Configuration conf = new Configuration(); conf.set("io.file.buffer.size", "40960"); BucketingSink<Tuple2<Integer, String>> sink = new BucketingSink<Tuple2<Integer, String>>(outPath) .setFSConfig(conf) .setWriter(new StreamWriterWithConfigCheck<Integer, String>(properties, "io.file.buffer.size", "40960")) .setBucketer(new BasePathBucketer<Tuple2<Integer, String>>()) .setPartPrefix(PART_PREFIX) .setPendingPrefix("") .setPendingSuffix(""); OneInputStreamOperatorTestHarness<Tuple2<Integer, String>, Object> testHarness = createTestSink(sink, 1, 0); testHarness.setProcessingTime(0L); testHarness.setup(); testHarness.open(); for (int i = 0; i < numElements; i++) { testHarness.processElement(new StreamRecord<>(Tuple2.of( i, "message #" + Integer.toString(i) ))); } testHarness.close(); GenericData.setStringType(valueSchema, GenericData.StringType.String); Schema elementSchema = AvroKeyValueSinkWriter.AvroKeyValue.getSchema(keySchema, valueSchema); FSDataInputStream inStream = dfs.open(new Path(outPath + "/" + PART_PREFIX + "-0-0")); SpecificDatumReader<GenericRecord> elementReader = new SpecificDatumReader<>(elementSchema); DataFileStream<GenericRecord> dataFileStream = new DataFileStream<>(inStream, elementReader); for (int i = 0; i < numElements; i++) { AvroKeyValueSinkWriter.AvroKeyValue<Integer, String> wrappedEntry = new AvroKeyValueSinkWriter.AvroKeyValue<>(dataFileStream.next()); int key = wrappedEntry.getKey(); Assert.assertEquals(i, key); String value = wrappedEntry.getValue(); Assert.assertEquals("message #" + i, value); } dataFileStream.close(); inStream.close(); }
Example 10
Source File: FastGenericSerializerGeneratorTest.java From avro-fastserde with Apache License 2.0 | 4 votes |
@Test public void shouldWritePrimitives() { // given Schema javaLangStringSchema = Schema.create(Schema.Type.STRING); GenericData.setStringType(javaLangStringSchema, GenericData.StringType.String); Schema recordSchema = createRecord("testRecord", createField("testInt", Schema.create(Schema.Type.INT)), createPrimitiveUnionFieldSchema("testIntUnion", Schema.Type.INT), createField("testString", Schema.create(Schema.Type.STRING)), createPrimitiveUnionFieldSchema("testStringUnion", Schema.Type.STRING), createField("testJavaString", javaLangStringSchema), createUnionField("testJavaStringUnion", javaLangStringSchema), createField("testLong", Schema.create(Schema.Type.LONG)), createPrimitiveUnionFieldSchema("testLongUnion", Schema.Type.LONG), createField("testDouble", Schema.create(Schema.Type.DOUBLE)), createPrimitiveUnionFieldSchema("testDoubleUnion", Schema.Type.DOUBLE), createField("testFloat", Schema.create(Schema.Type.FLOAT)), createPrimitiveUnionFieldSchema("testFloatUnion", Schema.Type.FLOAT), createField("testBoolean", Schema.create(Schema.Type.BOOLEAN)), createPrimitiveUnionFieldSchema("testBooleanUnion", Schema.Type.BOOLEAN), createField("testBytes", Schema.create(Schema.Type.BYTES)), createPrimitiveUnionFieldSchema("testBytesUnion", Schema.Type.BYTES)); GenericRecordBuilder builder = new GenericRecordBuilder(recordSchema); builder.set("testInt", 1); builder.set("testIntUnion", 1); builder.set("testString", "aaa"); builder.set("testStringUnion", "aaa"); builder.set("testJavaString", "aaa"); builder.set("testJavaStringUnion", "aaa"); builder.set("testLong", 1L); builder.set("testLongUnion", 1L); builder.set("testDouble", 1.0); builder.set("testDoubleUnion", 1.0); builder.set("testFloat", 1.0f); builder.set("testFloatUnion", 1.0f); builder.set("testBoolean", true); builder.set("testBooleanUnion", true); builder.set("testBytes", ByteBuffer.wrap(new byte[]{0x01, 0x02})); builder.set("testBytesUnion", ByteBuffer.wrap(new byte[]{0x01, 0x02})); // when GenericRecord record = deserializeGeneric(recordSchema, serializeGenericFast(builder.build())); // then Assert.assertEquals(1, record.get("testInt")); Assert.assertEquals(1, record.get("testIntUnion")); Assert.assertEquals("aaa", record.get("testString").toString()); Assert.assertEquals("aaa", record.get("testStringUnion").toString()); Assert.assertEquals("aaa", record.get("testJavaString")); Assert.assertEquals("aaa", record.get("testJavaStringUnion")); Assert.assertEquals(1L, record.get("testLong")); Assert.assertEquals(1L, record.get("testLongUnion")); Assert.assertEquals(1.0, record.get("testDouble")); Assert.assertEquals(1.0, record.get("testDoubleUnion")); Assert.assertEquals(1.0f, record.get("testFloat")); Assert.assertEquals(1.0f, record.get("testFloatUnion")); Assert.assertEquals(true, record.get("testBoolean")); Assert.assertEquals(true, record.get("testBooleanUnion")); Assert.assertEquals(ByteBuffer.wrap(new byte[]{0x01, 0x02}), record.get("testBytes")); Assert.assertEquals(ByteBuffer.wrap(new byte[]{0x01, 0x02}), record.get("testBytesUnion")); }
Example 11
Source File: FastGenericDeserializerGeneratorTest.java From avro-fastserde with Apache License 2.0 | 4 votes |
@Test public void shouldReadPrimitives() { // given Schema javaLangStringSchema = Schema.create(Schema.Type.STRING); GenericData.setStringType(javaLangStringSchema, GenericData.StringType.String); Schema recordSchema = createRecord("testRecord", createField("testInt", Schema.create(Schema.Type.INT)), createPrimitiveUnionFieldSchema("testIntUnion", Schema.Type.INT), createField("testString", Schema.create(Schema.Type.STRING)), createPrimitiveUnionFieldSchema("testStringUnion", Schema.Type.STRING), createField("testJavaString", javaLangStringSchema), createUnionField("testJavaStringUnion", javaLangStringSchema), createField("testLong", Schema.create(Schema.Type.LONG)), createPrimitiveUnionFieldSchema("testLongUnion", Schema.Type.LONG), createField("testDouble", Schema.create(Schema.Type.DOUBLE)), createPrimitiveUnionFieldSchema("testDoubleUnion", Schema.Type.DOUBLE), createField("testFloat", Schema.create(Schema.Type.FLOAT)), createPrimitiveUnionFieldSchema("testFloatUnion", Schema.Type.FLOAT), createField("testBoolean", Schema.create(Schema.Type.BOOLEAN)), createPrimitiveUnionFieldSchema("testBooleanUnion", Schema.Type.BOOLEAN), createField("testBytes", Schema.create(Schema.Type.BYTES)), createPrimitiveUnionFieldSchema("testBytesUnion", Schema.Type.BYTES)); GenericRecordBuilder builder = new GenericRecordBuilder(recordSchema); builder.set("testInt", 1); builder.set("testIntUnion", 1); builder.set("testString", "aaa"); builder.set("testStringUnion", "aaa"); builder.set("testJavaString", "aaa"); builder.set("testJavaStringUnion", "aaa"); builder.set("testLong", 1L); builder.set("testLongUnion", 1L); builder.set("testDouble", 1.0); builder.set("testDoubleUnion", 1.0); builder.set("testFloat", 1.0f); builder.set("testFloatUnion", 1.0f); builder.set("testBoolean", true); builder.set("testBooleanUnion", true); builder.set("testBytes", ByteBuffer.wrap(new byte[]{0x01, 0x02})); builder.set("testBytesUnion", ByteBuffer.wrap(new byte[]{0x01, 0x02})); // when GenericRecord record = deserializeGenericFast(recordSchema, recordSchema, serializeGeneric(builder.build())); // then Assert.assertEquals(1, record.get("testInt")); Assert.assertEquals(1, record.get("testIntUnion")); Assert.assertEquals("aaa", record.get("testString").toString()); Assert.assertEquals("aaa", record.get("testStringUnion").toString()); Assert.assertEquals("aaa", record.get("testJavaString")); Assert.assertEquals("aaa", record.get("testJavaStringUnion")); Assert.assertEquals(1L, record.get("testLong")); Assert.assertEquals(1L, record.get("testLongUnion")); Assert.assertEquals(1.0, record.get("testDouble")); Assert.assertEquals(1.0, record.get("testDoubleUnion")); Assert.assertEquals(1.0f, record.get("testFloat")); Assert.assertEquals(1.0f, record.get("testFloatUnion")); Assert.assertEquals(true, record.get("testBoolean")); Assert.assertEquals(true, record.get("testBooleanUnion")); Assert.assertEquals(ByteBuffer.wrap(new byte[]{0x01, 0x02}), record.get("testBytes")); Assert.assertEquals(ByteBuffer.wrap(new byte[]{0x01, 0x02}), record.get("testBytesUnion")); }
Example 12
Source File: FastSpecificSerializerGeneratorTest.java From avro-fastserde with Apache License 2.0 | 4 votes |
@Test public void shouldWriteMapOfRecords() { // given Schema mapRecordSchema = Schema.createMap(TestRecord.getClassSchema()); GenericData.setStringType(mapRecordSchema, GenericData.StringType.String); TestRecord testRecord = emptyTestRecord(); testRecord.put("testString", "abc"); Map<String, TestRecord> recordsMap = new HashMap<>(); recordsMap.put("1", testRecord); recordsMap.put("2", testRecord); // when Map<String, TestRecord> map = deserializeSpecific(mapRecordSchema, serializeSpecificFast(recordsMap, mapRecordSchema)); // then Assert.assertEquals(2, map.size()); Assert.assertEquals("abc", map.get("1").get("testString")); Assert.assertEquals("abc", map.get("2").get("testString")); // given mapRecordSchema = Schema.createMap(createUnionSchema(TestRecord .getClassSchema())); GenericData.setStringType(mapRecordSchema, GenericData.StringType.String); testRecord = emptyTestRecord(); testRecord.put("testString", "abc"); recordsMap = new HashMap<>(); recordsMap.put("1", testRecord); recordsMap.put("2", testRecord); // when map = deserializeSpecific(mapRecordSchema, serializeSpecificFast(recordsMap, mapRecordSchema)); // then Assert.assertEquals(2, map.size()); Assert.assertEquals("abc", map.get("1").get("testString")); Assert.assertEquals("abc", map.get("2").get("testString")); }
Example 13
Source File: BucketingSinkTest.java From flink with Apache License 2.0 | 4 votes |
/** * This tests {@link AvroKeyValueSinkWriter} * with non-rolling output and with compression. */ @Test public void testNonRollingAvroKeyValueWithCompressionWriter() throws Exception { final String outPath = hdfsURI + "/avro-kv-no-comp-non-rolling-out"; final int numElements = 20; Map<String, String> properties = new HashMap<>(); Schema keySchema = Schema.create(Schema.Type.INT); Schema valueSchema = Schema.create(Schema.Type.STRING); properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_KEY_SCHEMA, keySchema.toString()); properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_VALUE_SCHEMA, valueSchema.toString()); properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS, String.valueOf(true)); properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS_CODEC, DataFileConstants.SNAPPY_CODEC); BucketingSink<Tuple2<Integer, String>> sink = new BucketingSink<Tuple2<Integer, String>>(outPath) .setWriter(new AvroKeyValueSinkWriter<Integer, String>(properties)) .setBucketer(new BasePathBucketer<Tuple2<Integer, String>>()) .setPartPrefix(PART_PREFIX) .setPendingPrefix("") .setPendingSuffix(""); OneInputStreamOperatorTestHarness<Tuple2<Integer, String>, Object> testHarness = createTestSink(sink, 1, 0); testHarness.setProcessingTime(0L); testHarness.setup(); testHarness.open(); for (int i = 0; i < numElements; i++) { testHarness.processElement(new StreamRecord<>(Tuple2.of( i, "message #" + Integer.toString(i) ))); } testHarness.close(); GenericData.setStringType(valueSchema, GenericData.StringType.String); Schema elementSchema = AvroKeyValueSinkWriter.AvroKeyValue.getSchema(keySchema, valueSchema); FSDataInputStream inStream = dfs.open(new Path(outPath + "/" + PART_PREFIX + "-0-0")); SpecificDatumReader<GenericRecord> elementReader = new SpecificDatumReader<>(elementSchema); DataFileStream<GenericRecord> dataFileStream = new DataFileStream<>(inStream, elementReader); for (int i = 0; i < numElements; i++) { AvroKeyValueSinkWriter.AvroKeyValue<Integer, String> wrappedEntry = new AvroKeyValueSinkWriter.AvroKeyValue<>(dataFileStream.next()); int key = wrappedEntry.getKey(); Assert.assertEquals(i, key); String value = wrappedEntry.getValue(); Assert.assertEquals("message #" + i, value); } dataFileStream.close(); inStream.close(); }
Example 14
Source File: BucketingSinkTest.java From flink with Apache License 2.0 | 4 votes |
/** * This tests user defined hdfs configuration. * @throws Exception */ @Test public void testUserDefinedConfiguration() throws Exception { final String outPath = hdfsURI + "/string-non-rolling-with-config"; final int numElements = 20; Map<String, String> properties = new HashMap<>(); Schema keySchema = Schema.create(Schema.Type.INT); Schema valueSchema = Schema.create(Schema.Type.STRING); properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_KEY_SCHEMA, keySchema.toString()); properties.put(AvroKeyValueSinkWriter.CONF_OUTPUT_VALUE_SCHEMA, valueSchema.toString()); properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS, String.valueOf(true)); properties.put(AvroKeyValueSinkWriter.CONF_COMPRESS_CODEC, DataFileConstants.SNAPPY_CODEC); Configuration conf = new Configuration(); conf.set("io.file.buffer.size", "40960"); BucketingSink<Tuple2<Integer, String>> sink = new BucketingSink<Tuple2<Integer, String>>(outPath) .setFSConfig(conf) .setWriter(new StreamWriterWithConfigCheck<Integer, String>(properties, "io.file.buffer.size", "40960")) .setBucketer(new BasePathBucketer<Tuple2<Integer, String>>()) .setPartPrefix(PART_PREFIX) .setPendingPrefix("") .setPendingSuffix(""); OneInputStreamOperatorTestHarness<Tuple2<Integer, String>, Object> testHarness = createTestSink(sink, 1, 0); testHarness.setProcessingTime(0L); testHarness.setup(); testHarness.open(); for (int i = 0; i < numElements; i++) { testHarness.processElement(new StreamRecord<>(Tuple2.of( i, "message #" + Integer.toString(i) ))); } testHarness.close(); GenericData.setStringType(valueSchema, GenericData.StringType.String); Schema elementSchema = AvroKeyValueSinkWriter.AvroKeyValue.getSchema(keySchema, valueSchema); FSDataInputStream inStream = dfs.open(new Path(outPath + "/" + PART_PREFIX + "-0-0")); SpecificDatumReader<GenericRecord> elementReader = new SpecificDatumReader<>(elementSchema); DataFileStream<GenericRecord> dataFileStream = new DataFileStream<>(inStream, elementReader); for (int i = 0; i < numElements; i++) { AvroKeyValueSinkWriter.AvroKeyValue<Integer, String> wrappedEntry = new AvroKeyValueSinkWriter.AvroKeyValue<>(dataFileStream.next()); int key = wrappedEntry.getKey(); Assert.assertEquals(i, key); String value = wrappedEntry.getValue(); Assert.assertEquals("message #" + i, value); } dataFileStream.close(); inStream.close(); }
Example 15
Source File: TestExecuteSQLRecord.java From nifi with Apache License 2.0 | 4 votes |
@Test public void testWriteLOBsToAvro() throws Exception { final DBCPService dbcp = new DBCPServiceSimpleImpl("h2"); final Map<String, String> dbcpProperties = new HashMap<>(); runner = TestRunners.newTestRunner(ExecuteSQLRecord.class); runner.addControllerService("dbcp", dbcp, dbcpProperties); runner.enableControllerService(dbcp); runner.setProperty(AbstractExecuteSQL.DBCP_SERVICE, "dbcp"); // remove previous test database, if any final File dbLocation = new File(DB_LOCATION); dbLocation.delete(); // load test data to database final Connection con = ((DBCPService) runner.getControllerService("dbcp")).getConnection(); Statement stmt = con.createStatement(); try { stmt.execute("drop table TEST_NULL_INT"); } catch (final SQLException sqle) { } stmt.execute("create table TEST_NULL_INT (id integer not null, val1 integer, val2 integer, image blob(1K), words clob(1K), " + "natwords nclob(1K), constraint my_pk primary key (id))"); stmt.execute("insert into TEST_NULL_INT (id, val1, val2, image, words, natwords) VALUES (0, NULL, 1, CAST (X'DEADBEEF' AS BLOB), " + "CAST ('Hello World' AS CLOB), CAST ('I am an NCLOB' AS NCLOB))"); runner.setIncomingConnection(false); runner.setProperty(AbstractExecuteSQL.SQL_SELECT_QUERY, "select * from TEST_NULL_INT"); AvroRecordSetWriter recordWriter = new AvroRecordSetWriter(); runner.addControllerService("writer", recordWriter); runner.setProperty(recordWriter, SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, SchemaAccessUtils.INHERIT_RECORD_SCHEMA); runner.setProperty(ExecuteSQLRecord.RECORD_WRITER_FACTORY, "writer"); runner.enableControllerService(recordWriter); runner.run(); runner.assertAllFlowFilesTransferred(AbstractExecuteSQL.REL_SUCCESS, 1); MockFlowFile flowFile = runner.getFlowFilesForRelationship(AbstractExecuteSQL.REL_SUCCESS).get(0); flowFile.assertAttributeEquals(AbstractExecuteSQL.RESULT_ROW_COUNT, "1"); ByteArrayInputStream bais = new ByteArrayInputStream(flowFile.toByteArray()); final DataFileStream<GenericRecord> dataFileStream = new DataFileStream<>(bais, new GenericDatumReader<>()); final Schema avroSchema = dataFileStream.getSchema(); GenericData.setStringType(avroSchema, GenericData.StringType.String); final GenericRecord avroRecord = dataFileStream.next(); Object imageObj = avroRecord.get("IMAGE"); assertNotNull(imageObj); assertTrue(imageObj instanceof ByteBuffer); assertArrayEquals(new byte[]{(byte) 0xDE, (byte) 0xAD, (byte) 0xBE, (byte) 0xEF}, ((ByteBuffer) imageObj).array()); Object wordsObj = avroRecord.get("WORDS"); assertNotNull(wordsObj); assertTrue(wordsObj instanceof Utf8); assertEquals("Hello World", wordsObj.toString()); Object natwordsObj = avroRecord.get("NATWORDS"); assertNotNull(natwordsObj); assertTrue(natwordsObj instanceof Utf8); assertEquals("I am an NCLOB", natwordsObj.toString()); }