Java Code Examples for org.apache.beam.sdk.values.PCollectionTuple#of()
The following examples show how to use
org.apache.beam.sdk.values.PCollectionTuple#of() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CsvConverters.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@Override public PCollectionTuple expand(PBegin input) { if (hasHeaders()) { return input .apply("MatchFilePattern", FileIO.match().filepattern(inputFileSpec())) .apply("ReadMatches", FileIO.readMatches()) .apply( "ReadCsvWithHeaders", ParDo.of(new GetCsvHeadersFn(headerTag(), lineTag(), csvFormat(), delimiter())) .withOutputTags(headerTag(), TupleTagList.of(lineTag()))); } return PCollectionTuple.of( lineTag(), input.apply("ReadCsvWithoutHeaders", TextIO.read().from(inputFileSpec()))); }
Example 2
Source File: BeamSqlDslArrayTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testUnnestLiteral() { PCollection<Row> input = pipeline.apply( "boundedInput1", Create.empty(TypeDescriptor.of(Row.class)).withRowSchema(INPUT_SCHEMA)); // Because we have a multi-part FROM the DSL considers it multi-input TupleTag<Row> mainTag = new TupleTag<Row>("main") {}; PCollectionTuple inputTuple = PCollectionTuple.of(mainTag, input); Schema resultType = Schema.builder().addStringField("f_string").build(); PCollection<Row> result = inputTuple.apply( "sqlQuery", SqlTransform.query("SELECT * FROM UNNEST (ARRAY ['a', 'b', 'c'])")); PAssert.that(result) .containsInAnyOrder( Row.withSchema(resultType).addValues("a").build(), Row.withSchema(resultType).addValues("b").build(), Row.withSchema(resultType).addValues("c").build()); pipeline.run(); }
Example 3
Source File: BeamSqlDslArrayTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testUnnestNamedLiteral() { PCollection<Row> input = pipeline.apply( "boundedInput1", Create.empty(TypeDescriptor.of(Row.class)).withRowSchema(INPUT_SCHEMA)); // Because we have a multi-part FROM the DSL considers it multi-input TupleTag<Row> mainTag = new TupleTag<Row>("main") {}; PCollectionTuple inputTuple = PCollectionTuple.of(mainTag, input); Schema resultType = Schema.builder().addStringField("f_string").build(); PCollection<Row> result = inputTuple.apply( "sqlQuery", SqlTransform.query("SELECT * FROM UNNEST (ARRAY ['a', 'b', 'c']) AS t(f_string)")); PAssert.that(result) .containsInAnyOrder( Row.withSchema(resultType).addValues("a").build(), Row.withSchema(resultType).addValues("b").build(), Row.withSchema(resultType).addValues("c").build()); pipeline.run(); }
Example 4
Source File: CsvConvertersTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
/** * Tests {@link CsvConverters.LineToFailsafeJson} converts a line to a {@link FailsafeElement} * correctly using a JSON schema. */ @Test public void testLineToFailsafeJsonNoHeadersJsonSchema() { FailsafeElementCoder<String, String> coder = FAILSAFE_ELEMENT_CODER; CoderRegistry coderRegistry = pipeline.getCoderRegistry(); coderRegistry.registerCoderForType(coder.getEncodedTypeDescriptor(), coder); PCollection<String> lines = pipeline.apply(Create.of(RECORD_STRING).withCoder(StringUtf8Coder.of())); PCollectionTuple linesTuple = PCollectionTuple.of(CSV_LINES, lines); PCollectionTuple failsafe = linesTuple.apply( "TestLineToFailsafeJson", CsvConverters.LineToFailsafeJson.newBuilder() .setDelimiter(",") .setUdfFileSystemPath(null) .setUdfFunctionName(null) .setJsonSchemaPath(TEST_JSON_SCHEMA__PATH) .setHeaderTag(CSV_HEADERS) .setLineTag(CSV_LINES) .setUdfOutputTag(PROCESSING_OUT) .setUdfDeadletterTag(PROCESSING_DEADLETTER_OUT) .build()); PAssert.that(failsafe.get(PROCESSING_OUT)) .satisfies( collection -> { FailsafeElement<String, String> result = collection.iterator().next(); assertThat(result.getPayload(), is(equalTo(JSON_STRING_RECORD))); return null; }); pipeline.run(); }
Example 5
Source File: BeamSqlDslArrayTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testUnnestCrossJoin() { Row row1 = Row.withSchema(INPUT_SCHEMA) .addValues(42) .addArray(Arrays.asList("111", "222", "333")) .build(); Row row2 = Row.withSchema(INPUT_SCHEMA).addValues(13).addArray(Arrays.asList("444", "555")).build(); PCollection<Row> input = pipeline.apply("boundedInput1", Create.of(row1, row2).withRowSchema(INPUT_SCHEMA)); // Because we have a multi-part FROM the DSL considers it multi-input TupleTag<Row> mainTag = new TupleTag<Row>("main") {}; PCollectionTuple inputTuple = PCollectionTuple.of(mainTag, input); Schema resultType = Schema.builder().addInt32Field("f_int").addStringField("f_string").build(); PCollection<Row> result = inputTuple.apply( "sqlQuery", SqlTransform.query( "SELECT f_int, arrElems.f_string FROM main " + " CROSS JOIN UNNEST (main.f_stringArr) AS arrElems(f_string)")); PAssert.that(result) .containsInAnyOrder( Row.withSchema(resultType).addValues(42, "111").build(), Row.withSchema(resultType).addValues(42, "222").build(), Row.withSchema(resultType).addValues(42, "333").build(), Row.withSchema(resultType).addValues(13, "444").build(), Row.withSchema(resultType).addValues(13, "555").build()); pipeline.run(); }
Example 6
Source File: PipelineTest.java From beam with Apache License 2.0 | 5 votes |
/** Tests that Pipeline supports pulling an element out of a tuple as a transform. */ @Test @Category(ValidatesRunner.class) public void testTupleProjectionTransform() throws Exception { PCollection<Integer> input = pipeline.apply(Create.of(1, 2, 3, 4)); TupleTag<Integer> tag = new TupleTag<>(); PCollectionTuple tuple = PCollectionTuple.of(tag, input); PCollection<Integer> output = tuple.apply("ProjectTag", new TupleProjectionTransform<>(tag)); PAssert.that(output).containsInAnyOrder(1, 2, 3, 4); pipeline.run(); }
Example 7
Source File: CsvConvertersTest.java From DataflowTemplates with Apache License 2.0 | 4 votes |
/** * Tests {@link CsvConverters.LineToFailsafeJson} converts a line to a {@link FailsafeElement} * correctly using a Javascript Udf. Udf processing is handled by {@link * JavascriptTextTransformer}. */ @Test public void testLineToFailsafeJsonNoHeadersUdf() { FailsafeElementCoder<String, String> coder = FAILSAFE_ELEMENT_CODER; CoderRegistry coderRegistry = pipeline.getCoderRegistry(); coderRegistry.registerCoderForType(coder.getEncodedTypeDescriptor(), coder); PCollection<String> lines = pipeline.apply(Create.of(RECORD_STRING).withCoder(StringUtf8Coder.of())); PCollectionTuple linesTuple = PCollectionTuple.of(CSV_LINES, lines); CsvConverters.CsvPipelineOptions options = PipelineOptionsFactory.create().as(CsvConverters.CsvPipelineOptions.class); options.setDelimiter(","); options.setJavascriptTextTransformGcsPath(TRANSFORM_FILE_PATH); options.setJavascriptTextTransformFunctionName("transform"); PCollectionTuple failsafe = linesTuple.apply( "TestLineToFailsafeJsonNoHeadersUdf", CsvConverters.LineToFailsafeJson.newBuilder() .setDelimiter(options.getDelimiter()) .setUdfFileSystemPath(options.getJavascriptTextTransformGcsPath()) .setUdfFunctionName(options.getJavascriptTextTransformFunctionName()) .setJsonSchemaPath(options.getJsonSchemaPath()) .setHeaderTag(CSV_HEADERS) .setLineTag(CSV_LINES) .setUdfOutputTag(PROCESSING_OUT) .setUdfDeadletterTag(PROCESSING_DEADLETTER_OUT) .build()); PAssert.that(failsafe.get(PROCESSING_OUT)) .satisfies( collection -> { FailsafeElement<String, String> result = collection.iterator().next(); assertThat(result.getPayload(), is(equalTo(JSON_STRING_RECORD))); return null; }); PAssert.that(failsafe.get(PROCESSING_DEADLETTER_OUT)).empty(); pipeline.run(); }
Example 8
Source File: CsvConvertersTest.java From DataflowTemplates with Apache License 2.0 | 4 votes |
/** * Tests {@link CsvConverters.LineToFailsafeJson} converts a line to a {@link FailsafeElement} * correctly using a Javascript Udf. Udf processing is handled by {@link * JavascriptTextTransformer}. Should output record to deadletter table tag. */ @Test public void testLineToFailsafeJsonNoHeadersUdfDeadletter() { FailsafeElementCoder<String, String> coder = FAILSAFE_ELEMENT_CODER; CoderRegistry coderRegistry = pipeline.getCoderRegistry(); coderRegistry.registerCoderForType(coder.getEncodedTypeDescriptor(), coder); PCollection<String> lines = pipeline.apply(Create.of(BAD_JSON_STRING_RECORD).withCoder(StringUtf8Coder.of())); PCollectionTuple linesTuple = PCollectionTuple.of(CSV_LINES, lines); CsvConverters.CsvPipelineOptions options = PipelineOptionsFactory.create().as(CsvConverters.CsvPipelineOptions.class); options.setDelimiter(","); options.setJavascriptTextTransformGcsPath(SCRIPT_PARSE_EXCEPTION_FILE_PATH); options.setJavascriptTextTransformFunctionName("transform"); PCollectionTuple failsafe = linesTuple.apply( "TestLineToFailsafeJsonNoHeadersUdfBad", CsvConverters.LineToFailsafeJson.newBuilder() .setDelimiter(options.getDelimiter()) .setUdfFileSystemPath(options.getJavascriptTextTransformGcsPath()) .setUdfFunctionName(options.getJavascriptTextTransformFunctionName()) .setJsonSchemaPath(options.getJsonSchemaPath()) .setJsonSchemaPath(null) .setHeaderTag(CSV_HEADERS) .setLineTag(CSV_LINES) .setUdfOutputTag(PROCESSING_OUT) .setUdfDeadletterTag(PROCESSING_DEADLETTER_OUT) .build()); PAssert.that(failsafe.get(PROCESSING_OUT)).empty(); PAssert.that(failsafe.get(PROCESSING_DEADLETTER_OUT)) .satisfies( collection -> { FailsafeElement result = collection.iterator().next(); assertThat(result.getPayload(), is(equalTo(BAD_JSON_STRING_RECORD))); return null; }); pipeline.run(); }
Example 9
Source File: PipelineTest.java From beam with Apache License 2.0 | 4 votes |
@Override public PCollectionTuple expand(PCollection<T> input) { return PCollectionTuple.of(tag, input); }
Example 10
Source File: TestUtils.java From beam with Apache License 2.0 | 2 votes |
public static <T> PCollectionTuple tuple(String tag, PCollection<T> pCollection) { return PCollectionTuple.of(new TupleTag<>(tag), pCollection); }