org.apache.beam.sdk.testing.TestStream Java Examples
The following examples show how to use
org.apache.beam.sdk.testing.TestStream.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ReshuffleTest.java From beam with Apache License 2.0 | 6 votes |
@Test @Category({ValidatesRunner.class, UsesTestStream.class}) public void testReshuffleWithTimestampsStreaming() { TestStream<Long> stream = TestStream.create(VarLongCoder.of()) .advanceWatermarkTo(new Instant(0L).plus(Duration.standardDays(48L))) .addElements( TimestampedValue.of(0L, new Instant(0L)), TimestampedValue.of(1L, new Instant(0L).plus(Duration.standardDays(48L))), TimestampedValue.of( 2L, BoundedWindow.TIMESTAMP_MAX_VALUE.minus(Duration.standardDays(48L)))) .advanceWatermarkToInfinity(); PCollection<KV<String, Long>> input = pipeline .apply(stream) .apply(WithKeys.of("")) .apply(Window.into(FixedWindows.of(Duration.standardMinutes(10L)))); PCollection<KV<String, Long>> reshuffled = input.apply(Reshuffle.of()); PAssert.that(reshuffled.apply(Values.create())).containsInAnyOrder(0L, 1L, 2L); pipeline.run(); }
Example #2
Source File: TestStreamTranslation.java From beam with Apache License 2.0 | 6 votes |
/** Produces a {@link RunnerApi.TestStreamPayload} from a {@link TestStream}. */ static <T> RunnerApi.TestStreamPayload payloadForTestStream( final TestStream<T> transform, SdkComponents components) throws IOException { List<RunnerApi.TestStreamPayload.Event> protoEvents = new ArrayList<>(); try { for (TestStream.Event<T> event : transform.getEvents()) { protoEvents.add(eventToProto(event, transform.getValueCoder())); } } catch (IOException e) { throw new RuntimeException(e); } return RunnerApi.TestStreamPayload.newBuilder() .setCoderId(components.registerCoder(transform.getValueCoder())) .addAllEvents(protoEvents) .build(); }
Example #3
Source File: SpannerIOWriteTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void streamingWritesNoGrouping() throws Exception { // verify that grouping/sorting does not occur - batches should be created in received order. TestStream<Mutation> testStream = TestStream.create(SerializableCoder.of(Mutation.class)) .addElements(m(1L), m(5L), m(2L), m(4L), m(3L), m(6L)) .advanceWatermarkToInfinity(); // verify that grouping/sorting does not occur when notset. pipeline .apply(testStream) .apply( SpannerIO.write() .withProjectId("test-project") .withInstanceId("test-instance") .withDatabaseId("test-database") .withServiceFactory(serviceFactory) .withMaxNumRows(2)); pipeline.run(); verifyBatches(batch(m(1L), m(5L)), batch(m(2L), m(4L)), batch(m(3L), m(6L))); }
Example #4
Source File: TestStreamTranslationTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testRegistrarEncodedProto() throws Exception { PCollection<String> output = p.apply(testStream); AppliedPTransform<PBegin, PCollection<String>, TestStream<String>> appliedTestStream = AppliedPTransform.of("fakeName", PBegin.in(p).expand(), output.expand(), testStream, p); SdkComponents components = SdkComponents.create(); components.registerEnvironment(Environments.createDockerEnvironment("java")); RunnerApi.FunctionSpec spec = PTransformTranslation.toProto(appliedTestStream, components).getSpec(); assertThat(spec.getUrn(), equalTo(TEST_STREAM_TRANSFORM_URN)); RunnerApi.TestStreamPayload payload = TestStreamPayload.parseFrom(spec.getPayload()); verifyTestStreamEncoding( testStream, payload, RehydratedComponents.forComponents(components.toComponents())); }
Example #5
Source File: StatefulTeamScoreTest.java From deployment-examples with MIT License | 6 votes |
/** * Tests that {@link UpdateTeamScoreFn} {@link org.apache.beam.sdk.transforms.DoFn} outputs * correctly for one team. */ @Test public void testScoreUpdatesOneTeam() { TestStream<KV<String, GameActionInfo>> createEvents = TestStream.create(KvCoder.of(StringUtf8Coder.of(), AvroCoder.of(GameActionInfo.class))) .advanceWatermarkTo(baseTime) .addElements( event(TestUser.RED_TWO, 99, Duration.standardSeconds(10)), event(TestUser.RED_ONE, 1, Duration.standardSeconds(20)), event(TestUser.RED_ONE, 0, Duration.standardSeconds(30)), event(TestUser.RED_TWO, 100, Duration.standardSeconds(40)), event(TestUser.RED_TWO, 201, Duration.standardSeconds(50))) .advanceWatermarkToInfinity(); PCollection<KV<String, Integer>> teamScores = p.apply(createEvents).apply(ParDo.of(new UpdateTeamScoreFn(100))); String redTeam = TestUser.RED_ONE.getTeam(); PAssert.that(teamScores) .inWindow(GlobalWindow.INSTANCE) .containsInAnyOrder(KV.of(redTeam, 100), KV.of(redTeam, 200), KV.of(redTeam, 401)); p.run().waitUntilFinish(); }
Example #6
Source File: TestStreamTranslationTest.java From beam with Apache License 2.0 | 6 votes |
private static <T> void verifyTestStreamEncoding( TestStream<T> testStream, RunnerApi.TestStreamPayload payload, RehydratedComponents protoComponents) throws Exception { // This reverse direction is only valid for Java-based coders assertThat(protoComponents.getCoder(payload.getCoderId()), equalTo(testStream.getValueCoder())); assertThat(payload.getEventsList().size(), equalTo(testStream.getEvents().size())); for (int i = 0; i < payload.getEventsList().size(); ++i) { assertThat( TestStreamTranslation.eventFromProto(payload.getEvents(i), testStream.getValueCoder()), equalTo(testStream.getEvents().get(i))); } }
Example #7
Source File: TestStreamTranslation.java From beam with Apache License 2.0 | 6 votes |
/** * Converts an {@link AppliedPTransform}, which may be a rehydrated transform or an original * {@link TestStream}, to a {@link TestStream}. */ public static <T> TestStream<T> getTestStream( AppliedPTransform<PBegin, PCollection<T>, PTransform<PBegin, PCollection<T>>> application) throws IOException { // For robustness, we don't take this shortcut: // if (application.getTransform() instanceof TestStream) { // return application.getTransform() // } SdkComponents sdkComponents = SdkComponents.create(application.getPipeline().getOptions()); RunnerApi.PTransform transformProto = PTransformTranslation.toProto(application, sdkComponents); checkArgument( TEST_STREAM_TRANSFORM_URN.equals(transformProto.getSpec().getUrn()), "Attempt to get %s from a transform with wrong URN %s", TestStream.class.getSimpleName(), transformProto.getSpec().getUrn()); RunnerApi.TestStreamPayload testStreamPayload = RunnerApi.TestStreamPayload.parseFrom(transformProto.getSpec().getPayload()); return (TestStream<T>) testStreamFromProtoPayload( testStreamPayload, RehydratedComponents.forComponents(sdkComponents.toComponents())); }
Example #8
Source File: SpannerIOWriteTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void streamingWritesWithGrouping() throws Exception { // verify that grouping/sorting occurs when set. TestStream<Mutation> testStream = TestStream.create(SerializableCoder.of(Mutation.class)) .addElements(m(1L), m(5L), m(2L), m(4L), m(3L), m(6L)) .advanceWatermarkToInfinity(); pipeline .apply(testStream) .apply( SpannerIO.write() .withProjectId("test-project") .withInstanceId("test-instance") .withDatabaseId("test-database") .withServiceFactory(serviceFactory) .withGroupingFactor(40) .withMaxNumRows(2)); pipeline.run(); // Output should be batches of sorted mutations. verifyBatches(batch(m(1L), m(2L)), batch(m(3L), m(4L)), batch(m(5L), m(6L))); }
Example #9
Source File: StatefulParDoEvaluatorFactoryTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testRequiresTimeSortedInputWithLateDataAndAllowedLateness() { Instant now = Instant.ofEpochMilli(0); PCollection<KV<String, Integer>> input = pipeline .apply( TestStream.create(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())) .addElements(TimestampedValue.of(KV.of("", 1), now.plus(2))) .addElements(TimestampedValue.of(KV.of("", 2), now.plus(1))) .advanceWatermarkTo(now.plus(1)) .addElements(TimestampedValue.of(KV.of("", 3), now)) .advanceWatermarkToInfinity()) .apply( Window.<KV<String, Integer>>into(new GlobalWindows()) .withAllowedLateness(Duration.millis(2))); PCollection<String> result = input.apply(ParDo.of(statefulConcat())); PAssert.that(result).containsInAnyOrder("3", "3:2", "3:2:1"); pipeline.run(); }
Example #10
Source File: BigQueryChangeApplierTest.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@Test void testSchemasEmittedOnlyOnChanges() { TestStream<Row> testSream = TestStream .create(SerializableCoder.of(Row.class)) .addElements(testInsertRecord( Row.withSchema(RECORD_SCHEMA1) .addValues("k1", 1, DateTime.now(), "bytes".getBytes()).build()), testInsertRecord( Row.withSchema(RECORD_SCHEMA1) .addValues("k1", 2, DateTime.now(), "bytes".getBytes()).build())) .advanceWatermarkTo(Instant.now()) .advanceWatermarkToInfinity(); Pipeline p = Pipeline.create(); PCollection<Row> input = p.apply(testSream).setRowSchema(UPDATE_RECORD_SCHEMA); PCollection<KV<String, KV<Schema, Schema>>> tableSchemaCollection = BigQueryChangeApplier.buildTableSchemaCollection(input); PAssert.that(tableSchemaCollection).containsInAnyOrder( KV.of(TABLE_NAME, KV.of(KEY_SCHEMA, RECORD_SCHEMA1))); p.run().waitUntilFinish(); }
Example #11
Source File: StatefulTeamScoreTest.java From beam with Apache License 2.0 | 6 votes |
/** * Tests that {@link UpdateTeamScoreFn} {@link org.apache.beam.sdk.transforms.DoFn} outputs * correctly for one team. */ @Test public void testScoreUpdatesOneTeam() { TestStream<KV<String, GameActionInfo>> createEvents = TestStream.create(KvCoder.of(StringUtf8Coder.of(), AvroCoder.of(GameActionInfo.class))) .advanceWatermarkTo(baseTime) .addElements( event(TestUser.RED_TWO, 99, Duration.standardSeconds(10)), event(TestUser.RED_ONE, 1, Duration.standardSeconds(20)), event(TestUser.RED_ONE, 0, Duration.standardSeconds(30)), event(TestUser.RED_TWO, 100, Duration.standardSeconds(40)), event(TestUser.RED_TWO, 201, Duration.standardSeconds(50))) .advanceWatermarkToInfinity(); PCollection<KV<String, Integer>> teamScores = p.apply(createEvents).apply(ParDo.of(new UpdateTeamScoreFn(100))); String redTeam = TestUser.RED_ONE.getTeam(); PAssert.that(teamScores) .inWindow(GlobalWindow.INSTANCE) .containsInAnyOrder(KV.of(redTeam, 100), KV.of(redTeam, 200), KV.of(redTeam, 401)); p.run().waitUntilFinish(); }
Example #12
Source File: ReifyTest.java From beam with Apache License 2.0 | 6 votes |
@Test @Category({NeedsRunner.class, UsesTestStream.class}) public void globalWindowNoKeys() { PCollection<ValueInSingleWindow<String>> result = pipeline .apply( TestStream.create(StringUtf8Coder.of()) .addElements(TimestampedValue.of("dei", new Instant(123L))) .advanceWatermarkToInfinity()) .apply(Reify.windows()); PAssert.that(result) .containsInAnyOrder( ValueInSingleWindow.of( "dei", new Instant(123L), GlobalWindow.INSTANCE, PaneInfo.NO_FIRING)); pipeline.run(); }
Example #13
Source File: TestUtils.java From beam with Apache License 2.0 | 6 votes |
/** * Builds an unbounded {@link PCollection} in {@link Pipeline} set by {@link * #inPipeline(Pipeline)}. * * <p>If timestamp field was set with {@link #withTimestampField(String)} then watermark will be * advanced to the values from that field. */ public PCollection<Row> buildUnbounded() { checkArgument(pipeline != null); checkArgument(rows.size() > 0); if (type == null) { type = rows.get(0).getSchema(); } TestStream.Builder<Row> values = TestStream.create(type); for (Row row : rows) { if (timestampField != null) { values = values.advanceWatermarkTo(new Instant(row.getDateTime(timestampField))); } values = values.addElements(row); } return PBegin.in(pipeline).apply("unboundedPCollection", values.advanceWatermarkToInfinity()); }
Example #14
Source File: ParDoTest.java From beam with Apache License 2.0 | 6 votes |
@Test @Category({ ValidatesRunner.class, UsesStatefulParDo.class, UsesRequiresTimeSortedInput.class, UsesStrictTimerOrdering.class, UsesTestStream.class }) public void testRequiresTimeSortedInputWithTestStream() { // generate list long enough to rule out random shuffle in sorted order int numElements = 1000; List<Long> eventStamps = LongStream.range(0, numElements) .mapToObj(i -> numElements - i) .collect(Collectors.toList()); TestStream.Builder<Long> stream = TestStream.create(VarLongCoder.of()); for (Long stamp : eventStamps) { stream = stream.addElements(stamp); } testTimeSortedInput(numElements, pipeline.apply(stream.advanceWatermarkToInfinity())); }
Example #15
Source File: ParDoTest.java From beam with Apache License 2.0 | 6 votes |
@Test @Category({ ValidatesRunner.class, UsesTimersInParDo.class, UsesTestStream.class, UsesStatefulParDo.class, UsesStrictTimerOrdering.class }) public void testEventTimeTimerOrdering() throws Exception { final int numTestElements = 100; final Instant now = new Instant(1500000000000L); TestStream.Builder<KV<String, String>> builder = TestStream.create(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())) .advanceWatermarkTo(new Instant(0)); for (int i = 0; i < numTestElements; i++) { builder = builder.addElements(TimestampedValue.of(KV.of("dummy", "" + i), now.plus(i * 1000))); if ((i + 1) % 10 == 0) { builder = builder.advanceWatermarkTo(now.plus((i + 1) * 1000)); } } testEventTimeTimerOrderingWithInputPTransform( now, numTestElements, builder.advanceWatermarkToInfinity()); }
Example #16
Source File: ParDoTest.java From beam with Apache License 2.0 | 6 votes |
@Test @Category({ ValidatesRunner.class, UsesTimersInParDo.class, UsesTestStream.class, UsesStrictTimerOrdering.class }) public void testTwoTimersSettingEachOther() { Instant now = new Instant(1500000000000L); Instant end = now.plus(100); TestStream<KV<Void, Void>> input = TestStream.create(KvCoder.of(VoidCoder.of(), VoidCoder.of())) .addElements(KV.of(null, null)) .advanceWatermarkToInfinity(); pipeline.apply(TwoTimerTest.of(now, end, input)); pipeline.run(); }
Example #17
Source File: FileBasedDeadLetterQueueReconsumerTest.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@Test public void testAllFilesAreConsumed() throws IOException { TestStream<String> inputFiles = TestStream.create(StringUtf8Coder.of()) .addElements( createJsonFile("dlqFile1.json", JSON_FILE_CONTENTS_1), createJsonFile("dlqFile2.json", JSON_FILE_CONTENTS_1)) .addElements(createJsonFile("dlqFile3.json", JSON_FILE_CONTENTS_1)) .advanceWatermarkToInfinity(); PCollection<String> jsonData = p.apply(inputFiles) .apply(FileIO.matchAll()) .apply(FileBasedDeadLetterQueueReconsumer.moveAndConsumeMatches()); PAssert.that(jsonData) .containsInAnyOrder( Stream.of(JSON_FILE_CONTENTS_1) .flatMap(line -> Stream.of(line, line, line)) .collect(Collectors.toList())); p.run().waitUntilFinish(); }
Example #18
Source File: SpannerIOWriteTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void streamingWrites() throws Exception { TestStream<Mutation> testStream = TestStream.create(SerializableCoder.of(Mutation.class)) .addElements(m(1L), m(2L)) .advanceProcessingTime(Duration.standardMinutes(1)) .addElements(m(3L), m(4L)) .advanceProcessingTime(Duration.standardMinutes(1)) .addElements(m(5L), m(6L)) .advanceWatermarkToInfinity(); pipeline .apply(testStream) .apply( SpannerIO.write() .withProjectId("test-project") .withInstanceId("test-instance") .withDatabaseId("test-database") .withServiceFactory(serviceFactory)); pipeline.run(); verifyBatches(batch(m(1L), m(2L)), batch(m(3L), m(4L)), batch(m(5L), m(6L))); }
Example #19
Source File: StateAndTimersTest.java From streamingbook with Apache License 2.0 | 5 votes |
private static TestStream<KV<String, VisitOrImpression>> createStream() { // Impressions and visits, in event-time order, for two (logical) attributable impressions and one unattributable impression. Impression signupImpression = new Impression(123L, "http://search.com?q=xyz", "http://xyz.com/", Utils.parseTime("12:01:00")); Visit signupVisit = new Visit("http://xyz.com/", Utils.parseTime("12:01:10"), "http://search.com?q=xyz", false/*isGoal*/); Visit signupGoal = new Visit("http://xyz.com/join-mailing-list", Utils.parseTime("12:01:30"), "http://xyz.com/", true/*isGoal*/); Impression shoppingImpression = new Impression(456L, "http://search.com?q=thing", "http://xyz.com/thing", Utils.parseTime("12:02:00")); Impression shoppingImpressionDup = new Impression(789L, "http://search.com?q=thing", "http://xyz.com/thing", Utils.parseTime("12:02:10")); Visit shoppingVisit1 = new Visit("http://xyz.com/thing", Utils.parseTime("12:02:30"), "http://search.com?q=thing", false/*isGoal*/); Visit shoppingVisit2 = new Visit("http://xyz.com/thing/add-to-cart", Utils.parseTime("12:03:00"), "http://xyz.com/thing", false/*isGoal*/); Visit shoppingVisit3 = new Visit("http://xyz.com/thing/purchase", Utils.parseTime("12:03:20"), "http://xyz.com/thing/add-to-cart", false/*isGoal*/); Visit shoppingGoal = new Visit("http://xyz.com/thing/receipt", Utils.parseTime("12:03:45"), "http://xyz.com/thing/purchase", true/*isGoal*/); Impression unattributedImpression = new Impression(000L, "http://search.com?q=thing", "http://xyz.com/other-thing", Utils.parseTime("12:04:00")); Visit unattributedVisit = new Visit("http://xyz.com/other-thing", Utils.parseTime("12:04:20"), "http://search.com?q=other thing", false/*isGoal*/); // Create a stream of visits and impressions, with data arriving out of order. return TestStream.create( KvCoder.of(StringUtf8Coder.of(), AvroCoder.of(VisitOrImpression.class))) .advanceWatermarkTo(Utils.parseTime("12:00:00")) .addElements(visitOrImpression(shoppingVisit2, null)) .addElements(visitOrImpression(shoppingGoal, null)) .addElements(visitOrImpression(shoppingVisit3, null)) .addElements(visitOrImpression(signupGoal, null)) .advanceWatermarkTo(Utils.parseTime("12:00:30")) .addElements(visitOrImpression(null, signupImpression)) .advanceWatermarkTo(Utils.parseTime("12:01:00")) .addElements(visitOrImpression(null, shoppingImpression)) .addElements(visitOrImpression(signupVisit, null)) .advanceWatermarkTo(Utils.parseTime("12:01:30")) .addElements(visitOrImpression(null, shoppingImpressionDup)) .addElements(visitOrImpression(shoppingVisit1, null)) .advanceWatermarkTo(Utils.parseTime("12:03:45")) .addElements(visitOrImpression(null, unattributedImpression)) .advanceWatermarkTo(Utils.parseTime("12:04:00")) .addElements(visitOrImpression(unattributedVisit, null)) .advanceWatermarkToInfinity(); }
Example #20
Source File: TestJetRunner.java From beam with Apache License 2.0 | 5 votes |
@Override public Vertex translate( Pipeline pipeline, AppliedPTransform<?, ?, ?> appliedTransform, TransformHierarchy.Node node, JetTranslationContext context) { String transformName = appliedTransform.getFullName(); DAGBuilder dagBuilder = context.getDagBuilder(); String vertexId = dagBuilder.newVertexId(transformName); TestStream<T> testStream = (TestStream<T>) appliedTransform.getTransform(); // events in the transform are not serializable, we have to translate them. We'll also flatten // the collection. Map.Entry<TupleTag<?>, PValue> output = Utils.getOutput(appliedTransform); Coder outputCoder = Utils.getCoder((PCollection) output.getValue()); TestStream.TestStreamCoder<T> payloadCoder = TestStream.TestStreamCoder.of(testStream.getValueCoder()); byte[] encodedPayload = getEncodedPayload(testStream, payloadCoder); Vertex vertex = dagBuilder.addVertex( vertexId, TestStreamP.supplier(encodedPayload, payloadCoder, outputCoder)); String outputEdgeId = Utils.getTupleTagId(output.getValue()); dagBuilder.registerCollectionOfEdge(outputEdgeId, output.getKey().getId()); dagBuilder.registerEdgeStartPoint(outputEdgeId, vertex, outputCoder); return vertex; }
Example #21
Source File: DeduplicateTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category({NeedsRunner.class, UsesTestStream.class}) public void testEventTime() { Instant base = new Instant(0); TestStream<String> values = TestStream.create(StringUtf8Coder.of()) .advanceWatermarkTo(base) .addElements( TimestampedValue.of("k1", base), TimestampedValue.of("k2", base.plus(Duration.standardSeconds(10))), TimestampedValue.of("k3", base.plus(Duration.standardSeconds(20))), TimestampedValue.of("maybedup", base.plus(Duration.standardSeconds(59)))) .advanceWatermarkTo(base.plus(Duration.standardMinutes(1))) .addElements( TimestampedValue.of("k1", base.plus(Duration.standardSeconds(30))), TimestampedValue.of("k2", base.plus(Duration.standardSeconds(40))), TimestampedValue.of("k3", base.plus(Duration.standardSeconds(50)))) .advanceWatermarkTo( base.plus(Duration.standardMinutes(1)).plus(Deduplicate.DEFAULT_DURATION)) .addElements(TimestampedValue.of("maybedup", base.plus(Duration.standardSeconds(59)))) .advanceWatermarkToInfinity(); PCollection<String> distinctValues = p.apply(values).apply(Deduplicate.<String>values().withTimeDomain(TimeDomain.EVENT_TIME)); PAssert.that(distinctValues) .satisfies( (Iterable<String> input) -> { assertEquals(1, Iterables.frequency(input, "k1")); assertEquals(1, Iterables.frequency(input, "k2")); assertEquals(1, Iterables.frequency(input, "k3")); assertTrue( Iterables.frequency(input, "maybedup") == 1 || Iterables.frequency(input, "maybedup") == 2); return null; }); p.run(); }
Example #22
Source File: TestStreamTranslation.java From beam with Apache License 2.0 | 5 votes |
private <T> RunnerApi.FunctionSpec translateTyped( final TestStream<T> testStream, SdkComponents components) throws IOException { return RunnerApi.FunctionSpec.newBuilder() .setUrn(TEST_STREAM_TRANSFORM_URN) .setPayload(payloadForTestStream(testStream, components).toByteString()) .build(); }
Example #23
Source File: DeduplicateTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category({NeedsRunner.class, UsesTestStreamWithProcessingTime.class}) public void testProcessingTime() { Instant base = new Instant(0); TestStream<String> values = TestStream.create(StringUtf8Coder.of()) .advanceWatermarkTo(base) .addElements( TimestampedValue.of("k1", base), TimestampedValue.of("k2", base.plus(Duration.standardSeconds(10))), TimestampedValue.of("k3", base.plus(Duration.standardSeconds(20))), TimestampedValue.of("maybedup", base.plus(Duration.standardSeconds(59)))) .advanceProcessingTime(Duration.standardMinutes(1)) .addElements( TimestampedValue.of("k1", base.plus(Duration.standardSeconds(30))), TimestampedValue.of("k2", base.plus(Duration.standardSeconds(40))), TimestampedValue.of("k3", base.plus(Duration.standardSeconds(50)))) .advanceProcessingTime(Deduplicate.DEFAULT_DURATION) .addElements(TimestampedValue.of("maybedup", base.plus(Duration.standardSeconds(59)))) .advanceWatermarkToInfinity(); PCollection<String> distinctValues = p.apply(values).apply(Deduplicate.values()); PAssert.that(distinctValues) .satisfies( (Iterable<String> input) -> { assertEquals(1, Iterables.frequency(input, "k1")); assertEquals(1, Iterables.frequency(input, "k2")); assertEquals(1, Iterables.frequency(input, "k3")); assertTrue( Iterables.frequency(input, "maybedup") == 1 || Iterables.frequency(input, "maybedup") == 2); return null; }); p.run(); }
Example #24
Source File: DeduplicateTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category({NeedsRunner.class, UsesTestStreamWithProcessingTime.class}) public void testRepresentativeValuesWithCoder() { Instant base = new Instant(0); TestStream<KV<Integer, String>> values = TestStream.create(KvCoder.of(VarIntCoder.of(), StringUtf8Coder.of())) .advanceWatermarkTo(base) .addElements( TimestampedValue.of(KV.of(1, "k1"), base), TimestampedValue.of(KV.of(2, "k2"), base.plus(Duration.standardSeconds(10))), TimestampedValue.of(KV.of(3, "k3"), base.plus(Duration.standardSeconds(20)))) .advanceProcessingTime(Duration.standardMinutes(1)) .addElements( TimestampedValue.of(KV.of(1, "k1"), base.plus(Duration.standardSeconds(30))), TimestampedValue.of(KV.of(2, "k2"), base.plus(Duration.standardSeconds(40))), TimestampedValue.of(KV.of(3, "k3"), base.plus(Duration.standardSeconds(50)))) .advanceWatermarkToInfinity(); PCollection<KV<Integer, String>> distinctValues = p.apply(values) .apply( Deduplicate.withRepresentativeValueFn(new Keys<Integer>()) .withRepresentativeCoder(VarIntCoder.of())); PAssert.that(distinctValues).containsInAnyOrder(KV.of(1, "k1"), KV.of(2, "k2"), KV.of(3, "k3")); p.run(); }
Example #25
Source File: DeduplicateTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category({NeedsRunner.class, UsesTestStreamWithProcessingTime.class}) public void testTriggeredRepresentativeValuesWithType() { Instant base = new Instant(0); TestStream<KV<Integer, String>> values = TestStream.create(KvCoder.of(VarIntCoder.of(), StringUtf8Coder.of())) .advanceWatermarkTo(base) .addElements( TimestampedValue.of(KV.of(1, "k1"), base), TimestampedValue.of(KV.of(2, "k2"), base.plus(Duration.standardSeconds(10))), TimestampedValue.of(KV.of(3, "k3"), base.plus(Duration.standardSeconds(20)))) .advanceProcessingTime(Duration.standardMinutes(1)) .addElements( TimestampedValue.of(KV.of(1, "k1"), base.plus(Duration.standardSeconds(30))), TimestampedValue.of(KV.of(2, "k2"), base.plus(Duration.standardSeconds(40))), TimestampedValue.of(KV.of(3, "k3"), base.plus(Duration.standardSeconds(50)))) .advanceWatermarkToInfinity(); PCollection<KV<Integer, String>> distinctValues = p.apply(values) .apply( Deduplicate.withRepresentativeValueFn(new Keys<Integer>()) .withRepresentativeCoder(VarIntCoder.of())); PAssert.that(distinctValues).containsInAnyOrder(KV.of(1, "k1"), KV.of(2, "k2"), KV.of(3, "k3")); p.run(); }
Example #26
Source File: TestStreamTranslationTest.java From beam with Apache License 2.0 | 5 votes |
@Parameters(name = "{index}: {0}") public static Iterable<TestStream<?>> data() { return ImmutableList.of( TestStream.create(VarIntCoder.of()).advanceWatermarkToInfinity(), TestStream.create(VarIntCoder.of()) .advanceWatermarkTo(new Instant(42)) .advanceWatermarkToInfinity(), TestStream.create(VarIntCoder.of()) .addElements(TimestampedValue.of(3, new Instant(17))) .advanceWatermarkToInfinity(), TestStream.create(StringUtf8Coder.of()) .advanceProcessingTime(Duration.millis(82)) .advanceWatermarkToInfinity()); }
Example #27
Source File: ParDoTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category({ ValidatesRunner.class, UsesStatefulParDo.class, UsesRequiresTimeSortedInput.class, UsesStrictTimerOrdering.class, UsesTestStream.class }) public void testRequiresTimeSortedInputWithLateDataAndAllowedLateness() { // generate list long enough to rule out random shuffle in sorted order int numElements = 1000; List<Long> eventStamps = LongStream.range(0, numElements) .mapToObj(i -> numElements - i) .collect(Collectors.toList()); TestStream.Builder<Long> input = TestStream.create(VarLongCoder.of()); for (Long stamp : eventStamps) { input = input.addElements(TimestampedValue.of(stamp, Instant.ofEpochMilli(stamp))); if (stamp == 100) { // advance watermark when we have 100 remaining elements // all the rest are going to be late elements input = input.advanceWatermarkTo(Instant.ofEpochMilli(stamp)); } } testTimeSortedInput( numElements, pipeline .apply(input.advanceWatermarkToInfinity()) .apply( Window.<Long>into(new GlobalWindows()) .withAllowedLateness(Duration.millis(5000)))); }
Example #28
Source File: ParDoTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category({ ValidatesRunner.class, UsesStatefulParDo.class, UsesRequiresTimeSortedInput.class, UsesStrictTimerOrdering.class, UsesTestStream.class }) public void testRequiresTimeSortedInputWithLateData() { // generate list long enough to rule out random shuffle in sorted order int numElements = 1000; List<Long> eventStamps = LongStream.range(0, numElements) .mapToObj(i -> numElements - i) .collect(Collectors.toList()); TestStream.Builder<Long> input = TestStream.create(VarLongCoder.of()); for (Long stamp : eventStamps) { input = input.addElements(TimestampedValue.of(stamp, Instant.ofEpochMilli(stamp))); if (stamp == 100) { // advance watermark when we have 100 remaining elements // all the rest are going to be late elements input = input.advanceWatermarkTo(Instant.ofEpochMilli(stamp)); } } testTimeSortedInput( numElements - 100, numElements - 1, pipeline.apply(input.advanceWatermarkToInfinity()), // cannot validate exactly which data gets dropped, because that is runner dependent false); }
Example #29
Source File: TestStreamTranslation.java From beam with Apache License 2.0 | 5 votes |
static <T> TestStream.Event<T> eventFromProto( RunnerApi.TestStreamPayload.Event protoEvent, Coder<T> coder) throws IOException { switch (protoEvent.getEventCase()) { case WATERMARK_EVENT: return TestStream.WatermarkEvent.advanceTo( new Instant(protoEvent.getWatermarkEvent().getNewWatermark())); case PROCESSING_TIME_EVENT: return TestStream.ProcessingTimeEvent.advanceBy( Duration.millis(protoEvent.getProcessingTimeEvent().getAdvanceDuration())); case ELEMENT_EVENT: List<TimestampedValue<T>> decodedElements = new ArrayList<>(); for (RunnerApi.TestStreamPayload.TimestampedElement element : protoEvent.getElementEvent().getElementsList()) { decodedElements.add( TimestampedValue.of( CoderUtils.decodeFromByteArray(coder, element.getEncodedElement().toByteArray()), new Instant(element.getTimestamp()))); } return TestStream.ElementEvent.add(decodedElements); case EVENT_NOT_SET: default: throw new IllegalArgumentException( String.format( "Unsupported type of %s: %s", RunnerApi.TestStreamPayload.Event.class.getCanonicalName(), protoEvent.getEventCase())); } }
Example #30
Source File: DeduplicateTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category({NeedsRunner.class, UsesTestStream.class}) public void testInDifferentWindows() { Instant base = new Instant(0); TestStream<String> values = TestStream.create(StringUtf8Coder.of()) .advanceWatermarkTo(base) .addElements( TimestampedValue.of("k1", base), TimestampedValue.of("k2", base.plus(Duration.standardSeconds(10))), TimestampedValue.of("k3", base.plus(Duration.standardSeconds(20))), TimestampedValue.of("k1", base.plus(Duration.standardSeconds(30))), TimestampedValue.of("k2", base.plus(Duration.standardSeconds(40))), TimestampedValue.of("k3", base.plus(Duration.standardSeconds(50))), TimestampedValue.of("k4", base.plus(Duration.standardSeconds(60))), TimestampedValue.of("k5", base.plus(Duration.standardSeconds(70))), TimestampedValue.of("k6", base.plus(Duration.standardSeconds(80)))) .advanceWatermarkToInfinity(); PCollection<String> distinctValues = p.apply(values) .apply(Window.into(FixedWindows.of(Duration.standardSeconds(30)))) .apply(Deduplicate.values()); PAssert.that(distinctValues) .inWindow(new IntervalWindow(base, base.plus(Duration.standardSeconds(30)))) .containsInAnyOrder("k1", "k2", "k3"); PAssert.that(distinctValues) .inWindow( new IntervalWindow( base.plus(Duration.standardSeconds(30)), base.plus(Duration.standardSeconds(60)))) .containsInAnyOrder("k1", "k2", "k3"); PAssert.that(distinctValues) .inWindow( new IntervalWindow( base.plus(Duration.standardSeconds(60)), base.plus(Duration.standardSeconds(90)))) .containsInAnyOrder("k4", "k5", "k6"); p.run(); }