org.apache.beam.sdk.transforms.windowing.AfterProcessingTime Java Examples
The following examples show how to use
org.apache.beam.sdk.transforms.windowing.AfterProcessingTime.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: LeaderBoard.java From beam with Apache License 2.0 | 6 votes |
@Override public PCollection<KV<String, Integer>> expand(PCollection<GameActionInfo> infos) { return infos .apply( "LeaderboardTeamFixedWindows", Window.<GameActionInfo>into(FixedWindows.of(teamWindowDuration)) // We will get early (speculative) results as well as cumulative // processing of late data. .triggering( AfterWatermark.pastEndOfWindow() .withEarlyFirings( AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(FIVE_MINUTES)) .withLateFirings( AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(TEN_MINUTES))) .withAllowedLateness(allowedLateness) .accumulatingFiredPanes()) // Extract and sum teamname/score pairs from the event data. .apply("ExtractTeamScore", new ExtractAndSumScore("team")); }
Example #2
Source File: LeaderBoard.java From deployment-examples with MIT License | 6 votes |
@Override public PCollection<KV<String, Integer>> expand(PCollection<GameActionInfo> infos) { return infos .apply( "LeaderboardTeamFixedWindows", Window.<GameActionInfo>into(FixedWindows.of(teamWindowDuration)) // We will get early (speculative) results as well as cumulative // processing of late data. .triggering( AfterWatermark.pastEndOfWindow() .withEarlyFirings( AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(FIVE_MINUTES)) .withLateFirings( AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(TEN_MINUTES))) .withAllowedLateness(allowedLateness) .accumulatingFiredPanes()) // Extract and sum teamname/score pairs from the event data. .apply("ExtractTeamScore", new ExtractAndSumScore("team")); }
Example #3
Source File: MyBeamJob.java From hazelcast-jet-demos with Apache License 2.0 | 6 votes |
public static Pipeline build(PipelineOptions pipelineOptions) { Pipeline pipeline = Pipeline.create(pipelineOptions); pipeline .apply("unbounded-source", Read.from(new MyUnboundedSource("beam-input"))) .apply("reformat-and-timestamp", ParDo.of(new MyEnrichAndReformatFn())) .apply("window", Window.<String>into(FixedWindows.of(ONE_SECOND)) .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane())) .discardingFiredPanes() .withAllowedLateness(ONE_SECOND) ) .apply("sink", FileIO.<String>write() .via(TextIO.sink()) .to(".") .withPrefix("beam-output") .withNumShards(1) ) ; return pipeline; }
Example #4
Source File: BigQueryMerger.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@Override public PCollection<KV<K, V>> expand(PCollection<KV<K, V>> input) { return input .apply( Window.<KV<K, V>>into(new GlobalWindows()) .discardingFiredPanes() .triggering( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(Duration.ZERO) .alignedTo(intervalDuration, org.joda.time.Instant.now())))) .apply(GroupByKey.create()) .apply( ParDo.of( new DoFn<KV<K, Iterable<V>>, KV<K, V>>() { @ProcessElement public void process(ProcessContext c) { LOG.debug( "TS: {} | Element: {} | Pane: {}", c.timestamp(), c.element(), c.pane()); Iterator<V> it = c.element().getValue().iterator(); if (it.hasNext()) { c.output(KV.of(c.element().getKey(), it.next())); } } })); }
Example #5
Source File: BigQueryMerger.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@Override public PCollection<KV<K, V>> expand(PCollection<KV<K, V>> input) { return input .apply( Window.<KV<K, V>>into(new GlobalWindows()) .discardingFiredPanes() .triggering( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(Duration.ZERO) .alignedTo(intervalDuration, org.joda.time.Instant.now())))) .apply(GroupByKey.create()) .apply( ParDo.of( new DoFn<KV<K, Iterable<V>>, KV<K, V>>() { @ProcessElement public void process(ProcessContext c) { LOG.debug( "TS: {} | Element: {} | Pane: {}", c.timestamp(), c.element(), c.pane()); Iterator<V> it = c.element().getValue().iterator(); if (it.hasNext()) { c.output(KV.of(c.element().getKey(), it.next())); } } })); }
Example #6
Source File: DistinctTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category({NeedsRunner.class, UsesTestStreamWithProcessingTime.class}) public void testTriggeredDistinctRepresentativeValues() { Instant base = new Instant(0); TestStream<KV<Integer, String>> values = TestStream.create(KvCoder.of(VarIntCoder.of(), StringUtf8Coder.of())) .advanceWatermarkTo(base) .addElements( TimestampedValue.of(KV.of(1, "k1"), base), TimestampedValue.of(KV.of(2, "k2"), base.plus(Duration.standardSeconds(10))), TimestampedValue.of(KV.of(3, "k3"), base.plus(Duration.standardSeconds(20)))) .advanceProcessingTime(Duration.standardMinutes(1)) .addElements( TimestampedValue.of(KV.of(1, "k1"), base.plus(Duration.standardSeconds(30))), TimestampedValue.of(KV.of(2, "k2"), base.plus(Duration.standardSeconds(40))), TimestampedValue.of(KV.of(3, "k3"), base.plus(Duration.standardSeconds(50)))) .advanceWatermarkToInfinity(); PCollection<KV<Integer, String>> distinctValues = triggeredDistinctRepresentativePipeline .apply(values) .apply( Window.<KV<Integer, String>>into(FixedWindows.of(Duration.standardMinutes(1))) .triggering( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(Duration.standardSeconds(30)))) .withAllowedLateness(Duration.ZERO) .accumulatingFiredPanes()) .apply( Distinct.withRepresentativeValueFn(new Keys<Integer>()) .withRepresentativeType(TypeDescriptor.of(Integer.class))); PAssert.that(distinctValues).containsInAnyOrder(KV.of(1, "k1"), KV.of(2, "k2"), KV.of(3, "k3")); triggeredDistinctRepresentativePipeline.run(); }
Example #7
Source File: BeamModel.java From streamingbook with Apache License 2.0 | 5 votes |
@Override public PCollection<String> expand(PCollection<KV<String, Integer>> input) { return input .apply(Window.<KV<String, Integer>>into(FixedWindows.of(TWO_MINUTES)) .triggering(Repeatedly.forever(AfterProcessingTime .pastFirstElementInPane() .alignedTo(TWO_MINUTES, Utils.parseTime("12:05:00")))) .withAllowedLateness(Duration.standardDays(1000)) .accumulatingFiredPanes()) .apply(Sum.integersPerKey()) .apply(ParDo.of(new FormatAsStrings())); }
Example #8
Source File: DistinctTest.java From beam with Apache License 2.0 | 5 votes |
/** * Regression test: when all values are emitted by a speculative trigger, caused a null KV when * the on-time firing occurred. */ @Test @Category({NeedsRunner.class, UsesTestStreamWithProcessingTime.class}) public void testTriggeredDistinctRepresentativeValuesEmpty() { Instant base = new Instant(0); TestStream<KV<Integer, String>> values = TestStream.create(KvCoder.of(VarIntCoder.of(), StringUtf8Coder.of())) .advanceWatermarkTo(base) .addElements(TimestampedValue.of(KV.of(1, "k1"), base)) .advanceProcessingTime(Duration.standardMinutes(1)) .advanceWatermarkToInfinity(); PCollection<KV<Integer, String>> distinctValues = triggeredDistinctRepresentativePipeline .apply(values) .apply( Window.<KV<Integer, String>>into(FixedWindows.of(Duration.standardMinutes(1))) .triggering( AfterWatermark.pastEndOfWindow() .withEarlyFirings( AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(Duration.standardSeconds(30)))) .withAllowedLateness(Duration.ZERO) .discardingFiredPanes()) .apply( Distinct.withRepresentativeValueFn(new Keys<Integer>()) .withRepresentativeType(TypeDescriptor.of(Integer.class))); PAssert.that(distinctValues).containsInAnyOrder(KV.of(1, "k1")); triggeredDistinctRepresentativePipeline.run(); }
Example #9
Source File: GroupByKeyTest.java From beam with Apache License 2.0 | 5 votes |
/** * Tests that when a processing time timers comes in after a window is expired it does not cause * a spurious output. */ @Test @Category({ValidatesRunner.class, UsesTestStreamWithProcessingTime.class}) public void testCombiningAccumulatingProcessingTime() throws Exception { PCollection<Integer> triggeredSums = p.apply( TestStream.create(VarIntCoder.of()) .advanceWatermarkTo(new Instant(0)) .addElements( TimestampedValue.of(2, new Instant(2)), TimestampedValue.of(5, new Instant(5))) .advanceWatermarkTo(new Instant(100)) .advanceProcessingTime(Duration.millis(10)) .advanceWatermarkToInfinity()) .apply( Window.<Integer>into(FixedWindows.of(Duration.millis(100))) .withTimestampCombiner(TimestampCombiner.EARLIEST) .accumulatingFiredPanes() .withAllowedLateness(Duration.ZERO) .triggering( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(Duration.millis(10))))) .apply(Sum.integersGlobally().withoutDefaults()); PAssert.that(triggeredSums).containsInAnyOrder(7); p.run(); }
Example #10
Source File: TestStreamTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category({NeedsRunner.class, UsesTestStreamWithProcessingTime.class}) public void testProcessingTimeTrigger() { TestStream<Long> source = TestStream.create(VarLongCoder.of()) .addElements( TimestampedValue.of(1L, new Instant(1000L)), TimestampedValue.of(2L, new Instant(2000L))) .advanceProcessingTime(Duration.standardMinutes(12)) .addElements(TimestampedValue.of(3L, new Instant(3000L))) .advanceProcessingTime(Duration.standardMinutes(6)) .advanceWatermarkToInfinity(); PCollection<Long> sum = p.apply(source) .apply( Window.<Long>configure() .triggering( AfterWatermark.pastEndOfWindow() .withEarlyFirings( AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(Duration.standardMinutes(5)))) .accumulatingFiredPanes() .withAllowedLateness(Duration.ZERO)) .apply(Sum.longsGlobally()); PAssert.that(sum).inEarlyGlobalWindowPanes().containsInAnyOrder(3L, 6L); p.run(); }
Example #11
Source File: PubsubUnboundedSink.java From beam with Apache License 2.0 | 5 votes |
@Override public PDone expand(PCollection<PubsubMessage> input) { input .apply( "PubsubUnboundedSink.Window", Window.<PubsubMessage>into(new GlobalWindows()) .triggering( Repeatedly.forever( AfterFirst.of( AfterPane.elementCountAtLeast(publishBatchSize), AfterProcessingTime.pastFirstElementInPane().plusDelayOf(maxLatency)))) .discardingFiredPanes()) .apply("PubsubUnboundedSink.Shard", ParDo.of(new ShardFn(numShards, recordIdMethod))) .setCoder(KvCoder.of(VarIntCoder.of(), CODER)) .apply(GroupByKey.create()) .apply( "PubsubUnboundedSink.Writer", ParDo.of( new WriterFn( pubsubFactory, topic, timestampAttribute, idAttribute, publishBatchSize, publishBatchBytes))); return PDone.in(input.getPipeline()); }
Example #12
Source File: TriggerTranslation.java From beam with Apache License 2.0 | 5 votes |
private RunnerApi.Trigger convertSpecific(AfterProcessingTime v) { RunnerApi.Trigger.AfterProcessingTime.Builder builder = RunnerApi.Trigger.AfterProcessingTime.newBuilder(); for (TimestampTransform transform : v.getTimestampTransforms()) { builder.addTimestampTransforms(convertTimestampTransform(transform)); } return RunnerApi.Trigger.newBuilder().setAfterProcessingTime(builder).build(); }
Example #13
Source File: PCollectionTranslationTest.java From beam with Apache License 2.0 | 5 votes |
@Parameters(name = "{index}: {0}") public static Iterable<PCollection<?>> data() { Pipeline pipeline = TestPipeline.create(); PCollection<Integer> ints = pipeline.apply("ints", Create.of(1, 2, 3)); PCollection<Long> longs = pipeline.apply("unbounded longs", GenerateSequence.from(0)); PCollection<Long> windowedLongs = longs.apply( "into fixed windows", Window.into(FixedWindows.of(Duration.standardMinutes(10L)))); PCollection<KV<String, Iterable<String>>> groupedStrings = pipeline .apply( "kvs", Create.of(KV.of("foo", "spam"), KV.of("bar", "ham"), KV.of("baz", "eggs"))) .apply("group", GroupByKey.create()); PCollection<Long> coderLongs = pipeline .apply("counts with alternative coder", GenerateSequence.from(0).to(10)) .setCoder(BigEndianLongCoder.of()); pipeline .apply( "intsWithCustomCoder", Create.of(1, 2).withCoder(new AutoValue_PCollectionTranslationTest_CustomIntCoder())) .apply( "into custom windows", Window.into(new CustomWindows()) .triggering( AfterWatermark.pastEndOfWindow() .withEarlyFirings( AfterFirst.of( AfterPane.elementCountAtLeast(5), AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(Duration.millis(227L))))) .accumulatingFiredPanes() .withAllowedLateness(Duration.standardMinutes(12L))); return ImmutableList.of(ints, longs, windowedLongs, coderLongs, groupedStrings); }
Example #14
Source File: ReduceFnRunnerTest.java From beam with Apache License 2.0 | 5 votes |
/** Tests that a processing time timer does not cause window GC. */ @Test public void testProcessingTimeTimerDoesNotGc() throws Exception { WindowingStrategy<?, IntervalWindow> strategy = WindowingStrategy.of((WindowFn<?, IntervalWindow>) FixedWindows.of(Duration.millis(100))) .withTimestampCombiner(TimestampCombiner.EARLIEST) .withMode(AccumulationMode.ACCUMULATING_FIRED_PANES) .withAllowedLateness(Duration.ZERO) .withTrigger( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.millis(10)))); ReduceFnTester<Integer, Integer, IntervalWindow> tester = ReduceFnTester.combining(strategy, Sum.ofIntegers(), VarIntCoder.of()); tester.advanceProcessingTime(new Instant(5000)); injectElement(tester, 2); // processing timer @ 5000 + 10; EOW timer @ 100 injectElement(tester, 5); tester.advanceProcessingTime(new Instant(10000)); tester.assertHasOnlyGlobalAndStateFor(new IntervalWindow(new Instant(0), new Instant(100))); assertThat( tester.extractOutput(), contains( isSingleWindowedValue( equalTo(7), 2, 0, 100, PaneInfo.createPane(true, false, Timing.EARLY, 0, 0)))); }
Example #15
Source File: ReduceFnRunnerTest.java From beam with Apache License 2.0 | 5 votes |
/** * Tests that when a processing time timer comes in after a window is expired it is just ignored. */ @Test public void testLateProcessingTimeTimer() throws Exception { WindowingStrategy<?, IntervalWindow> strategy = WindowingStrategy.of((WindowFn<?, IntervalWindow>) FixedWindows.of(Duration.millis(100))) .withTimestampCombiner(TimestampCombiner.EARLIEST) .withMode(AccumulationMode.ACCUMULATING_FIRED_PANES) .withAllowedLateness(Duration.ZERO) .withTrigger( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.millis(10)))); ReduceFnTester<Integer, Integer, IntervalWindow> tester = ReduceFnTester.combining(strategy, Sum.ofIntegers(), VarIntCoder.of()); tester.advanceProcessingTime(new Instant(5000)); injectElement(tester, 2); // processing timer @ 5000 + 10; EOW timer @ 100 injectElement(tester, 5); // After this advancement, the window is expired and only the GC process // should be allowed to touch it tester.advanceInputWatermarkNoTimers(new Instant(100)); // This should not output tester.advanceProcessingTime(new Instant(6000)); assertThat(tester.extractOutput(), emptyIterable()); }
Example #16
Source File: ReduceFnRunnerTest.java From beam with Apache License 2.0 | 5 votes |
/** * Tests that when a processing time timer comes in after a window is expired but in the same * bundle it does not cause a spurious output. */ @Test public void testCombiningAccumulatingProcessingTime() throws Exception { WindowingStrategy<?, IntervalWindow> strategy = WindowingStrategy.of((WindowFn<?, IntervalWindow>) FixedWindows.of(Duration.millis(100))) .withTimestampCombiner(TimestampCombiner.EARLIEST) .withMode(AccumulationMode.ACCUMULATING_FIRED_PANES) .withAllowedLateness(Duration.ZERO) .withTrigger( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.millis(10)))); ReduceFnTester<Integer, Integer, IntervalWindow> tester = ReduceFnTester.combining(strategy, Sum.ofIntegers(), VarIntCoder.of()); tester.advanceProcessingTime(new Instant(5000)); injectElement(tester, 2); // processing timer @ 5000 + 10; EOW timer @ 100 injectElement(tester, 5); tester.advanceInputWatermarkNoTimers(new Instant(100)); tester.advanceProcessingTimeNoTimers(new Instant(5010)); // Fires the GC/EOW timer at the same time as the processing time timer. tester.fireTimers( new IntervalWindow(new Instant(0), new Instant(100)), TimestampedValue.of(TimeDomain.EVENT_TIME, new Instant(100)), TimestampedValue.of(TimeDomain.PROCESSING_TIME, new Instant(5010))); assertThat( tester.extractOutput(), contains( isSingleWindowedValue( equalTo(7), 2, 0, 100, PaneInfo.createPane(true, true, Timing.ON_TIME, 0, 0)))); }
Example #17
Source File: ReduceFnRunnerTest.java From beam with Apache License 2.0 | 5 votes |
/** * Tests that when a processing time timers comes in after a window is expired and GC'd it does * not cause a spurious output. */ @Test public void testCombiningAccumulatingProcessingTimeSeparateBundles() throws Exception { WindowingStrategy<?, IntervalWindow> strategy = WindowingStrategy.of((WindowFn<?, IntervalWindow>) FixedWindows.of(Duration.millis(100))) .withTimestampCombiner(TimestampCombiner.EARLIEST) .withMode(AccumulationMode.ACCUMULATING_FIRED_PANES) .withAllowedLateness(Duration.ZERO) .withTrigger( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.millis(10)))); ReduceFnTester<Integer, Integer, IntervalWindow> tester = ReduceFnTester.combining(strategy, Sum.ofIntegers(), VarIntCoder.of()); tester.advanceProcessingTime(new Instant(5000)); injectElement(tester, 2); // processing timer @ 5000 + 10; EOW timer @ 100 injectElement(tester, 5); tester.advanceInputWatermark(new Instant(100)); tester.advanceProcessingTime(new Instant(5011)); assertThat( tester.extractOutput(), contains( isSingleWindowedValue( equalTo(7), 2, 0, 100, PaneInfo.createPane(true, true, Timing.ON_TIME, 0, 0)))); }
Example #18
Source File: ReduceFnRunnerTest.java From beam with Apache License 2.0 | 5 votes |
/** * We should fire an empty ON_TIME pane in the GlobalWindow when the watermark moves to * end-of-time. */ @Test public void fireEmptyOnDrainInGlobalWindowIfRequested() throws Exception { ReduceFnTester<Integer, Iterable<Integer>, GlobalWindow> tester = ReduceFnTester.nonCombining( WindowingStrategy.of(new GlobalWindows()) .withTrigger( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane().plusDelayOf(new Duration(3)))) .withMode(AccumulationMode.DISCARDING_FIRED_PANES)); final int n = 20; for (int i = 0; i < n; i++) { tester.advanceProcessingTime(new Instant(i)); tester.injectElements(TimestampedValue.of(i, new Instant(i))); } tester.advanceProcessingTime(new Instant(n + 4)); List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput(); assertEquals((n + 3) / 4, output.size()); for (int i = 0; i < output.size(); i++) { assertEquals(Timing.EARLY, output.get(i).getPane().getTiming()); assertEquals(i, output.get(i).getPane().getIndex()); assertEquals(4, Iterables.size(output.get(i).getValue())); } tester.advanceInputWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE); output = tester.extractOutput(); assertEquals(1, output.size()); assertEquals(Timing.ON_TIME, output.get(0).getPane().getTiming()); assertEquals((n + 3) / 4, output.get(0).getPane().getIndex()); assertEquals(0, Iterables.size(output.get(0).getValue())); }
Example #19
Source File: Task.java From beam with Apache License 2.0 | 5 votes |
static PCollection<Long> applyTransform(PCollection<String> events) { return events .apply( Window.<String>into(FixedWindows.of(Duration.standardDays(1))) .triggering( AfterWatermark.pastEndOfWindow() .withEarlyFirings( AfterProcessingTime.pastFirstElementInPane())) .withAllowedLateness(Duration.ZERO) .accumulatingFiredPanes()) .apply(Combine.globally(Count.<String>combineFn()).withoutDefaults()); }
Example #20
Source File: BeamModel.java From streamingbook with Apache License 2.0 | 5 votes |
@Override public PCollection<String> expand(PCollection<KV<String, Integer>> input) { return input .apply(Window.<KV<String, Integer>>into(FixedWindows.of(TWO_MINUTES)) .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(TWO_MINUTES))) .withAllowedLateness(Duration.standardDays(1000)) .accumulatingFiredPanes()) .apply(Sum.integersPerKey()) .apply(ParDo.of(new FormatAsStrings())); }
Example #21
Source File: BeamModel.java From streamingbook with Apache License 2.0 | 5 votes |
@Override public PCollection<String> expand(PCollection<KV<String, Integer>> input) { return input .apply(Window.<KV<String, Integer>>into(FixedWindows.of(TWO_MINUTES)) .triggering(AfterWatermark.pastEndOfWindow() .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(ONE_MINUTE)) .withLateFirings(AfterPane.elementCountAtLeast(1))) .withAllowedLateness(Duration.standardDays(1000)) .accumulatingFiredPanes()) .apply(Sum.integersPerKey()) .apply(ParDo.of(new FormatAsStrings())); }
Example #22
Source File: BeamModel.java From streamingbook with Apache License 2.0 | 5 votes |
@Override public PCollection<String> expand(PCollection<KV<String, Integer>> input) { return input .apply(Window.<KV<String, Integer>>into(FixedWindows.of(TWO_MINUTES)) .triggering(AfterWatermark.pastEndOfWindow() .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(ONE_MINUTE)) .withLateFirings(AfterPane.elementCountAtLeast(1))) .withAllowedLateness(TWO_MINUTES) .accumulatingFiredPanes()) .apply(Sum.integersPerKey()) .apply(ParDo.of(new FormatAsStrings())); }
Example #23
Source File: BeamModel.java From streamingbook with Apache License 2.0 | 5 votes |
@Override public PCollection<String> expand(PCollection<KV<String, Integer>> input) { return input .apply(Window.<KV<String, Integer>>into(FixedWindows.of(TWO_MINUTES)) .triggering(AfterWatermark.pastEndOfWindow() .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(ONE_MINUTE)) .withLateFirings(AfterPane.elementCountAtLeast(1))) .withAllowedLateness(Duration.standardDays(1000)) .discardingFiredPanes()) .apply(Sum.integersPerKey()) .apply(ParDo.of(new FormatAsStrings())); }
Example #24
Source File: BeamModel.java From streamingbook with Apache License 2.0 | 5 votes |
@Override public PCollection<String> expand(PCollection<KV<String, Integer>> input) { return input .apply(Window.<KV<String, Integer>>into(Sessions.withGapDuration(ONE_MINUTE)) .triggering(AfterWatermark.pastEndOfWindow() .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(ONE_MINUTE)) .withLateFirings(AfterPane.elementCountAtLeast(1))) .withAllowedLateness(Duration.standardDays(1000)) .accumulatingFiredPanes()) .apply(Sum.integersPerKey()) .apply(ParDo.of(new FormatAsStrings())); }
Example #25
Source File: LeaderBoard.java From deployment-examples with MIT License | 5 votes |
@Override public PCollection<KV<String, Integer>> expand(PCollection<GameActionInfo> input) { return input .apply( "LeaderboardUserGlobalWindow", Window.<GameActionInfo>into(new GlobalWindows()) // Get periodic results every ten minutes. .triggering( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane().plusDelayOf(TEN_MINUTES))) .accumulatingFiredPanes() .withAllowedLateness(allowedLateness)) // Extract and sum username/score pairs from the event data. .apply("ExtractUserScore", new ExtractAndSumScore("user")); }
Example #26
Source File: LeaderBoard.java From beam with Apache License 2.0 | 5 votes |
@Override public PCollection<KV<String, Integer>> expand(PCollection<GameActionInfo> input) { return input .apply( "LeaderboardUserGlobalWindow", Window.<GameActionInfo>into(new GlobalWindows()) // Get periodic results every ten minutes. .triggering( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane().plusDelayOf(TEN_MINUTES))) .accumulatingFiredPanes() .withAllowedLateness(allowedLateness)) // Extract and sum username/score pairs from the event data. .apply("ExtractUserScore", new ExtractAndSumScore("user")); }
Example #27
Source File: Task.java From beam with Apache License 2.0 | 5 votes |
static PCollection<Long> applyTransform(PCollection<String> events) { return events .apply( Window.<String>into(FixedWindows.of(Duration.standardDays(1))) .triggering( AfterWatermark.pastEndOfWindow() .withEarlyFirings( AfterProcessingTime.pastFirstElementInPane())) .withAllowedLateness(Duration.ZERO) .discardingFiredPanes()) .apply(Combine.globally(Count.<String>combineFn()).withoutDefaults()); }
Example #28
Source File: DistinctTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category({NeedsRunner.class, UsesTestStreamWithProcessingTime.class}) public void testTriggeredDistinct() { Instant base = new Instant(0); TestStream<String> values = TestStream.create(StringUtf8Coder.of()) .advanceWatermarkTo(base) .addElements( TimestampedValue.of("k1", base), TimestampedValue.of("k2", base.plus(Duration.standardSeconds(10))), TimestampedValue.of("k3", base.plus(Duration.standardSeconds(20)))) .advanceProcessingTime(Duration.standardMinutes(1)) .addElements( TimestampedValue.of("k1", base.plus(Duration.standardSeconds(30))), TimestampedValue.of("k2", base.plus(Duration.standardSeconds(40))), TimestampedValue.of("k3", base.plus(Duration.standardSeconds(50)))) .advanceWatermarkToInfinity(); PCollection<String> distinctValues = triggeredDistinctPipeline .apply(values) .apply( Window.<String>into(FixedWindows.of(Duration.standardMinutes(1))) .triggering( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(Duration.standardSeconds(30)))) .withAllowedLateness(Duration.ZERO) .accumulatingFiredPanes()) .apply(Distinct.create()); PAssert.that(distinctValues).containsInAnyOrder("k1", "k2", "k3"); triggeredDistinctPipeline.run(); }
Example #29
Source File: BatchLoads.java From beam with Apache License 2.0 | 4 votes |
private WriteResult expandTriggered(PCollection<KV<DestinationT, ElementT>> input) { checkArgument(numFileShards > 0); Pipeline p = input.getPipeline(); final PCollectionView<String> loadJobIdPrefixView = createLoadJobIdPrefixView(p); final PCollectionView<String> tempFilePrefixView = createTempFilePrefixView(p, loadJobIdPrefixView); // The user-supplied triggeringDuration is often chosen to control how many BigQuery load // jobs are generated, to prevent going over BigQuery's daily quota for load jobs. If this // is set to a large value, currently we have to buffer all the data until the trigger fires. // Instead we ensure that the files are written if a threshold number of records are ready. // We use only the user-supplied trigger on the actual BigQuery load. This allows us to // offload the data to the filesystem. PCollection<KV<DestinationT, ElementT>> inputInGlobalWindow = input.apply( "rewindowIntoGlobal", Window.<KV<DestinationT, ElementT>>into(new GlobalWindows()) .triggering( Repeatedly.forever( AfterFirst.of( AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(triggeringFrequency), AfterPane.elementCountAtLeast(FILE_TRIGGERING_RECORD_COUNT)))) .discardingFiredPanes()); PCollection<WriteBundlesToFiles.Result<DestinationT>> results = writeShardedFiles(inputInGlobalWindow, tempFilePrefixView); // Apply the user's trigger before we start generating BigQuery load jobs. results = results.apply( "applyUserTrigger", Window.<WriteBundlesToFiles.Result<DestinationT>>into(new GlobalWindows()) .triggering( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(triggeringFrequency))) .discardingFiredPanes()); TupleTag<KV<ShardedKey<DestinationT>, List<String>>> multiPartitionsTag = new TupleTag<>("multiPartitionsTag"); TupleTag<KV<ShardedKey<DestinationT>, List<String>>> singlePartitionTag = new TupleTag<>("singlePartitionTag"); // If we have non-default triggered output, we can't use the side-input technique used in // expandUntriggered . Instead make the result list a main input. Apply a GroupByKey first for // determinism. PCollectionTuple partitions = results .apply("AttachSingletonKey", WithKeys.of((Void) null)) .setCoder( KvCoder.of(VoidCoder.of(), WriteBundlesToFiles.ResultCoder.of(destinationCoder))) .apply("GroupOntoSingleton", GroupByKey.create()) .apply("ExtractResultValues", Values.create()) .apply( "WritePartitionTriggered", ParDo.of( new WritePartition<>( singletonTable, dynamicDestinations, tempFilePrefixView, maxFilesPerPartition, maxBytesPerPartition, multiPartitionsTag, singlePartitionTag, rowWriterFactory)) .withSideInputs(tempFilePrefixView) .withOutputTags(multiPartitionsTag, TupleTagList.of(singlePartitionTag))); PCollection<KV<TableDestination, String>> tempTables = writeTempTables(partitions.get(multiPartitionsTag), loadJobIdPrefixView); tempTables // Now that the load job has happened, we want the rename to happen immediately. .apply( Window.<KV<TableDestination, String>>into(new GlobalWindows()) .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1)))) .apply(WithKeys.of((Void) null)) .setCoder(KvCoder.of(VoidCoder.of(), tempTables.getCoder())) .apply(GroupByKey.create()) .apply(Values.create()) .apply( "WriteRenameTriggered", ParDo.of( new WriteRename( bigQueryServices, loadJobIdPrefixView, writeDisposition, createDisposition, maxRetryJobs, kmsKey)) .withSideInputs(loadJobIdPrefixView)); writeSinglePartition(partitions.get(singlePartitionTag), loadJobIdPrefixView); return writeResult(p); }
Example #30
Source File: TriggerTranslationTest.java From beam with Apache License 2.0 | 4 votes |
@Parameters(name = "{index}: {0}") public static Iterable<ToProtoAndBackSpec> data() { return ImmutableList.of( // Atomic triggers toProtoAndBackSpec(AfterWatermark.pastEndOfWindow()), toProtoAndBackSpec(AfterPane.elementCountAtLeast(73)), toProtoAndBackSpec(AfterSynchronizedProcessingTime.ofFirstElement()), toProtoAndBackSpec(Never.ever()), toProtoAndBackSpec(DefaultTrigger.of()), toProtoAndBackSpec(AfterProcessingTime.pastFirstElementInPane()), toProtoAndBackSpec( AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.millis(23))), toProtoAndBackSpec( AfterProcessingTime.pastFirstElementInPane() .alignedTo(Duration.millis(5), new Instant(27))), toProtoAndBackSpec( AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(Duration.standardSeconds(3)) .alignedTo(Duration.millis(5), new Instant(27)) .plusDelayOf(Duration.millis(13))), // Composite triggers toProtoAndBackSpec( AfterAll.of(AfterPane.elementCountAtLeast(79), AfterWatermark.pastEndOfWindow())), toProtoAndBackSpec( AfterEach.inOrder(AfterPane.elementCountAtLeast(79), AfterPane.elementCountAtLeast(3))), toProtoAndBackSpec( AfterFirst.of(AfterWatermark.pastEndOfWindow(), AfterPane.elementCountAtLeast(3))), toProtoAndBackSpec( AfterWatermark.pastEndOfWindow().withEarlyFirings(AfterPane.elementCountAtLeast(3))), toProtoAndBackSpec( AfterWatermark.pastEndOfWindow().withLateFirings(AfterPane.elementCountAtLeast(3))), toProtoAndBackSpec( AfterWatermark.pastEndOfWindow() .withEarlyFirings( AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.millis(42))) .withLateFirings(AfterPane.elementCountAtLeast(3))), toProtoAndBackSpec(Repeatedly.forever(AfterWatermark.pastEndOfWindow())), toProtoAndBackSpec( Repeatedly.forever(AfterPane.elementCountAtLeast(1)) .orFinally(AfterWatermark.pastEndOfWindow()))); }