org.apache.beam.sdk.transforms.windowing.Repeatedly Java Examples
The following examples show how to use
org.apache.beam.sdk.transforms.windowing.Repeatedly.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: WriteFeatureSetSpecAck.java From feast with Apache License 2.0 | 6 votes |
@Override public PCollection<FeatureSetReference> expand(PCollection<FeatureSetReference> input) { return input .apply( "OnEveryElementTrigger", Window.<FeatureSetReference>into(new GlobalWindows()) .accumulatingFiredPanes() .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1))) .withAllowedLateness(Duration.ZERO)) .apply("CountingReadySinks", Count.perElement()) .apply( "WhenAllReady", Filter.by( (SerializableFunction<KV<FeatureSetReference, Long>, Boolean>) count -> count.getValue() >= sinksCount)) .apply(Keys.create()); }
Example #2
Source File: BigQueryMerger.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@Override public PCollection<KV<K, V>> expand(PCollection<KV<K, V>> input) { return input .apply( Window.<KV<K, V>>into(new GlobalWindows()) .discardingFiredPanes() .triggering( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(Duration.ZERO) .alignedTo(intervalDuration, org.joda.time.Instant.now())))) .apply(GroupByKey.create()) .apply( ParDo.of( new DoFn<KV<K, Iterable<V>>, KV<K, V>>() { @ProcessElement public void process(ProcessContext c) { LOG.debug( "TS: {} | Element: {} | Pane: {}", c.timestamp(), c.element(), c.pane()); Iterator<V> it = c.element().getValue().iterator(); if (it.hasNext()) { c.output(KV.of(c.element().getKey(), it.next())); } } })); }
Example #3
Source File: BigQueryMerger.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@Override public PCollection<KV<K, V>> expand(PCollection<KV<K, V>> input) { return input .apply( Window.<KV<K, V>>into(new GlobalWindows()) .discardingFiredPanes() .triggering( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(Duration.ZERO) .alignedTo(intervalDuration, org.joda.time.Instant.now())))) .apply(GroupByKey.create()) .apply( ParDo.of( new DoFn<KV<K, Iterable<V>>, KV<K, V>>() { @ProcessElement public void process(ProcessContext c) { LOG.debug( "TS: {} | Element: {} | Pane: {}", c.timestamp(), c.element(), c.pane()); Iterator<V> it = c.element().getValue().iterator(); if (it.hasNext()) { c.output(KV.of(c.element().getKey(), it.next())); } } })); }
Example #4
Source File: CombineTest.java From beam with Apache License 2.0 | 6 votes |
@Test @Category({ValidatesRunner.class, UsesSideInputs.class}) public void testGlobalCombineWithDefaultsAndTriggers() { PCollection<Integer> input = pipeline.apply(Create.of(1, 1)); PCollection<String> output = input .apply( Window.<Integer>into(new GlobalWindows()) .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1))) .accumulatingFiredPanes() .withAllowedLateness(new Duration(0), ClosingBehavior.FIRE_ALWAYS)) .apply(Sum.integersGlobally()) .apply(ParDo.of(new FormatPaneInfo())); // The actual elements produced are nondeterministic. Could be one, could be two. // But it should certainly have a final element with the correct final sum. PAssert.that(output) .satisfies( input1 -> { assertThat(input1, hasItem("2: true")); return null; }); pipeline.run(); }
Example #5
Source File: MyBeamJob.java From hazelcast-jet-demos with Apache License 2.0 | 6 votes |
public static Pipeline build(PipelineOptions pipelineOptions) { Pipeline pipeline = Pipeline.create(pipelineOptions); pipeline .apply("unbounded-source", Read.from(new MyUnboundedSource("beam-input"))) .apply("reformat-and-timestamp", ParDo.of(new MyEnrichAndReformatFn())) .apply("window", Window.<String>into(FixedWindows.of(ONE_SECOND)) .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane())) .discardingFiredPanes() .withAllowedLateness(ONE_SECOND) ) .apply("sink", FileIO.<String>write() .via(TextIO.sink()) .to(".") .withPrefix("beam-output") .withNumShards(1) ) ; return pipeline; }
Example #6
Source File: CombineTest.java From beam with Apache License 2.0 | 6 votes |
@Test @Category(ValidatesRunner.class) public void testHotKeyCombiningWithAccumulationMode() { PCollection<Integer> input = pipeline.apply(Create.of(1, 2, 3, 4, 5)); PCollection<Integer> output = input .apply( Window.<Integer>into(new GlobalWindows()) .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1))) .accumulatingFiredPanes() .withAllowedLateness(new Duration(0), ClosingBehavior.FIRE_ALWAYS)) .apply(Sum.integersGlobally().withoutDefaults().withFanout(2)) .apply(ParDo.of(new GetLast())); PAssert.that(output) .satisfies( input1 -> { assertThat(input1, hasItem(15)); return null; }); pipeline.run(); }
Example #7
Source File: ReduceFnRunnerTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testWatermarkHoldForLateNewWindow() throws Exception { Duration allowedLateness = Duration.standardMinutes(1); Duration gapDuration = Duration.millis(10); ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining( WindowingStrategy.of(Sessions.withGapDuration(gapDuration)) .withMode(AccumulationMode.DISCARDING_FIRED_PANES) .withTrigger( Repeatedly.forever( AfterWatermark.pastEndOfWindow() .withLateFirings(AfterPane.elementCountAtLeast(1)))) .withAllowedLateness(allowedLateness)); tester.setAutoAdvanceOutputWatermark(false); assertEquals(null, tester.getWatermarkHold()); assertEquals(null, tester.getOutputWatermark()); tester.advanceInputWatermark(new Instant(40)); injectElements(tester, 1); assertThat(tester.getWatermarkHold(), nullValue()); injectElements(tester, 10); assertThat(tester.getWatermarkHold(), nullValue()); }
Example #8
Source File: ReduceFnRunnerTest.java From beam with Apache License 2.0 | 6 votes |
/** * Tests that if end-of-window and GC timers come in together, that the pane is correctly marked * as final. */ @Test public void testCombiningAccumulatingEventTime() throws Exception { WindowingStrategy<?, IntervalWindow> strategy = WindowingStrategy.of((WindowFn<?, IntervalWindow>) FixedWindows.of(Duration.millis(100))) .withTimestampCombiner(TimestampCombiner.EARLIEST) .withMode(AccumulationMode.ACCUMULATING_FIRED_PANES) .withAllowedLateness(Duration.millis(1)) .withTrigger(Repeatedly.forever(AfterWatermark.pastEndOfWindow())); ReduceFnTester<Integer, Integer, IntervalWindow> tester = ReduceFnTester.combining(strategy, Sum.ofIntegers(), VarIntCoder.of()); injectElement(tester, 2); // processing timer @ 5000 + 10; EOW timer @ 100 injectElement(tester, 5); tester.advanceInputWatermark(new Instant(1000)); assertThat( tester.extractOutput(), contains( isSingleWindowedValue( equalTo(7), 2, 0, 100, PaneInfo.createPane(true, true, Timing.ON_TIME, 0, 0)))); }
Example #9
Source File: WriteTables.java From beam with Apache License 2.0 | 5 votes |
@Override public PCollection<KV<TableDestination, String>> expand( PCollection<KV<ShardedKey<DestinationT>, List<String>>> input) { PCollectionTuple writeTablesOutputs = input.apply( ParDo.of(new WriteTablesDoFn()) .withSideInputs(sideInputs) .withOutputTags(mainOutputTag, TupleTagList.of(temporaryFilesTag))); // Garbage collect temporary files. // We mustn't start garbage collecting files until we are assured that the WriteTablesDoFn has // succeeded in loading those files and won't be retried. Otherwise, we might fail part of the // way through deleting temporary files, and retry WriteTablesDoFn. This will then fail due // to missing files, causing either the entire workflow to fail or get stuck (depending on how // the runner handles persistent failures). writeTablesOutputs .get(temporaryFilesTag) .setCoder(StringUtf8Coder.of()) .apply(WithKeys.of((Void) null)) .setCoder(KvCoder.of(VoidCoder.of(), StringUtf8Coder.of())) .apply( Window.<KV<Void, String>>into(new GlobalWindows()) .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1))) .discardingFiredPanes()) .apply(GroupByKey.create()) .apply(Values.create()) .apply(ParDo.of(new GarbageCollectTemporaryFiles())); return writeTablesOutputs.get(mainOutputTag); }
Example #10
Source File: BeamModel.java From streamingbook with Apache License 2.0 | 5 votes |
@Override public PCollection<String> expand(PCollection<KV<String, Integer>> input) { return input .apply(Window.<KV<String, Integer>>into(FixedWindows.of(TWO_MINUTES)) .triggering(Repeatedly.forever(AfterProcessingTime .pastFirstElementInPane() .alignedTo(TWO_MINUTES, Utils.parseTime("12:05:00")))) .withAllowedLateness(Duration.standardDays(1000)) .accumulatingFiredPanes()) .apply(Sum.integersPerKey()) .apply(ParDo.of(new FormatAsStrings())); }
Example #11
Source File: PubsubUnboundedSink.java From beam with Apache License 2.0 | 5 votes |
@Override public PDone expand(PCollection<PubsubMessage> input) { input .apply( "PubsubUnboundedSink.Window", Window.<PubsubMessage>into(new GlobalWindows()) .triggering( Repeatedly.forever( AfterFirst.of( AfterPane.elementCountAtLeast(publishBatchSize), AfterProcessingTime.pastFirstElementInPane().plusDelayOf(maxLatency)))) .discardingFiredPanes()) .apply("PubsubUnboundedSink.Shard", ParDo.of(new ShardFn(numShards, recordIdMethod))) .setCoder(KvCoder.of(VarIntCoder.of(), CODER)) .apply(GroupByKey.create()) .apply( "PubsubUnboundedSink.Writer", ParDo.of( new WriterFn( pubsubFactory, topic, timestampAttribute, idAttribute, publishBatchSize, publishBatchBytes))); return PDone.in(input.getPipeline()); }
Example #12
Source File: KafkaExactlyOnceSink.java From beam with Apache License 2.0 | 5 votes |
@Override public PCollection<Void> expand(PCollection<ProducerRecord<K, V>> input) { int numShards = spec.getNumShards(); if (numShards <= 0) { try (Consumer<?, ?> consumer = openConsumer(spec)) { numShards = consumer.partitionsFor(spec.getTopic()).size(); LOG.info( "Using {} shards for exactly-once writer, matching number of partitions " + "for topic '{}'", numShards, spec.getTopic()); } } checkState(numShards > 0, "Could not set number of shards"); return input .apply( Window.<ProducerRecord<K, V>>into(new GlobalWindows()) // Everything into global window. .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1))) .discardingFiredPanes()) .apply( String.format("Shuffle across %d shards", numShards), ParDo.of(new Reshard<>(numShards))) .apply("Persist sharding", GroupByKey.create()) .apply("Assign sequential ids", ParDo.of(new Sequencer<>())) .apply("Persist ids", GroupByKey.create()) .apply( String.format("Write to Kafka topic '%s'", spec.getTopic()), ParDo.of(new ExactlyOnceWriter<>(spec, input.getCoder()))); }
Example #13
Source File: ReduceFnRunnerTest.java From beam with Apache License 2.0 | 5 votes |
/** Tests that a processing time timer does not cause window GC. */ @Test public void testProcessingTimeTimerDoesNotGc() throws Exception { WindowingStrategy<?, IntervalWindow> strategy = WindowingStrategy.of((WindowFn<?, IntervalWindow>) FixedWindows.of(Duration.millis(100))) .withTimestampCombiner(TimestampCombiner.EARLIEST) .withMode(AccumulationMode.ACCUMULATING_FIRED_PANES) .withAllowedLateness(Duration.ZERO) .withTrigger( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.millis(10)))); ReduceFnTester<Integer, Integer, IntervalWindow> tester = ReduceFnTester.combining(strategy, Sum.ofIntegers(), VarIntCoder.of()); tester.advanceProcessingTime(new Instant(5000)); injectElement(tester, 2); // processing timer @ 5000 + 10; EOW timer @ 100 injectElement(tester, 5); tester.advanceProcessingTime(new Instant(10000)); tester.assertHasOnlyGlobalAndStateFor(new IntervalWindow(new Instant(0), new Instant(100))); assertThat( tester.extractOutput(), contains( isSingleWindowedValue( equalTo(7), 2, 0, 100, PaneInfo.createPane(true, false, Timing.EARLY, 0, 0)))); }
Example #14
Source File: ReduceFnRunnerTest.java From beam with Apache License 2.0 | 5 votes |
/** * Tests that when a processing time timer comes in after a window is expired it is just ignored. */ @Test public void testLateProcessingTimeTimer() throws Exception { WindowingStrategy<?, IntervalWindow> strategy = WindowingStrategy.of((WindowFn<?, IntervalWindow>) FixedWindows.of(Duration.millis(100))) .withTimestampCombiner(TimestampCombiner.EARLIEST) .withMode(AccumulationMode.ACCUMULATING_FIRED_PANES) .withAllowedLateness(Duration.ZERO) .withTrigger( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.millis(10)))); ReduceFnTester<Integer, Integer, IntervalWindow> tester = ReduceFnTester.combining(strategy, Sum.ofIntegers(), VarIntCoder.of()); tester.advanceProcessingTime(new Instant(5000)); injectElement(tester, 2); // processing timer @ 5000 + 10; EOW timer @ 100 injectElement(tester, 5); // After this advancement, the window is expired and only the GC process // should be allowed to touch it tester.advanceInputWatermarkNoTimers(new Instant(100)); // This should not output tester.advanceProcessingTime(new Instant(6000)); assertThat(tester.extractOutput(), emptyIterable()); }
Example #15
Source File: ReduceFnRunnerTest.java From beam with Apache License 2.0 | 5 votes |
/** * Tests that when a processing time timer comes in after a window is expired but in the same * bundle it does not cause a spurious output. */ @Test public void testCombiningAccumulatingProcessingTime() throws Exception { WindowingStrategy<?, IntervalWindow> strategy = WindowingStrategy.of((WindowFn<?, IntervalWindow>) FixedWindows.of(Duration.millis(100))) .withTimestampCombiner(TimestampCombiner.EARLIEST) .withMode(AccumulationMode.ACCUMULATING_FIRED_PANES) .withAllowedLateness(Duration.ZERO) .withTrigger( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.millis(10)))); ReduceFnTester<Integer, Integer, IntervalWindow> tester = ReduceFnTester.combining(strategy, Sum.ofIntegers(), VarIntCoder.of()); tester.advanceProcessingTime(new Instant(5000)); injectElement(tester, 2); // processing timer @ 5000 + 10; EOW timer @ 100 injectElement(tester, 5); tester.advanceInputWatermarkNoTimers(new Instant(100)); tester.advanceProcessingTimeNoTimers(new Instant(5010)); // Fires the GC/EOW timer at the same time as the processing time timer. tester.fireTimers( new IntervalWindow(new Instant(0), new Instant(100)), TimestampedValue.of(TimeDomain.EVENT_TIME, new Instant(100)), TimestampedValue.of(TimeDomain.PROCESSING_TIME, new Instant(5010))); assertThat( tester.extractOutput(), contains( isSingleWindowedValue( equalTo(7), 2, 0, 100, PaneInfo.createPane(true, true, Timing.ON_TIME, 0, 0)))); }
Example #16
Source File: ReduceFnRunnerTest.java From beam with Apache License 2.0 | 5 votes |
/** * Tests that when a processing time timers comes in after a window is expired and GC'd it does * not cause a spurious output. */ @Test public void testCombiningAccumulatingProcessingTimeSeparateBundles() throws Exception { WindowingStrategy<?, IntervalWindow> strategy = WindowingStrategy.of((WindowFn<?, IntervalWindow>) FixedWindows.of(Duration.millis(100))) .withTimestampCombiner(TimestampCombiner.EARLIEST) .withMode(AccumulationMode.ACCUMULATING_FIRED_PANES) .withAllowedLateness(Duration.ZERO) .withTrigger( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.millis(10)))); ReduceFnTester<Integer, Integer, IntervalWindow> tester = ReduceFnTester.combining(strategy, Sum.ofIntegers(), VarIntCoder.of()); tester.advanceProcessingTime(new Instant(5000)); injectElement(tester, 2); // processing timer @ 5000 + 10; EOW timer @ 100 injectElement(tester, 5); tester.advanceInputWatermark(new Instant(100)); tester.advanceProcessingTime(new Instant(5011)); assertThat( tester.extractOutput(), contains( isSingleWindowedValue( equalTo(7), 2, 0, 100, PaneInfo.createPane(true, true, Timing.ON_TIME, 0, 0)))); }
Example #17
Source File: ReduceFnRunnerTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testMergingLateWatermarkHolds() throws Exception { MetricsContainerImpl container = new MetricsContainerImpl("any"); MetricsEnvironment.setCurrentContainer(container); Duration gapDuration = Duration.millis(10); Duration allowedLateness = Duration.standardMinutes(100); ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining( WindowingStrategy.of(Sessions.withGapDuration(gapDuration)) .withMode(AccumulationMode.DISCARDING_FIRED_PANES) .withTrigger( Repeatedly.forever( AfterWatermark.pastEndOfWindow() .withLateFirings(AfterPane.elementCountAtLeast(10)))) .withAllowedLateness(allowedLateness)); tester.setAutoAdvanceOutputWatermark(false); // Input watermark -> null assertEquals(null, tester.getWatermarkHold()); assertEquals(null, tester.getOutputWatermark()); tester.advanceInputWatermark(new Instant(20)); // Add two late elements that cause a window to merge. injectElements(tester, Arrays.asList(3)); assertThat(tester.getWatermarkHold(), nullValue()); injectElements(tester, Arrays.asList(4)); Instant endOfWindow = new Instant(4).plus(gapDuration); // We expect a GC hold to be one less than the end of window plus the allowed lateness. Instant expectedGcHold = endOfWindow.plus(allowedLateness).minus(1); assertEquals(expectedGcHold, tester.getWatermarkHold()); tester.advanceInputWatermark(new Instant(1000)); assertEquals(expectedGcHold, tester.getWatermarkHold()); }
Example #18
Source File: ReduceFnRunnerTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void noEmptyPanesFinalIfNonEmpty() throws Exception { ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining( WindowingStrategy.of(FixedWindows.of(Duration.millis(10))) .withTrigger( Repeatedly.forever( AfterFirst.of( AfterPane.elementCountAtLeast(2), AfterWatermark.pastEndOfWindow()))) .withMode(AccumulationMode.ACCUMULATING_FIRED_PANES) .withAllowedLateness(Duration.millis(100)) .withTimestampCombiner(TimestampCombiner.EARLIEST) .withClosingBehavior(ClosingBehavior.FIRE_IF_NON_EMPTY)); tester.advanceInputWatermark(new Instant(0)); tester.injectElements( TimestampedValue.of(1, new Instant(1)), TimestampedValue.of(2, new Instant(2))); tester.advanceInputWatermark(new Instant(20)); tester.advanceInputWatermark(new Instant(250)); List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput(); assertThat( output, contains( // Trigger with 2 elements isSingleWindowedValue(containsInAnyOrder(1, 2), 1, 0, 10), // Trigger for the empty on time pane isSingleWindowedValue(containsInAnyOrder(1, 2), 9, 0, 10))); }
Example #19
Source File: ReduceFnRunnerTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void noEmptyPanesFinalAlways() throws Exception { ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining( WindowingStrategy.of(FixedWindows.of(Duration.millis(10))) .withTrigger( Repeatedly.forever( AfterFirst.of( AfterPane.elementCountAtLeast(2), AfterWatermark.pastEndOfWindow()))) .withMode(AccumulationMode.ACCUMULATING_FIRED_PANES) .withAllowedLateness(Duration.millis(100)) .withTimestampCombiner(TimestampCombiner.EARLIEST) .withClosingBehavior(ClosingBehavior.FIRE_ALWAYS)); tester.advanceInputWatermark(new Instant(0)); tester.injectElements( TimestampedValue.of(1, new Instant(1)), TimestampedValue.of(2, new Instant(2))); tester.advanceInputWatermark(new Instant(20)); tester.advanceInputWatermark(new Instant(250)); List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput(); assertThat( output, contains( // Trigger with 2 elements isSingleWindowedValue(containsInAnyOrder(1, 2), 1, 0, 10), // Trigger for the empty on time pane isSingleWindowedValue(containsInAnyOrder(1, 2), 9, 0, 10), // Trigger for the final pane isSingleWindowedValue(containsInAnyOrder(1, 2), 9, 0, 10))); }
Example #20
Source File: ReduceFnRunnerTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testPaneInfoFinalAndOnTime() throws Exception { ReduceFnTester<Integer, Iterable<Integer>, IntervalWindow> tester = ReduceFnTester.nonCombining( WindowingStrategy.of(FixedWindows.of(Duration.millis(10))) .withTrigger( Repeatedly.forever(AfterPane.elementCountAtLeast(2)) .orFinally(AfterWatermark.pastEndOfWindow())) .withMode(AccumulationMode.DISCARDING_FIRED_PANES) .withAllowedLateness(Duration.millis(100)) .withClosingBehavior(ClosingBehavior.FIRE_ALWAYS)); tester.advanceInputWatermark(new Instant(0)); // Should trigger due to element count tester.injectElements( TimestampedValue.of(1, new Instant(1)), TimestampedValue.of(2, new Instant(2))); assertThat( tester.extractOutput(), contains( WindowMatchers.valueWithPaneInfo( PaneInfo.createPane(true, false, Timing.EARLY, 0, -1)))); tester.advanceInputWatermark(new Instant(150)); assertThat( tester.extractOutput(), contains( WindowMatchers.valueWithPaneInfo( PaneInfo.createPane(false, true, Timing.ON_TIME, 1, 0)))); }
Example #21
Source File: ReduceFnRunnerTest.java From beam with Apache License 2.0 | 5 votes |
/** * We should fire a non-empty ON_TIME pane in the GlobalWindow when the watermark moves to * end-of-time. */ @Test public void fireNonEmptyOnDrainInGlobalWindow() throws Exception { ReduceFnTester<Integer, Iterable<Integer>, GlobalWindow> tester = ReduceFnTester.nonCombining( WindowingStrategy.of(new GlobalWindows()) .withTrigger(Repeatedly.forever(AfterPane.elementCountAtLeast(3))) .withMode(AccumulationMode.DISCARDING_FIRED_PANES)); tester.advanceInputWatermark(new Instant(0)); final int n = 20; for (int i = 0; i < n; i++) { tester.injectElements(TimestampedValue.of(i, new Instant(i))); } List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput(); assertEquals(n / 3, output.size()); for (int i = 0; i < output.size(); i++) { assertEquals(Timing.EARLY, output.get(i).getPane().getTiming()); assertEquals(i, output.get(i).getPane().getIndex()); assertEquals(3, Iterables.size(output.get(i).getValue())); } tester.advanceInputWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE); output = tester.extractOutput(); assertEquals(1, output.size()); assertEquals(Timing.ON_TIME, output.get(0).getPane().getTiming()); assertEquals(n / 3, output.get(0).getPane().getIndex()); assertEquals(n - ((n / 3) * 3), Iterables.size(output.get(0).getValue())); }
Example #22
Source File: ReduceFnRunnerTest.java From beam with Apache License 2.0 | 5 votes |
/** * We should fire an empty ON_TIME pane in the GlobalWindow when the watermark moves to * end-of-time. */ @Test public void fireEmptyOnDrainInGlobalWindowIfRequested() throws Exception { ReduceFnTester<Integer, Iterable<Integer>, GlobalWindow> tester = ReduceFnTester.nonCombining( WindowingStrategy.of(new GlobalWindows()) .withTrigger( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane().plusDelayOf(new Duration(3)))) .withMode(AccumulationMode.DISCARDING_FIRED_PANES)); final int n = 20; for (int i = 0; i < n; i++) { tester.advanceProcessingTime(new Instant(i)); tester.injectElements(TimestampedValue.of(i, new Instant(i))); } tester.advanceProcessingTime(new Instant(n + 4)); List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput(); assertEquals((n + 3) / 4, output.size()); for (int i = 0; i < output.size(); i++) { assertEquals(Timing.EARLY, output.get(i).getPane().getTiming()); assertEquals(i, output.get(i).getPane().getIndex()); assertEquals(4, Iterables.size(output.get(i).getValue())); } tester.advanceInputWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE); output = tester.extractOutput(); assertEquals(1, output.size()); assertEquals(Timing.ON_TIME, output.get(0).getPane().getTiming()); assertEquals((n + 3) / 4, output.get(0).getPane().getIndex()); assertEquals(0, Iterables.size(output.get(0).getValue())); }
Example #23
Source File: BeamSqlDslAggregationTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testSupportsGlobalWindowWithCustomTrigger() throws Exception { pipeline.enableAbandonedNodeEnforcement(false); DateTime startTime = parseTimestampWithoutTimeZone("2017-1-1 0:0:0"); Schema type = Schema.builder() .addInt32Field("f_intGroupingKey") .addInt32Field("f_intValue") .addDateTimeField("f_timestamp") .build(); Object[] rows = new Object[] { 0, 1, startTime.plusSeconds(0), 0, 2, startTime.plusSeconds(1), 0, 3, startTime.plusSeconds(2), 0, 4, startTime.plusSeconds(3), 0, 5, startTime.plusSeconds(4), 0, 6, startTime.plusSeconds(6) }; PCollection<Row> input = createTestPCollection(type, rows, "f_timestamp") .apply( Window.<Row>into(new GlobalWindows()) .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(2))) .discardingFiredPanes() .withOnTimeBehavior(Window.OnTimeBehavior.FIRE_IF_NON_EMPTY)); String sql = "SELECT SUM(f_intValue) AS `sum` FROM PCOLLECTION GROUP BY f_intGroupingKey"; PCollection<Row> result = input.apply("sql", SqlTransform.query(sql)); assertEquals(new GlobalWindows(), result.getWindowingStrategy().getWindowFn()); PAssert.that(result).containsInAnyOrder(rowsWithSingleIntField("sum", Arrays.asList(3, 7, 11))); pipeline.run(); }
Example #24
Source File: ReadFeatureSetSpecs.java From feast with Apache License 2.0 | 5 votes |
@Override public PCollection<KV<FeatureSetReference, FeatureSetSpec>> expand(PBegin input) { return input .apply( KafkaIO.readBytes() .withBootstrapServers( getSpecsStreamingUpdateConfig().getSource().getBootstrapServers()) .withTopic(getSpecsStreamingUpdateConfig().getSource().getTopic()) .withConsumerConfigUpdates( ImmutableMap.of( ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest", ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false))) .apply("ParseFeatureSetSpec", ParDo.of(new KafkaRecordToFeatureSetSpec())) .apply("OnlyRelevantSpecs", Filter.by(new FilterRelevantFunction(getSource(), getStores()))) .apply( Window.<KV<String, FeatureSetSpec>>into(new GlobalWindows()) .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1))) .accumulatingFiredPanes() .withAllowedLateness(Duration.ZERO)) .apply( Combine.perKey( (SerializableFunction<Iterable<FeatureSetSpec>, FeatureSetSpec>) specs -> { ArrayList<FeatureSetSpec> featureSetSpecs = Lists.newArrayList(specs); featureSetSpecs.sort( Comparator.comparing(FeatureSetSpec::getVersion).reversed()); return featureSetSpecs.get(0); })) .apply("CreateFeatureSetReferenceKey", ParDo.of(new CreateFeatureSetReference())) .setCoder( KvCoder.of( AvroCoder.of(FeatureSetReference.class), ProtoCoder.of(FeatureSetSpec.class))); }
Example #25
Source File: LeaderBoard.java From deployment-examples with MIT License | 5 votes |
@Override public PCollection<KV<String, Integer>> expand(PCollection<GameActionInfo> input) { return input .apply( "LeaderboardUserGlobalWindow", Window.<GameActionInfo>into(new GlobalWindows()) // Get periodic results every ten minutes. .triggering( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane().plusDelayOf(TEN_MINUTES))) .accumulatingFiredPanes() .withAllowedLateness(allowedLateness)) // Extract and sum username/score pairs from the event data. .apply("ExtractUserScore", new ExtractAndSumScore("user")); }
Example #26
Source File: KafkaExactlyOnceSink.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@Override public PCollection<Void> expand(PCollection<KV<K, V>> input) { int numShards = spec.getNumShards(); if (numShards <= 0) { try (Consumer<?, ?> consumer = openConsumer(spec)) { numShards = consumer.partitionsFor(spec.getTopic()).size(); LOG.info( "Using {} shards for exactly-once writer, matching number of partitions " + "for topic '{}'", numShards, spec.getTopic()); } } checkState(numShards > 0, "Could not set number of shards"); return input .apply( Window.<KV<K, V>>into(new GlobalWindows()) // Everything into global window. .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1))) .discardingFiredPanes()) .apply( String.format("Shuffle across %d shards", numShards), ParDo.of(new Reshard<>(numShards))) .apply("Persist sharding", GroupByKey.create()) .apply("Assign sequential ids", ParDo.of(new Sequencer<>())) .apply("Persist ids", GroupByKey.create()) .apply( String.format("Write to Kafka topic '%s'", spec.getTopic()), ParDo.of(new ExactlyOnceWriter<>(spec, input.getCoder()))); }
Example #27
Source File: LeaderBoard.java From beam with Apache License 2.0 | 5 votes |
@Override public PCollection<KV<String, Integer>> expand(PCollection<GameActionInfo> input) { return input .apply( "LeaderboardUserGlobalWindow", Window.<GameActionInfo>into(new GlobalWindows()) // Get periodic results every ten minutes. .triggering( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane().plusDelayOf(TEN_MINUTES))) .accumulatingFiredPanes() .withAllowedLateness(allowedLateness)) // Extract and sum username/score pairs from the event data. .apply("ExtractUserScore", new ExtractAndSumScore("user")); }
Example #28
Source File: BeamModel.java From streamingbook with Apache License 2.0 | 5 votes |
@Override public PCollection<String> expand(PCollection<KV<String, Integer>> input) { return input .apply(Window.<KV<String, Integer>>into(FixedWindows.of(TWO_MINUTES)) .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(TWO_MINUTES))) .withAllowedLateness(Duration.standardDays(1000)) .accumulatingFiredPanes()) .apply(Sum.integersPerKey()) .apply(ParDo.of(new FormatAsStrings())); }
Example #29
Source File: BeamModel.java From streamingbook with Apache License 2.0 | 5 votes |
@Override public PCollection<String> expand(PCollection<KV<String, Integer>> input) { return input .apply(Window.<KV<String, Integer>>into(FixedWindows.of(TWO_MINUTES)) .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1))) .withAllowedLateness(Duration.standardDays(1000)) .accumulatingFiredPanes()) .apply(Sum.integersPerKey()) .apply(ParDo.of(new FormatAsStrings())); }
Example #30
Source File: BeamSqlDslJoinTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testRejectsNonGlobalWindowsWithRepeatingTrigger() throws Exception { String sql = "SELECT o1.order_id, o1.price, o1.site_id, o2.order_id, o2.price, o2.site_id " + "FROM ORDER_DETAILS1 o1" + " JOIN ORDER_DETAILS2 o2" + " on " + " o1.order_id=o2.site_id AND o2.price=o1.site_id"; PCollection<Row> orders = ordersUnbounded() .apply( "window", Window.<Row>into(FixedWindows.of(Duration.standardSeconds(203))) .triggering(Repeatedly.forever(AfterWatermark.pastEndOfWindow())) .withAllowedLateness(Duration.standardMinutes(2)) .accumulatingFiredPanes()); PCollectionTuple inputs = tuple("ORDER_DETAILS1", orders, "ORDER_DETAILS2", orders); thrown.expect(UnsupportedOperationException.class); thrown.expectMessage( stringContainsInOrder(Arrays.asList("once per window", "default trigger"))); inputs.apply("sql", SqlTransform.query(sql)); pipeline.run(); }