org.apache.beam.sdk.transforms.windowing.AfterProcessingTime Java Examples

The following examples show how to use org.apache.beam.sdk.transforms.windowing.AfterProcessingTime. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: LeaderBoard.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<KV<String, Integer>> expand(PCollection<GameActionInfo> infos) {
  return infos
      .apply(
          "LeaderboardTeamFixedWindows",
          Window.<GameActionInfo>into(FixedWindows.of(teamWindowDuration))
              // We will get early (speculative) results as well as cumulative
              // processing of late data.
              .triggering(
                  AfterWatermark.pastEndOfWindow()
                      .withEarlyFirings(
                          AfterProcessingTime.pastFirstElementInPane()
                              .plusDelayOf(FIVE_MINUTES))
                      .withLateFirings(
                          AfterProcessingTime.pastFirstElementInPane()
                              .plusDelayOf(TEN_MINUTES)))
              .withAllowedLateness(allowedLateness)
              .accumulatingFiredPanes())
      // Extract and sum teamname/score pairs from the event data.
      .apply("ExtractTeamScore", new ExtractAndSumScore("team"));
}
 
Example #2
Source File: LeaderBoard.java    From deployment-examples with MIT License 6 votes vote down vote up
@Override
public PCollection<KV<String, Integer>> expand(PCollection<GameActionInfo> infos) {
  return infos
      .apply(
          "LeaderboardTeamFixedWindows",
          Window.<GameActionInfo>into(FixedWindows.of(teamWindowDuration))
              // We will get early (speculative) results as well as cumulative
              // processing of late data.
              .triggering(
                  AfterWatermark.pastEndOfWindow()
                      .withEarlyFirings(
                          AfterProcessingTime.pastFirstElementInPane()
                              .plusDelayOf(FIVE_MINUTES))
                      .withLateFirings(
                          AfterProcessingTime.pastFirstElementInPane()
                              .plusDelayOf(TEN_MINUTES)))
              .withAllowedLateness(allowedLateness)
              .accumulatingFiredPanes())
      // Extract and sum teamname/score pairs from the event data.
      .apply("ExtractTeamScore", new ExtractAndSumScore("team"));
}
 
Example #3
Source File: MyBeamJob.java    From hazelcast-jet-demos with Apache License 2.0 6 votes vote down vote up
public static Pipeline build(PipelineOptions pipelineOptions) {
	
    Pipeline pipeline = Pipeline.create(pipelineOptions);

	pipeline
	.apply("unbounded-source", 
			Read.from(new MyUnboundedSource("beam-input")))
    .apply("reformat-and-timestamp", 
    		ParDo.of(new MyEnrichAndReformatFn()))
	.apply("window",
			 Window.<String>into(FixedWindows.of(ONE_SECOND))
			 .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane()))
			 .discardingFiredPanes()
			 .withAllowedLateness(ONE_SECOND)
			)
    .apply("sink",
    		FileIO.<String>write()
    		.via(TextIO.sink())
            .to(".")
            .withPrefix("beam-output")
            .withNumShards(1)
    		)
	;

    return pipeline;
}
 
Example #4
Source File: BigQueryMerger.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<KV<K, V>> expand(PCollection<KV<K, V>> input) {
  return input
      .apply(
          Window.<KV<K, V>>into(new GlobalWindows())
              .discardingFiredPanes()
              .triggering(
                  Repeatedly.forever(
                      AfterProcessingTime.pastFirstElementInPane()
                          .plusDelayOf(Duration.ZERO)
                          .alignedTo(intervalDuration, org.joda.time.Instant.now()))))
      .apply(GroupByKey.create())
      .apply(
          ParDo.of(
              new DoFn<KV<K, Iterable<V>>, KV<K, V>>() {
                @ProcessElement
                public void process(ProcessContext c) {
                  LOG.debug(
                      "TS: {} | Element: {} | Pane: {}", c.timestamp(), c.element(), c.pane());
                  Iterator<V> it = c.element().getValue().iterator();
                  if (it.hasNext()) {
                    c.output(KV.of(c.element().getKey(), it.next()));
                  }
                }
              }));
}
 
Example #5
Source File: BigQueryMerger.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Override
public PCollection<KV<K, V>> expand(PCollection<KV<K, V>> input) {
  return input
      .apply(
          Window.<KV<K, V>>into(new GlobalWindows())
              .discardingFiredPanes()
              .triggering(
                  Repeatedly.forever(
                      AfterProcessingTime.pastFirstElementInPane()
                          .plusDelayOf(Duration.ZERO)
                          .alignedTo(intervalDuration, org.joda.time.Instant.now()))))
      .apply(GroupByKey.create())
      .apply(
          ParDo.of(
              new DoFn<KV<K, Iterable<V>>, KV<K, V>>() {
                @ProcessElement
                public void process(ProcessContext c) {
                  LOG.debug(
                      "TS: {} | Element: {} | Pane: {}", c.timestamp(), c.element(), c.pane());
                  Iterator<V> it = c.element().getValue().iterator();
                  if (it.hasNext()) {
                    c.output(KV.of(c.element().getKey(), it.next()));
                  }
                }
              }));
}
 
Example #6
Source File: DistinctTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category({NeedsRunner.class, UsesTestStreamWithProcessingTime.class})
public void testTriggeredDistinctRepresentativeValues() {
  Instant base = new Instant(0);
  TestStream<KV<Integer, String>> values =
      TestStream.create(KvCoder.of(VarIntCoder.of(), StringUtf8Coder.of()))
          .advanceWatermarkTo(base)
          .addElements(
              TimestampedValue.of(KV.of(1, "k1"), base),
              TimestampedValue.of(KV.of(2, "k2"), base.plus(Duration.standardSeconds(10))),
              TimestampedValue.of(KV.of(3, "k3"), base.plus(Duration.standardSeconds(20))))
          .advanceProcessingTime(Duration.standardMinutes(1))
          .addElements(
              TimestampedValue.of(KV.of(1, "k1"), base.plus(Duration.standardSeconds(30))),
              TimestampedValue.of(KV.of(2, "k2"), base.plus(Duration.standardSeconds(40))),
              TimestampedValue.of(KV.of(3, "k3"), base.plus(Duration.standardSeconds(50))))
          .advanceWatermarkToInfinity();

  PCollection<KV<Integer, String>> distinctValues =
      triggeredDistinctRepresentativePipeline
          .apply(values)
          .apply(
              Window.<KV<Integer, String>>into(FixedWindows.of(Duration.standardMinutes(1)))
                  .triggering(
                      Repeatedly.forever(
                          AfterProcessingTime.pastFirstElementInPane()
                              .plusDelayOf(Duration.standardSeconds(30))))
                  .withAllowedLateness(Duration.ZERO)
                  .accumulatingFiredPanes())
          .apply(
              Distinct.withRepresentativeValueFn(new Keys<Integer>())
                  .withRepresentativeType(TypeDescriptor.of(Integer.class)));

  PAssert.that(distinctValues).containsInAnyOrder(KV.of(1, "k1"), KV.of(2, "k2"), KV.of(3, "k3"));
  triggeredDistinctRepresentativePipeline.run();
}
 
Example #7
Source File: BeamModel.java    From streamingbook with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<String> expand(PCollection<KV<String, Integer>> input) {
    return input
        .apply(Window.<KV<String, Integer>>into(FixedWindows.of(TWO_MINUTES))
               .triggering(Repeatedly.forever(AfterProcessingTime
                                              .pastFirstElementInPane()
                                              .alignedTo(TWO_MINUTES, Utils.parseTime("12:05:00"))))
               .withAllowedLateness(Duration.standardDays(1000))
               .accumulatingFiredPanes())
        .apply(Sum.integersPerKey())
        .apply(ParDo.of(new FormatAsStrings()));
}
 
Example #8
Source File: DistinctTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Regression test: when all values are emitted by a speculative trigger, caused a null KV when
 * the on-time firing occurred.
 */
@Test
@Category({NeedsRunner.class, UsesTestStreamWithProcessingTime.class})
public void testTriggeredDistinctRepresentativeValuesEmpty() {
  Instant base = new Instant(0);
  TestStream<KV<Integer, String>> values =
      TestStream.create(KvCoder.of(VarIntCoder.of(), StringUtf8Coder.of()))
          .advanceWatermarkTo(base)
          .addElements(TimestampedValue.of(KV.of(1, "k1"), base))
          .advanceProcessingTime(Duration.standardMinutes(1))
          .advanceWatermarkToInfinity();

  PCollection<KV<Integer, String>> distinctValues =
      triggeredDistinctRepresentativePipeline
          .apply(values)
          .apply(
              Window.<KV<Integer, String>>into(FixedWindows.of(Duration.standardMinutes(1)))
                  .triggering(
                      AfterWatermark.pastEndOfWindow()
                          .withEarlyFirings(
                              AfterProcessingTime.pastFirstElementInPane()
                                  .plusDelayOf(Duration.standardSeconds(30))))
                  .withAllowedLateness(Duration.ZERO)
                  .discardingFiredPanes())
          .apply(
              Distinct.withRepresentativeValueFn(new Keys<Integer>())
                  .withRepresentativeType(TypeDescriptor.of(Integer.class)));

  PAssert.that(distinctValues).containsInAnyOrder(KV.of(1, "k1"));
  triggeredDistinctRepresentativePipeline.run();
}
 
Example #9
Source File: GroupByKeyTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that when a processing time timers comes in after a window is expired it does not cause
 * a spurious output.
 */
@Test
@Category({ValidatesRunner.class, UsesTestStreamWithProcessingTime.class})
public void testCombiningAccumulatingProcessingTime() throws Exception {
  PCollection<Integer> triggeredSums =
      p.apply(
              TestStream.create(VarIntCoder.of())
                  .advanceWatermarkTo(new Instant(0))
                  .addElements(
                      TimestampedValue.of(2, new Instant(2)),
                      TimestampedValue.of(5, new Instant(5)))
                  .advanceWatermarkTo(new Instant(100))
                  .advanceProcessingTime(Duration.millis(10))
                  .advanceWatermarkToInfinity())
          .apply(
              Window.<Integer>into(FixedWindows.of(Duration.millis(100)))
                  .withTimestampCombiner(TimestampCombiner.EARLIEST)
                  .accumulatingFiredPanes()
                  .withAllowedLateness(Duration.ZERO)
                  .triggering(
                      Repeatedly.forever(
                          AfterProcessingTime.pastFirstElementInPane()
                              .plusDelayOf(Duration.millis(10)))))
          .apply(Sum.integersGlobally().withoutDefaults());

  PAssert.that(triggeredSums).containsInAnyOrder(7);

  p.run();
}
 
Example #10
Source File: TestStreamTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category({NeedsRunner.class, UsesTestStreamWithProcessingTime.class})
public void testProcessingTimeTrigger() {
  TestStream<Long> source =
      TestStream.create(VarLongCoder.of())
          .addElements(
              TimestampedValue.of(1L, new Instant(1000L)),
              TimestampedValue.of(2L, new Instant(2000L)))
          .advanceProcessingTime(Duration.standardMinutes(12))
          .addElements(TimestampedValue.of(3L, new Instant(3000L)))
          .advanceProcessingTime(Duration.standardMinutes(6))
          .advanceWatermarkToInfinity();

  PCollection<Long> sum =
      p.apply(source)
          .apply(
              Window.<Long>configure()
                  .triggering(
                      AfterWatermark.pastEndOfWindow()
                          .withEarlyFirings(
                              AfterProcessingTime.pastFirstElementInPane()
                                  .plusDelayOf(Duration.standardMinutes(5))))
                  .accumulatingFiredPanes()
                  .withAllowedLateness(Duration.ZERO))
          .apply(Sum.longsGlobally());

  PAssert.that(sum).inEarlyGlobalWindowPanes().containsInAnyOrder(3L, 6L);

  p.run();
}
 
Example #11
Source File: PubsubUnboundedSink.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PDone expand(PCollection<PubsubMessage> input) {
  input
      .apply(
          "PubsubUnboundedSink.Window",
          Window.<PubsubMessage>into(new GlobalWindows())
              .triggering(
                  Repeatedly.forever(
                      AfterFirst.of(
                          AfterPane.elementCountAtLeast(publishBatchSize),
                          AfterProcessingTime.pastFirstElementInPane().plusDelayOf(maxLatency))))
              .discardingFiredPanes())
      .apply("PubsubUnboundedSink.Shard", ParDo.of(new ShardFn(numShards, recordIdMethod)))
      .setCoder(KvCoder.of(VarIntCoder.of(), CODER))
      .apply(GroupByKey.create())
      .apply(
          "PubsubUnboundedSink.Writer",
          ParDo.of(
              new WriterFn(
                  pubsubFactory,
                  topic,
                  timestampAttribute,
                  idAttribute,
                  publishBatchSize,
                  publishBatchBytes)));
  return PDone.in(input.getPipeline());
}
 
Example #12
Source File: TriggerTranslation.java    From beam with Apache License 2.0 5 votes vote down vote up
private RunnerApi.Trigger convertSpecific(AfterProcessingTime v) {
  RunnerApi.Trigger.AfterProcessingTime.Builder builder =
      RunnerApi.Trigger.AfterProcessingTime.newBuilder();

  for (TimestampTransform transform : v.getTimestampTransforms()) {
    builder.addTimestampTransforms(convertTimestampTransform(transform));
  }

  return RunnerApi.Trigger.newBuilder().setAfterProcessingTime(builder).build();
}
 
Example #13
Source File: PCollectionTranslationTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Parameters(name = "{index}: {0}")
public static Iterable<PCollection<?>> data() {
  Pipeline pipeline = TestPipeline.create();
  PCollection<Integer> ints = pipeline.apply("ints", Create.of(1, 2, 3));
  PCollection<Long> longs = pipeline.apply("unbounded longs", GenerateSequence.from(0));
  PCollection<Long> windowedLongs =
      longs.apply(
          "into fixed windows", Window.into(FixedWindows.of(Duration.standardMinutes(10L))));
  PCollection<KV<String, Iterable<String>>> groupedStrings =
      pipeline
          .apply(
              "kvs", Create.of(KV.of("foo", "spam"), KV.of("bar", "ham"), KV.of("baz", "eggs")))
          .apply("group", GroupByKey.create());
  PCollection<Long> coderLongs =
      pipeline
          .apply("counts with alternative coder", GenerateSequence.from(0).to(10))
          .setCoder(BigEndianLongCoder.of());
  pipeline
      .apply(
          "intsWithCustomCoder",
          Create.of(1, 2).withCoder(new AutoValue_PCollectionTranslationTest_CustomIntCoder()))
      .apply(
          "into custom windows",
          Window.into(new CustomWindows())
              .triggering(
                  AfterWatermark.pastEndOfWindow()
                      .withEarlyFirings(
                          AfterFirst.of(
                              AfterPane.elementCountAtLeast(5),
                              AfterProcessingTime.pastFirstElementInPane()
                                  .plusDelayOf(Duration.millis(227L)))))
              .accumulatingFiredPanes()
              .withAllowedLateness(Duration.standardMinutes(12L)));
  return ImmutableList.of(ints, longs, windowedLongs, coderLongs, groupedStrings);
}
 
Example #14
Source File: ReduceFnRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Tests that a processing time timer does not cause window GC. */
@Test
public void testProcessingTimeTimerDoesNotGc() throws Exception {
  WindowingStrategy<?, IntervalWindow> strategy =
      WindowingStrategy.of((WindowFn<?, IntervalWindow>) FixedWindows.of(Duration.millis(100)))
          .withTimestampCombiner(TimestampCombiner.EARLIEST)
          .withMode(AccumulationMode.ACCUMULATING_FIRED_PANES)
          .withAllowedLateness(Duration.ZERO)
          .withTrigger(
              Repeatedly.forever(
                  AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.millis(10))));

  ReduceFnTester<Integer, Integer, IntervalWindow> tester =
      ReduceFnTester.combining(strategy, Sum.ofIntegers(), VarIntCoder.of());

  tester.advanceProcessingTime(new Instant(5000));
  injectElement(tester, 2); // processing timer @ 5000 + 10; EOW timer @ 100
  injectElement(tester, 5);

  tester.advanceProcessingTime(new Instant(10000));

  tester.assertHasOnlyGlobalAndStateFor(new IntervalWindow(new Instant(0), new Instant(100)));

  assertThat(
      tester.extractOutput(),
      contains(
          isSingleWindowedValue(
              equalTo(7), 2, 0, 100, PaneInfo.createPane(true, false, Timing.EARLY, 0, 0))));
}
 
Example #15
Source File: ReduceFnRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that when a processing time timer comes in after a window is expired it is just ignored.
 */
@Test
public void testLateProcessingTimeTimer() throws Exception {
  WindowingStrategy<?, IntervalWindow> strategy =
      WindowingStrategy.of((WindowFn<?, IntervalWindow>) FixedWindows.of(Duration.millis(100)))
          .withTimestampCombiner(TimestampCombiner.EARLIEST)
          .withMode(AccumulationMode.ACCUMULATING_FIRED_PANES)
          .withAllowedLateness(Duration.ZERO)
          .withTrigger(
              Repeatedly.forever(
                  AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.millis(10))));

  ReduceFnTester<Integer, Integer, IntervalWindow> tester =
      ReduceFnTester.combining(strategy, Sum.ofIntegers(), VarIntCoder.of());

  tester.advanceProcessingTime(new Instant(5000));
  injectElement(tester, 2); // processing timer @ 5000 + 10; EOW timer @ 100
  injectElement(tester, 5);

  // After this advancement, the window is expired and only the GC process
  // should be allowed to touch it
  tester.advanceInputWatermarkNoTimers(new Instant(100));

  // This should not output
  tester.advanceProcessingTime(new Instant(6000));

  assertThat(tester.extractOutput(), emptyIterable());
}
 
Example #16
Source File: ReduceFnRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that when a processing time timer comes in after a window is expired but in the same
 * bundle it does not cause a spurious output.
 */
@Test
public void testCombiningAccumulatingProcessingTime() throws Exception {
  WindowingStrategy<?, IntervalWindow> strategy =
      WindowingStrategy.of((WindowFn<?, IntervalWindow>) FixedWindows.of(Duration.millis(100)))
          .withTimestampCombiner(TimestampCombiner.EARLIEST)
          .withMode(AccumulationMode.ACCUMULATING_FIRED_PANES)
          .withAllowedLateness(Duration.ZERO)
          .withTrigger(
              Repeatedly.forever(
                  AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.millis(10))));

  ReduceFnTester<Integer, Integer, IntervalWindow> tester =
      ReduceFnTester.combining(strategy, Sum.ofIntegers(), VarIntCoder.of());

  tester.advanceProcessingTime(new Instant(5000));
  injectElement(tester, 2); // processing timer @ 5000 + 10; EOW timer @ 100
  injectElement(tester, 5);

  tester.advanceInputWatermarkNoTimers(new Instant(100));
  tester.advanceProcessingTimeNoTimers(new Instant(5010));

  // Fires the GC/EOW timer at the same time as the processing time timer.
  tester.fireTimers(
      new IntervalWindow(new Instant(0), new Instant(100)),
      TimestampedValue.of(TimeDomain.EVENT_TIME, new Instant(100)),
      TimestampedValue.of(TimeDomain.PROCESSING_TIME, new Instant(5010)));

  assertThat(
      tester.extractOutput(),
      contains(
          isSingleWindowedValue(
              equalTo(7), 2, 0, 100, PaneInfo.createPane(true, true, Timing.ON_TIME, 0, 0))));
}
 
Example #17
Source File: ReduceFnRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Tests that when a processing time timers comes in after a window is expired and GC'd it does
 * not cause a spurious output.
 */
@Test
public void testCombiningAccumulatingProcessingTimeSeparateBundles() throws Exception {
  WindowingStrategy<?, IntervalWindow> strategy =
      WindowingStrategy.of((WindowFn<?, IntervalWindow>) FixedWindows.of(Duration.millis(100)))
          .withTimestampCombiner(TimestampCombiner.EARLIEST)
          .withMode(AccumulationMode.ACCUMULATING_FIRED_PANES)
          .withAllowedLateness(Duration.ZERO)
          .withTrigger(
              Repeatedly.forever(
                  AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.millis(10))));

  ReduceFnTester<Integer, Integer, IntervalWindow> tester =
      ReduceFnTester.combining(strategy, Sum.ofIntegers(), VarIntCoder.of());

  tester.advanceProcessingTime(new Instant(5000));
  injectElement(tester, 2); // processing timer @ 5000 + 10; EOW timer @ 100
  injectElement(tester, 5);

  tester.advanceInputWatermark(new Instant(100));
  tester.advanceProcessingTime(new Instant(5011));

  assertThat(
      tester.extractOutput(),
      contains(
          isSingleWindowedValue(
              equalTo(7), 2, 0, 100, PaneInfo.createPane(true, true, Timing.ON_TIME, 0, 0))));
}
 
Example #18
Source File: ReduceFnRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * We should fire an empty ON_TIME pane in the GlobalWindow when the watermark moves to
 * end-of-time.
 */
@Test
public void fireEmptyOnDrainInGlobalWindowIfRequested() throws Exception {
  ReduceFnTester<Integer, Iterable<Integer>, GlobalWindow> tester =
      ReduceFnTester.nonCombining(
          WindowingStrategy.of(new GlobalWindows())
              .withTrigger(
                  Repeatedly.forever(
                      AfterProcessingTime.pastFirstElementInPane().plusDelayOf(new Duration(3))))
              .withMode(AccumulationMode.DISCARDING_FIRED_PANES));

  final int n = 20;
  for (int i = 0; i < n; i++) {
    tester.advanceProcessingTime(new Instant(i));
    tester.injectElements(TimestampedValue.of(i, new Instant(i)));
  }
  tester.advanceProcessingTime(new Instant(n + 4));
  List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput();
  assertEquals((n + 3) / 4, output.size());
  for (int i = 0; i < output.size(); i++) {
    assertEquals(Timing.EARLY, output.get(i).getPane().getTiming());
    assertEquals(i, output.get(i).getPane().getIndex());
    assertEquals(4, Iterables.size(output.get(i).getValue()));
  }

  tester.advanceInputWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE);

  output = tester.extractOutput();
  assertEquals(1, output.size());
  assertEquals(Timing.ON_TIME, output.get(0).getPane().getTiming());
  assertEquals((n + 3) / 4, output.get(0).getPane().getIndex());
  assertEquals(0, Iterables.size(output.get(0).getValue()));
}
 
Example #19
Source File: Task.java    From beam with Apache License 2.0 5 votes vote down vote up
static PCollection<Long> applyTransform(PCollection<String> events) {
  return events
      .apply(
          Window.<String>into(FixedWindows.of(Duration.standardDays(1)))
              .triggering(
                  AfterWatermark.pastEndOfWindow()
                      .withEarlyFirings(
                          AfterProcessingTime.pastFirstElementInPane()))
              .withAllowedLateness(Duration.ZERO)
              .accumulatingFiredPanes())

      .apply(Combine.globally(Count.<String>combineFn()).withoutDefaults());
}
 
Example #20
Source File: BeamModel.java    From streamingbook with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<String> expand(PCollection<KV<String, Integer>> input) {
    return input
        .apply(Window.<KV<String, Integer>>into(FixedWindows.of(TWO_MINUTES))
               .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(TWO_MINUTES)))
               .withAllowedLateness(Duration.standardDays(1000))
               .accumulatingFiredPanes())
        .apply(Sum.integersPerKey())
        .apply(ParDo.of(new FormatAsStrings()));
}
 
Example #21
Source File: BeamModel.java    From streamingbook with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<String> expand(PCollection<KV<String, Integer>> input) {
    return input
        .apply(Window.<KV<String, Integer>>into(FixedWindows.of(TWO_MINUTES))
               .triggering(AfterWatermark.pastEndOfWindow()
                           .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(ONE_MINUTE))
                           .withLateFirings(AfterPane.elementCountAtLeast(1)))
               .withAllowedLateness(Duration.standardDays(1000))
               .accumulatingFiredPanes())
        .apply(Sum.integersPerKey())
        .apply(ParDo.of(new FormatAsStrings()));
}
 
Example #22
Source File: BeamModel.java    From streamingbook with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<String> expand(PCollection<KV<String, Integer>> input) {
    return input
        .apply(Window.<KV<String, Integer>>into(FixedWindows.of(TWO_MINUTES))
               .triggering(AfterWatermark.pastEndOfWindow()
                           .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(ONE_MINUTE))
                           .withLateFirings(AfterPane.elementCountAtLeast(1)))
               .withAllowedLateness(TWO_MINUTES)
               .accumulatingFiredPanes())
        .apply(Sum.integersPerKey())
        .apply(ParDo.of(new FormatAsStrings()));
}
 
Example #23
Source File: BeamModel.java    From streamingbook with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<String> expand(PCollection<KV<String, Integer>> input) {
    return input
        .apply(Window.<KV<String, Integer>>into(FixedWindows.of(TWO_MINUTES))
               .triggering(AfterWatermark.pastEndOfWindow()
                           .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(ONE_MINUTE))
                           .withLateFirings(AfterPane.elementCountAtLeast(1)))
               .withAllowedLateness(Duration.standardDays(1000))
               .discardingFiredPanes())
        .apply(Sum.integersPerKey())
        .apply(ParDo.of(new FormatAsStrings()));
}
 
Example #24
Source File: BeamModel.java    From streamingbook with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<String> expand(PCollection<KV<String, Integer>> input) {
    return input
        .apply(Window.<KV<String, Integer>>into(Sessions.withGapDuration(ONE_MINUTE))
               .triggering(AfterWatermark.pastEndOfWindow()
                           .withEarlyFirings(AfterProcessingTime.pastFirstElementInPane().plusDelayOf(ONE_MINUTE))
                           .withLateFirings(AfterPane.elementCountAtLeast(1)))
               .withAllowedLateness(Duration.standardDays(1000))
               .accumulatingFiredPanes())
        .apply(Sum.integersPerKey())
        .apply(ParDo.of(new FormatAsStrings()));
}
 
Example #25
Source File: LeaderBoard.java    From deployment-examples with MIT License 5 votes vote down vote up
@Override
public PCollection<KV<String, Integer>> expand(PCollection<GameActionInfo> input) {
  return input
      .apply(
          "LeaderboardUserGlobalWindow",
          Window.<GameActionInfo>into(new GlobalWindows())
              // Get periodic results every ten minutes.
              .triggering(
                  Repeatedly.forever(
                      AfterProcessingTime.pastFirstElementInPane().plusDelayOf(TEN_MINUTES)))
              .accumulatingFiredPanes()
              .withAllowedLateness(allowedLateness))
      // Extract and sum username/score pairs from the event data.
      .apply("ExtractUserScore", new ExtractAndSumScore("user"));
}
 
Example #26
Source File: LeaderBoard.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PCollection<KV<String, Integer>> expand(PCollection<GameActionInfo> input) {
  return input
      .apply(
          "LeaderboardUserGlobalWindow",
          Window.<GameActionInfo>into(new GlobalWindows())
              // Get periodic results every ten minutes.
              .triggering(
                  Repeatedly.forever(
                      AfterProcessingTime.pastFirstElementInPane().plusDelayOf(TEN_MINUTES)))
              .accumulatingFiredPanes()
              .withAllowedLateness(allowedLateness))
      // Extract and sum username/score pairs from the event data.
      .apply("ExtractUserScore", new ExtractAndSumScore("user"));
}
 
Example #27
Source File: Task.java    From beam with Apache License 2.0 5 votes vote down vote up
static PCollection<Long> applyTransform(PCollection<String> events) {
  return events
      .apply(
          Window.<String>into(FixedWindows.of(Duration.standardDays(1)))
              .triggering(
                  AfterWatermark.pastEndOfWindow()
                  .withEarlyFirings(
                      AfterProcessingTime.pastFirstElementInPane()))
              .withAllowedLateness(Duration.ZERO)
              .discardingFiredPanes())

      .apply(Combine.globally(Count.<String>combineFn()).withoutDefaults());
}
 
Example #28
Source File: DistinctTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category({NeedsRunner.class, UsesTestStreamWithProcessingTime.class})
public void testTriggeredDistinct() {
  Instant base = new Instant(0);
  TestStream<String> values =
      TestStream.create(StringUtf8Coder.of())
          .advanceWatermarkTo(base)
          .addElements(
              TimestampedValue.of("k1", base),
              TimestampedValue.of("k2", base.plus(Duration.standardSeconds(10))),
              TimestampedValue.of("k3", base.plus(Duration.standardSeconds(20))))
          .advanceProcessingTime(Duration.standardMinutes(1))
          .addElements(
              TimestampedValue.of("k1", base.plus(Duration.standardSeconds(30))),
              TimestampedValue.of("k2", base.plus(Duration.standardSeconds(40))),
              TimestampedValue.of("k3", base.plus(Duration.standardSeconds(50))))
          .advanceWatermarkToInfinity();

  PCollection<String> distinctValues =
      triggeredDistinctPipeline
          .apply(values)
          .apply(
              Window.<String>into(FixedWindows.of(Duration.standardMinutes(1)))
                  .triggering(
                      Repeatedly.forever(
                          AfterProcessingTime.pastFirstElementInPane()
                              .plusDelayOf(Duration.standardSeconds(30))))
                  .withAllowedLateness(Duration.ZERO)
                  .accumulatingFiredPanes())
          .apply(Distinct.create());
  PAssert.that(distinctValues).containsInAnyOrder("k1", "k2", "k3");
  triggeredDistinctPipeline.run();
}
 
Example #29
Source File: BatchLoads.java    From beam with Apache License 2.0 4 votes vote down vote up
private WriteResult expandTriggered(PCollection<KV<DestinationT, ElementT>> input) {
  checkArgument(numFileShards > 0);
  Pipeline p = input.getPipeline();
  final PCollectionView<String> loadJobIdPrefixView = createLoadJobIdPrefixView(p);
  final PCollectionView<String> tempFilePrefixView =
      createTempFilePrefixView(p, loadJobIdPrefixView);
  // The user-supplied triggeringDuration is often chosen to control how many BigQuery load
  // jobs are generated, to prevent going over BigQuery's daily quota for load jobs. If this
  // is set to a large value, currently we have to buffer all the data until the trigger fires.
  // Instead we ensure that the files are written if a threshold number of records are ready.
  // We use only the user-supplied trigger on the actual BigQuery load. This allows us to
  // offload the data to the filesystem.
  PCollection<KV<DestinationT, ElementT>> inputInGlobalWindow =
      input.apply(
          "rewindowIntoGlobal",
          Window.<KV<DestinationT, ElementT>>into(new GlobalWindows())
              .triggering(
                  Repeatedly.forever(
                      AfterFirst.of(
                          AfterProcessingTime.pastFirstElementInPane()
                              .plusDelayOf(triggeringFrequency),
                          AfterPane.elementCountAtLeast(FILE_TRIGGERING_RECORD_COUNT))))
              .discardingFiredPanes());
  PCollection<WriteBundlesToFiles.Result<DestinationT>> results =
      writeShardedFiles(inputInGlobalWindow, tempFilePrefixView);
  // Apply the user's trigger before we start generating BigQuery load jobs.
  results =
      results.apply(
          "applyUserTrigger",
          Window.<WriteBundlesToFiles.Result<DestinationT>>into(new GlobalWindows())
              .triggering(
                  Repeatedly.forever(
                      AfterProcessingTime.pastFirstElementInPane()
                          .plusDelayOf(triggeringFrequency)))
              .discardingFiredPanes());

  TupleTag<KV<ShardedKey<DestinationT>, List<String>>> multiPartitionsTag =
      new TupleTag<>("multiPartitionsTag");
  TupleTag<KV<ShardedKey<DestinationT>, List<String>>> singlePartitionTag =
      new TupleTag<>("singlePartitionTag");

  // If we have non-default triggered output, we can't use the side-input technique used in
  // expandUntriggered . Instead make the result list a main input. Apply a GroupByKey first for
  // determinism.
  PCollectionTuple partitions =
      results
          .apply("AttachSingletonKey", WithKeys.of((Void) null))
          .setCoder(
              KvCoder.of(VoidCoder.of(), WriteBundlesToFiles.ResultCoder.of(destinationCoder)))
          .apply("GroupOntoSingleton", GroupByKey.create())
          .apply("ExtractResultValues", Values.create())
          .apply(
              "WritePartitionTriggered",
              ParDo.of(
                      new WritePartition<>(
                          singletonTable,
                          dynamicDestinations,
                          tempFilePrefixView,
                          maxFilesPerPartition,
                          maxBytesPerPartition,
                          multiPartitionsTag,
                          singlePartitionTag,
                          rowWriterFactory))
                  .withSideInputs(tempFilePrefixView)
                  .withOutputTags(multiPartitionsTag, TupleTagList.of(singlePartitionTag)));
  PCollection<KV<TableDestination, String>> tempTables =
      writeTempTables(partitions.get(multiPartitionsTag), loadJobIdPrefixView);

  tempTables
      // Now that the load job has happened, we want the rename to happen immediately.
      .apply(
          Window.<KV<TableDestination, String>>into(new GlobalWindows())
              .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1))))
      .apply(WithKeys.of((Void) null))
      .setCoder(KvCoder.of(VoidCoder.of(), tempTables.getCoder()))
      .apply(GroupByKey.create())
      .apply(Values.create())
      .apply(
          "WriteRenameTriggered",
          ParDo.of(
                  new WriteRename(
                      bigQueryServices,
                      loadJobIdPrefixView,
                      writeDisposition,
                      createDisposition,
                      maxRetryJobs,
                      kmsKey))
              .withSideInputs(loadJobIdPrefixView));
  writeSinglePartition(partitions.get(singlePartitionTag), loadJobIdPrefixView);
  return writeResult(p);
}
 
Example #30
Source File: TriggerTranslationTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Parameters(name = "{index}: {0}")
public static Iterable<ToProtoAndBackSpec> data() {
  return ImmutableList.of(
      // Atomic triggers
      toProtoAndBackSpec(AfterWatermark.pastEndOfWindow()),
      toProtoAndBackSpec(AfterPane.elementCountAtLeast(73)),
      toProtoAndBackSpec(AfterSynchronizedProcessingTime.ofFirstElement()),
      toProtoAndBackSpec(Never.ever()),
      toProtoAndBackSpec(DefaultTrigger.of()),
      toProtoAndBackSpec(AfterProcessingTime.pastFirstElementInPane()),
      toProtoAndBackSpec(
          AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.millis(23))),
      toProtoAndBackSpec(
          AfterProcessingTime.pastFirstElementInPane()
              .alignedTo(Duration.millis(5), new Instant(27))),
      toProtoAndBackSpec(
          AfterProcessingTime.pastFirstElementInPane()
              .plusDelayOf(Duration.standardSeconds(3))
              .alignedTo(Duration.millis(5), new Instant(27))
              .plusDelayOf(Duration.millis(13))),

      // Composite triggers

      toProtoAndBackSpec(
          AfterAll.of(AfterPane.elementCountAtLeast(79), AfterWatermark.pastEndOfWindow())),
      toProtoAndBackSpec(
          AfterEach.inOrder(AfterPane.elementCountAtLeast(79), AfterPane.elementCountAtLeast(3))),
      toProtoAndBackSpec(
          AfterFirst.of(AfterWatermark.pastEndOfWindow(), AfterPane.elementCountAtLeast(3))),
      toProtoAndBackSpec(
          AfterWatermark.pastEndOfWindow().withEarlyFirings(AfterPane.elementCountAtLeast(3))),
      toProtoAndBackSpec(
          AfterWatermark.pastEndOfWindow().withLateFirings(AfterPane.elementCountAtLeast(3))),
      toProtoAndBackSpec(
          AfterWatermark.pastEndOfWindow()
              .withEarlyFirings(
                  AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.millis(42)))
              .withLateFirings(AfterPane.elementCountAtLeast(3))),
      toProtoAndBackSpec(Repeatedly.forever(AfterWatermark.pastEndOfWindow())),
      toProtoAndBackSpec(
          Repeatedly.forever(AfterPane.elementCountAtLeast(1))
              .orFinally(AfterWatermark.pastEndOfWindow())));
}