org.apache.beam.sdk.transforms.windowing.GlobalWindows Java Examples
The following examples show how to use
org.apache.beam.sdk.transforms.windowing.GlobalWindows.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CombineTest.java From beam with Apache License 2.0 | 6 votes |
@Test @Category(ValidatesRunner.class) public void testHotKeyCombiningWithAccumulationMode() { PCollection<Integer> input = pipeline.apply(Create.of(1, 2, 3, 4, 5)); PCollection<Integer> output = input .apply( Window.<Integer>into(new GlobalWindows()) .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1))) .accumulatingFiredPanes() .withAllowedLateness(new Duration(0), ClosingBehavior.FIRE_ALWAYS)) .apply(Sum.integersGlobally().withoutDefaults().withFanout(2)) .apply(ParDo.of(new GetLast())); PAssert.that(output) .satisfies( input1 -> { assertThat(input1, hasItem(15)); return null; }); pipeline.run(); }
Example #2
Source File: BigQueryMerger.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@Override public PCollection<KV<K, V>> expand(PCollection<KV<K, V>> input) { return input .apply( Window.<KV<K, V>>into(new GlobalWindows()) .discardingFiredPanes() .triggering( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(Duration.ZERO) .alignedTo(intervalDuration, org.joda.time.Instant.now())))) .apply(GroupByKey.create()) .apply( ParDo.of( new DoFn<KV<K, Iterable<V>>, KV<K, V>>() { @ProcessElement public void process(ProcessContext c) { LOG.debug( "TS: {} | Element: {} | Pane: {}", c.timestamp(), c.element(), c.pane()); Iterator<V> it = c.element().getValue().iterator(); if (it.hasNext()) { c.output(KV.of(c.element().getKey(), it.next())); } } })); }
Example #3
Source File: RepeatedlyStateMachineTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testRepeatedlyAfterFirstProcessingTime() throws Exception { SimpleTriggerStateMachineTester<GlobalWindow> tester = TriggerStateMachineTester.forTrigger( RepeatedlyStateMachine.forever( AfterFirstStateMachine.of( AfterProcessingTimeStateMachine.pastFirstElementInPane() .plusDelayOf(Duration.standardMinutes(15)), AfterPaneStateMachine.elementCountAtLeast(5))), new GlobalWindows()); GlobalWindow window = GlobalWindow.INSTANCE; tester.injectElements(1); assertFalse(tester.shouldFire(window)); tester.advanceProcessingTime(new Instant(0).plus(Duration.standardMinutes(15))); assertTrue(tester.shouldFire(window)); tester.fireIfShouldFire(window); assertFalse(tester.shouldFire(window)); }
Example #4
Source File: StatefulParDoEvaluatorFactoryTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testRequiresTimeSortedInputWithLateDataAndAllowedLateness() { Instant now = Instant.ofEpochMilli(0); PCollection<KV<String, Integer>> input = pipeline .apply( TestStream.create(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())) .addElements(TimestampedValue.of(KV.of("", 1), now.plus(2))) .addElements(TimestampedValue.of(KV.of("", 2), now.plus(1))) .advanceWatermarkTo(now.plus(1)) .addElements(TimestampedValue.of(KV.of("", 3), now)) .advanceWatermarkToInfinity()) .apply( Window.<KV<String, Integer>>into(new GlobalWindows()) .withAllowedLateness(Duration.millis(2))); PCollection<String> result = input.apply(ParDo.of(statefulConcat())); PAssert.that(result).containsInAnyOrder("3", "3:2", "3:2:1"); pipeline.run(); }
Example #5
Source File: RepeatedlyStateMachineTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testRepeatedlyElementCount() throws Exception { SimpleTriggerStateMachineTester<GlobalWindow> tester = TriggerStateMachineTester.forTrigger( RepeatedlyStateMachine.forever(AfterPaneStateMachine.elementCountAtLeast(5)), new GlobalWindows()); GlobalWindow window = GlobalWindow.INSTANCE; tester.injectElements(1); assertFalse(tester.shouldFire(window)); tester.injectElements(2, 3, 4, 5); assertTrue(tester.shouldFire(window)); tester.fireIfShouldFire(window); assertFalse(tester.shouldFire(window)); }
Example #6
Source File: WriteFeatureSetSpecAck.java From feast with Apache License 2.0 | 6 votes |
@Override public PCollection<FeatureSetReference> expand(PCollection<FeatureSetReference> input) { return input .apply( "OnEveryElementTrigger", Window.<FeatureSetReference>into(new GlobalWindows()) .accumulatingFiredPanes() .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1))) .withAllowedLateness(Duration.ZERO)) .apply("CountingReadySinks", Count.perElement()) .apply( "WhenAllReady", Filter.by( (SerializableFunction<KV<FeatureSetReference, Long>, Boolean>) count -> count.getValue() >= sinksCount)) .apply(Keys.create()); }
Example #7
Source File: RepeatedlyStateMachineTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testRepeatedlyProcessingTime() throws Exception { SimpleTriggerStateMachineTester<GlobalWindow> tester = TriggerStateMachineTester.forTrigger( RepeatedlyStateMachine.forever( AfterProcessingTimeStateMachine.pastFirstElementInPane() .plusDelayOf(Duration.standardMinutes(15))), new GlobalWindows()); GlobalWindow window = GlobalWindow.INSTANCE; tester.injectElements(1); assertFalse(tester.shouldFire(window)); tester.advanceProcessingTime(new Instant(0).plus(Duration.standardMinutes(15))); assertTrue(tester.shouldFire(window)); tester.fireIfShouldFire(window); assertFalse(tester.shouldFire(window)); }
Example #8
Source File: RepeatedlyStateMachineTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testRepeatedlyAfterFirstElementCount() throws Exception { SimpleTriggerStateMachineTester<GlobalWindow> tester = TriggerStateMachineTester.forTrigger( RepeatedlyStateMachine.forever( AfterFirstStateMachine.of( AfterProcessingTimeStateMachine.pastFirstElementInPane() .plusDelayOf(Duration.standardMinutes(15)), AfterPaneStateMachine.elementCountAtLeast(5))), new GlobalWindows()); GlobalWindow window = GlobalWindow.INSTANCE; tester.injectElements(1); assertFalse(tester.shouldFire(window)); tester.injectElements(2, 3, 4, 5); assertTrue(tester.shouldFire(window)); tester.fireIfShouldFire(window); assertFalse(tester.shouldFire(window)); }
Example #9
Source File: SimpleDoFnRunnerTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testProcessElementExceptionsWrappedAsUserCodeException() { ThrowingDoFn fn = new ThrowingDoFn(); DoFnRunner<String, String> runner = new SimpleDoFnRunner<>( null, fn, NullSideInputReader.empty(), null, null, Collections.emptyList(), mockStepContext, null, Collections.emptyMap(), WindowingStrategy.of(new GlobalWindows()), DoFnSchemaInformation.create(), Collections.emptyMap()); thrown.expect(UserCodeException.class); thrown.expectCause(is(fn.exceptionToThrow)); runner.processElement(WindowedValue.valueInGlobalWindow("anyValue")); }
Example #10
Source File: BoundedDataset.java From beam with Apache License 2.0 | 6 votes |
Iterable<WindowedValue<T>> getValues(PCollection<T> pcollection) { if (windowedValues == null) { WindowFn<?, ?> windowFn = pcollection.getWindowingStrategy().getWindowFn(); Coder<? extends BoundedWindow> windowCoder = windowFn.windowCoder(); final WindowedValue.WindowedValueCoder<T> windowedValueCoder; if (windowFn instanceof GlobalWindows) { windowedValueCoder = WindowedValue.ValueOnlyWindowedValueCoder.of(pcollection.getCoder()); } else { windowedValueCoder = WindowedValue.FullWindowedValueCoder.of(pcollection.getCoder(), windowCoder); } JavaRDDLike<byte[], ?> bytesRDD = rdd.map(CoderHelpers.toByteFunction(windowedValueCoder)); List<byte[]> clientBytes = bytesRDD.collect(); windowedValues = clientBytes.stream() .map(bytes -> CoderHelpers.fromByteArray(bytes, windowedValueCoder)) .collect(Collectors.toList()); } return windowedValues; }
Example #11
Source File: BigQueryMerger.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@Override public PCollection<KV<K, V>> expand(PCollection<KV<K, V>> input) { return input .apply( Window.<KV<K, V>>into(new GlobalWindows()) .discardingFiredPanes() .triggering( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(Duration.ZERO) .alignedTo(intervalDuration, org.joda.time.Instant.now())))) .apply(GroupByKey.create()) .apply( ParDo.of( new DoFn<KV<K, Iterable<V>>, KV<K, V>>() { @ProcessElement public void process(ProcessContext c) { LOG.debug( "TS: {} | Element: {} | Pane: {}", c.timestamp(), c.element(), c.pane()); Iterator<V> it = c.element().getValue().iterator(); if (it.hasNext()) { c.output(KV.of(c.element().getKey(), it.next())); } } })); }
Example #12
Source File: SimpleDoFnRunnerTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testStartBundleExceptionsWrappedAsUserCodeException() { ThrowingDoFn fn = new ThrowingDoFn(); DoFnRunner<String, String> runner = new SimpleDoFnRunner<>( null, fn, NullSideInputReader.empty(), null, null, Collections.emptyList(), mockStepContext, null, Collections.emptyMap(), WindowingStrategy.of(new GlobalWindows()), DoFnSchemaInformation.create(), Collections.emptyMap()); thrown.expect(UserCodeException.class); thrown.expectCause(is(fn.exceptionToThrow)); runner.startBundle(); }
Example #13
Source File: WindowMergingFnRunnerTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testWindowMergingWithNonMergingWindowFn() throws Exception { ThrowingFunction< KV<Object, Iterable<BoundedWindow>>, KV< Object, KV<Iterable<BoundedWindow>, Iterable<KV<BoundedWindow, Iterable<BoundedWindow>>>>>> mapFunction = WindowMergingFnRunner.createMapFunctionForPTransform( "ptransformId", createMergeTransformForWindowFn(new GlobalWindows())); KV<Object, Iterable<BoundedWindow>> input = KV.of( "abc", ImmutableList.of(new IntervalWindow(Instant.now(), Duration.standardMinutes(1)))); assertEquals( KV.of(input.getKey(), KV.of(input.getValue(), Collections.emptyList())), mapFunction.apply(input)); }
Example #14
Source File: WindowMappingFnRunnerTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testWindowMapping() throws Exception { String pTransformId = "pTransformId"; SdkComponents components = SdkComponents.create(); components.registerEnvironment(Environments.createDockerEnvironment("java")); RunnerApi.FunctionSpec functionSpec = RunnerApi.FunctionSpec.newBuilder() .setUrn(WindowMappingFnRunner.URN) .setPayload( ParDoTranslation.translateWindowMappingFn( new GlobalWindows().getDefaultWindowMappingFn(), components) .toByteString()) .build(); RunnerApi.PTransform pTransform = RunnerApi.PTransform.newBuilder().setSpec(functionSpec).build(); ThrowingFunction<KV<Object, BoundedWindow>, KV<Object, BoundedWindow>> mapFunction = WindowMappingFnRunner.createMapFunctionForPTransform(pTransformId, pTransform); KV<Object, BoundedWindow> input = KV.of("abc", new IntervalWindow(Instant.now(), Duration.standardMinutes(1))); assertEquals(KV.of(input.getKey(), GlobalWindow.INSTANCE), mapFunction.apply(input)); }
Example #15
Source File: SimpleDoFnRunnerTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testFinishBundleExceptionsWrappedAsUserCodeException() { ThrowingDoFn fn = new ThrowingDoFn(); DoFnRunner<String, String> runner = new SimpleDoFnRunner<>( null, fn, NullSideInputReader.empty(), null, null, Collections.emptyList(), mockStepContext, null, Collections.emptyMap(), WindowingStrategy.of(new GlobalWindows()), DoFnSchemaInformation.create(), Collections.emptyMap()); thrown.expect(UserCodeException.class); thrown.expectCause(is(fn.exceptionToThrow)); runner.finishBundle(); }
Example #16
Source File: CombineTest.java From beam with Apache License 2.0 | 6 votes |
@Test @Category({ValidatesRunner.class, UsesSideInputs.class}) public void testGlobalCombineWithDefaultsAndTriggers() { PCollection<Integer> input = pipeline.apply(Create.of(1, 1)); PCollection<String> output = input .apply( Window.<Integer>into(new GlobalWindows()) .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1))) .accumulatingFiredPanes() .withAllowedLateness(new Duration(0), ClosingBehavior.FIRE_ALWAYS)) .apply(Sum.integersGlobally()) .apply(ParDo.of(new FormatPaneInfo())); // The actual elements produced are nondeterministic. Could be one, could be two. // But it should certainly have a final element with the correct final sum. PAssert.that(output) .satisfies( input1 -> { assertThat(input1, hasItem("2: true")); return null; }); pipeline.run(); }
Example #17
Source File: SimpleDoFnRunnerTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testOnTimerExceptionsWrappedAsUserCodeException() { ThrowingDoFn fn = new ThrowingDoFn(); DoFnRunner<String, String> runner = new SimpleDoFnRunner<>( null, fn, NullSideInputReader.empty(), null, null, Collections.emptyList(), mockStepContext, null, Collections.emptyMap(), WindowingStrategy.of(new GlobalWindows()), DoFnSchemaInformation.create(), Collections.emptyMap()); thrown.expect(UserCodeException.class); thrown.expectCause(is(fn.exceptionToThrow)); runner.onTimer( TimerDeclaration.PREFIX + ThrowingDoFn.TIMER_ID, "", null, GlobalWindow.INSTANCE, new Instant(0), new Instant(0), TimeDomain.EVENT_TIME); }
Example #18
Source File: KafkaExactlyOnceSink.java From beam with Apache License 2.0 | 5 votes |
@Override public PCollection<Void> expand(PCollection<ProducerRecord<K, V>> input) { int numShards = spec.getNumShards(); if (numShards <= 0) { try (Consumer<?, ?> consumer = openConsumer(spec)) { numShards = consumer.partitionsFor(spec.getTopic()).size(); LOG.info( "Using {} shards for exactly-once writer, matching number of partitions " + "for topic '{}'", numShards, spec.getTopic()); } } checkState(numShards > 0, "Could not set number of shards"); return input .apply( Window.<ProducerRecord<K, V>>into(new GlobalWindows()) // Everything into global window. .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1))) .discardingFiredPanes()) .apply( String.format("Shuffle across %d shards", numShards), ParDo.of(new Reshard<>(numShards))) .apply("Persist sharding", GroupByKey.create()) .apply("Assign sequential ids", ParDo.of(new Sequencer<>())) .apply("Persist ids", GroupByKey.create()) .apply( String.format("Write to Kafka topic '%s'", spec.getTopic()), ParDo.of(new ExactlyOnceWriter<>(spec, input.getCoder()))); }
Example #19
Source File: WindowingStrategyTranslation.java From beam with Apache License 2.0 | 5 votes |
public static WindowFn<?, ?> windowFnFromProto(FunctionSpec windowFnSpec) { try { String s = windowFnSpec.getUrn(); if (s.equals(getUrn(GlobalWindowsPayload.Enum.PROPERTIES))) { return new GlobalWindows(); } else if (s.equals(getUrn(FixedWindowsPayload.Enum.PROPERTIES))) { FixedWindowsPayload fixedParams = FixedWindowsPayload.parseFrom(windowFnSpec.getPayload()); return FixedWindows.of(Duration.millis(Durations.toMillis(fixedParams.getSize()))) .withOffset(Duration.millis(Timestamps.toMillis(fixedParams.getOffset()))); } else if (s.equals(getUrn(SlidingWindowsPayload.Enum.PROPERTIES))) { SlidingWindowsPayload slidingParams = SlidingWindowsPayload.parseFrom(windowFnSpec.getPayload()); return SlidingWindows.of(Duration.millis(Durations.toMillis(slidingParams.getSize()))) .every(Duration.millis(Durations.toMillis(slidingParams.getPeriod()))) .withOffset(Duration.millis(Timestamps.toMillis(slidingParams.getOffset()))); } else if (s.equals(getUrn(SessionWindowsPayload.Enum.PROPERTIES))) { SessionWindowsPayload sessionParams = SessionWindowsPayload.parseFrom(windowFnSpec.getPayload()); return Sessions.withGapDuration( Duration.millis(Durations.toMillis(sessionParams.getGapSize()))); } else if (s.equals(SERIALIZED_JAVA_WINDOWFN_URN)) { return (WindowFn<?, ?>) SerializableUtils.deserializeFromByteArray( windowFnSpec.getPayload().toByteArray(), "WindowFn"); } else { throw new IllegalArgumentException( "Unknown or unsupported WindowFn: " + windowFnSpec.getUrn()); } } catch (InvalidProtocolBufferException e) { throw new IllegalArgumentException( String.format( "%s for %s with URN %s did not contain expected proto message for payload", FunctionSpec.class.getSimpleName(), WindowFn.class.getSimpleName(), windowFnSpec.getUrn()), e); } }
Example #20
Source File: TestPubsubSignal.java From beam with Apache License 2.0 | 5 votes |
@Override public POutput expand(PCollection<? extends T> input) { return input // assign a dummy key and global window, // this is needed to accumulate all observed events in the same state cell .apply(Window.into(new GlobalWindows())) .apply(WithKeys.of("dummyKey")) .apply( "checkAllEventsForSuccess", ParDo.of(new StatefulPredicateCheck<>(coder, formatter, successPredicate))) // signal the success/failure to the result topic .apply("publishSuccess", PubsubIO.writeStrings().to(resultTopicPath.getPath())); }
Example #21
Source File: PTransformMatchersTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void classEqualToDoesNotMatchUnrelatedClass() { PTransformMatcher matcher = PTransformMatchers.classEqualTo(ParDo.SingleOutput.class); AppliedPTransform<?, ?, ?> application = getAppliedTransform(Window.<KV<String, Integer>>into(new GlobalWindows())); assertThat(matcher.matches(application), is(false)); }
Example #22
Source File: WindowIntoTranslationTest.java From beam with Apache License 2.0 | 5 votes |
@Parameters(name = "{index}: {0}") public static Iterable<WindowFn<?, ?>> data() { // This pipeline exists for construction, not to run any test. return ImmutableList.<WindowFn<?, ?>>builder() .add(FixedWindows.of(Duration.standardMinutes(10L))) .add(new GlobalWindows()) .add(Sessions.withGapDuration(Duration.standardMinutes(15L))) .add(SlidingWindows.of(Duration.standardMinutes(5L)).every(Duration.standardMinutes(1L))) .add(new CustomWindows()) .build(); }
Example #23
Source File: ReduceFnRunnerTest.java From beam with Apache License 2.0 | 5 votes |
/** * We should fire a non-empty ON_TIME pane in the GlobalWindow when the watermark moves to * end-of-time. */ @Test public void fireNonEmptyOnDrainInGlobalWindow() throws Exception { ReduceFnTester<Integer, Iterable<Integer>, GlobalWindow> tester = ReduceFnTester.nonCombining( WindowingStrategy.of(new GlobalWindows()) .withTrigger(Repeatedly.forever(AfterPane.elementCountAtLeast(3))) .withMode(AccumulationMode.DISCARDING_FIRED_PANES)); tester.advanceInputWatermark(new Instant(0)); final int n = 20; for (int i = 0; i < n; i++) { tester.injectElements(TimestampedValue.of(i, new Instant(i))); } List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput(); assertEquals(n / 3, output.size()); for (int i = 0; i < output.size(); i++) { assertEquals(Timing.EARLY, output.get(i).getPane().getTiming()); assertEquals(i, output.get(i).getPane().getIndex()); assertEquals(3, Iterables.size(output.get(i).getValue())); } tester.advanceInputWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE); output = tester.extractOutput(); assertEquals(1, output.size()); assertEquals(Timing.ON_TIME, output.get(0).getPane().getTiming()); assertEquals(n / 3, output.get(0).getPane().getIndex()); assertEquals(n - ((n / 3) * 3), Iterables.size(output.get(0).getValue())); }
Example #24
Source File: ReduceFnRunnerTest.java From beam with Apache License 2.0 | 5 votes |
/** * We should fire an empty ON_TIME pane in the GlobalWindow when the watermark moves to * end-of-time. */ @Test public void fireEmptyOnDrainInGlobalWindowIfRequested() throws Exception { ReduceFnTester<Integer, Iterable<Integer>, GlobalWindow> tester = ReduceFnTester.nonCombining( WindowingStrategy.of(new GlobalWindows()) .withTrigger( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane().plusDelayOf(new Duration(3)))) .withMode(AccumulationMode.DISCARDING_FIRED_PANES)); final int n = 20; for (int i = 0; i < n; i++) { tester.advanceProcessingTime(new Instant(i)); tester.injectElements(TimestampedValue.of(i, new Instant(i))); } tester.advanceProcessingTime(new Instant(n + 4)); List<WindowedValue<Iterable<Integer>>> output = tester.extractOutput(); assertEquals((n + 3) / 4, output.size()); for (int i = 0; i < output.size(); i++) { assertEquals(Timing.EARLY, output.get(i).getPane().getTiming()); assertEquals(i, output.get(i).getPane().getIndex()); assertEquals(4, Iterables.size(output.get(i).getValue())); } tester.advanceInputWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE); output = tester.extractOutput(); assertEquals(1, output.size()); assertEquals(Timing.ON_TIME, output.get(0).getPane().getTiming()); assertEquals((n + 3) / 4, output.get(0).getPane().getIndex()); assertEquals(0, Iterables.size(output.get(0).getValue())); }
Example #25
Source File: PCollectionViewTranslationTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testWindowMappingFnTranslation() throws Exception { SdkComponents sdkComponents = SdkComponents.create(); sdkComponents.registerEnvironment(Environments.createDockerEnvironment("java")); assertEquals( new GlobalWindows().getDefaultWindowMappingFn(), PCollectionViewTranslation.windowMappingFnFromProto( ParDoTranslation.translateWindowMappingFn( new GlobalWindows().getDefaultWindowMappingFn(), sdkComponents))); }
Example #26
Source File: PubsubUnboundedSink.java From beam with Apache License 2.0 | 5 votes |
@Override public PDone expand(PCollection<PubsubMessage> input) { input .apply( "PubsubUnboundedSink.Window", Window.<PubsubMessage>into(new GlobalWindows()) .triggering( Repeatedly.forever( AfterFirst.of( AfterPane.elementCountAtLeast(publishBatchSize), AfterProcessingTime.pastFirstElementInPane().plusDelayOf(maxLatency)))) .discardingFiredPanes()) .apply("PubsubUnboundedSink.Shard", ParDo.of(new ShardFn(numShards, recordIdMethod))) .setCoder(KvCoder.of(VarIntCoder.of(), CODER)) .apply(GroupByKey.create()) .apply( "PubsubUnboundedSink.Writer", ParDo.of( new WriterFn( pubsubFactory, topic, timestampAttribute, idAttribute, publishBatchSize, publishBatchBytes))); return PDone.in(input.getPipeline()); }
Example #27
Source File: WriteTables.java From beam with Apache License 2.0 | 5 votes |
@Override public PCollection<KV<TableDestination, String>> expand( PCollection<KV<ShardedKey<DestinationT>, List<String>>> input) { PCollectionTuple writeTablesOutputs = input.apply( ParDo.of(new WriteTablesDoFn()) .withSideInputs(sideInputs) .withOutputTags(mainOutputTag, TupleTagList.of(temporaryFilesTag))); // Garbage collect temporary files. // We mustn't start garbage collecting files until we are assured that the WriteTablesDoFn has // succeeded in loading those files and won't be retried. Otherwise, we might fail part of the // way through deleting temporary files, and retry WriteTablesDoFn. This will then fail due // to missing files, causing either the entire workflow to fail or get stuck (depending on how // the runner handles persistent failures). writeTablesOutputs .get(temporaryFilesTag) .setCoder(StringUtf8Coder.of()) .apply(WithKeys.of((Void) null)) .setCoder(KvCoder.of(VoidCoder.of(), StringUtf8Coder.of())) .apply( Window.<KV<Void, String>>into(new GlobalWindows()) .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1))) .discardingFiredPanes()) .apply(GroupByKey.create()) .apply(Values.create()) .apply(ParDo.of(new GarbageCollectTemporaryFiles())); return writeTablesOutputs.get(mainOutputTag); }
Example #28
Source File: ParDoTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category({ ValidatesRunner.class, UsesStatefulParDo.class, UsesRequiresTimeSortedInput.class, UsesStrictTimerOrdering.class, UsesTestStream.class }) public void testRequiresTimeSortedInputWithLateDataAndAllowedLateness() { // generate list long enough to rule out random shuffle in sorted order int numElements = 1000; List<Long> eventStamps = LongStream.range(0, numElements) .mapToObj(i -> numElements - i) .collect(Collectors.toList()); TestStream.Builder<Long> input = TestStream.create(VarLongCoder.of()); for (Long stamp : eventStamps) { input = input.addElements(TimestampedValue.of(stamp, Instant.ofEpochMilli(stamp))); if (stamp == 100) { // advance watermark when we have 100 remaining elements // all the rest are going to be late elements input = input.advanceWatermarkTo(Instant.ofEpochMilli(stamp)); } } testTimeSortedInput( numElements, pipeline .apply(input.advanceWatermarkToInfinity()) .apply( Window.<Long>into(new GlobalWindows()) .withAllowedLateness(Duration.millis(5000)))); }
Example #29
Source File: WaitTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category({NeedsRunner.class, UsesTestStreamWithProcessingTime.class}) public void testWaitInGlobalWindow() { testWaitWithParameters( Duration.standardMinutes(1) /* duration */, Duration.standardSeconds(15) /* lateness */, 20 /* numMainElements */, new GlobalWindows(), 20 /* numSignalElements */, new GlobalWindows()); }
Example #30
Source File: SimpleDoFnRunnerTest.java From beam with Apache License 2.0 | 5 votes |
/** * Tests that a users call to set a timer gets properly dispatched to the timer internals. From * there on, it is the duty of the runner & step context to set it in whatever way is right for * that runner. */ @Test public void testTimerSet() { WindowFn<?, ?> windowFn = new GlobalWindows(); DoFnWithTimers<GlobalWindow> fn = new DoFnWithTimers(windowFn.windowCoder()); DoFnRunner<String, String> runner = new SimpleDoFnRunner<>( null, fn, NullSideInputReader.empty(), null, null, Collections.emptyList(), mockStepContext, null, Collections.emptyMap(), WindowingStrategy.of(new GlobalWindows()), DoFnSchemaInformation.create(), Collections.emptyMap()); // Setting the timer needs the current time, as it is set relative Instant currentTime = new Instant(42); when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(currentTime); runner.processElement(WindowedValue.valueInGlobalWindow("anyValue")); verify(mockTimerInternals) .setTimer( StateNamespaces.window(new GlobalWindows().windowCoder(), GlobalWindow.INSTANCE), TimerDeclaration.PREFIX + DoFnWithTimers.TIMER_ID, "", currentTime.plus(DoFnWithTimers.TIMER_OFFSET), currentTime.plus(DoFnWithTimers.TIMER_OFFSET), TimeDomain.EVENT_TIME); }