org.apache.beam.sdk.transforms.windowing.Window Java Examples
The following examples show how to use
org.apache.beam.sdk.transforms.windowing.Window.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ReshuffleTest.java From beam with Apache License 2.0 | 7 votes |
@Test @Category(ValidatesRunner.class) public void testReshuffleAfterSlidingWindows() { PCollection<KV<String, Integer>> input = pipeline .apply( Create.of(ARBITRARY_KVS) .withCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()))) .apply(Window.into(FixedWindows.of(Duration.standardMinutes(10L)))); PCollection<KV<String, Integer>> output = input.apply(Reshuffle.of()); PAssert.that(output).containsInAnyOrder(ARBITRARY_KVS); assertEquals(input.getWindowingStrategy(), output.getWindowingStrategy()); pipeline.run(); }
Example #2
Source File: SqlQuery3.java From beam with Apache License 2.0 | 6 votes |
@Override public PCollection<NameCityStateId> expand(PCollection<Event> allEvents) { PCollection<Event> windowed = allEvents.apply( Window.into(FixedWindows.of(Duration.standardSeconds(configuration.windowSizeSec)))); String auctionName = Auction.class.getSimpleName(); PCollection<Row> auctions = windowed .apply(getName() + ".Filter." + auctionName, Filter.by(e1 -> e1.newAuction != null)) .apply(getName() + ".ToRecords." + auctionName, new SelectEvent(Type.AUCTION)); String personName = Person.class.getSimpleName(); PCollection<Row> people = windowed .apply(getName() + ".Filter." + personName, Filter.by(e -> e.newPerson != null)) .apply(getName() + ".ToRecords." + personName, new SelectEvent(Type.PERSON)); PCollectionTuple inputStreams = PCollectionTuple.of(new TupleTag<>("Auction"), auctions) .and(new TupleTag<>("Person"), people); return inputStreams .apply(SqlTransform.query(QUERY).withQueryPlannerClass(plannerClass)) .apply(Convert.fromRows(NameCityStateId.class)); }
Example #3
Source File: LeaderBoard.java From deployment-examples with MIT License | 6 votes |
@Override public PCollection<KV<String, Integer>> expand(PCollection<GameActionInfo> infos) { return infos .apply( "LeaderboardTeamFixedWindows", Window.<GameActionInfo>into(FixedWindows.of(teamWindowDuration)) // We will get early (speculative) results as well as cumulative // processing of late data. .triggering( AfterWatermark.pastEndOfWindow() .withEarlyFirings( AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(FIVE_MINUTES)) .withLateFirings( AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(TEN_MINUTES))) .withAllowedLateness(allowedLateness) .accumulatingFiredPanes()) // Extract and sum teamname/score pairs from the event data. .apply("ExtractTeamScore", new ExtractAndSumScore("team")); }
Example #4
Source File: ReduceFnRunner.java From beam with Apache License 2.0 | 6 votes |
/** Do we need to emit? */ private boolean needToEmit(boolean isEmpty, boolean isFinished, PaneInfo.Timing timing) { if (!isEmpty) { // The pane has elements. return true; } if (timing == Timing.ON_TIME && windowingStrategy.getOnTimeBehavior() == Window.OnTimeBehavior.FIRE_ALWAYS) { // This is an empty ON_TIME pane. return true; } if (isFinished && windowingStrategy.getClosingBehavior() == ClosingBehavior.FIRE_ALWAYS) { // This is known to be the final pane, and the user has requested it even when empty. return true; } return false; }
Example #5
Source File: DistinctTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testWindow_applyIf() { final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings()); final PCollection<String> uniq = Distinct.of(dataset) .applyIf( true, b -> b.windowBy(FixedWindows.of(Duration.standardHours(1))) .triggeredBy(DefaultTrigger.of()) .discardingFiredPanes()) .output(); final Distinct distinct = (Distinct) TestUtils.getProducer(uniq); assertTrue(distinct.getWindow().isPresent()); @SuppressWarnings("unchecked") final WindowDesc<?> windowDesc = WindowDesc.of((Window) distinct.getWindow().get()); assertEquals( FixedWindows.of(org.joda.time.Duration.standardHours(1)), windowDesc.getWindowFn()); assertEquals(DefaultTrigger.of(), windowDesc.getTrigger()); assertEquals(AccumulationMode.DISCARDING_FIRED_PANES, windowDesc.getAccumulationMode()); }
Example #6
Source File: GroupByKeyTest.java From beam with Apache License 2.0 | 6 votes |
@Test @Category(NeedsRunner.class) public void testIdentityWindowFnPropagation() { List<KV<String, Integer>> ungroupedPairs = Arrays.asList(); PCollection<KV<String, Integer>> input = p.apply( Create.of(ungroupedPairs) .withCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()))) .apply(Window.into(FixedWindows.of(Duration.standardMinutes(1)))); PCollection<KV<String, Iterable<Integer>>> output = input.apply(GroupByKey.create()); p.run(); Assert.assertTrue( output .getWindowingStrategy() .getWindowFn() .isCompatible(FixedWindows.of(Duration.standardMinutes(1)))); }
Example #7
Source File: CombineTest.java From beam with Apache License 2.0 | 6 votes |
@Test @Category(ValidatesRunner.class) public void testSessionsCombine() { PCollection<KV<String, Integer>> input = pipeline .apply( Create.timestamped( TimestampedValue.of(KV.of("a", 1), new Instant(0L)), TimestampedValue.of(KV.of("a", 1), new Instant(4L)), TimestampedValue.of(KV.of("a", 4), new Instant(7L)), TimestampedValue.of(KV.of("b", 1), new Instant(10L)), TimestampedValue.of(KV.of("b", 13), new Instant(16L))) .withCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()))) .apply(Window.into(Sessions.withGapDuration(Duration.millis(5)))); PCollection<Integer> sum = input.apply(Values.create()).apply(Combine.globally(new SumInts()).withoutDefaults()); PCollection<KV<String, String>> sumPerKey = input.apply(Combine.perKey(new TestCombineFn())); PAssert.that(sum).containsInAnyOrder(7, 13); PAssert.that(sumPerKey) .containsInAnyOrder(Arrays.asList(KV.of("a", "114"), KV.of("b", "1"), KV.of("b", "13"))); pipeline.run(); }
Example #8
Source File: BeamSqlDslJoinTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testJoinsUnboundedWithinWindowsWithDefaultTrigger() throws Exception { String sql = "SELECT o1.order_id, o1.price, o1.site_id, o2.order_id, o2.price, o2.site_id " + "FROM ORDER_DETAILS1 o1" + " JOIN ORDER_DETAILS2 o2" + " on " + " o1.order_id=o2.site_id AND o2.price=o1.site_id"; PCollection<Row> orders = ordersUnbounded() .apply("window", Window.into(FixedWindows.of(Duration.standardSeconds(50)))); PCollectionTuple inputs = tuple("ORDER_DETAILS1", orders, "ORDER_DETAILS2", orders); PAssert.that(inputs.apply("sql", SqlTransform.query(sql))) .containsInAnyOrder( TestUtils.RowsBuilder.of(RESULT_ROW_TYPE) .addRows(1, 2, 2, 2, 2, 1, 1, 4, 3, 3, 3, 1) .getRows()); pipeline.run(); }
Example #9
Source File: CombineTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testCountPerElementWithSlidingWindows() { PCollection<String> input = pipeline .apply( Create.timestamped( TimestampedValue.of("a", new Instant(1)), TimestampedValue.of("a", new Instant(2)), TimestampedValue.of("b", new Instant(3)), TimestampedValue.of("b", new Instant(4)))) .apply(Window.into(SlidingWindows.of(Duration.millis(2)).every(Duration.millis(1)))); PCollection<KV<String, Long>> output = input.apply(Count.perElement()); PAssert.that(output) .containsInAnyOrder( KV.of("a", 1L), KV.of("a", 2L), KV.of("a", 1L), KV.of("b", 1L), KV.of("b", 2L), KV.of("b", 1L)); pipeline.run(); }
Example #10
Source File: ReduceWindowTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testWindow_applyIf() { final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings()); final PCollection<Long> output = ReduceWindow.of(dataset) .reduceBy(e -> 1L) .withSortedValues(String::compareTo) .applyIf( true, b -> b.windowBy(FixedWindows.of(org.joda.time.Duration.standardHours(1))) .triggeredBy(DefaultTrigger.of()) .discardingFiredPanes()) .output(); final ReduceWindow rw = (ReduceWindow) TestUtils.getProducer(output); assertTrue(rw.getWindow().isPresent()); @SuppressWarnings("unchecked") final WindowDesc<?> windowDesc = WindowDesc.of((Window) rw.getWindow().get()); assertEquals( FixedWindows.of(org.joda.time.Duration.standardHours(1)), windowDesc.getWindowFn()); assertEquals(DefaultTrigger.of(), windowDesc.getTrigger()); assertEquals(AccumulationMode.DISCARDING_FIRED_PANES, windowDesc.getAccumulationMode()); }
Example #11
Source File: BigQueryMerger.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@Override public PCollection<KV<K, V>> expand(PCollection<KV<K, V>> input) { return input .apply( Window.<KV<K, V>>into(new GlobalWindows()) .discardingFiredPanes() .triggering( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(Duration.ZERO) .alignedTo(intervalDuration, org.joda.time.Instant.now())))) .apply(GroupByKey.create()) .apply( ParDo.of( new DoFn<KV<K, Iterable<V>>, KV<K, V>>() { @ProcessElement public void process(ProcessContext c) { LOG.debug( "TS: {} | Element: {} | Pane: {}", c.timestamp(), c.element(), c.pane()); Iterator<V> it = c.element().getValue().iterator(); if (it.hasNext()) { c.output(KV.of(c.element().getKey(), it.next())); } } })); }
Example #12
Source File: PeriodicImpulse.java From beam with Apache License 2.0 | 6 votes |
@Override public PCollection<Instant> expand(PBegin input) { PCollection<Instant> result = input .apply( Create.<PeriodicSequence.SequenceDefinition>of( new PeriodicSequence.SequenceDefinition( startTimestamp, stopTimestamp, fireInterval))) .apply(PeriodicSequence.create()); if (this.applyWindowing) { result = result.apply( Window.<Instant>into(FixedWindows.of(Duration.millis(fireInterval.getMillis())))); } return result; }
Example #13
Source File: CombineTest.java From beam with Apache License 2.0 | 6 votes |
@Test @Category(ValidatesRunner.class) public void testHotKeyCombiningWithAccumulationMode() { PCollection<Integer> input = pipeline.apply(Create.of(1, 2, 3, 4, 5)); PCollection<Integer> output = input .apply( Window.<Integer>into(new GlobalWindows()) .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1))) .accumulatingFiredPanes() .withAllowedLateness(new Duration(0), ClosingBehavior.FIRE_ALWAYS)) .apply(Sum.integersGlobally().withoutDefaults().withFanout(2)) .apply(ParDo.of(new GetLast())); PAssert.that(output) .satisfies( input1 -> { assertThat(input1, hasItem(15)); return null; }); pipeline.run(); }
Example #14
Source File: DistinctTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testBuild() { final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings()); final FixedWindows windowing = FixedWindows.of(org.joda.time.Duration.standardHours(1)); final DefaultTrigger trigger = DefaultTrigger.of(); final PCollection<String> uniq = Distinct.named("Distinct1") .of(dataset) .windowBy(windowing) .triggeredBy(trigger) .discardingFiredPanes() .withAllowedLateness(Duration.millis(1000)) .output(); final Distinct distinct = (Distinct) TestUtils.getProducer(uniq); assertTrue(distinct.getName().isPresent()); assertEquals("Distinct1", distinct.getName().get()); assertTrue(distinct.getWindow().isPresent()); @SuppressWarnings("unchecked") final WindowDesc<?> windowDesc = WindowDesc.of((Window) distinct.getWindow().get()); assertEquals(windowing, windowDesc.getWindowFn()); assertEquals(trigger, windowDesc.getTrigger()); assertEquals(AccumulationMode.DISCARDING_FIRED_PANES, windowDesc.getAccumulationMode()); assertEquals(Duration.millis(1000), windowDesc.getAllowedLateness()); }
Example #15
Source File: CountByKeyTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testBuild() { final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings()); final FixedWindows windowing = FixedWindows.of(org.joda.time.Duration.standardHours(1)); final DefaultTrigger trigger = DefaultTrigger.of(); final PCollection<KV<String, Long>> counted = CountByKey.named("CountByKey1") .of(dataset) .keyBy(s -> s) .windowBy(windowing) .triggeredBy(trigger) .discardingFiredPanes() .withAllowedLateness(Duration.millis(1000)) .output(); final CountByKey count = (CountByKey) TestUtils.getProducer(counted); assertTrue(count.getName().isPresent()); assertEquals("CountByKey1", count.getName().get()); assertNotNull(count.getKeyExtractor()); assertTrue(count.getWindow().isPresent()); final WindowDesc<?> desc = WindowDesc.of((Window<?>) count.getWindow().get()); assertEquals(windowing, desc.getWindowFn()); assertEquals(trigger, desc.getTrigger()); assertEquals(AccumulationMode.DISCARDING_FIRED_PANES, desc.getAccumulationMode()); assertEquals(Duration.millis(1000), desc.getAllowedLateness()); }
Example #16
Source File: MyBeamJob.java From hazelcast-jet-demos with Apache License 2.0 | 6 votes |
public static Pipeline build(PipelineOptions pipelineOptions) { Pipeline pipeline = Pipeline.create(pipelineOptions); pipeline .apply("unbounded-source", Read.from(new MyUnboundedSource("beam-input"))) .apply("reformat-and-timestamp", ParDo.of(new MyEnrichAndReformatFn())) .apply("window", Window.<String>into(FixedWindows.of(ONE_SECOND)) .triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane())) .discardingFiredPanes() .withAllowedLateness(ONE_SECOND) ) .apply("sink", FileIO.<String>write() .via(TextIO.sink()) .to(".") .withPrefix("beam-output") .withNumShards(1) ) ; return pipeline; }
Example #17
Source File: StreamWordCount.java From beam-starter with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { Options options = PipelineOptionsFactory.fromArgs(args).withValidation() .as(Options.class); options.setRunner(FlinkRunner.class); Pipeline p = Pipeline.create(options); KafkaIO.Read<byte[], String> kafkaIOReader = KafkaIO.read() .withBootstrapServers("192.168.99.100:32771") .withTopics(Arrays.asList("beam".split(","))) .updateConsumerProperties(ImmutableMap.of("auto.offset.reset", (Object)"earliest")) .withValueCoder(StringUtf8Coder.of()); p.apply(kafkaIOReader.withoutMetadata()) .apply(Values.<String>create()) .apply(Window.<String>into( FixedWindows.of(Duration.standardMinutes(options.getWindowSize())))) .apply(new CountWords()) .apply(MapElements.via(new FormatAsTextFn())) .apply("WriteCounts", TextIO.Write.to(options.getOutput())); p.run(); }
Example #18
Source File: PCollectionTranslationTest.java From beam with Apache License 2.0 | 5 votes |
@Parameters(name = "{index}: {0}") public static Iterable<PCollection<?>> data() { Pipeline pipeline = TestPipeline.create(); PCollection<Integer> ints = pipeline.apply("ints", Create.of(1, 2, 3)); PCollection<Long> longs = pipeline.apply("unbounded longs", GenerateSequence.from(0)); PCollection<Long> windowedLongs = longs.apply( "into fixed windows", Window.into(FixedWindows.of(Duration.standardMinutes(10L)))); PCollection<KV<String, Iterable<String>>> groupedStrings = pipeline .apply( "kvs", Create.of(KV.of("foo", "spam"), KV.of("bar", "ham"), KV.of("baz", "eggs"))) .apply("group", GroupByKey.create()); PCollection<Long> coderLongs = pipeline .apply("counts with alternative coder", GenerateSequence.from(0).to(10)) .setCoder(BigEndianLongCoder.of()); pipeline .apply( "intsWithCustomCoder", Create.of(1, 2).withCoder(new AutoValue_PCollectionTranslationTest_CustomIntCoder())) .apply( "into custom windows", Window.into(new CustomWindows()) .triggering( AfterWatermark.pastEndOfWindow() .withEarlyFirings( AfterFirst.of( AfterPane.elementCountAtLeast(5), AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(Duration.millis(227L))))) .accumulatingFiredPanes() .withAllowedLateness(Duration.standardMinutes(12L))); return ImmutableList.of(ints, longs, windowedLongs, coderLongs, groupedStrings); }
Example #19
Source File: SplittableDoFnTest.java From beam with Apache License 2.0 | 5 votes |
private void testWindowedSideInput(IsBounded bounded) { PCollection<Integer> mainInput = p.apply( "main", Create.timestamped( TimestampedValue.of(0, new Instant(0)), TimestampedValue.of(1, new Instant(1)), TimestampedValue.of(2, new Instant(2)), TimestampedValue.of(3, new Instant(3)), TimestampedValue.of(4, new Instant(4)), TimestampedValue.of(5, new Instant(5)), TimestampedValue.of(6, new Instant(6)), TimestampedValue.of(7, new Instant(7)))) .apply("window 2", Window.into(FixedWindows.of(Duration.millis(2)))); PCollectionView<String> sideInput = p.apply( "side", Create.timestamped( TimestampedValue.of("a", new Instant(0)), TimestampedValue.of("b", new Instant(4)))) .apply("window 4", Window.into(FixedWindows.of(Duration.millis(4)))) .apply("singleton", View.asSingleton()); PCollection<String> res = mainInput.apply(ParDo.of(sdfWithSideInput(bounded, sideInput)).withSideInputs(sideInput)); PAssert.that(res).containsInAnyOrder("a:0", "a:1", "a:2", "a:3", "b:4", "b:5", "b:6", "b:7"); p.run(); }
Example #20
Source File: WindowAssignTranslator.java From beam with Apache License 2.0 | 5 votes |
@Override public void translate( Window.Assign<T> transform, TransformHierarchy.Node node, TranslationContext ctx) { final PCollection<T> output = ctx.getOutput(transform); @SuppressWarnings("unchecked") final WindowFn<T, ?> windowFn = (WindowFn<T, ?>) output.getWindowingStrategy().getWindowFn(); final MessageStream<OpMessage<T>> inputStream = ctx.getMessageStream(ctx.getInput(transform)); final MessageStream<OpMessage<T>> outputStream = inputStream.flatMap(OpAdapter.adapt(new WindowAssignOp<>(windowFn))); ctx.registerMessageStream(output, outputStream); }
Example #21
Source File: SparkMetricsPusherTest.java From beam with Apache License 2.0 | 5 votes |
@Category(StreamingTest.class) @Test public void testInStreamingMode() throws Exception { Instant instant = new Instant(0); CreateStream<Integer> source = CreateStream.of(VarIntCoder.of(), batchDuration()) .emptyBatch() .advanceWatermarkForNextBatch(instant) .nextBatch( TimestampedValue.of(1, instant), TimestampedValue.of(2, instant), TimestampedValue.of(3, instant)) .advanceWatermarkForNextBatch(instant.plus(Duration.standardSeconds(1L))) .nextBatch( TimestampedValue.of(4, instant.plus(Duration.standardSeconds(1L))), TimestampedValue.of(5, instant.plus(Duration.standardSeconds(1L))), TimestampedValue.of(6, instant.plus(Duration.standardSeconds(1L)))) .advanceNextBatchWatermarkToInfinity(); pipeline .apply(source) .apply( Window.<Integer>into(FixedWindows.of(Duration.standardSeconds(3L))) .withAllowedLateness(Duration.ZERO)) .apply(ParDo.of(new CountingDoFn())); pipeline.run(); // give metrics pusher time to push Thread.sleep( (pipeline.getOptions().as(MetricsOptions.class).getMetricsPushPeriod() + 1L) * 1000); assertThat(TestMetricsSink.getCounterValue(COUNTER_NAME), is(6L)); }
Example #22
Source File: SparkMetricsSinkTest.java From beam with Apache License 2.0 | 5 votes |
@Category(StreamingTest.class) @Test public void testInStreamingMode() throws Exception { assertThat(InMemoryMetrics.valueOf("emptyLines"), is(nullValue())); Instant instant = new Instant(0); CreateStream<String> source = CreateStream.of( StringUtf8Coder.of(), Duration.millis( (pipeline.getOptions().as(SparkPipelineOptions.class)) .getBatchIntervalMillis())) .emptyBatch() .advanceWatermarkForNextBatch(instant) .nextBatch( TimestampedValue.of(WORDS.get(0), instant), TimestampedValue.of(WORDS.get(1), instant), TimestampedValue.of(WORDS.get(2), instant)) .advanceWatermarkForNextBatch(instant.plus(Duration.standardSeconds(2L))) .nextBatch( TimestampedValue.of(WORDS.get(3), instant.plus(Duration.standardSeconds(1L))), TimestampedValue.of(WORDS.get(4), instant.plus(Duration.standardSeconds(1L))), TimestampedValue.of(WORDS.get(5), instant.plus(Duration.standardSeconds(1L)))) .advanceNextBatchWatermarkToInfinity(); PCollection<String> output = pipeline .apply(source) .apply( Window.<String>into(FixedWindows.of(Duration.standardSeconds(3L))) .withAllowedLateness(Duration.ZERO)) .apply(new WordCount.CountWords()) .apply(MapElements.via(new WordCount.FormatAsTextFn())); PAssert.that(output).containsInAnyOrder(EXPECTED_COUNTS); pipeline.run(); assertThat(InMemoryMetrics.<Double>valueOf("emptyLines"), is(1d)); }
Example #23
Source File: SumByKey.java From beam with Apache License 2.0 | 5 votes |
private SumByKey( @Nullable String name, UnaryFunction<InputT, KeyT> keyExtractor, @Nullable TypeDescriptor<KeyT> keyType, UnaryFunction<InputT, Long> valueExtractor, @Nullable Window<InputT> window, TypeDescriptor<KV<KeyT, Long>> outputType) { super(name, outputType, keyExtractor, keyType, window); this.valueExtractor = valueExtractor; }
Example #24
Source File: DistinctTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category({NeedsRunner.class, UsesTestStreamWithProcessingTime.class}) public void testTriggeredDistinctRepresentativeValues() { Instant base = new Instant(0); TestStream<KV<Integer, String>> values = TestStream.create(KvCoder.of(VarIntCoder.of(), StringUtf8Coder.of())) .advanceWatermarkTo(base) .addElements( TimestampedValue.of(KV.of(1, "k1"), base), TimestampedValue.of(KV.of(2, "k2"), base.plus(Duration.standardSeconds(10))), TimestampedValue.of(KV.of(3, "k3"), base.plus(Duration.standardSeconds(20)))) .advanceProcessingTime(Duration.standardMinutes(1)) .addElements( TimestampedValue.of(KV.of(1, "k1"), base.plus(Duration.standardSeconds(30))), TimestampedValue.of(KV.of(2, "k2"), base.plus(Duration.standardSeconds(40))), TimestampedValue.of(KV.of(3, "k3"), base.plus(Duration.standardSeconds(50)))) .advanceWatermarkToInfinity(); PCollection<KV<Integer, String>> distinctValues = triggeredDistinctRepresentativePipeline .apply(values) .apply( Window.<KV<Integer, String>>into(FixedWindows.of(Duration.standardMinutes(1))) .triggering( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(Duration.standardSeconds(30)))) .withAllowedLateness(Duration.ZERO) .accumulatingFiredPanes()) .apply( Distinct.withRepresentativeValueFn(new Keys<Integer>()) .withRepresentativeType(TypeDescriptor.of(Integer.class))); PAssert.that(distinctValues).containsInAnyOrder(KV.of(1, "k1"), KV.of(2, "k2"), KV.of(3, "k3")); triggeredDistinctRepresentativePipeline.run(); }
Example #25
Source File: DistinctTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category({NeedsRunner.class, UsesTestStream.class}) public void testWindowedDistinct() { Instant base = new Instant(0); TestStream<String> values = TestStream.create(StringUtf8Coder.of()) .advanceWatermarkTo(base) .addElements( TimestampedValue.of("k1", base), TimestampedValue.of("k2", base.plus(Duration.standardSeconds(10))), TimestampedValue.of("k3", base.plus(Duration.standardSeconds(20))), TimestampedValue.of("k1", base.plus(Duration.standardSeconds(30))), TimestampedValue.of("k2", base.plus(Duration.standardSeconds(40))), TimestampedValue.of("k3", base.plus(Duration.standardSeconds(50))), TimestampedValue.of("k4", base.plus(Duration.standardSeconds(60))), TimestampedValue.of("k5", base.plus(Duration.standardSeconds(70))), TimestampedValue.of("k6", base.plus(Duration.standardSeconds(80)))) .advanceWatermarkToInfinity(); PCollection<String> distinctValues = windowedDistinctPipeline .apply(values) .apply(Window.into(FixedWindows.of(Duration.standardSeconds(30)))) .apply(Distinct.create()); PAssert.that(distinctValues) .inWindow(new IntervalWindow(base, base.plus(Duration.standardSeconds(30)))) .containsInAnyOrder("k1", "k2", "k3"); PAssert.that(distinctValues) .inWindow( new IntervalWindow( base.plus(Duration.standardSeconds(30)), base.plus(Duration.standardSeconds(60)))) .containsInAnyOrder("k1", "k2", "k3"); PAssert.that(distinctValues) .inWindow( new IntervalWindow( base.plus(Duration.standardSeconds(60)), base.plus(Duration.standardSeconds(90)))) .containsInAnyOrder("k4", "k5", "k6"); windowedDistinctPipeline.run(); }
Example #26
Source File: AutoCompleteTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testWindowedAutoComplete() { List<TimestampedValue<String>> words = Arrays.asList( TimestampedValue.of("xA", new Instant(1)), TimestampedValue.of("xA", new Instant(1)), TimestampedValue.of("xB", new Instant(1)), TimestampedValue.of("xB", new Instant(2)), TimestampedValue.of("xB", new Instant(2))); PCollection<String> input = p.apply(Create.timestamped(words)); PCollection<KV<String, List<CompletionCandidate>>> output = input .apply(Window.into(SlidingWindows.of(new Duration(2)))) .apply(new ComputeTopCompletions(2, recursive)); PAssert.that(output) .containsInAnyOrder( // Window [0, 2) KV.of("x", parseList("xA:2", "xB:1")), KV.of("xA", parseList("xA:2")), KV.of("xB", parseList("xB:1")), // Window [1, 3) KV.of("x", parseList("xB:3", "xA:2")), KV.of("xA", parseList("xA:2")), KV.of("xB", parseList("xB:3")), // Window [2, 3) KV.of("x", parseList("xB:2")), KV.of("xB", parseList("xB:2"))); p.run().waitUntilFinish(); }
Example #27
Source File: PipelineTranslationTest.java From beam with Apache License 2.0 | 5 votes |
@Parameters(name = "{index}") public static Iterable<Pipeline> testPipelines() { Pipeline trivialPipeline = Pipeline.create(); trivialPipeline.apply(Create.of(1, 2, 3)); Pipeline sideInputPipeline = Pipeline.create(); final PCollectionView<String> singletonView = sideInputPipeline.apply(Create.of("foo")).apply(View.asSingleton()); sideInputPipeline .apply(Create.of("main input")) .apply( ParDo.of( new DoFn<String, String>() { @ProcessElement public void process(ProcessContext c) { // actually never executed and no effect on translation c.sideInput(singletonView); } }) .withSideInputs(singletonView)); Pipeline complexPipeline = Pipeline.create(); BigEndianLongCoder customCoder = BigEndianLongCoder.of(); PCollection<Long> elems = complexPipeline.apply(GenerateSequence.from(0L).to(207L)); PCollection<Long> counted = elems.apply(Count.globally()).setCoder(customCoder); PCollection<Long> windowed = counted.apply( Window.<Long>into(FixedWindows.of(Duration.standardMinutes(7))) .triggering( AfterWatermark.pastEndOfWindow() .withLateFirings(AfterPane.elementCountAtLeast(19))) .accumulatingFiredPanes() .withAllowedLateness(Duration.standardMinutes(3L))); final WindowingStrategy<?, ?> windowedStrategy = windowed.getWindowingStrategy(); PCollection<KV<String, Long>> keyed = windowed.apply(WithKeys.of("foo")); PCollection<KV<String, Iterable<Long>>> grouped = keyed.apply(GroupByKey.create()); return ImmutableList.of(trivialPipeline, sideInputPipeline, complexPipeline); }
Example #28
Source File: GroupByKeyTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testGroupByKeyEndOfWindowLateFiringsOk() { PCollection<KV<String, String>> input = p.apply(Create.of(KV.of("hello", "goodbye"))) .apply( Window.<KV<String, String>>configure() .discardingFiredPanes() .triggering( AfterWatermark.pastEndOfWindow() .withLateFirings(AfterPane.elementCountAtLeast(1))) .withAllowedLateness(Duration.millis(10))); // OK input.apply(GroupByKey.create()); }
Example #29
Source File: DistinctTest.java From beam with Apache License 2.0 | 5 votes |
/** * Regression test: when all values are emitted by a speculative trigger, caused a null KV when * the on-time firing occurred. */ @Test @Category({NeedsRunner.class, UsesTestStreamWithProcessingTime.class}) public void testTriggeredDistinctRepresentativeValuesEmpty() { Instant base = new Instant(0); TestStream<KV<Integer, String>> values = TestStream.create(KvCoder.of(VarIntCoder.of(), StringUtf8Coder.of())) .advanceWatermarkTo(base) .addElements(TimestampedValue.of(KV.of(1, "k1"), base)) .advanceProcessingTime(Duration.standardMinutes(1)) .advanceWatermarkToInfinity(); PCollection<KV<Integer, String>> distinctValues = triggeredDistinctRepresentativePipeline .apply(values) .apply( Window.<KV<Integer, String>>into(FixedWindows.of(Duration.standardMinutes(1))) .triggering( AfterWatermark.pastEndOfWindow() .withEarlyFirings( AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(Duration.standardSeconds(30)))) .withAllowedLateness(Duration.ZERO) .discardingFiredPanes()) .apply( Distinct.withRepresentativeValueFn(new Keys<Integer>()) .withRepresentativeType(TypeDescriptor.of(Integer.class))); PAssert.that(distinctValues).containsInAnyOrder(KV.of(1, "k1")); triggeredDistinctRepresentativePipeline.run(); }
Example #30
Source File: SideInputLoadTest.java From beam with Apache License 2.0 | 5 votes |
private PCollection<KV<byte[], byte[]>> applyWindowingIfPresent( PCollection<KV<byte[], byte[]>> input) { PCollection<KV<byte[], byte[]>> windowedInput = input; if (options.getWindowCount() != 1) { long windowDurationMilis = sourceOptions.numRecords / options.getWindowCount(); windowedInput = input.apply(Window.into(FixedWindows.of(Duration.millis(windowDurationMilis)))); } return windowedInput; }