Java Code Examples for org.apache.beam.sdk.testing.TestStream#Builder
The following examples show how to use
org.apache.beam.sdk.testing.TestStream#Builder .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestUnboundedTable.java From beam with Apache License 2.0 | 6 votes |
@Override public PCollection<Row> buildIOReader(PBegin begin) { TestStream.Builder<Row> values = TestStream.create(schema); for (Pair<Duration, List<Row>> pair : timestampedRows) { values = values.advanceWatermarkTo(new Instant(0).plus(pair.getKey())); for (int i = 0; i < pair.getValue().size(); i++) { values = values.addElements( TimestampedValue.of( pair.getValue().get(i), new Instant(pair.getValue().get(i).getDateTime(timestampField)))); } } return begin .apply( "MockedUnboundedTable_" + COUNTER.incrementAndGet(), values.advanceWatermarkToInfinity()) .setRowSchema(getSchema()); }
Example 2
Source File: TestUtils.java From beam with Apache License 2.0 | 6 votes |
/** * Builds an unbounded {@link PCollection} in {@link Pipeline} set by {@link * #inPipeline(Pipeline)}. * * <p>If timestamp field was set with {@link #withTimestampField(String)} then watermark will be * advanced to the values from that field. */ public PCollection<Row> buildUnbounded() { checkArgument(pipeline != null); checkArgument(rows.size() > 0); if (type == null) { type = rows.get(0).getSchema(); } TestStream.Builder<Row> values = TestStream.create(type); for (Row row : rows) { if (timestampField != null) { values = values.advanceWatermarkTo(new Instant(row.getDateTime(timestampField))); } values = values.addElements(row); } return PBegin.in(pipeline).apply("unboundedPCollection", values.advanceWatermarkToInfinity()); }
Example 3
Source File: ParDoTest.java From beam with Apache License 2.0 | 6 votes |
@Test @Category({ ValidatesRunner.class, UsesStatefulParDo.class, UsesRequiresTimeSortedInput.class, UsesStrictTimerOrdering.class, UsesTestStream.class }) public void testRequiresTimeSortedInputWithTestStream() { // generate list long enough to rule out random shuffle in sorted order int numElements = 1000; List<Long> eventStamps = LongStream.range(0, numElements) .mapToObj(i -> numElements - i) .collect(Collectors.toList()); TestStream.Builder<Long> stream = TestStream.create(VarLongCoder.of()); for (Long stamp : eventStamps) { stream = stream.addElements(stamp); } testTimeSortedInput(numElements, pipeline.apply(stream.advanceWatermarkToInfinity())); }
Example 4
Source File: ParDoTest.java From beam with Apache License 2.0 | 6 votes |
@Test @Category({ ValidatesRunner.class, UsesTimersInParDo.class, UsesTestStream.class, UsesStatefulParDo.class, UsesStrictTimerOrdering.class }) public void testEventTimeTimerOrdering() throws Exception { final int numTestElements = 100; final Instant now = new Instant(1500000000000L); TestStream.Builder<KV<String, String>> builder = TestStream.create(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())) .advanceWatermarkTo(new Instant(0)); for (int i = 0; i < numTestElements; i++) { builder = builder.addElements(TimestampedValue.of(KV.of("dummy", "" + i), now.plus(i * 1000))); if ((i + 1) % 10 == 0) { builder = builder.advanceWatermarkTo(now.plus((i + 1) * 1000)); } } testEventTimeTimerOrderingWithInputPTransform( now, numTestElements, builder.advanceWatermarkToInfinity()); }
Example 5
Source File: BeamSqlDslBase.java From beam with Apache License 2.0 | 5 votes |
private PCollection<Row> prepareUnboundedPCollection1() { TestStream.Builder<Row> values = TestStream.create(schemaInTableA); for (Row row : rowsInTableA) { values = values.advanceWatermarkTo(new Instant(row.getDateTime("f_timestamp"))); values = values.addElements(row); } return PBegin.in(pipeline) .apply("unboundedInput1", values.advanceWatermarkToInfinity()) .apply( "unboundedInput1.fixedWindow1year", Window.into(FixedWindows.of(Duration.standardDays(365)))); }
Example 6
Source File: BeamSqlDslBase.java From beam with Apache License 2.0 | 5 votes |
private PCollection<Row> prepareUnboundedPCollection2() { TestStream.Builder<Row> values = TestStream.create(schemaInTableA); Row row = rowsInTableA.get(0); values = values.advanceWatermarkTo(new Instant(row.getDateTime("f_timestamp"))); values = values.addElements(row); return PBegin.in(pipeline) .apply("unboundedInput2", values.advanceWatermarkToInfinity()) .apply( "unboundedInput2.fixedWindow1year", Window.into(FixedWindows.of(Duration.standardDays(365)))); }
Example 7
Source File: ParDoTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category({ ValidatesRunner.class, UsesStatefulParDo.class, UsesRequiresTimeSortedInput.class, UsesStrictTimerOrdering.class, UsesTestStream.class }) public void testRequiresTimeSortedInputWithLateDataAndAllowedLateness() { // generate list long enough to rule out random shuffle in sorted order int numElements = 1000; List<Long> eventStamps = LongStream.range(0, numElements) .mapToObj(i -> numElements - i) .collect(Collectors.toList()); TestStream.Builder<Long> input = TestStream.create(VarLongCoder.of()); for (Long stamp : eventStamps) { input = input.addElements(TimestampedValue.of(stamp, Instant.ofEpochMilli(stamp))); if (stamp == 100) { // advance watermark when we have 100 remaining elements // all the rest are going to be late elements input = input.advanceWatermarkTo(Instant.ofEpochMilli(stamp)); } } testTimeSortedInput( numElements, pipeline .apply(input.advanceWatermarkToInfinity()) .apply( Window.<Long>into(new GlobalWindows()) .withAllowedLateness(Duration.millis(5000)))); }
Example 8
Source File: ParDoTest.java From beam with Apache License 2.0 | 5 votes |
@Test @Category({ ValidatesRunner.class, UsesStatefulParDo.class, UsesRequiresTimeSortedInput.class, UsesStrictTimerOrdering.class, UsesTestStream.class }) public void testRequiresTimeSortedInputWithLateData() { // generate list long enough to rule out random shuffle in sorted order int numElements = 1000; List<Long> eventStamps = LongStream.range(0, numElements) .mapToObj(i -> numElements - i) .collect(Collectors.toList()); TestStream.Builder<Long> input = TestStream.create(VarLongCoder.of()); for (Long stamp : eventStamps) { input = input.addElements(TimestampedValue.of(stamp, Instant.ofEpochMilli(stamp))); if (stamp == 100) { // advance watermark when we have 100 remaining elements // all the rest are going to be late elements input = input.advanceWatermarkTo(Instant.ofEpochMilli(stamp)); } } testTimeSortedInput( numElements - 100, numElements - 1, pipeline.apply(input.advanceWatermarkToInfinity()), // cannot validate exactly which data gets dropped, because that is runner dependent false); }
Example 9
Source File: BeamModelTest.java From streamingbook with Apache License 2.0 | 4 votes |
TestStream<KV<String, Integer>> createStream(WatermarkType watermark, boolean extraSix) { TestStream.Builder<KV<String, Integer>> stream = TestStream.create( KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())) .advanceWatermarkTo(BASE_TIME) // Test assumes processing time begins there as well. .advanceProcessingTime(to("12:04:59")) .addElements(score("TeamX", 5, "12:00:26")) .advanceProcessingTime(to("12:05:39")) .addElements(score("TeamX", 7, "12:02:24")) // Fudge the event time to 2:24 to make sessions connect .advanceProcessingTime(to("12:06:00")); if (watermark == WatermarkType.HEURISTIC) stream = stream.advanceWatermarkTo(parseTime("12:02:00")); stream = stream .advanceProcessingTime(to("12:06:13")) .addElements(score("TeamX", 3, "12:03:39")) .advanceProcessingTime(to("12:06:39")) .addElements(score("TeamX", 4, "12:04:19")); if (extraSix) stream = stream.addElements(score("TeamX", 6, "12:00:53")); stream = stream .advanceProcessingTime(to("12:07:06")) .addElements(score("TeamX", 8, "12:03:06")) .advanceProcessingTime(to("12:07:19")) .addElements(score("TeamX", 3, "12:06:39")) .advanceProcessingTime(to("12:07:30")); if (watermark == WatermarkType.HEURISTIC) stream = stream .advanceWatermarkTo(parseTime("12:04:00")) .advanceProcessingTime(to("12:07:40")) .advanceWatermarkTo(parseTime("12:06:00")); stream = stream .advanceProcessingTime(to("12:08:19")) .addElements(score("TeamX", 9, "12:01:25")) // Fudge the event time to 1:25 to make sessions connect .advanceProcessingTime(to("12:08:39")); if (watermark == WatermarkType.PERFECT) stream = stream.advanceWatermarkTo(parseTime("12:02:00")); stream = stream .addElements(score("TeamX", 8, "12:07:26")) .advanceProcessingTime(to("12:09:00")) .addElements(score("TeamX", 1, "12:07:46")); if (watermark == WatermarkType.PERFECT) { stream = stream .advanceWatermarkTo(parseTime("12:04:00")) .advanceProcessingTime(to("12:09:10")) .advanceWatermarkTo(parseTime("12:06:00")) .advanceProcessingTime(to("12:09:20")) .advanceWatermarkTo(parseTime("12:08:00")); } stream = stream.advanceProcessingTime(to("12:09:30")); if (watermark == WatermarkType.HEURISTIC) stream = stream.advanceWatermarkTo(parseTime("12:08:30")); stream = stream.advanceProcessingTime(to("12:10:00")); return stream.advanceWatermarkToInfinity(); }
Example 10
Source File: WaitTest.java From beam with Apache License 2.0 | 4 votes |
/** * Generates a {@link TestStream} of the given duration containing the values [0, numElements) and * the same number of random but monotonic watermark updates, with each element within * allowedLateness of the respective watermark update. * * <p>TODO: Consider moving this into TestStream if it's useful enough. */ private PCollection<Long> generateStreamWithBoundedDisorder( String name, Instant base, Duration totalDuration, int numElements, Duration allowedLateness) { TestStream.Builder<Long> stream = TestStream.create(VarLongCoder.of()); // Generate numElements random watermark updates. After each one also generate an element within // allowedLateness of it. List<Instant> watermarks = Lists.newArrayList(); for (int i = 0; i < numElements; ++i) { watermarks.add(base.plus(new Duration((long) (totalDuration.getMillis() * Math.random())))); } Collections.sort(watermarks); List<Event<Long>> events = Lists.newArrayList(); for (int i = 0; i < numElements; ++i) { Instant processingTimestamp = base.plus((long) (1.0 * i * totalDuration.getMillis() / (numElements + 1))); Instant watermark = watermarks.get(i); Instant elementTimestamp = watermark.minus((long) (Math.random() * allowedLateness.getMillis())); events.add(new Event<>(processingTimestamp, watermark)); events.add(new Event<>(processingTimestamp, TimestampedValue.of((long) i, elementTimestamp))); } Instant lastProcessingTime = base; for (Event<Long> event : events) { Duration processingTimeDelta = new Duration(lastProcessingTime, event.processingTime); if (processingTimeDelta.getMillis() > 0) { stream = stream.advanceProcessingTime(processingTimeDelta); } lastProcessingTime = event.processingTime; if (event.element != null) { stream = stream.addElements(event.element); } else { stream = stream.advanceWatermarkTo(event.watermarkUpdate); } } return p.apply(name, stream.advanceWatermarkToInfinity()); }
Example 11
Source File: ParDoTest.java From beam with Apache License 2.0 | 4 votes |
@Test @Category({ ValidatesRunner.class, UsesStatefulParDo.class, UsesRequiresTimeSortedInput.class, UsesStrictTimerOrdering.class, UsesTestStream.class }) public void testTwoRequiresTimeSortedInputWithLateData() { // generate list long enough to rule out random shuffle in sorted order int numElements = 1000; List<Long> eventStamps = LongStream.range(0, numElements) .mapToObj(i -> numElements - i) .collect(Collectors.toList()); TestStream.Builder<Long> input = TestStream.create(VarLongCoder.of()); for (Long stamp : eventStamps) { input = input.addElements(TimestampedValue.of(stamp, Instant.ofEpochMilli(stamp))); if (stamp == 100) { // advance watermark when we have 100 remaining elements // all the rest are going to be late elements input = input.advanceWatermarkTo(Instant.ofEpochMilli(stamp)); } } // apply the sorted function for the first time PCollection<Long> first = pipeline .apply(input.advanceWatermarkToInfinity()) .apply(WithTimestamps.of(e -> Instant.ofEpochMilli(e))) .apply( "first.MapElements", MapElements.into( TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.longs())) .via(e -> KV.of("", e))) .apply("first.ParDo", ParDo.of(timeSortedDoFn())) .apply(MapElements.into(TypeDescriptors.longs()).via(e -> (long) e)); // apply the test to the already sorted outcome so that we test that we don't loose any // more data testTimeSortedInputAlreadyHavingStamps( numElements - 100, numElements - 1, first, // cannot validate exactly which data gets dropped, because that is runner dependent false); }