org.apache.beam.sdk.io.range.OffsetRange Java Examples
The following examples show how to use
org.apache.beam.sdk.io.range.OffsetRange.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PCollectionViewsTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testNestedOverlaps() { Iterable<OffsetRange> ranges = Arrays.asList(range(0, 8), range(1, 7), range(2, 6), range(3, 5)); Map<OffsetRange, Integer> nonOverlappingRangesToNumElementsPerPosition = computeOverlappingRanges(ranges); assertEquals( ImmutableMap.builder() .put(range(0, 1), 1) .put(range(1, 2), 2) .put(range(2, 3), 3) .put(range(3, 5), 4) .put(range(5, 6), 3) .put(range(6, 7), 2) .put(range(7, 8), 1) .build(), nonOverlappingRangesToNumElementsPerPosition); assertNonEmptyRangesAndPositions(ranges, nonOverlappingRangesToNumElementsPerPosition); }
Example #2
Source File: GrowableOffsetRangeTrackerTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testLargeRange() throws Exception { SimpleEstimator simpleEstimator = new SimpleEstimator(); GrowableOffsetRangeTracker tracker = new GrowableOffsetRangeTracker(Long.MIN_VALUE, simpleEstimator); simpleEstimator.setEstimateRangeEnd(Long.MAX_VALUE); Progress progress = tracker.getProgress(); assertEquals(0, progress.getWorkCompleted(), 0.001); assertEquals( BigDecimal.valueOf(Long.MAX_VALUE) .subtract(BigDecimal.valueOf(Long.MIN_VALUE), MathContext.DECIMAL128) .doubleValue(), progress.getWorkRemaining(), 0.001); simpleEstimator.setEstimateRangeEnd(Long.MIN_VALUE); SplitResult res = tracker.trySplit(0); assertEquals(new OffsetRange(Long.MIN_VALUE, Long.MIN_VALUE), res.getPrimary()); assertEquals(new OffsetRange(Long.MIN_VALUE, Long.MAX_VALUE), res.getResidual()); }
Example #3
Source File: SplittableDoFnTest.java From beam with Apache License 2.0 | 6 votes |
@ProcessElement public ProcessContinuation process( @Element String element, OutputReceiver<String> receiver, RestrictionTracker<OffsetRange, Long> tracker, BundleFinalizer bundleFinalizer) throws InterruptedException { if (wasFinalized.get()) { // Claim beyond the end now that we know we have been finalized. tracker.tryClaim(Long.MAX_VALUE); receiver.output(element); return stop(); } if (tracker.tryClaim(tracker.currentRestriction().getFrom() + 1)) { bundleFinalizer.afterBundleCommit( Instant.now().plus(Duration.standardSeconds(MAX_ATTEMPTS)), () -> wasFinalized.set(true)); // We sleep here instead of setting a resume time since the resume time doesn't need to // be honored. sleep(1000L); // 1 second return resume(); } return stop(); }
Example #4
Source File: ReadAllViaFileBasedSource.java From beam with Apache License 2.0 | 6 votes |
@ProcessElement public void process(ProcessContext c) throws IOException { ReadableFile file = c.element().getKey(); OffsetRange range = c.element().getValue(); FileBasedSource<T> source = CompressedSource.from(createSource.apply(file.getMetadata().resourceId().toString())) .withCompression(file.getCompression()); try (BoundedSource.BoundedReader<T> reader = source .createForSubrangeOfFile(file.getMetadata(), range.getFrom(), range.getTo()) .createReader(c.getPipelineOptions())) { for (boolean more = reader.start(); more; more = reader.advance()) { c.output(reader.getCurrent()); } } }
Example #5
Source File: OffsetBasedSource.java From beam with Apache License 2.0 | 6 votes |
@Override public List<? extends OffsetBasedSource<T>> split( long desiredBundleSizeBytes, PipelineOptions options) throws Exception { // Split the range into bundles based on the desiredBundleSizeBytes. If the desired bundle // size is smaller than the minBundleSize of the source then minBundleSize will be used instead. long desiredBundleSizeOffsetUnits = Math.max(Math.max(1, desiredBundleSizeBytes / getBytesPerOffset()), minBundleSize); List<OffsetBasedSource<T>> subSources = new ArrayList<>(); for (OffsetRange range : new OffsetRange(startOffset, Math.min(endOffset, getMaxEndOffset(options))) .split(desiredBundleSizeOffsetUnits, minBundleSize)) { subSources.add(createSourceForSubrange(range.getFrom(), range.getTo())); } return subSources; }
Example #6
Source File: PCollectionViewsTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testIncreasingOverlaps() { Iterable<OffsetRange> ranges = Arrays.asList(range(0, 4), range(1, 5), range(2, 6), range(3, 7), range(4, 8), range(5, 9)); Map<OffsetRange, Integer> nonOverlappingRangesToNumElementsPerPosition = computeOverlappingRanges(ranges); assertEquals( ImmutableMap.builder() .put(range(0, 1), 1) .put(range(1, 2), 2) .put(range(2, 3), 3) .put(range(3, 4), 4) .put(range(4, 5), 4) .put(range(5, 6), 4) .put(range(6, 7), 3) .put(range(7, 8), 2) .put(range(8, 9), 1) .build(), nonOverlappingRangesToNumElementsPerPosition); assertNonEmptyRangesAndPositions(ranges, nonOverlappingRangesToNumElementsPerPosition); }
Example #7
Source File: CSVStreamingPipelineTest.java From dlp-dataflow-deidentification with Apache License 2.0 | 6 votes |
@Test public void testCSVStreamingInitialRestriction() { CSVContentProcessorDoFn csv = new CSVContentProcessorDoFn(ValueProvider.StaticValueProvider.of(2)); String[] lines1 = {"line1", "line2", "line3", "line4"}; String[] lines2 = {"line1", "line2", "line3", "line4", "line5", "line6"}; KV<String, List<String>> input1 = KV.of("FileName", Arrays.asList(lines1)); KV<String, List<String>> input2 = KV.of("FileName", Arrays.asList(lines2)); OffsetRange rangeResult1 = csv.getInitialRestriction(input1); assertEquals(rangeResult1.getFrom(), 1); assertEquals(rangeResult1.getTo(), 3); OffsetRange rangeResult2 = csv.getInitialRestriction(input2); assertEquals(rangeResult2.getFrom(), 1); assertEquals(rangeResult2.getTo(), 4); }
Example #8
Source File: CSVContentProcessorDoFn.java From dlp-dataflow-deidentification with Apache License 2.0 | 6 votes |
@GetInitialRestriction public OffsetRange getInitialRestriction(KV<String, List<String>> contents) { this.numberOfRows = contents.getValue().size() - 1; int totalSplit = 0; totalSplit = this.numberOfRows / this.batchSize.get().intValue(); int remaining = this.numberOfRows % this.batchSize.get().intValue(); if (remaining > 0) { totalSplit = totalSplit + 2; } else { totalSplit = totalSplit + 1; } LOG.info("Initial Restriction range from 1 to: {}", totalSplit); return new OffsetRange(1, totalSplit); }
Example #9
Source File: SplittableDoFnTest.java From beam with Apache License 2.0 | 6 votes |
@ProcessElement public ProcessContinuation processElement( ProcessContext c, RestrictionTracker<OffsetRange, Long> tracker) { int[] blockStarts = {-1, 0, 12, 123, 1234, 12345, 34567, MAX_INDEX}; int trueStart = snapToNextBlock((int) tracker.currentRestriction().getFrom(), blockStarts); for (int i = trueStart, numIterations = 1; tracker.tryClaim((long) blockStarts[i]); ++i, ++numIterations) { for (int index = blockStarts[i]; index < blockStarts[i + 1]; ++index) { c.output(index); } if (numIterations == numClaimsPerCall) { return resume(); } } return stop(); }
Example #10
Source File: PCollectionViewsTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testRangesWithAtMostOneOverlap() { Iterable<OffsetRange> ranges = Arrays.asList(range(0, 6), range(4, 10), range(8, 12)); Map<OffsetRange, Integer> nonOverlappingRangesToNumElementsPerPosition = computeOverlappingRanges(ranges); assertEquals( ImmutableMap.builder() .put(range(0, 4), 1) .put(range(4, 6), 2) .put(range(6, 8), 1) .put(range(8, 10), 2) .put(range(10, 12), 1) .build(), nonOverlappingRangesToNumElementsPerPosition); assertNonEmptyRangesAndPositions(ranges, nonOverlappingRangesToNumElementsPerPosition); }
Example #11
Source File: OffsetRangeTrackerTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testLargeRange() throws Exception { OffsetRangeTracker tracker = new OffsetRangeTracker(new OffsetRange(Long.MIN_VALUE, Long.MAX_VALUE)); Progress progress = tracker.getProgress(); assertEquals(0, progress.getWorkCompleted(), 0.001); assertEquals( BigDecimal.valueOf(Long.MAX_VALUE) .subtract(BigDecimal.valueOf(Long.MIN_VALUE), MathContext.DECIMAL128) .doubleValue(), progress.getWorkRemaining(), 0.001); SplitResult res = tracker.trySplit(0); assertEquals(new OffsetRange(Long.MIN_VALUE, Long.MIN_VALUE), res.getPrimary()); assertEquals(new OffsetRange(Long.MIN_VALUE, Long.MAX_VALUE), res.getResidual()); }
Example #12
Source File: OffsetRangeTrackerTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testSmallRangeWithLargeValue() throws Exception { OffsetRangeTracker tracker = new OffsetRangeTracker(new OffsetRange(123456789012345677L, 123456789012345679L)); assertTrue(tracker.tryClaim(123456789012345677L)); SplitResult res = tracker.trySplit(0.5); assertEquals(new OffsetRange(123456789012345677L, 123456789012345678L), res.getPrimary()); assertEquals(new OffsetRange(123456789012345678L, 123456789012345679L), res.getResidual()); tracker = new OffsetRangeTracker(new OffsetRange(123456789012345681L, 123456789012345683L)); assertTrue(tracker.tryClaim(123456789012345681L)); res = tracker.trySplit(0.5); assertEquals(new OffsetRange(123456789012345681L, 123456789012345682L), res.getPrimary()); assertEquals(new OffsetRange(123456789012345682L, 123456789012345683L), res.getResidual()); }
Example #13
Source File: OffsetRangeTrackerTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testCheckDoneAfterTryClaimRightBeforeEndOfRange() { OffsetRangeTracker tracker = new OffsetRangeTracker(new OffsetRange(100, 200)); assertTrue(tracker.tryClaim(150L)); assertTrue(tracker.tryClaim(175L)); assertTrue(tracker.tryClaim(199L)); tracker.checkDone(); }
Example #14
Source File: PCollectionViewsTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testOverlappingFromsAndTos() { Iterable<OffsetRange> ranges = Arrays.asList(range(0, 4), range(0, 4), range(0, 4)); Map<OffsetRange, Integer> nonOverlappingRangesToNumElementsPerPosition = computeOverlappingRanges(ranges); assertEquals( ImmutableMap.builder().put(range(0, 4), 3).build(), nonOverlappingRangesToNumElementsPerPosition); assertNonEmptyRangesAndPositions(ranges, nonOverlappingRangesToNumElementsPerPosition); }
Example #15
Source File: OutputAndTimeBoundedSplittableProcessElementInvokerTest.java From beam with Apache License 2.0 | 5 votes |
private SplittableProcessElementInvoker<Void, String, OffsetRange, Long, Void>.Result runTest( int totalNumOutputs, Duration sleepBeforeFirstClaim, int numOutputsPerProcessCall, Duration sleepBeforeEachOutput) { SomeFn fn = new SomeFn(sleepBeforeFirstClaim, numOutputsPerProcessCall, sleepBeforeEachOutput); OffsetRange initialRestriction = new OffsetRange(0, totalNumOutputs); return runTest(fn, initialRestriction); }
Example #16
Source File: OffsetRangeTrackerTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testNonMonotonicClaim() throws Exception { expected.expectMessage("Trying to claim offset 103 while last attempted was 110"); OffsetRangeTracker tracker = new OffsetRangeTracker(new OffsetRange(100, 200)); assertTrue(tracker.tryClaim(105L)); assertTrue(tracker.tryClaim(110L)); tracker.tryClaim(103L); }
Example #17
Source File: OffsetRangeTrackerTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testBacklogUnstarted() { OffsetRangeTracker tracker = new OffsetRangeTracker(new OffsetRange(0, 200)); Progress progress = tracker.getProgress(); assertEquals(0, progress.getWorkCompleted(), 0.001); assertEquals(200, progress.getWorkRemaining(), 0.001); tracker = new OffsetRangeTracker(new OffsetRange(100, 200)); progress = tracker.getProgress(); assertEquals(0, progress.getWorkCompleted(), 0.001); assertEquals(100, progress.getWorkRemaining(), 0.001); }
Example #18
Source File: OffsetRangeTrackerTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testCheckDoneAfterTryClaimAtEndOfRange() { OffsetRangeTracker tracker = new OffsetRangeTracker(new OffsetRange(100, 200)); assertTrue(tracker.tryClaim(150L)); assertTrue(tracker.tryClaim(175L)); assertFalse(tracker.tryClaim(200L)); tracker.checkDone(); }
Example #19
Source File: SplittableParDoProcessFnTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testUpdatesWatermark() throws Exception { DoFn<Instant, String> fn = new WatermarkUpdateFn(); Instant base = Instant.now(); ProcessFnTester<Instant, String, OffsetRange, Long, Instant> tester = new ProcessFnTester<>( base, fn, InstantCoder.of(), SerializableCoder.of(OffsetRange.class), InstantCoder.of(), 3, MAX_BUNDLE_DURATION); tester.startElement(base, new OffsetRange(0, 8)); assertThat(tester.takeOutputElements(), hasItems("0", "1", "2")); assertEquals(base.plus(Duration.standardSeconds(2)), tester.getWatermarkHold()); assertTrue(tester.advanceProcessingTimeBy(Duration.standardSeconds(1))); assertThat(tester.takeOutputElements(), hasItems("3", "4", "5")); assertEquals(base.plus(Duration.standardSeconds(5)), tester.getWatermarkHold()); assertTrue(tester.advanceProcessingTimeBy(Duration.standardSeconds(1))); assertThat(tester.takeOutputElements(), hasItems("6", "7")); assertEquals(null, tester.getWatermarkHold()); }
Example #20
Source File: OutputAndTimeBoundedSplittableProcessElementInvokerTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testInvokeProcessElementOutputBounded() throws Exception { SplittableProcessElementInvoker<Void, String, OffsetRange, Long, Void>.Result res = runTest(10000, Duration.ZERO, Integer.MAX_VALUE, Duration.ZERO); assertFalse(res.getContinuation().shouldResume()); OffsetRange residualRange = res.getResidualRestriction(); // Should process the first 100 elements. assertEquals(1000, residualRange.getFrom()); assertEquals(10000, residualRange.getTo()); }
Example #21
Source File: OffsetRangeTrackerTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testCheckpointUnstarted() throws Exception { OffsetRangeTracker tracker = new OffsetRangeTracker(new OffsetRange(100, 200)); SplitResult res = tracker.trySplit(0); assertEquals(new OffsetRange(100, 100), res.getPrimary()); assertEquals(new OffsetRange(100, 200), res.getResidual()); tracker.checkDone(); }
Example #22
Source File: GrowableOffsetRangeTrackerTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testCheckpointBeforeStart() throws Exception { SimpleEstimator simpleEstimator = new SimpleEstimator(); GrowableOffsetRangeTracker tracker = new GrowableOffsetRangeTracker(0L, simpleEstimator); simpleEstimator.setEstimateRangeEnd(10); SplitResult res = tracker.trySplit(0); tracker.checkDone(); assertEquals(new OffsetRange(0, 0), res.getPrimary()); assertEquals(new OffsetRange(0, 0), tracker.currentRestriction()); assertEquals(new OffsetRange(0, Long.MAX_VALUE), res.getResidual()); }
Example #23
Source File: OutputAndTimeBoundedSplittableProcessElementInvokerTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testInvokeProcessElementTimeBoundedWithStartupDelay() throws Exception { SplittableProcessElementInvoker<Void, String, OffsetRange, Long, Void>.Result res = runTest(10000, Duration.standardSeconds(3), Integer.MAX_VALUE, Duration.millis(100)); assertFalse(res.getContinuation().shouldResume()); OffsetRange residualRange = res.getResidualRestriction(); // Same as above, but this time it counts from the time of the first tryClaim() call assertThat(residualRange.getFrom(), greaterThan(10L)); assertThat(residualRange.getFrom(), lessThan(100L)); assertEquals(10000, residualRange.getTo()); }
Example #24
Source File: SplittableParDoProcessFnTest.java From beam with Apache License 2.0 | 5 votes |
@ProcessElement public ProcessContinuation process( ProcessContext c, RestrictionTracker<OffsetRange, Long> tracker) { for (long i = tracker.currentRestriction().getFrom(), numIterations = 0; tracker.tryClaim(i); ++i, ++numIterations) { c.output(String.valueOf(c.element() + i)); if (numIterations == numOutputsPerCall - 1) { return resume(); } } return stop(); }
Example #25
Source File: PCollectionViews.java From beam with Apache License 2.0 | 5 votes |
@VisibleForTesting static int computeTotalNumElements( Map<OffsetRange, Integer> nonOverlappingRangesToNumElementsPerPosition) { long sum = 0; for (Map.Entry<OffsetRange, Integer> range : nonOverlappingRangesToNumElementsPerPosition.entrySet()) { sum += Math.multiplyExact( Math.subtractExact(range.getKey().getTo(), range.getKey().getFrom()), range.getValue()); } return Ints.checkedCast(sum); }
Example #26
Source File: PCollectionViewsTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testNoOverlapping() { Iterable<OffsetRange> ranges = Arrays.asList(range(0, 2), range(4, 6)); Map<OffsetRange, Integer> nonOverlappingRangesToNumElementsPerPosition = computeOverlappingRanges(ranges); assertEquals( ImmutableMap.of(range(0, 2), 1, range(4, 6), 1), nonOverlappingRangesToNumElementsPerPosition); assertNonEmptyRangesAndPositions(ranges, nonOverlappingRangesToNumElementsPerPosition); }
Example #27
Source File: BundleSplitterTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void bundlesShouldBeEvenForConstDistribution() { long expectedBundleSize = 2; options.bundleSizeDistribution = fromRealDistribution(new ConstantRealDistribution(2)); splitter = new BundleSplitter(options); List<OffsetRange> bundleSizes = splitter.getBundleSizes(4, 0, options.numRecords); bundleSizes.stream() .map(range -> range.getTo() - range.getFrom()) .forEach(size -> assertEquals(expectedBundleSize, size.intValue())); }
Example #28
Source File: PCollectionViews.java From beam with Apache License 2.0 | 5 votes |
/** * Returns a {@code PCollectionView<List<T>>} capable of processing elements windowed using the * provided {@link WindowingStrategy}. */ public static <T, W extends BoundedWindow> PCollectionView<List<T>> listView( PCollection<KV<Long, ValueOrMetadata<T, OffsetRange>>> pCollection, TypeDescriptorSupplier<T> typeDescriptorSupplier, WindowingStrategy<?, W> windowingStrategy) { return new SimplePCollectionView<>( pCollection, new ListViewFn2<>(typeDescriptorSupplier), windowingStrategy.getWindowFn().getDefaultWindowMappingFn(), windowingStrategy); }
Example #29
Source File: HL7v2IO.java From beam with Apache License 2.0 | 5 votes |
@GetInitialRestriction public OffsetRange getEarliestToLatestRestriction(@Element String hl7v2Store) throws IOException { Instant from = this.client.getEarliestHL7v2SendTime(hl7v2Store, this.filter.get()); // filters are [from, to) to match logic of OffsetRangeTracker but need latest element to be // included in results set to add an extra ms to the upper bound. Instant to = this.client.getLatestHL7v2SendTime(hl7v2Store, this.filter.get()).plus(1); return new OffsetRange(from.getMillis(), to.getMillis()); }
Example #30
Source File: SplittableParDoProcessFnTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testResumeCarriesOverState() throws Exception { DoFn<Integer, String> fn = new CounterFn(1); Instant base = Instant.now(); dateTimeProvider.setDateTimeFixed(base.getMillis()); ProcessFnTester<Integer, String, OffsetRange, Long, Void> tester = new ProcessFnTester<>( base, fn, BigEndianIntegerCoder.of(), SerializableCoder.of(OffsetRange.class), VoidCoder.of(), MAX_OUTPUTS_PER_BUNDLE, MAX_BUNDLE_DURATION); tester.startElement(42, new OffsetRange(0, 3)); assertThat(tester.takeOutputElements(), contains("42")); assertTrue(tester.advanceProcessingTimeBy(Duration.standardSeconds(1))); assertThat(tester.takeOutputElements(), contains("43")); assertTrue(tester.advanceProcessingTimeBy(Duration.standardSeconds(1))); assertThat(tester.takeOutputElements(), contains("44")); // Should not resume the null residual. assertFalse(tester.advanceProcessingTimeBy(Duration.standardSeconds(1))); // After outputting all 3 items, should not output anything more. assertEquals(0, tester.takeOutputElements().size()); // Should also not ask to resume. assertFalse(tester.advanceProcessingTimeBy(Duration.standardSeconds(1))); }