org.apache.beam.sdk.io.range.OffsetRange Java Examples

The following examples show how to use org.apache.beam.sdk.io.range.OffsetRange. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PCollectionViewsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testNestedOverlaps() {
  Iterable<OffsetRange> ranges =
      Arrays.asList(range(0, 8), range(1, 7), range(2, 6), range(3, 5));

  Map<OffsetRange, Integer> nonOverlappingRangesToNumElementsPerPosition =
      computeOverlappingRanges(ranges);
  assertEquals(
      ImmutableMap.builder()
          .put(range(0, 1), 1)
          .put(range(1, 2), 2)
          .put(range(2, 3), 3)
          .put(range(3, 5), 4)
          .put(range(5, 6), 3)
          .put(range(6, 7), 2)
          .put(range(7, 8), 1)
          .build(),
      nonOverlappingRangesToNumElementsPerPosition);
  assertNonEmptyRangesAndPositions(ranges, nonOverlappingRangesToNumElementsPerPosition);
}
 
Example #2
Source File: GrowableOffsetRangeTrackerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testLargeRange() throws Exception {
  SimpleEstimator simpleEstimator = new SimpleEstimator();
  GrowableOffsetRangeTracker tracker =
      new GrowableOffsetRangeTracker(Long.MIN_VALUE, simpleEstimator);

  simpleEstimator.setEstimateRangeEnd(Long.MAX_VALUE);
  Progress progress = tracker.getProgress();
  assertEquals(0, progress.getWorkCompleted(), 0.001);
  assertEquals(
      BigDecimal.valueOf(Long.MAX_VALUE)
          .subtract(BigDecimal.valueOf(Long.MIN_VALUE), MathContext.DECIMAL128)
          .doubleValue(),
      progress.getWorkRemaining(),
      0.001);

  simpleEstimator.setEstimateRangeEnd(Long.MIN_VALUE);
  SplitResult res = tracker.trySplit(0);
  assertEquals(new OffsetRange(Long.MIN_VALUE, Long.MIN_VALUE), res.getPrimary());
  assertEquals(new OffsetRange(Long.MIN_VALUE, Long.MAX_VALUE), res.getResidual());
}
 
Example #3
Source File: SplittableDoFnTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public ProcessContinuation process(
    @Element String element,
    OutputReceiver<String> receiver,
    RestrictionTracker<OffsetRange, Long> tracker,
    BundleFinalizer bundleFinalizer)
    throws InterruptedException {
  if (wasFinalized.get()) {
    // Claim beyond the end now that we know we have been finalized.
    tracker.tryClaim(Long.MAX_VALUE);
    receiver.output(element);
    return stop();
  }
  if (tracker.tryClaim(tracker.currentRestriction().getFrom() + 1)) {
    bundleFinalizer.afterBundleCommit(
        Instant.now().plus(Duration.standardSeconds(MAX_ATTEMPTS)),
        () -> wasFinalized.set(true));
    // We sleep here instead of setting a resume time since the resume time doesn't need to
    // be honored.
    sleep(1000L); // 1 second
    return resume();
  }
  return stop();
}
 
Example #4
Source File: ReadAllViaFileBasedSource.java    From beam with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void process(ProcessContext c) throws IOException {
  ReadableFile file = c.element().getKey();
  OffsetRange range = c.element().getValue();
  FileBasedSource<T> source =
      CompressedSource.from(createSource.apply(file.getMetadata().resourceId().toString()))
          .withCompression(file.getCompression());
  try (BoundedSource.BoundedReader<T> reader =
      source
          .createForSubrangeOfFile(file.getMetadata(), range.getFrom(), range.getTo())
          .createReader(c.getPipelineOptions())) {
    for (boolean more = reader.start(); more; more = reader.advance()) {
      c.output(reader.getCurrent());
    }
  }
}
 
Example #5
Source File: OffsetBasedSource.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public List<? extends OffsetBasedSource<T>> split(
    long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
  // Split the range into bundles based on the desiredBundleSizeBytes. If the desired bundle
  // size is smaller than the minBundleSize of the source then minBundleSize will be used instead.

  long desiredBundleSizeOffsetUnits =
      Math.max(Math.max(1, desiredBundleSizeBytes / getBytesPerOffset()), minBundleSize);

  List<OffsetBasedSource<T>> subSources = new ArrayList<>();
  for (OffsetRange range :
      new OffsetRange(startOffset, Math.min(endOffset, getMaxEndOffset(options)))
          .split(desiredBundleSizeOffsetUnits, minBundleSize)) {
    subSources.add(createSourceForSubrange(range.getFrom(), range.getTo()));
  }
  return subSources;
}
 
Example #6
Source File: PCollectionViewsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testIncreasingOverlaps() {
  Iterable<OffsetRange> ranges =
      Arrays.asList(range(0, 4), range(1, 5), range(2, 6), range(3, 7), range(4, 8), range(5, 9));

  Map<OffsetRange, Integer> nonOverlappingRangesToNumElementsPerPosition =
      computeOverlappingRanges(ranges);
  assertEquals(
      ImmutableMap.builder()
          .put(range(0, 1), 1)
          .put(range(1, 2), 2)
          .put(range(2, 3), 3)
          .put(range(3, 4), 4)
          .put(range(4, 5), 4)
          .put(range(5, 6), 4)
          .put(range(6, 7), 3)
          .put(range(7, 8), 2)
          .put(range(8, 9), 1)
          .build(),
      nonOverlappingRangesToNumElementsPerPosition);
  assertNonEmptyRangesAndPositions(ranges, nonOverlappingRangesToNumElementsPerPosition);
}
 
Example #7
Source File: CSVStreamingPipelineTest.java    From dlp-dataflow-deidentification with Apache License 2.0 6 votes vote down vote up
@Test
public void testCSVStreamingInitialRestriction() {
  CSVContentProcessorDoFn csv =
      new CSVContentProcessorDoFn(ValueProvider.StaticValueProvider.of(2));
  String[] lines1 = {"line1", "line2", "line3", "line4"};
  String[] lines2 = {"line1", "line2", "line3", "line4", "line5", "line6"};

  KV<String, List<String>> input1 = KV.of("FileName", Arrays.asList(lines1));
  KV<String, List<String>> input2 = KV.of("FileName", Arrays.asList(lines2));

  OffsetRange rangeResult1 = csv.getInitialRestriction(input1);
  assertEquals(rangeResult1.getFrom(), 1);
  assertEquals(rangeResult1.getTo(), 3);

  OffsetRange rangeResult2 = csv.getInitialRestriction(input2);
  assertEquals(rangeResult2.getFrom(), 1);
  assertEquals(rangeResult2.getTo(), 4);
}
 
Example #8
Source File: CSVContentProcessorDoFn.java    From dlp-dataflow-deidentification with Apache License 2.0 6 votes vote down vote up
@GetInitialRestriction
public OffsetRange getInitialRestriction(KV<String, List<String>> contents) {

  this.numberOfRows = contents.getValue().size() - 1;
  int totalSplit = 0;
  totalSplit = this.numberOfRows / this.batchSize.get().intValue();
  int remaining = this.numberOfRows % this.batchSize.get().intValue();
  if (remaining > 0) {
    totalSplit = totalSplit + 2;

  } else {
    totalSplit = totalSplit + 1;
  }
  LOG.info("Initial Restriction range from 1 to: {}", totalSplit);
  return new OffsetRange(1, totalSplit);
}
 
Example #9
Source File: SplittableDoFnTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public ProcessContinuation processElement(
    ProcessContext c, RestrictionTracker<OffsetRange, Long> tracker) {
  int[] blockStarts = {-1, 0, 12, 123, 1234, 12345, 34567, MAX_INDEX};
  int trueStart = snapToNextBlock((int) tracker.currentRestriction().getFrom(), blockStarts);
  for (int i = trueStart, numIterations = 1;
      tracker.tryClaim((long) blockStarts[i]);
      ++i, ++numIterations) {
    for (int index = blockStarts[i]; index < blockStarts[i + 1]; ++index) {
      c.output(index);
    }
    if (numIterations == numClaimsPerCall) {
      return resume();
    }
  }
  return stop();
}
 
Example #10
Source File: PCollectionViewsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testRangesWithAtMostOneOverlap() {
  Iterable<OffsetRange> ranges = Arrays.asList(range(0, 6), range(4, 10), range(8, 12));

  Map<OffsetRange, Integer> nonOverlappingRangesToNumElementsPerPosition =
      computeOverlappingRanges(ranges);
  assertEquals(
      ImmutableMap.builder()
          .put(range(0, 4), 1)
          .put(range(4, 6), 2)
          .put(range(6, 8), 1)
          .put(range(8, 10), 2)
          .put(range(10, 12), 1)
          .build(),
      nonOverlappingRangesToNumElementsPerPosition);
  assertNonEmptyRangesAndPositions(ranges, nonOverlappingRangesToNumElementsPerPosition);
}
 
Example #11
Source File: OffsetRangeTrackerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testLargeRange() throws Exception {
  OffsetRangeTracker tracker =
      new OffsetRangeTracker(new OffsetRange(Long.MIN_VALUE, Long.MAX_VALUE));

  Progress progress = tracker.getProgress();
  assertEquals(0, progress.getWorkCompleted(), 0.001);
  assertEquals(
      BigDecimal.valueOf(Long.MAX_VALUE)
          .subtract(BigDecimal.valueOf(Long.MIN_VALUE), MathContext.DECIMAL128)
          .doubleValue(),
      progress.getWorkRemaining(),
      0.001);

  SplitResult res = tracker.trySplit(0);
  assertEquals(new OffsetRange(Long.MIN_VALUE, Long.MIN_VALUE), res.getPrimary());
  assertEquals(new OffsetRange(Long.MIN_VALUE, Long.MAX_VALUE), res.getResidual());
}
 
Example #12
Source File: OffsetRangeTrackerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testSmallRangeWithLargeValue() throws Exception {
  OffsetRangeTracker tracker =
      new OffsetRangeTracker(new OffsetRange(123456789012345677L, 123456789012345679L));
  assertTrue(tracker.tryClaim(123456789012345677L));
  SplitResult res = tracker.trySplit(0.5);
  assertEquals(new OffsetRange(123456789012345677L, 123456789012345678L), res.getPrimary());
  assertEquals(new OffsetRange(123456789012345678L, 123456789012345679L), res.getResidual());

  tracker = new OffsetRangeTracker(new OffsetRange(123456789012345681L, 123456789012345683L));
  assertTrue(tracker.tryClaim(123456789012345681L));
  res = tracker.trySplit(0.5);
  assertEquals(new OffsetRange(123456789012345681L, 123456789012345682L), res.getPrimary());
  assertEquals(new OffsetRange(123456789012345682L, 123456789012345683L), res.getResidual());
}
 
Example #13
Source File: OffsetRangeTrackerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testCheckDoneAfterTryClaimRightBeforeEndOfRange() {
  OffsetRangeTracker tracker = new OffsetRangeTracker(new OffsetRange(100, 200));
  assertTrue(tracker.tryClaim(150L));
  assertTrue(tracker.tryClaim(175L));
  assertTrue(tracker.tryClaim(199L));
  tracker.checkDone();
}
 
Example #14
Source File: PCollectionViewsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testOverlappingFromsAndTos() {
  Iterable<OffsetRange> ranges = Arrays.asList(range(0, 4), range(0, 4), range(0, 4));

  Map<OffsetRange, Integer> nonOverlappingRangesToNumElementsPerPosition =
      computeOverlappingRanges(ranges);
  assertEquals(
      ImmutableMap.builder().put(range(0, 4), 3).build(),
      nonOverlappingRangesToNumElementsPerPosition);
  assertNonEmptyRangesAndPositions(ranges, nonOverlappingRangesToNumElementsPerPosition);
}
 
Example #15
Source File: OutputAndTimeBoundedSplittableProcessElementInvokerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private SplittableProcessElementInvoker<Void, String, OffsetRange, Long, Void>.Result runTest(
    int totalNumOutputs,
    Duration sleepBeforeFirstClaim,
    int numOutputsPerProcessCall,
    Duration sleepBeforeEachOutput) {
  SomeFn fn = new SomeFn(sleepBeforeFirstClaim, numOutputsPerProcessCall, sleepBeforeEachOutput);
  OffsetRange initialRestriction = new OffsetRange(0, totalNumOutputs);
  return runTest(fn, initialRestriction);
}
 
Example #16
Source File: OffsetRangeTrackerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testNonMonotonicClaim() throws Exception {
  expected.expectMessage("Trying to claim offset 103 while last attempted was 110");
  OffsetRangeTracker tracker = new OffsetRangeTracker(new OffsetRange(100, 200));
  assertTrue(tracker.tryClaim(105L));
  assertTrue(tracker.tryClaim(110L));
  tracker.tryClaim(103L);
}
 
Example #17
Source File: OffsetRangeTrackerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testBacklogUnstarted() {
  OffsetRangeTracker tracker = new OffsetRangeTracker(new OffsetRange(0, 200));
  Progress progress = tracker.getProgress();
  assertEquals(0, progress.getWorkCompleted(), 0.001);
  assertEquals(200, progress.getWorkRemaining(), 0.001);

  tracker = new OffsetRangeTracker(new OffsetRange(100, 200));
  progress = tracker.getProgress();
  assertEquals(0, progress.getWorkCompleted(), 0.001);
  assertEquals(100, progress.getWorkRemaining(), 0.001);
}
 
Example #18
Source File: OffsetRangeTrackerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testCheckDoneAfterTryClaimAtEndOfRange() {
  OffsetRangeTracker tracker = new OffsetRangeTracker(new OffsetRange(100, 200));
  assertTrue(tracker.tryClaim(150L));
  assertTrue(tracker.tryClaim(175L));
  assertFalse(tracker.tryClaim(200L));
  tracker.checkDone();
}
 
Example #19
Source File: SplittableParDoProcessFnTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testUpdatesWatermark() throws Exception {
  DoFn<Instant, String> fn = new WatermarkUpdateFn();
  Instant base = Instant.now();

  ProcessFnTester<Instant, String, OffsetRange, Long, Instant> tester =
      new ProcessFnTester<>(
          base,
          fn,
          InstantCoder.of(),
          SerializableCoder.of(OffsetRange.class),
          InstantCoder.of(),
          3,
          MAX_BUNDLE_DURATION);

  tester.startElement(base, new OffsetRange(0, 8));
  assertThat(tester.takeOutputElements(), hasItems("0", "1", "2"));
  assertEquals(base.plus(Duration.standardSeconds(2)), tester.getWatermarkHold());

  assertTrue(tester.advanceProcessingTimeBy(Duration.standardSeconds(1)));
  assertThat(tester.takeOutputElements(), hasItems("3", "4", "5"));
  assertEquals(base.plus(Duration.standardSeconds(5)), tester.getWatermarkHold());

  assertTrue(tester.advanceProcessingTimeBy(Duration.standardSeconds(1)));
  assertThat(tester.takeOutputElements(), hasItems("6", "7"));
  assertEquals(null, tester.getWatermarkHold());
}
 
Example #20
Source File: OutputAndTimeBoundedSplittableProcessElementInvokerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testInvokeProcessElementOutputBounded() throws Exception {
  SplittableProcessElementInvoker<Void, String, OffsetRange, Long, Void>.Result res =
      runTest(10000, Duration.ZERO, Integer.MAX_VALUE, Duration.ZERO);
  assertFalse(res.getContinuation().shouldResume());
  OffsetRange residualRange = res.getResidualRestriction();
  // Should process the first 100 elements.
  assertEquals(1000, residualRange.getFrom());
  assertEquals(10000, residualRange.getTo());
}
 
Example #21
Source File: OffsetRangeTrackerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testCheckpointUnstarted() throws Exception {
  OffsetRangeTracker tracker = new OffsetRangeTracker(new OffsetRange(100, 200));
  SplitResult res = tracker.trySplit(0);
  assertEquals(new OffsetRange(100, 100), res.getPrimary());
  assertEquals(new OffsetRange(100, 200), res.getResidual());
  tracker.checkDone();
}
 
Example #22
Source File: GrowableOffsetRangeTrackerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testCheckpointBeforeStart() throws Exception {
  SimpleEstimator simpleEstimator = new SimpleEstimator();
  GrowableOffsetRangeTracker tracker = new GrowableOffsetRangeTracker(0L, simpleEstimator);
  simpleEstimator.setEstimateRangeEnd(10);
  SplitResult res = tracker.trySplit(0);
  tracker.checkDone();
  assertEquals(new OffsetRange(0, 0), res.getPrimary());
  assertEquals(new OffsetRange(0, 0), tracker.currentRestriction());
  assertEquals(new OffsetRange(0, Long.MAX_VALUE), res.getResidual());
}
 
Example #23
Source File: OutputAndTimeBoundedSplittableProcessElementInvokerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testInvokeProcessElementTimeBoundedWithStartupDelay() throws Exception {
  SplittableProcessElementInvoker<Void, String, OffsetRange, Long, Void>.Result res =
      runTest(10000, Duration.standardSeconds(3), Integer.MAX_VALUE, Duration.millis(100));
  assertFalse(res.getContinuation().shouldResume());
  OffsetRange residualRange = res.getResidualRestriction();
  // Same as above, but this time it counts from the time of the first tryClaim() call
  assertThat(residualRange.getFrom(), greaterThan(10L));
  assertThat(residualRange.getFrom(), lessThan(100L));
  assertEquals(10000, residualRange.getTo());
}
 
Example #24
Source File: SplittableParDoProcessFnTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public ProcessContinuation process(
    ProcessContext c, RestrictionTracker<OffsetRange, Long> tracker) {
  for (long i = tracker.currentRestriction().getFrom(), numIterations = 0;
      tracker.tryClaim(i);
      ++i, ++numIterations) {
    c.output(String.valueOf(c.element() + i));
    if (numIterations == numOutputsPerCall - 1) {
      return resume();
    }
  }
  return stop();
}
 
Example #25
Source File: PCollectionViews.java    From beam with Apache License 2.0 5 votes vote down vote up
@VisibleForTesting
static int computeTotalNumElements(
    Map<OffsetRange, Integer> nonOverlappingRangesToNumElementsPerPosition) {
  long sum = 0;
  for (Map.Entry<OffsetRange, Integer> range :
      nonOverlappingRangesToNumElementsPerPosition.entrySet()) {
    sum +=
        Math.multiplyExact(
            Math.subtractExact(range.getKey().getTo(), range.getKey().getFrom()),
            range.getValue());
  }
  return Ints.checkedCast(sum);
}
 
Example #26
Source File: PCollectionViewsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testNoOverlapping() {
  Iterable<OffsetRange> ranges = Arrays.asList(range(0, 2), range(4, 6));

  Map<OffsetRange, Integer> nonOverlappingRangesToNumElementsPerPosition =
      computeOverlappingRanges(ranges);
  assertEquals(
      ImmutableMap.of(range(0, 2), 1, range(4, 6), 1),
      nonOverlappingRangesToNumElementsPerPosition);
  assertNonEmptyRangesAndPositions(ranges, nonOverlappingRangesToNumElementsPerPosition);
}
 
Example #27
Source File: BundleSplitterTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void bundlesShouldBeEvenForConstDistribution() {
  long expectedBundleSize = 2;
  options.bundleSizeDistribution = fromRealDistribution(new ConstantRealDistribution(2));
  splitter = new BundleSplitter(options);

  List<OffsetRange> bundleSizes = splitter.getBundleSizes(4, 0, options.numRecords);

  bundleSizes.stream()
      .map(range -> range.getTo() - range.getFrom())
      .forEach(size -> assertEquals(expectedBundleSize, size.intValue()));
}
 
Example #28
Source File: PCollectionViews.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a {@code PCollectionView<List<T>>} capable of processing elements windowed using the
 * provided {@link WindowingStrategy}.
 */
public static <T, W extends BoundedWindow> PCollectionView<List<T>> listView(
    PCollection<KV<Long, ValueOrMetadata<T, OffsetRange>>> pCollection,
    TypeDescriptorSupplier<T> typeDescriptorSupplier,
    WindowingStrategy<?, W> windowingStrategy) {
  return new SimplePCollectionView<>(
      pCollection,
      new ListViewFn2<>(typeDescriptorSupplier),
      windowingStrategy.getWindowFn().getDefaultWindowMappingFn(),
      windowingStrategy);
}
 
Example #29
Source File: HL7v2IO.java    From beam with Apache License 2.0 5 votes vote down vote up
@GetInitialRestriction
public OffsetRange getEarliestToLatestRestriction(@Element String hl7v2Store)
    throws IOException {
  Instant from = this.client.getEarliestHL7v2SendTime(hl7v2Store, this.filter.get());
  // filters are [from, to) to match logic of OffsetRangeTracker but need latest element to be
  // included in results set to add an extra ms to the upper bound.
  Instant to = this.client.getLatestHL7v2SendTime(hl7v2Store, this.filter.get()).plus(1);
  return new OffsetRange(from.getMillis(), to.getMillis());
}
 
Example #30
Source File: SplittableParDoProcessFnTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testResumeCarriesOverState() throws Exception {
  DoFn<Integer, String> fn = new CounterFn(1);
  Instant base = Instant.now();
  dateTimeProvider.setDateTimeFixed(base.getMillis());
  ProcessFnTester<Integer, String, OffsetRange, Long, Void> tester =
      new ProcessFnTester<>(
          base,
          fn,
          BigEndianIntegerCoder.of(),
          SerializableCoder.of(OffsetRange.class),
          VoidCoder.of(),
          MAX_OUTPUTS_PER_BUNDLE,
          MAX_BUNDLE_DURATION);

  tester.startElement(42, new OffsetRange(0, 3));
  assertThat(tester.takeOutputElements(), contains("42"));
  assertTrue(tester.advanceProcessingTimeBy(Duration.standardSeconds(1)));
  assertThat(tester.takeOutputElements(), contains("43"));
  assertTrue(tester.advanceProcessingTimeBy(Duration.standardSeconds(1)));
  assertThat(tester.takeOutputElements(), contains("44"));
  // Should not resume the null residual.
  assertFalse(tester.advanceProcessingTimeBy(Duration.standardSeconds(1)));
  // After outputting all 3 items, should not output anything more.
  assertEquals(0, tester.takeOutputElements().size());
  // Should also not ask to resume.
  assertFalse(tester.advanceProcessingTimeBy(Duration.standardSeconds(1)));
}