Java Code Examples for org.apache.beam.sdk.io.UnboundedSource#UnboundedReader
The following examples show how to use
org.apache.beam.sdk.io.UnboundedSource#UnboundedReader .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: WorkerCustomSources.java From beam with Apache License 2.0 | 6 votes |
@Override @SuppressWarnings("unchecked") public NativeReaderIterator<WindowedValue<ValueWithRecordId<T>>> iterator() throws IOException { UnboundedSource.UnboundedReader<T> reader = (UnboundedSource.UnboundedReader<T>) context.getCachedReader(); final boolean started = reader != null; if (reader == null) { String key = context.getSerializedKey().toStringUtf8(); // Key is expected to be a zero-padded integer representing the split index. int splitIndex = Integer.parseInt(key.substring(0, 16), 16) - 1; UnboundedSource<T, UnboundedSource.CheckpointMark> splitSource = parseSource(splitIndex); UnboundedSource.CheckpointMark checkpoint = null; if (splitSource.getCheckpointMarkCoder() != null) { checkpoint = context.getReaderCheckpoint(splitSource.getCheckpointMarkCoder()); } reader = splitSource.createReader(options, checkpoint); } context.setActiveReader(reader); return new UnboundedReaderIterator<>(reader, context, started); }
Example 2
Source File: ReaderCache.java From beam with Apache License 2.0 | 6 votes |
/** * If there is a cached reader for this split and the cache token matches, the reader is * <i>removed</i> from the cache and returned. Cache the reader using cacheReader() as required. * Note that cache will expire in one minute. If cacheToken does not match the token already * cached, it is assumed that the cached reader (if any) is no longer relevant and will be closed. * Return null in case of a cache miss. */ UnboundedSource.UnboundedReader<?> acquireReader( String computationId, ByteString splitId, long cacheToken) { KV<String, ByteString> key = KV.of(computationId, splitId); CacheEntry entry = cache.asMap().remove(key); cache.cleanUp(); if (entry != null) { if (entry.token == cacheToken) { return entry.reader; } else { // new cacheToken invalidates old one. close the reader. closeReader(key, entry); } } return null; }
Example 3
Source File: UnboundedSourceWrapper.java From beam with Apache License 2.0 | 6 votes |
/** Emit the current element from the given Reader. The reader is guaranteed to have data. */ private void emitElement( SourceContext<WindowedValue<ValueWithRecordId<OutputT>>> ctx, UnboundedSource.UnboundedReader<OutputT> reader) { // make sure that reader state update and element emission are atomic // with respect to snapshots OutputT item = reader.getCurrent(); byte[] recordId = reader.getCurrentRecordId(); Instant timestamp = reader.getCurrentTimestamp(); WindowedValue<ValueWithRecordId<OutputT>> windowedValue = WindowedValue.of( new ValueWithRecordId<>(item, recordId), timestamp, GlobalWindow.INSTANCE, PaneInfo.NO_FIRING); ctx.collect(windowedValue); }
Example 4
Source File: SyntheticUnboundedSourceTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void startPositionShouldBeExclusive() throws IOException { int startPosition = 0; checkpoint = new SyntheticRecordsCheckpoint(startPosition, sourceOptions.numRecords); UnboundedSource.UnboundedReader<KV<byte[], byte[]>> reader = source.createReader(pipeline.getOptions(), checkpoint); reader.start(); KV<byte[], byte[]> currentElement = reader.getCurrent(); KV<byte[], byte[]> expectedElement = sourceOptions.genRecord(startPosition + 1).kv; assertEquals(expectedElement, currentElement); }
Example 5
Source File: ReaderCache.java From beam with Apache License 2.0 | 5 votes |
/** Cache the reader for a minute. It will be closed if it is not acquired with in a minute. */ void cacheReader( String computationId, ByteString splitId, long cacheToken, UnboundedSource.UnboundedReader<?> reader) { CacheEntry existing = cache .asMap() .putIfAbsent(KV.of(computationId, splitId), new CacheEntry(reader, cacheToken)); Preconditions.checkState(existing == null, "Overwriting existing readers is not allowed"); cache.cleanUp(); }
Example 6
Source File: UnboundedSourceWrapper.java From beam with Apache License 2.0 | 5 votes |
@Override public void close() throws Exception { metricContainer.registerMetricsForPipelineResult(); try { super.close(); if (localReaders != null) { for (UnboundedSource.UnboundedReader<OutputT> reader : localReaders) { reader.close(); } } } finally { Workarounds.deleteStaticCaches(); } }
Example 7
Source File: MicrobatchSource.java From beam with Apache License 2.0 | 5 votes |
private Reader(final UnboundedSource.UnboundedReader<T> unboundedReader) { this.unboundedReader = unboundedReader; backoffFactory = FluentBackoff.DEFAULT .withInitialBackoff(Duration.millis(10)) .withMaxBackoff(maxReadTime.minus(1)) .withMaxCumulativeBackoff(maxReadTime.minus(1)); }
Example 8
Source File: UnboundedSourceP.java From beam with Apache License 2.0 | 5 votes |
@Override public Object next() { if (minWatermark > lastSentWatermark) { lastSentWatermark = minWatermark; return new Watermark(lastSentWatermark); } try { // trying to fetch a value from the next reader for (int i = 0; i < readers.length; i++) { currentReaderIndex++; if (currentReaderIndex >= readers.length) { currentReaderIndex = 0; } UnboundedSource.UnboundedReader<InputT> currentReader = readers[currentReaderIndex]; if (currentReader.advance()) { long currentWatermark = currentReader.getWatermark().getMillis(); long origWatermark = watermarks[currentReaderIndex]; if (currentWatermark > origWatermark) { watermarks[currentReaderIndex] = currentWatermark; // todo: we should probably do this only on a timer... if (origWatermark == minWatermark) { minWatermark = getMin(watermarks); } } return mapFn.apply(currentReader); } } // all advances have failed return null; } catch (IOException e) { throw ExceptionUtil.rethrow(e); } }
Example 9
Source File: UnboundedSourceP.java From beam with Apache License 2.0 | 5 votes |
CoalescingTraverser( UnboundedSource.UnboundedReader<InputT>[] readers, Function<UnboundedSource.UnboundedReader<InputT>, byte[]> mapFn) { this.readers = readers; watermarks = initWatermarks(readers.length); this.mapFn = mapFn; }
Example 10
Source File: UnboundedReadEvaluatorFactoryTest.java From beam with Apache License 2.0 | 5 votes |
@Override public UnboundedSource.UnboundedReader<T> createReader( PipelineOptions options, @Nullable TestCheckpointMark checkpointMark) { checkState( checkpointMark == null || checkpointMark.decoded, "Cannot resume from a checkpoint that has not been decoded"); readerCreatedCount++; return new TestUnboundedReader(elems, checkpointMark == null ? -1 : checkpointMark.index); }
Example 11
Source File: UnboundedSourceP.java From beam with Apache License 2.0 | 5 votes |
private static <T> UnboundedSource.UnboundedReader<T> createReader( PipelineOptions options, UnboundedSource<T, ?> shard) { try { return shard.createReader(options, null); } catch (IOException e) { throw ExceptionUtil.rethrow(e); } }
Example 12
Source File: UnboundedSourceP.java From beam with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") private static <T, CmT extends UnboundedSource.CheckpointMark> UnboundedSource.UnboundedReader<T>[] createReaders( List<? extends UnboundedSource<T, CmT>> shards, PipelineOptions options) { return shards.stream() .map(shard -> createReader(options, shard)) .toArray(UnboundedSource.UnboundedReader[]::new); }
Example 13
Source File: SyntheticUnboundedSourceTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void lastElementShouldBeInclusive() throws IOException { int endPosition = 2; checkpoint = new SyntheticRecordsCheckpoint(0, endPosition); UnboundedSource.UnboundedReader<KV<byte[], byte[]>> reader = source.createReader(pipeline.getOptions(), checkpoint); reader.start(); reader.advance(); KV<byte[], byte[]> currentElement = reader.getCurrent(); KV<byte[], byte[]> expectedElement = sourceOptions.genRecord(endPosition).kv; assertEquals(expectedElement, currentElement); }
Example 14
Source File: SyntheticUnboundedSourceTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void shouldStartTheReaderSuccessfully() throws IOException { UnboundedSource.UnboundedReader<KV<byte[], byte[]>> reader = source.createReader(pipeline.getOptions(), checkpoint); boolean isStarted = reader.start(); assertTrue(isStarted); }
Example 15
Source File: UnboundedSourceP.java From beam with Apache License 2.0 | 4 votes |
SingleReaderTraverser( UnboundedSource.UnboundedReader<InputT> reader, Function<UnboundedSource.UnboundedReader<InputT>, byte[]> mapFn) { this.reader = reader; this.mapFn = mapFn; }
Example 16
Source File: UnboundedSourceWrapper.java From beam with Apache License 2.0 | 4 votes |
/** Initialize and restore state before starting execution of the source. */ @Override public void open(Configuration parameters) throws Exception { FileSystems.setDefaultPipelineOptions(serializedOptions.get()); runtimeContext = (StreamingRuntimeContext) getRuntimeContext(); metricContainer = new FlinkMetricContainer(runtimeContext); // figure out which split sources we're responsible for int subtaskIndex = runtimeContext.getIndexOfThisSubtask(); int numSubtasks = runtimeContext.getNumberOfParallelSubtasks(); localSplitSources = new ArrayList<>(); localReaders = new ArrayList<>(); pendingCheckpoints = new LinkedHashMap<>(); if (isRestored) { // restore the splitSources from the checkpoint to ensure consistent ordering for (KV<? extends UnboundedSource<OutputT, CheckpointMarkT>, CheckpointMarkT> restored : stateForCheckpoint.get()) { localSplitSources.add(restored.getKey()); localReaders.add( restored.getKey().createReader(serializedOptions.get(), restored.getValue())); } } else { // initialize localReaders and localSources from scratch for (int i = 0; i < splitSources.size(); i++) { if (i % numSubtasks == subtaskIndex) { UnboundedSource<OutputT, CheckpointMarkT> source = splitSources.get(i); UnboundedSource.UnboundedReader<OutputT> reader = source.createReader(serializedOptions.get(), null); localSplitSources.add(source); localReaders.add(reader); } } } LOG.info( "Unbounded Flink Source {}/{} is reading from sources: {}", subtaskIndex + 1, numSubtasks, localSplitSources); }
Example 17
Source File: UnboundedSourceWrapper.java From beam with Apache License 2.0 | 4 votes |
@Override public void snapshotState(FunctionSnapshotContext functionSnapshotContext) throws Exception { if (!isRunning) { LOG.debug("snapshotState() called on closed source"); } else { if (checkpointCoder == null) { // no checkpoint coder available in this source return; } stateForCheckpoint.clear(); long checkpointId = functionSnapshotContext.getCheckpointId(); // we checkpoint the sources along with the CheckpointMarkT to ensure // than we have a correct mapping of checkpoints to sources when // restoring List<CheckpointMarkT> checkpointMarks = new ArrayList<>(localSplitSources.size()); for (int i = 0; i < localSplitSources.size(); i++) { UnboundedSource<OutputT, CheckpointMarkT> source = localSplitSources.get(i); UnboundedSource.UnboundedReader<OutputT> reader = localReaders.get(i); @SuppressWarnings("unchecked") CheckpointMarkT mark = (CheckpointMarkT) reader.getCheckpointMark(); checkpointMarks.add(mark); KV<UnboundedSource<OutputT, CheckpointMarkT>, CheckpointMarkT> kv = KV.of(source, mark); stateForCheckpoint.add(kv); } // cleanup old pending checkpoints and add new checkpoint int diff = pendingCheckpoints.size() - MAX_NUMBER_PENDING_CHECKPOINTS; if (diff >= 0) { for (Iterator<Long> iterator = pendingCheckpoints.keySet().iterator(); diff >= 0; diff--) { iterator.next(); iterator.remove(); } } pendingCheckpoints.put(checkpointId, checkpointMarks); } }
Example 18
Source File: StreamingModeExecutionContext.java From beam with Apache License 2.0 | 4 votes |
public void setActiveReader(UnboundedSource.UnboundedReader<?> reader) { checkState(activeReader == null, "not expected to be overwritten"); activeReader = reader; }
Example 19
Source File: UnboundedSourceWrapper.java From beam with Apache License 2.0 | 4 votes |
/** Visible so that we can check this in tests. Must not be used for anything else. */ @VisibleForTesting List<UnboundedSource.UnboundedReader<OutputT>> getLocalReaders() { return localReaders; }
Example 20
Source File: ReaderCache.java From beam with Apache License 2.0 | 4 votes |
CacheEntry(UnboundedSource.UnboundedReader<?> reader, long token) { this.reader = reader; this.token = token; }