org.apache.beam.runners.flink.FlinkPipelineOptions Java Examples
The following examples show how to use
org.apache.beam.runners.flink.FlinkPipelineOptions.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ReaderInvocationUtil.java From beam with Apache License 2.0 | 5 votes |
public ReaderInvocationUtil( String stepName, PipelineOptions options, FlinkMetricContainer container) { FlinkPipelineOptions flinkPipelineOptions = options.as(FlinkPipelineOptions.class); this.stepName = stepName; this.enableMetrics = !flinkPipelineOptions.getDisableMetrics(); this.container = container; }
Example #2
Source File: UnboundedSourceWrapperTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testAccumulatorRegistrationOnOperatorClose() throws Exception { FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class); TestCountingSource source = new TestCountingSource(20).withoutSplitting(); UnboundedSourceWrapper<KV<Integer, Integer>, TestCountingSource.CounterMark> sourceWrapper = new UnboundedSourceWrapper<>("noReader", options, source, 2); StreamingRuntimeContext mock = Mockito.mock(StreamingRuntimeContext.class); Mockito.when(mock.getNumberOfParallelSubtasks()).thenReturn(1); Mockito.when(mock.getExecutionConfig()).thenReturn(new ExecutionConfig()); Mockito.when(mock.getIndexOfThisSubtask()).thenReturn(0); sourceWrapper.setRuntimeContext(mock); sourceWrapper.open(new Configuration()); String metricContainerFieldName = "metricContainer"; FlinkMetricContainer monitoredContainer = Mockito.spy( (FlinkMetricContainer) Whitebox.getInternalState(sourceWrapper, metricContainerFieldName)); Whitebox.setInternalState(sourceWrapper, metricContainerFieldName, monitoredContainer); sourceWrapper.close(); Mockito.verify(monitoredContainer).registerMetricsForPipelineResult(); }
Example #3
Source File: WindowDoFnOperatorTest.java From beam with Apache License 2.0 | 5 votes |
private WindowDoFnOperator<Long, Long, Long> getWindowDoFnOperator() { WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(standardMinutes(1))); TupleTag<KV<Long, Long>> outputTag = new TupleTag<>("main-output"); SystemReduceFn<Long, Long, long[], Long, BoundedWindow> reduceFn = SystemReduceFn.combining( VarLongCoder.of(), AppliedCombineFn.withInputCoder( Sum.ofLongs(), CoderRegistry.createDefault(), KvCoder.of(VarLongCoder.of(), VarLongCoder.of()))); Coder<IntervalWindow> windowCoder = windowingStrategy.getWindowFn().windowCoder(); SingletonKeyedWorkItemCoder<Long, Long> workItemCoder = SingletonKeyedWorkItemCoder.of(VarLongCoder.of(), VarLongCoder.of(), windowCoder); FullWindowedValueCoder<SingletonKeyedWorkItem<Long, Long>> inputCoder = WindowedValue.getFullCoder(workItemCoder, windowCoder); FullWindowedValueCoder<KV<Long, Long>> outputCoder = WindowedValue.getFullCoder(KvCoder.of(VarLongCoder.of(), VarLongCoder.of()), windowCoder); return new WindowDoFnOperator<Long, Long, Long>( reduceFn, "stepName", (Coder) inputCoder, outputTag, emptyList(), new MultiOutputOutputManagerFactory<>(outputTag, outputCoder), windowingStrategy, emptyMap(), emptyList(), PipelineOptionsFactory.as(FlinkPipelineOptions.class), VarLongCoder.of(), new WorkItemKeySelector(VarLongCoder.of())); }
Example #4
Source File: DoFnOperatorTest.java From beam with Apache License 2.0 | 5 votes |
private static DoFnOperator getOperatorForCleanupInspection() { FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class); options.setParallelism(4); TupleTag<String> outputTag = new TupleTag<>("main-output"); WindowedValue.ValueOnlyWindowedValueCoder<String> windowedValueCoder = WindowedValue.getValueOnlyCoder(StringUtf8Coder.of()); IdentityDoFn<String> doFn = new IdentityDoFn<String>() { @FinishBundle public void finishBundle(FinishBundleContext context) { context.output( "finishBundle", BoundedWindow.TIMESTAMP_MIN_VALUE, GlobalWindow.INSTANCE); } }; DoFnOperator.MultiOutputOutputManagerFactory<String> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory( outputTag, WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE)); return new DoFnOperator<>( doFn, "stepName", windowedValueCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), outputManagerFactory, WindowingStrategy.globalDefault(), new HashMap<>(), /* side-input mapping */ Collections.emptyList(), /* side inputs */ options, null, null, DoFnSchemaInformation.create(), Collections.emptyMap()); }
Example #5
Source File: DoFnOperatorTest.java From beam with Apache License 2.0 | 5 votes |
private <K, InT, OutT> OneInputStreamOperatorTestHarness<WindowedValue<InT>, WindowedValue<OutT>> createTestHarness( WindowingStrategy<Object, ?> windowingStrategy, DoFn<InT, OutT> fn, FullWindowedValueCoder<InT> inputCoder, FullWindowedValueCoder<OutT> outputCoder, Coder<?> keyCoder, TupleTag<OutT> outputTag, TypeInformation<K> keyCoderInfo, KeySelector<WindowedValue<InT>, K> keySelector) throws Exception { DoFnOperator<InT, OutT> doFnOperator = new DoFnOperator<>( fn, "stepName", inputCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, outputCoder), windowingStrategy, new HashMap<>(), /* side-input mapping */ Collections.emptyList(), /* side inputs */ PipelineOptionsFactory.as(FlinkPipelineOptions.class), keyCoder /* key coder */, keySelector, DoFnSchemaInformation.create(), Collections.emptyMap()); return new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, keySelector, keyCoderInfo); }
Example #6
Source File: DoFnOperatorTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void nonKeyedParDoPushbackDataCheckpointing() throws Exception { pushbackDataCheckpointing( () -> { Coder<WindowedValue<String>> coder = WindowedValue.getFullCoder(StringUtf8Coder.of(), IntervalWindow.getCoder()); TupleTag<String> outputTag = new TupleTag<>("main-output"); ImmutableMap<Integer, PCollectionView<?>> sideInputMapping = ImmutableMap.<Integer, PCollectionView<?>>builder() .put(1, view1) .put(2, view2) .build(); DoFnOperator<String, String> doFnOperator = new DoFnOperator<>( new IdentityDoFn<>(), "stepName", coder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, coder), WindowingStrategy.of(FixedWindows.of(Duration.millis(100))), sideInputMapping, /* side-input mapping */ ImmutableList.of(view1, view2), /* side inputs */ PipelineOptionsFactory.as(FlinkPipelineOptions.class), null, null, DoFnSchemaInformation.create(), Collections.emptyMap()); return new TwoInputStreamOperatorTestHarness<>(doFnOperator); }); }
Example #7
Source File: DoFnOperatorTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void nonKeyedParDoSideInputCheckpointing() throws Exception { sideInputCheckpointing( () -> { Coder<WindowedValue<String>> coder = WindowedValue.getFullCoder(StringUtf8Coder.of(), IntervalWindow.getCoder()); TupleTag<String> outputTag = new TupleTag<>("main-output"); ImmutableMap<Integer, PCollectionView<?>> sideInputMapping = ImmutableMap.<Integer, PCollectionView<?>>builder() .put(1, view1) .put(2, view2) .build(); DoFnOperator<String, String> doFnOperator = new DoFnOperator<>( new IdentityDoFn<>(), "stepName", coder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, coder), WindowingStrategy.globalDefault(), sideInputMapping, /* side-input mapping */ ImmutableList.of(view1, view2), /* side inputs */ PipelineOptionsFactory.as(FlinkPipelineOptions.class), null, null, DoFnSchemaInformation.create(), Collections.emptyMap()); return new TwoInputStreamOperatorTestHarness<>(doFnOperator); }); }
Example #8
Source File: DoFnOperatorTest.java From beam with Apache License 2.0 | 5 votes |
@Test @SuppressWarnings("unchecked") public void testSingleOutput() throws Exception { Coder<WindowedValue<String>> coder = WindowedValue.getValueOnlyCoder(StringUtf8Coder.of()); TupleTag<String> outputTag = new TupleTag<>("main-output"); DoFnOperator<String, String> doFnOperator = new DoFnOperator<>( new IdentityDoFn<>(), "stepName", coder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, coder), WindowingStrategy.globalDefault(), new HashMap<>(), /* side-input mapping */ Collections.emptyList(), /* side inputs */ PipelineOptionsFactory.as(FlinkPipelineOptions.class), null, null, DoFnSchemaInformation.create(), Collections.emptyMap()); OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness = new OneInputStreamOperatorTestHarness<>(doFnOperator); testHarness.open(); testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("Hello"))); assertThat( stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.valueInGlobalWindow("Hello"))); testHarness.close(); }
Example #9
Source File: NonMergingGroupByKeyTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testEnabledReIterationDoesNotThrowAnException() { final Pipeline p = FlinkTestPipeline.createForBatch(); p.getOptions().as(FlinkPipelineOptions.class).setReIterableGroupByKeyResult(true); p.apply(Create.of(Arrays.asList(KV.of("a", 1), KV.of("b", 2), KV.of("c", 3)))) .apply(GroupByKey.create()) .apply(ParDo.of(new ReiterateDoFn<>())); final PipelineResult.State state = p.run().waitUntilFinish(); Assert.assertEquals(PipelineResult.State.DONE, state); }
Example #10
Source File: FlinkExecutableStageFunction.java From beam with Apache License 2.0 | 5 votes |
@Override public void open(Configuration parameters) { FlinkPipelineOptions options = pipelineOptions.get().as(FlinkPipelineOptions.class); // Register standard file systems. FileSystems.setDefaultPipelineOptions(options); executableStage = ExecutableStage.fromPayload(stagePayload); runtimeContext = getRuntimeContext(); metricContainer = new FlinkMetricContainer(runtimeContext); // TODO: Wire this into the distributed cache and make it pluggable. stageContext = contextFactory.get(jobInfo); stageBundleFactory = stageContext.getStageBundleFactory(executableStage); // NOTE: It's safe to reuse the state handler between partitions because each partition uses the // same backing runtime context and broadcast variables. We use checkState below to catch errors // in backward-incompatible Flink changes. stateRequestHandler = getStateRequestHandler( executableStage, stageBundleFactory.getProcessBundleDescriptor(), runtimeContext); progressHandler = new BundleProgressHandler() { @Override public void onProgress(ProcessBundleProgressResponse progress) { metricContainer.updateMetrics(stepName, progress.getMonitoringInfosList()); } @Override public void onCompleted(ProcessBundleResponse response) { metricContainer.updateMetrics(stepName, response.getMonitoringInfosList()); } }; }
Example #11
Source File: UnboundedSourceWrapper.java From beam with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") public UnboundedSourceWrapper( String stepName, PipelineOptions pipelineOptions, UnboundedSource<OutputT, CheckpointMarkT> source, int parallelism) throws Exception { this.stepName = stepName; this.serializedOptions = new SerializablePipelineOptions(pipelineOptions); this.isConvertedBoundedSource = source instanceof UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter; if (source.requiresDeduping()) { LOG.warn("Source {} requires deduping but Flink runner doesn't support this yet.", source); } Coder<CheckpointMarkT> checkpointMarkCoder = source.getCheckpointMarkCoder(); if (checkpointMarkCoder == null) { LOG.info("No CheckpointMarkCoder specified for this source. Won't create snapshots."); checkpointCoder = null; } else { Coder<? extends UnboundedSource<OutputT, CheckpointMarkT>> sourceCoder = (Coder) SerializableCoder.of(new TypeDescriptor<UnboundedSource>() {}); checkpointCoder = KvCoder.of(sourceCoder, checkpointMarkCoder); } // get the splits early. we assume that the generated splits are stable, // this is necessary so that the mapping of state to source is correct // when restoring splitSources = source.split(parallelism, pipelineOptions); FlinkPipelineOptions options = pipelineOptions.as(FlinkPipelineOptions.class); idleTimeoutMs = options.getShutdownSourcesAfterIdleMs(); }
Example #12
Source File: PipelineOptionsTableGenerator.java From beam with Apache License 2.0 | 5 votes |
/** * Returns the extracted list of options via reflections on FlinkPipelineOptions. Options are * returned sorted in alphabetical order since Java does not guarantee any consistent order on the * class methods. */ private static List<Option> extractOptions(boolean isPython) { List<Option> options = new ArrayList<>(); for (Method method : FlinkPipelineOptions.class.getDeclaredMethods()) { String name; String description; String defaultValue = null; name = method.getName(); if (name.matches("^(get|is).*")) { name = name.replaceFirst("^(get|is)", ""); if (isPython) { name = CaseFormat.UPPER_CAMEL.to(CaseFormat.LOWER_UNDERSCORE, name); } else { name = Character.toLowerCase(name.charAt(0)) + name.substring(1); } Description descriptionAnnotation = method.getAnnotation(Description.class); if (descriptionAnnotation == null) { throw new RuntimeException( "All pipeline options should have a description. Please add one for " + name); } description = descriptionAnnotation.value(); Optional<String> defaultValueFromAnnotation = getDefaultValueFromAnnotation(method); if (defaultValueFromAnnotation.isPresent()) { defaultValue = defaultValueFromAnnotation.get(); } options.add(new Option(name, description, defaultValue)); } } options.sort(Comparator.comparing(option -> option.name)); return options; }
Example #13
Source File: DoFnOperatorTest.java From beam with Apache License 2.0 | 4 votes |
@Test public void testCheckpointBufferingWithMultipleBundles() throws Exception { FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class); options.setMaxBundleSize(10L); options.setCheckpointingInterval(1L); TupleTag<String> outputTag = new TupleTag<>("main-output"); StringUtf8Coder coder = StringUtf8Coder.of(); WindowedValue.ValueOnlyWindowedValueCoder<String> windowedValueCoder = WindowedValue.getValueOnlyCoder(coder); DoFnOperator.MultiOutputOutputManagerFactory<String> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory<>( outputTag, WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE)); @SuppressWarnings("unchecked") Supplier<DoFnOperator<String, String>> doFnOperatorSupplier = () -> new DoFnOperator<>( new IdentityDoFn(), "stepName", windowedValueCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), outputManagerFactory, WindowingStrategy.globalDefault(), new HashMap<>(), /* side-input mapping */ Collections.emptyList(), /* side inputs */ options, null, null, DoFnSchemaInformation.create(), Collections.emptyMap()); DoFnOperator<String, String> doFnOperator = doFnOperatorSupplier.get(); OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness = new OneInputStreamOperatorTestHarness<>(doFnOperator); testHarness.open(); // start a bundle testHarness.processElement( new StreamRecord<>(WindowedValue.valueInGlobalWindow("regular element"))); // This callback will be executed in the snapshotState function in the course of // finishing the currently active bundle. Everything emitted in the callback should // be buffered and not sent downstream. doFnOperator.setBundleFinishedCallback( () -> { try { // Clear this early for the test here because we want to finish the bundle from within // the callback which would otherwise cause an infinitive recursion doFnOperator.setBundleFinishedCallback(null); testHarness.processElement( new StreamRecord<>(WindowedValue.valueInGlobalWindow("trigger another bundle"))); doFnOperator.invokeFinishBundle(); testHarness.processElement( new StreamRecord<>( WindowedValue.valueInGlobalWindow( "check that the previous element is not flushed"))); } catch (Exception e) { throw new RuntimeException(e); } }); OperatorSubtaskState snapshot = testHarness.snapshot(0, 0); assertThat( stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.valueInGlobalWindow("regular element"))); testHarness.close(); // Restore OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness2 = new OneInputStreamOperatorTestHarness<>(doFnOperatorSupplier.get()); testHarness2.initializeState(snapshot); testHarness2.open(); testHarness2.processElement( new StreamRecord<>(WindowedValue.valueInGlobalWindow("after restore"))); assertThat( stripStreamRecordFromWindowedValue(testHarness2.getOutput()), contains( WindowedValue.valueInGlobalWindow("trigger another bundle"), WindowedValue.valueInGlobalWindow("check that the previous element is not flushed"), WindowedValue.valueInGlobalWindow("after restore"))); }
Example #14
Source File: KettleBeamPipelineExecutor.java From kettle-beam with Apache License 2.0 | 4 votes |
public Pipeline getPipeline( TransMeta transMeta, BeamJobConfig config ) throws KettleException { try { if ( StringUtils.isEmpty( config.getRunnerTypeName() ) ) { throw new KettleException( "You need to specify a runner type, one of : " + RunnerType.values().toString() ); } PipelineOptions pipelineOptions = null; VariableSpace space = transMeta; RunnerType runnerType = RunnerType.getRunnerTypeByName( transMeta.environmentSubstitute( config.getRunnerTypeName() ) ); switch ( runnerType ) { case Direct: pipelineOptions = PipelineOptionsFactory.create(); break; case DataFlow: DataflowPipelineOptions dfOptions = PipelineOptionsFactory.as( DataflowPipelineOptions.class ); configureDataFlowOptions( config, dfOptions, space ); pipelineOptions = dfOptions; break; case Spark: SparkPipelineOptions sparkOptions; if (sparkContext!=null) { SparkContextOptions sparkContextOptions = PipelineOptionsFactory.as( SparkContextOptions.class ); sparkContextOptions.setProvidedSparkContext( sparkContext ); sparkOptions = sparkContextOptions; } else { sparkOptions = PipelineOptionsFactory.as( SparkPipelineOptions.class ); } configureSparkOptions( config, sparkOptions, space, transMeta.getName() ); pipelineOptions = sparkOptions; break; case Flink: FlinkPipelineOptions flinkOptions = PipelineOptionsFactory.as( FlinkPipelineOptions.class ); configureFlinkOptions( config, flinkOptions, space ); pipelineOptions = flinkOptions; break; default: throw new KettleException( "Sorry, this isn't implemented yet" ); } configureStandardOptions( config, transMeta.getName(), pipelineOptions, space ); setVariablesInTransformation( config, transMeta ); TransMetaPipelineConverter converter; if (stepPluginClasses!=null && xpPluginClasses!=null) { converter = new TransMetaPipelineConverter( transMeta, metaStore, stepPluginClasses, xpPluginClasses, jobConfig ); } else { converter = new TransMetaPipelineConverter( transMeta, metaStore, config.getPluginsToStage(), jobConfig ); } Pipeline pipeline = converter.createPipeline( pipelineOptions ); // Also set the pipeline options... // FileSystems.setDefaultPipelineOptions(pipelineOptions); return pipeline; } catch ( Exception e ) { throw new KettleException( "Error configuring local Beam Engine", e ); } }
Example #15
Source File: UnboundedSourceWrapperTest.java From beam with Apache License 2.0 | 4 votes |
private static void testSourceDoesNotShutdown(boolean shouldHaveReaders) throws Exception { final int parallelism = 2; FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class); // Make sure we do not shut down options.setShutdownSourcesAfterIdleMs(Long.MAX_VALUE); TestCountingSource source = new TestCountingSource(20).withoutSplitting(); UnboundedSourceWrapper<KV<Integer, Integer>, TestCountingSource.CounterMark> sourceWrapper = new UnboundedSourceWrapper<>("noReader", options, source, parallelism); StreamingRuntimeContext mock = Mockito.mock(StreamingRuntimeContext.class); if (shouldHaveReaders) { // Since the source can't be split, the first subtask index will read everything Mockito.when(mock.getIndexOfThisSubtask()).thenReturn(0); } else { // Set up the RuntimeContext such that this instance won't receive any readers Mockito.when(mock.getIndexOfThisSubtask()).thenReturn(parallelism - 1); } Mockito.when(mock.getNumberOfParallelSubtasks()).thenReturn(parallelism); Mockito.when(mock.getExecutionConfig()).thenReturn(new ExecutionConfig()); ProcessingTimeService timerService = Mockito.mock(ProcessingTimeService.class); Mockito.when(timerService.getCurrentProcessingTime()).thenReturn(Long.MAX_VALUE); Mockito.when(mock.getProcessingTimeService()).thenReturn(timerService); sourceWrapper.setRuntimeContext(mock); sourceWrapper.open(new Configuration()); SourceFunction.SourceContext sourceContext = Mockito.mock(SourceFunction.SourceContext.class); Object checkpointLock = new Object(); Mockito.when(sourceContext.getCheckpointLock()).thenReturn(checkpointLock); // Initialize source context early to avoid concurrency issues with its initialization in the // run // method and the onProcessingTime call on the wrapper. sourceWrapper.setSourceContext(sourceContext); sourceWrapper.open(new Configuration()); assertThat(sourceWrapper.getLocalReaders().isEmpty(), is(!shouldHaveReaders)); Thread thread = new Thread( () -> { try { sourceWrapper.run(sourceContext); } catch (Exception e) { LOG.error("Error while running UnboundedSourceWrapper", e); } }); try { thread.start(); // Wait to see if the wrapper shuts down immediately in case it doesn't have readers if (!shouldHaveReaders) { // The expected state is for finalizeSource to sleep instead of exiting while (true) { StackTraceElement[] callStack = thread.getStackTrace(); if (callStack.length >= 2 && "sleep".equals(callStack[0].getMethodName()) && "finalizeSource".equals(callStack[1].getMethodName())) { break; } Thread.sleep(10); } } // Source should still be running even if there are no readers assertThat(sourceWrapper.isRunning(), is(true)); synchronized (checkpointLock) { // Trigger emission of the watermark by updating processing time. // The actual processing time value does not matter. sourceWrapper.onProcessingTime(42); } // Source should still be running even when watermark is at max assertThat(sourceWrapper.isRunning(), is(true)); assertThat(thread.isAlive(), is(true)); sourceWrapper.cancel(); } finally { thread.interrupt(); // try to join but also don't mask exceptions with test timeout thread.join(1000); } assertThat(thread.isAlive(), is(false)); }
Example #16
Source File: UnboundedSourceWrapperTest.java From beam with Apache License 2.0 | 4 votes |
@Test(timeout = 30_000) public void testValueEmission() throws Exception { final int numElementsPerShard = 20; FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class); final long[] numElementsReceived = {0L}; final int[] numWatermarksReceived = {0}; // this source will emit exactly NUM_ELEMENTS for each parallel reader, // afterwards it will stall. We check whether we also receive NUM_ELEMENTS // elements later. TestCountingSource source = new TestCountingSource(numElementsPerShard).withFixedNumSplits(numSplits); for (int subtaskIndex = 0; subtaskIndex < numTasks; subtaskIndex++) { UnboundedSourceWrapper<KV<Integer, Integer>, TestCountingSource.CounterMark> flinkWrapper = new UnboundedSourceWrapper<>("stepName", options, source, numTasks); // the source wrapper will only request as many splits as there are tasks and the source // will create at most numSplits splits assertEquals(numSplits, flinkWrapper.getSplitSources().size()); StreamSource< WindowedValue<ValueWithRecordId<KV<Integer, Integer>>>, UnboundedSourceWrapper<KV<Integer, Integer>, TestCountingSource.CounterMark>> sourceOperator = new StreamSource<>(flinkWrapper); AbstractStreamOperatorTestHarness<WindowedValue<ValueWithRecordId<KV<Integer, Integer>>>> testHarness = new AbstractStreamOperatorTestHarness<>( sourceOperator, numTasks /* max parallelism */, numTasks /* parallelism */, subtaskIndex /* subtask index */); // The testing timer service is synchronous, so we must configure a watermark interval // > 0, otherwise we can get loop infinitely due to a timer always becoming ready after // it has been set. testHarness.getExecutionConfig().setAutoWatermarkInterval(10L); testHarness.setProcessingTime(System.currentTimeMillis()); testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime); Thread processingTimeUpdateThread = startProcessingTimeUpdateThread(testHarness); try { testHarness.open(); StreamSources.run( sourceOperator, testHarness.getCheckpointLock(), new TestStreamStatusMaintainer(), new Output<StreamRecord<WindowedValue<ValueWithRecordId<KV<Integer, Integer>>>>>() { private boolean hasSeenMaxWatermark = false; @Override public void emitWatermark(Watermark watermark) { // we get this when there is no more data // it can happen that we get the max watermark several times, so guard against // this if (!hasSeenMaxWatermark && watermark.getTimestamp() >= BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()) { numWatermarksReceived[0]++; hasSeenMaxWatermark = true; } } @Override public <X> void collect(OutputTag<X> outputTag, StreamRecord<X> streamRecord) { collect((StreamRecord) streamRecord); } @Override public void emitLatencyMarker(LatencyMarker latencyMarker) {} @Override public void collect( StreamRecord<WindowedValue<ValueWithRecordId<KV<Integer, Integer>>>> windowedValueStreamRecord) { numElementsReceived[0]++; } @Override public void close() {} }); } finally { processingTimeUpdateThread.interrupt(); processingTimeUpdateThread.join(); } } // verify that we get the expected count across all subtasks assertEquals(numElementsPerShard * numSplits, numElementsReceived[0]); // and that we get as many final watermarks as there are subtasks assertEquals(numTasks, numWatermarksReceived[0]); }
Example #17
Source File: ExecutableStageDoFnOperatorTest.java From beam with Apache License 2.0 | 4 votes |
@SuppressWarnings("rawtypes") private ExecutableStageDoFnOperator getOperator( TupleTag<Integer> mainOutput, List<TupleTag<?>> additionalOutputs, DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory, WindowingStrategy windowingStrategy, @Nullable Coder keyCoder, Coder windowedInputCoder) { FlinkExecutableStageContextFactory contextFactory = Mockito.mock(FlinkExecutableStageContextFactory.class); when(contextFactory.get(any())).thenReturn(stageContext); final ExecutableStagePayload stagePayload; if (keyCoder != null) { stagePayload = this.stagePayloadWithUserState; } else { stagePayload = this.stagePayload; } ExecutableStageDoFnOperator<Integer, Integer> operator = new ExecutableStageDoFnOperator<>( "transform", windowedInputCoder, Collections.emptyMap(), mainOutput, additionalOutputs, outputManagerFactory, Collections.emptyMap() /* sideInputTagMapping */, Collections.emptyList() /* sideInputs */, Collections.emptyMap() /* sideInputId mapping */, PipelineOptionsFactory.as(FlinkPipelineOptions.class), stagePayload, jobInfo, contextFactory, createOutputMap(mainOutput, additionalOutputs), windowingStrategy, keyCoder, keyCoder != null ? new KvToByteBufferKeySelector<>(keyCoder) : null); Whitebox.setInternalState(operator, "stateRequestHandler", stateRequestHandler); return operator; }
Example #18
Source File: ExecutableStageDoFnOperatorTest.java From beam with Apache License 2.0 | 4 votes |
@Test public void testSerialization() { WindowedValue.ValueOnlyWindowedValueCoder<Integer> coder = WindowedValue.getValueOnlyCoder(VarIntCoder.of()); TupleTag<Integer> mainOutput = new TupleTag<>("main-output"); TupleTag<Integer> additionalOutput = new TupleTag<>("additional-output"); ImmutableMap<TupleTag<?>, OutputTag<?>> tagsToOutputTags = ImmutableMap.<TupleTag<?>, OutputTag<?>>builder() .put( additionalOutput, new OutputTag<>(additionalOutput.getId(), TypeInformation.of(Integer.class))) .build(); ImmutableMap<TupleTag<?>, Coder<WindowedValue<?>>> tagsToCoders = ImmutableMap.<TupleTag<?>, Coder<WindowedValue<?>>>builder() .put(mainOutput, (Coder) coder) .put(additionalOutput, coder) .build(); ImmutableMap<TupleTag<?>, Integer> tagsToIds = ImmutableMap.<TupleTag<?>, Integer>builder() .put(mainOutput, 0) .put(additionalOutput, 1) .build(); DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory( mainOutput, tagsToOutputTags, tagsToCoders, tagsToIds); FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class); ExecutableStageDoFnOperator<Integer, Integer> operator = new ExecutableStageDoFnOperator<>( "transform", WindowedValue.getValueOnlyCoder(VarIntCoder.of()), Collections.emptyMap(), mainOutput, ImmutableList.of(additionalOutput), outputManagerFactory, Collections.emptyMap() /* sideInputTagMapping */, Collections.emptyList() /* sideInputs */, Collections.emptyMap() /* sideInputId mapping */, options, stagePayload, jobInfo, FlinkExecutableStageContextFactory.getInstance(), createOutputMap(mainOutput, ImmutableList.of(additionalOutput)), WindowingStrategy.globalDefault(), null, null); ExecutableStageDoFnOperator<Integer, Integer> clone = SerializationUtils.clone(operator); assertNotNull(clone); assertNotEquals(operator, clone); }
Example #19
Source File: DoFnOperator.java From beam with Apache License 2.0 | 4 votes |
/** Constructor for DoFnOperator. */ public DoFnOperator( DoFn<InputT, OutputT> doFn, String stepName, Coder<WindowedValue<InputT>> inputWindowedCoder, Map<TupleTag<?>, Coder<?>> outputCoders, TupleTag<OutputT> mainOutputTag, List<TupleTag<?>> additionalOutputTags, OutputManagerFactory<OutputT> outputManagerFactory, WindowingStrategy<?, ?> windowingStrategy, Map<Integer, PCollectionView<?>> sideInputTagMapping, Collection<PCollectionView<?>> sideInputs, PipelineOptions options, Coder<?> keyCoder, KeySelector<WindowedValue<InputT>, ?> keySelector, DoFnSchemaInformation doFnSchemaInformation, Map<String, PCollectionView<?>> sideInputMapping) { this.doFn = doFn; this.stepName = stepName; this.windowedInputCoder = inputWindowedCoder; this.outputCoders = outputCoders; this.mainOutputTag = mainOutputTag; this.additionalOutputTags = additionalOutputTags; this.sideInputTagMapping = sideInputTagMapping; this.sideInputs = sideInputs; this.serializedOptions = new SerializablePipelineOptions(options); this.windowingStrategy = windowingStrategy; this.outputManagerFactory = outputManagerFactory; setChainingStrategy(ChainingStrategy.ALWAYS); this.keyCoder = keyCoder; this.keySelector = keySelector; this.timerCoder = TimerInternals.TimerDataCoderV2.of(windowingStrategy.getWindowFn().windowCoder()); FlinkPipelineOptions flinkOptions = options.as(FlinkPipelineOptions.class); this.maxBundleSize = flinkOptions.getMaxBundleSize(); Preconditions.checkArgument(maxBundleSize > 0, "Bundle size must be at least 1"); this.maxBundleTimeMills = flinkOptions.getMaxBundleTimeMills(); Preconditions.checkArgument(maxBundleTimeMills > 0, "Bundle time must be at least 1"); this.doFnSchemaInformation = doFnSchemaInformation; this.sideInputMapping = sideInputMapping; this.requiresStableInput = // WindowDoFnOperator does not use a DoFn doFn != null && DoFnSignatures.getSignature(doFn.getClass()).processElement().requiresStableInput(); if (requiresStableInput) { Preconditions.checkState( CheckpointingMode.valueOf(flinkOptions.getCheckpointingMode()) == CheckpointingMode.EXACTLY_ONCE, "Checkpointing mode is not set to exactly once but @RequiresStableInput is used."); Preconditions.checkState( flinkOptions.getCheckpointingInterval() > 0, "No checkpointing configured but pipeline uses @RequiresStableInput"); LOG.warn( "Enabling stable input for transform {}. Will only process elements at most every {} milliseconds.", stepName, flinkOptions.getCheckpointingInterval() + Math.max(0, flinkOptions.getMinPauseBetweenCheckpoints())); } this.finishBundleBeforeCheckpointing = flinkOptions.getFinishBundleBeforeCheckpointing(); }
Example #20
Source File: DoFnOperator.java From beam with Apache License 2.0 | 4 votes |
@Override public void open() throws Exception { // WindowDoFnOperator need use state and timer to get DoFn. // So must wait StateInternals and TimerInternals ready. // This will be called after initializeState() this.doFn = getDoFn(); doFnInvoker = DoFnInvokers.invokerFor(doFn); doFnInvoker.invokeSetup(); FlinkPipelineOptions options = serializedOptions.get().as(FlinkPipelineOptions.class); StepContext stepContext = new FlinkStepContext(); doFnRunner = DoFnRunners.simpleRunner( options, doFn, sideInputReader, outputManager, mainOutputTag, additionalOutputTags, stepContext, getInputCoder(), outputCoders, windowingStrategy, doFnSchemaInformation, sideInputMapping); if (requiresStableInput) { // put this in front of the root FnRunner before any additional wrappers doFnRunner = bufferingDoFnRunner = BufferingDoFnRunner.create( doFnRunner, "stable-input-buffer", windowedInputCoder, windowingStrategy.getWindowFn().windowCoder(), getOperatorStateBackend(), getKeyedStateBackend(), options.getNumConcurrentCheckpoints()); } doFnRunner = createWrappingDoFnRunner(doFnRunner, stepContext); earlyBindStateIfNeeded(); if (!options.getDisableMetrics()) { flinkMetricContainer = new FlinkMetricContainer(getRuntimeContext()); doFnRunner = new DoFnRunnerWithMetricsUpdate<>(stepName, doFnRunner, flinkMetricContainer); String checkpointMetricNamespace = options.getReportCheckpointDuration(); if (checkpointMetricNamespace != null) { MetricName checkpointMetric = MetricName.named(checkpointMetricNamespace, "checkpoint_duration"); checkpointStats = new CheckpointStats( () -> flinkMetricContainer .getMetricsContainer(stepName) .getDistribution(checkpointMetric)); } } elementCount = 0L; lastFinishBundleTime = getProcessingTimeService().getCurrentProcessingTime(); // Schedule timer to check timeout of finish bundle. long bundleCheckPeriod = Math.max(maxBundleTimeMills / 2, 1); checkFinishBundleTimer = getProcessingTimeService() .scheduleAtFixedRate( timestamp -> checkInvokeFinishBundleByTime(), bundleCheckPeriod, bundleCheckPeriod); if (doFn instanceof SplittableParDoViaKeyedWorkItems.ProcessFn) { pushbackDoFnRunner = new ProcessFnRunner<>((DoFnRunner) doFnRunner, sideInputs, sideInputHandler); } else { pushbackDoFnRunner = SimplePushbackSideInputDoFnRunner.create(doFnRunner, sideInputs, sideInputHandler); } }
Example #21
Source File: DoFnOperatorTest.java From beam with Apache License 2.0 | 4 votes |
@Test public void testBundleProcessingExceptionIsFatalDuringCheckpointing() throws Exception { FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class); options.setMaxBundleSize(10L); options.setCheckpointingInterval(1L); TupleTag<String> outputTag = new TupleTag<>("main-output"); StringUtf8Coder coder = StringUtf8Coder.of(); WindowedValue.ValueOnlyWindowedValueCoder<String> windowedValueCoder = WindowedValue.getValueOnlyCoder(coder); DoFnOperator.MultiOutputOutputManagerFactory<String> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory( outputTag, WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE)); @SuppressWarnings("unchecked") DoFnOperator doFnOperator = new DoFnOperator<>( new IdentityDoFn() { @FinishBundle public void finishBundle() { throw new RuntimeException("something went wrong here"); } }, "stepName", windowedValueCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), outputManagerFactory, WindowingStrategy.globalDefault(), new HashMap<>(), /* side-input mapping */ Collections.emptyList(), /* side inputs */ options, null, null, DoFnSchemaInformation.create(), Collections.emptyMap()); @SuppressWarnings("unchecked") OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness = new OneInputStreamOperatorTestHarness<>(doFnOperator); testHarness.open(); // start a bundle testHarness.processElement( new StreamRecord<>(WindowedValue.valueInGlobalWindow("regular element"))); // Make sure we throw Error, not a regular Exception. // A regular exception would just cause the checkpoint to fail. assertThrows(Error.class, () -> testHarness.snapshot(0, 0)); }
Example #22
Source File: DoFnOperatorTest.java From beam with Apache License 2.0 | 4 votes |
@Test(expected = IllegalStateException.class) public void testFailOnRequiresStableInputAndDisabledCheckpointing() { TupleTag<String> outputTag = new TupleTag<>("main-output"); StringUtf8Coder keyCoder = StringUtf8Coder.of(); KvToByteBufferKeySelector keySelector = new KvToByteBufferKeySelector<>(keyCoder); KvCoder<String, String> kvCoder = KvCoder.of(keyCoder, StringUtf8Coder.of()); WindowedValue.ValueOnlyWindowedValueCoder<KV<String, String>> windowedValueCoder = WindowedValue.getValueOnlyCoder(kvCoder); DoFn<String, String> doFn = new DoFn<String, String>() { @ProcessElement // Use RequiresStableInput to force buffering elements @RequiresStableInput public void processElement(ProcessContext context) { context.output(context.element()); } }; DoFnOperator.MultiOutputOutputManagerFactory<String> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory( outputTag, WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE)); FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class); // should make the DoFnOperator creation fail options.setCheckpointingInterval(-1L); new DoFnOperator( doFn, "stepName", windowedValueCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), outputManagerFactory, WindowingStrategy.globalDefault(), new HashMap<>(), /* side-input mapping */ Collections.emptyList(), /* side inputs */ options, keyCoder, keySelector, DoFnSchemaInformation.create(), Collections.emptyMap()); }
Example #23
Source File: DoFnOperatorTest.java From beam with Apache License 2.0 | 4 votes |
@Test public void keyedParDoPushbackDataCheckpointing() throws Exception { pushbackDataCheckpointing( () -> { StringUtf8Coder keyCoder = StringUtf8Coder.of(); Coder<WindowedValue<String>> coder = WindowedValue.getFullCoder(keyCoder, IntervalWindow.getCoder()); TupleTag<String> outputTag = new TupleTag<>("main-output"); KeySelector<WindowedValue<String>, ByteBuffer> keySelector = e -> FlinkKeyUtils.encodeKey(e.getValue(), keyCoder); ImmutableMap<Integer, PCollectionView<?>> sideInputMapping = ImmutableMap.<Integer, PCollectionView<?>>builder() .put(1, view1) .put(2, view2) .build(); DoFnOperator<String, String> doFnOperator = new DoFnOperator<>( new IdentityDoFn<>(), "stepName", coder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, coder), WindowingStrategy.of(FixedWindows.of(Duration.millis(100))), sideInputMapping, /* side-input mapping */ ImmutableList.of(view1, view2), /* side inputs */ PipelineOptionsFactory.as(FlinkPipelineOptions.class), keyCoder, keySelector, DoFnSchemaInformation.create(), Collections.emptyMap()); return new KeyedTwoInputStreamOperatorTestHarness<>( doFnOperator, keySelector, // we use a dummy key for the second input since it is considered to be broadcast null, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of())); }); }
Example #24
Source File: DoFnOperatorTest.java From beam with Apache License 2.0 | 4 votes |
@Test public void keyedParDoSideInputCheckpointing() throws Exception { sideInputCheckpointing( () -> { StringUtf8Coder keyCoder = StringUtf8Coder.of(); Coder<WindowedValue<String>> coder = WindowedValue.getFullCoder(keyCoder, IntervalWindow.getCoder()); TupleTag<String> outputTag = new TupleTag<>("main-output"); KeySelector<WindowedValue<String>, ByteBuffer> keySelector = e -> FlinkKeyUtils.encodeKey(e.getValue(), keyCoder); ImmutableMap<Integer, PCollectionView<?>> sideInputMapping = ImmutableMap.<Integer, PCollectionView<?>>builder() .put(1, view1) .put(2, view2) .build(); DoFnOperator<String, String> doFnOperator = new DoFnOperator<>( new IdentityDoFn<>(), "stepName", coder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, coder), WindowingStrategy.of(FixedWindows.of(Duration.millis(100))), sideInputMapping, /* side-input mapping */ ImmutableList.of(view1, view2), /* side inputs */ PipelineOptionsFactory.as(FlinkPipelineOptions.class), keyCoder, keySelector, DoFnSchemaInformation.create(), Collections.emptyMap()); return new KeyedTwoInputStreamOperatorTestHarness<>( doFnOperator, keySelector, // we use a dummy key for the second input since it is considered to be broadcast null, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of())); }); }
Example #25
Source File: FlinkStatefulDoFnFunction.java From beam with Apache License 2.0 | 4 votes |
@Override public void reduce( Iterable<WindowedValue<KV<K, V>>> values, Collector<WindowedValue<RawUnionValue>> out) throws Exception { RuntimeContext runtimeContext = getRuntimeContext(); DoFnRunners.OutputManager outputManager; if (outputMap.size() == 1) { outputManager = new FlinkDoFnFunction.DoFnOutputManager(out); } else { // it has some additional Outputs outputManager = new FlinkDoFnFunction.MultiDoFnOutputManager(out, outputMap); } final Iterator<WindowedValue<KV<K, V>>> iterator = values.iterator(); // get the first value, we need this for initializing the state internals with the key. // we are guaranteed to have a first value, otherwise reduce() would not have been called. WindowedValue<KV<K, V>> currentValue = iterator.next(); final K key = currentValue.getValue().getKey(); final InMemoryStateInternals<K> stateInternals = InMemoryStateInternals.forKey(key); // Used with Batch, we know that all the data is available for this key. We can't use the // timer manager from the context because it doesn't exist. So we create one and advance // time to the end after processing all elements. final InMemoryTimerInternals timerInternals = new InMemoryTimerInternals(); timerInternals.advanceProcessingTime(Instant.now()); timerInternals.advanceSynchronizedProcessingTime(Instant.now()); List<TupleTag<?>> additionalOutputTags = Lists.newArrayList(outputMap.keySet()); DoFnRunner<KV<K, V>, OutputT> doFnRunner = DoFnRunners.simpleRunner( serializedOptions.get(), dofn, new FlinkSideInputReader(sideInputs, runtimeContext), outputManager, mainOutputTag, additionalOutputTags, new FlinkNoOpStepContext() { @Override public StateInternals stateInternals() { return stateInternals; } @Override public TimerInternals timerInternals() { return timerInternals; } }, inputCoder, outputCoderMap, windowingStrategy, doFnSchemaInformation, sideInputMapping); FlinkPipelineOptions pipelineOptions = serializedOptions.get().as(FlinkPipelineOptions.class); if (!pipelineOptions.getDisableMetrics()) { doFnRunner = new DoFnRunnerWithMetricsUpdate<>(stepName, doFnRunner, metricContainer); } doFnRunner.startBundle(); doFnRunner.processElement(currentValue); while (iterator.hasNext()) { currentValue = iterator.next(); doFnRunner.processElement(currentValue); } // Finish any pending windows by advancing the input watermark to infinity. timerInternals.advanceInputWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE); // Finally, advance the processing time to infinity to fire any timers. timerInternals.advanceProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE); timerInternals.advanceSynchronizedProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE); fireEligibleTimers(key, timerInternals, doFnRunner); doFnRunner.finishBundle(); }
Example #26
Source File: DoFnOperatorTest.java From beam with Apache License 2.0 | 4 votes |
@Test public void testLateDroppingForStatefulFn() throws Exception { WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(new Duration(10))); DoFn<Integer, String> fn = new DoFn<Integer, String>() { @StateId("state") private final StateSpec<ValueState<String>> stateSpec = StateSpecs.value(StringUtf8Coder.of()); @ProcessElement public void processElement(ProcessContext context) { context.output(context.element().toString()); } }; VarIntCoder keyCoder = VarIntCoder.of(); Coder<WindowedValue<Integer>> inputCoder = WindowedValue.getFullCoder(keyCoder, windowingStrategy.getWindowFn().windowCoder()); Coder<WindowedValue<String>> outputCoder = WindowedValue.getFullCoder( StringUtf8Coder.of(), windowingStrategy.getWindowFn().windowCoder()); KeySelector<WindowedValue<Integer>, ByteBuffer> keySelector = e -> FlinkKeyUtils.encodeKey(e.getValue(), keyCoder); TupleTag<String> outputTag = new TupleTag<>("main-output"); DoFnOperator<Integer, String> doFnOperator = new DoFnOperator<>( fn, "stepName", inputCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, outputCoder), windowingStrategy, new HashMap<>(), /* side-input mapping */ Collections.emptyList(), /* side inputs */ PipelineOptionsFactory.as(FlinkPipelineOptions.class), keyCoder, /* key coder */ keySelector, DoFnSchemaInformation.create(), Collections.emptyMap()); OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<String>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>( doFnOperator, keySelector, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of())); testHarness.open(); testHarness.processWatermark(0); IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10)); // this should not be late testHarness.processElement( new StreamRecord<>(WindowedValue.of(13, new Instant(0), window1, PaneInfo.NO_FIRING))); assertThat( stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of("13", new Instant(0), window1, PaneInfo.NO_FIRING))); testHarness.getOutput().clear(); testHarness.processWatermark(9); // this should still not be considered late testHarness.processElement( new StreamRecord<>(WindowedValue.of(17, new Instant(0), window1, PaneInfo.NO_FIRING))); assertThat( stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of("17", new Instant(0), window1, PaneInfo.NO_FIRING))); testHarness.getOutput().clear(); testHarness.processWatermark(10); // this should now be considered late testHarness.processElement( new StreamRecord<>(WindowedValue.of(17, new Instant(0), window1, PaneInfo.NO_FIRING))); assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), emptyIterable()); testHarness.close(); }
Example #27
Source File: DoFnOperatorTest.java From beam with Apache License 2.0 | 4 votes |
@Test @SuppressWarnings("unchecked") public void testMultiOutputOutput() throws Exception { WindowedValue.ValueOnlyWindowedValueCoder<String> coder = WindowedValue.getValueOnlyCoder(StringUtf8Coder.of()); TupleTag<String> mainOutput = new TupleTag<>("main-output"); TupleTag<String> additionalOutput1 = new TupleTag<>("output-1"); TupleTag<String> additionalOutput2 = new TupleTag<>("output-2"); ImmutableMap<TupleTag<?>, OutputTag<?>> tagsToOutputTags = ImmutableMap.<TupleTag<?>, OutputTag<?>>builder() .put(additionalOutput1, new OutputTag<String>(additionalOutput1.getId()) {}) .put(additionalOutput2, new OutputTag<String>(additionalOutput2.getId()) {}) .build(); ImmutableMap<TupleTag<?>, Coder<WindowedValue<?>>> tagsToCoders = ImmutableMap.<TupleTag<?>, Coder<WindowedValue<?>>>builder() .put(mainOutput, (Coder) coder) .put(additionalOutput1, coder) .put(additionalOutput2, coder) .build(); ImmutableMap<TupleTag<?>, Integer> tagsToIds = ImmutableMap.<TupleTag<?>, Integer>builder() .put(mainOutput, 0) .put(additionalOutput1, 1) .put(additionalOutput2, 2) .build(); DoFnOperator<String, String> doFnOperator = new DoFnOperator<>( new MultiOutputDoFn(additionalOutput1, additionalOutput2), "stepName", coder, Collections.emptyMap(), mainOutput, ImmutableList.of(additionalOutput1, additionalOutput2), new DoFnOperator.MultiOutputOutputManagerFactory( mainOutput, tagsToOutputTags, tagsToCoders, tagsToIds), WindowingStrategy.globalDefault(), new HashMap<>(), /* side-input mapping */ Collections.emptyList(), /* side inputs */ PipelineOptionsFactory.as(FlinkPipelineOptions.class), null, null, DoFnSchemaInformation.create(), Collections.emptyMap()); OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness = new OneInputStreamOperatorTestHarness<>(doFnOperator); testHarness.open(); testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("one"))); testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("two"))); testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("hello"))); assertThat( this.stripStreamRecord(testHarness.getOutput()), contains(WindowedValue.valueInGlobalWindow("got: hello"))); assertThat( this.stripStreamRecord(testHarness.getSideOutput(tagsToOutputTags.get(additionalOutput1))), contains( WindowedValue.valueInGlobalWindow("extra: one"), WindowedValue.valueInGlobalWindow("got: hello"))); assertThat( this.stripStreamRecord(testHarness.getSideOutput(tagsToOutputTags.get(additionalOutput2))), contains( WindowedValue.valueInGlobalWindow("extra: two"), WindowedValue.valueInGlobalWindow("got: hello"))); testHarness.close(); }
Example #28
Source File: FlinkDoFnFunction.java From beam with Apache License 2.0 | 4 votes |
@Override public void open(Configuration parameters) { // Note that the SerializablePipelineOptions already initialize FileSystems in the readObject() // deserialization method. However, this is a hack, and we want to properly initialize the // options where they are needed. FileSystems.setDefaultPipelineOptions(serializedOptions.get()); doFnInvoker = DoFnInvokers.tryInvokeSetupFor(doFn); metricContainer = new FlinkMetricContainer(getRuntimeContext()); // setup DoFnRunner final RuntimeContext runtimeContext = getRuntimeContext(); final DoFnRunners.OutputManager outputManager; if (outputMap.size() == 1) { outputManager = new DoFnOutputManager(); } else { // it has some additional outputs outputManager = new MultiDoFnOutputManager(outputMap); } final List<TupleTag<?>> additionalOutputTags = Lists.newArrayList(outputMap.keySet()); DoFnRunner<InputT, OutputT> doFnRunner = DoFnRunners.simpleRunner( serializedOptions.get(), doFn, new FlinkSideInputReader(sideInputs, runtimeContext), outputManager, mainOutputTag, additionalOutputTags, new FlinkNoOpStepContext(), inputCoder, outputCoderMap, windowingStrategy, doFnSchemaInformation, sideInputMapping); if (!serializedOptions.get().as(FlinkPipelineOptions.class).getDisableMetrics()) { doFnRunner = new DoFnRunnerWithMetricsUpdate<>(stepName, doFnRunner, metricContainer); } this.collectorAware = (CollectorAware) outputManager; this.doFnRunner = doFnRunner; }