org.apache.flink.runtime.executiongraph.restart.RestartStrategyFactory Java Examples
The following examples show how to use
org.apache.flink.runtime.executiongraph.restart.RestartStrategyFactory.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SchedulerNGFactoryFactory.java From flink with Apache License 2.0 | 6 votes |
static SchedulerNGFactory createSchedulerNGFactory( final Configuration configuration, final RestartStrategyFactory restartStrategyFactory) { final String schedulerName = configuration.getString(JobManagerOptions.SCHEDULER); switch (schedulerName) { case "legacy": return new LegacySchedulerFactory(restartStrategyFactory); case "ng": return new DefaultSchedulerFactory(); default: throw new IllegalArgumentException(String.format( "Illegal value [%s] for config option [%s]", schedulerName, JobManagerOptions.SCHEDULER.key())); } }
Example #2
Source File: JobManagerSharedServices.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
public JobManagerSharedServices( ScheduledExecutorService scheduledExecutorService, LibraryCacheManager libraryCacheManager, RestartStrategyFactory restartStrategyFactory, StackTraceSampleCoordinator stackTraceSampleCoordinator, BackPressureStatsTracker backPressureStatsTracker, @Nonnull BlobWriter blobWriter) { this.scheduledExecutorService = checkNotNull(scheduledExecutorService); this.libraryCacheManager = checkNotNull(libraryCacheManager); this.restartStrategyFactory = checkNotNull(restartStrategyFactory); this.stackTraceSampleCoordinator = checkNotNull(stackTraceSampleCoordinator); this.backPressureStatsTracker = checkNotNull(backPressureStatsTracker); this.blobWriter = blobWriter; }
Example #3
Source File: JobMasterTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Tests that in a streaming use case where checkpointing is enabled, a * fixed delay with Integer.MAX_VALUE retries is instantiated if no other restart * strategy has been specified. */ @Test public void testAutomaticRestartingWhenCheckpointing() throws Exception { // create savepoint data final long savepointId = 42L; final File savepointFile = createSavepoint(savepointId); // set savepoint settings final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath( savepointFile.getAbsolutePath(), true); final JobGraph jobGraph = createJobGraphWithCheckpointing(savepointRestoreSettings); final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1); final TestingCheckpointRecoveryFactory testingCheckpointRecoveryFactory = new TestingCheckpointRecoveryFactory( completedCheckpointStore, new StandaloneCheckpointIDCounter()); haServices.setCheckpointRecoveryFactory(testingCheckpointRecoveryFactory); final JobMaster jobMaster = createJobMaster( new Configuration(), jobGraph, haServices, new TestingJobManagerSharedServicesBuilder() .setRestartStrategyFactory(RestartStrategyFactory.createRestartStrategyFactory(configuration)) .build()); RestartStrategy restartStrategy = jobMaster.getRestartStrategy(); assertNotNull(restartStrategy); assertTrue(restartStrategy instanceof FixedDelayRestartStrategy); }
Example #4
Source File: JobManagerSharedServices.java From flink with Apache License 2.0 | 5 votes |
public JobManagerSharedServices( ScheduledExecutorService scheduledExecutorService, LibraryCacheManager libraryCacheManager, RestartStrategyFactory restartStrategyFactory, StackTraceSampleCoordinator stackTraceSampleCoordinator, BackPressureStatsTracker backPressureStatsTracker, @Nonnull BlobWriter blobWriter) { this.scheduledExecutorService = checkNotNull(scheduledExecutorService); this.libraryCacheManager = checkNotNull(libraryCacheManager); this.restartStrategyFactory = checkNotNull(restartStrategyFactory); this.stackTraceSampleCoordinator = checkNotNull(stackTraceSampleCoordinator); this.backPressureStatsTracker = checkNotNull(backPressureStatsTracker); this.blobWriter = blobWriter; }
Example #5
Source File: JobManagerSharedServices.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
public RestartStrategyFactory getRestartStrategyFactory() { return restartStrategyFactory; }
Example #6
Source File: JobManagerSharedServices.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
public static JobManagerSharedServices fromConfiguration( Configuration config, BlobServer blobServer) throws Exception { checkNotNull(config); checkNotNull(blobServer); final String classLoaderResolveOrder = config.getString(CoreOptions.CLASSLOADER_RESOLVE_ORDER); final String[] alwaysParentFirstLoaderPatterns = CoreOptions.getParentFirstLoaderPatterns(config); final BlobLibraryCacheManager libraryCacheManager = new BlobLibraryCacheManager( blobServer, FlinkUserCodeClassLoaders.ResolveOrder.fromString(classLoaderResolveOrder), alwaysParentFirstLoaderPatterns); final FiniteDuration timeout; try { timeout = AkkaUtils.getTimeout(config); } catch (NumberFormatException e) { throw new IllegalConfigurationException(AkkaUtils.formatDurationParsingErrorMessage()); } final ScheduledExecutorService futureExecutor = Executors.newScheduledThreadPool( Hardware.getNumberCPUCores(), new ExecutorThreadFactory("jobmanager-future")); final StackTraceSampleCoordinator stackTraceSampleCoordinator = new StackTraceSampleCoordinator(futureExecutor, timeout.toMillis()); final int cleanUpInterval = config.getInteger(WebOptions.BACKPRESSURE_CLEANUP_INTERVAL); final BackPressureStatsTrackerImpl backPressureStatsTracker = new BackPressureStatsTrackerImpl( stackTraceSampleCoordinator, cleanUpInterval, config.getInteger(WebOptions.BACKPRESSURE_NUM_SAMPLES), config.getInteger(WebOptions.BACKPRESSURE_REFRESH_INTERVAL), Time.milliseconds(config.getInteger(WebOptions.BACKPRESSURE_DELAY))); futureExecutor.scheduleWithFixedDelay( backPressureStatsTracker::cleanUpOperatorStatsCache, cleanUpInterval, cleanUpInterval, TimeUnit.MILLISECONDS); return new JobManagerSharedServices( futureExecutor, libraryCacheManager, RestartStrategyFactory.createRestartStrategyFactory(config), stackTraceSampleCoordinator, backPressureStatsTracker, blobServer); }
Example #7
Source File: TestingJobManagerSharedServicesBuilder.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
public TestingJobManagerSharedServicesBuilder setRestartStrategyFactory(RestartStrategyFactory restartStrategyFactory) { this.restartStrategyFactory = restartStrategyFactory; return this; }
Example #8
Source File: JobMasterTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testRequestNextInputSplit() throws Exception { final List<TestingInputSplit> expectedInputSplits = Arrays.asList( new TestingInputSplit(1), new TestingInputSplit(42), new TestingInputSplit(1337)); // build one node JobGraph InputSplitSource<TestingInputSplit> inputSplitSource = new TestingInputSplitSource(expectedInputSplits); JobVertex source = new JobVertex("vertex1"); source.setParallelism(1); source.setInputSplitSource(inputSplitSource); source.setInvokableClass(AbstractInvokable.class); final JobGraph testJobGraph = new JobGraph(source); testJobGraph.setAllowQueuedScheduling(true); configuration.setLong(ConfigConstants.RESTART_STRATEGY_FIXED_DELAY_ATTEMPTS, 1); configuration.setString(ConfigConstants.RESTART_STRATEGY_FIXED_DELAY_DELAY, "0 s"); final JobManagerSharedServices jobManagerSharedServices = new TestingJobManagerSharedServicesBuilder() .setRestartStrategyFactory(RestartStrategyFactory.createRestartStrategyFactory(configuration)) .build(); final JobMaster jobMaster = createJobMaster( configuration, testJobGraph, haServices, jobManagerSharedServices); CompletableFuture<Acknowledge> startFuture = jobMaster.start(jobMasterId); try { // wait for the start to complete startFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS); final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class); ExecutionGraph eg = jobMaster.getExecutionGraph(); ExecutionVertex ev = eg.getAllExecutionVertices().iterator().next(); final SupplierWithException<SerializedInputSplit, Exception> inputSplitSupplier = () -> jobMasterGateway.requestNextInputSplit( source.getID(), ev.getCurrentExecutionAttempt().getAttemptId()).get(); List<InputSplit> actualInputSplits = getInputSplits( expectedInputSplits.size(), inputSplitSupplier); final Matcher<Iterable<? extends InputSplit>> expectedInputSplitsMatcher = containsInAnyOrder(expectedInputSplits.toArray(EMPTY_TESTING_INPUT_SPLITS)); assertThat(actualInputSplits, expectedInputSplitsMatcher); final long maxWaitMillis = 2000L; ExecutionGraphTestUtils.waitUntilExecutionVertexState(ev, ExecutionState.SCHEDULED, maxWaitMillis); CompletableFuture.runAsync(() -> eg.failGlobal(new Exception("Testing exception")), eg.getJobMasterMainThreadExecutor()).get(); ExecutionGraphTestUtils.waitUntilExecutionVertexState(ev, ExecutionState.SCHEDULED, maxWaitMillis); actualInputSplits = getInputSplits( expectedInputSplits.size(), inputSplitSupplier); assertThat(actualInputSplits, expectedInputSplitsMatcher); } finally { RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout); } }
Example #9
Source File: JobManagerSharedServices.java From flink with Apache License 2.0 | 4 votes |
public RestartStrategyFactory getRestartStrategyFactory() { return restartStrategyFactory; }
Example #10
Source File: JobManagerSharedServices.java From flink with Apache License 2.0 | 4 votes |
public static JobManagerSharedServices fromConfiguration( Configuration config, BlobServer blobServer) throws Exception { checkNotNull(config); checkNotNull(blobServer); final String classLoaderResolveOrder = config.getString(CoreOptions.CLASSLOADER_RESOLVE_ORDER); final String[] alwaysParentFirstLoaderPatterns = CoreOptions.getParentFirstLoaderPatterns(config); final BlobLibraryCacheManager libraryCacheManager = new BlobLibraryCacheManager( blobServer, FlinkUserCodeClassLoaders.ResolveOrder.fromString(classLoaderResolveOrder), alwaysParentFirstLoaderPatterns); final FiniteDuration timeout; try { timeout = AkkaUtils.getTimeout(config); } catch (NumberFormatException e) { throw new IllegalConfigurationException(AkkaUtils.formatDurationParsingErrorMessage()); } final ScheduledExecutorService futureExecutor = Executors.newScheduledThreadPool( Hardware.getNumberCPUCores(), new ExecutorThreadFactory("jobmanager-future")); final StackTraceSampleCoordinator stackTraceSampleCoordinator = new StackTraceSampleCoordinator(futureExecutor, timeout.toMillis()); final int cleanUpInterval = config.getInteger(WebOptions.BACKPRESSURE_CLEANUP_INTERVAL); final BackPressureStatsTrackerImpl backPressureStatsTracker = new BackPressureStatsTrackerImpl( stackTraceSampleCoordinator, cleanUpInterval, config.getInteger(WebOptions.BACKPRESSURE_NUM_SAMPLES), config.getInteger(WebOptions.BACKPRESSURE_REFRESH_INTERVAL), Time.milliseconds(config.getInteger(WebOptions.BACKPRESSURE_DELAY))); futureExecutor.scheduleWithFixedDelay( backPressureStatsTracker::cleanUpOperatorStatsCache, cleanUpInterval, cleanUpInterval, TimeUnit.MILLISECONDS); return new JobManagerSharedServices( futureExecutor, libraryCacheManager, RestartStrategyFactory.createRestartStrategyFactory(config), stackTraceSampleCoordinator, backPressureStatsTracker, blobServer); }
Example #11
Source File: LegacySchedulerFactory.java From flink with Apache License 2.0 | 4 votes |
public LegacySchedulerFactory(final RestartStrategyFactory restartStrategyFactory) { this.restartStrategyFactory = checkNotNull(restartStrategyFactory); }
Example #12
Source File: LegacyScheduler.java From flink with Apache License 2.0 | 4 votes |
public LegacyScheduler( final Logger log, final JobGraph jobGraph, final BackPressureStatsTracker backPressureStatsTracker, final Executor ioExecutor, final Configuration jobMasterConfiguration, final SlotProvider slotProvider, final ScheduledExecutorService futureExecutor, final ClassLoader userCodeLoader, final CheckpointRecoveryFactory checkpointRecoveryFactory, final Time rpcTimeout, final RestartStrategyFactory restartStrategyFactory, final BlobWriter blobWriter, final JobManagerJobMetricGroup jobManagerJobMetricGroup, final Time slotRequestTimeout, final ShuffleMaster<?> shuffleMaster, final PartitionTracker partitionTracker) throws Exception { this.log = checkNotNull(log); this.jobGraph = checkNotNull(jobGraph); this.backPressureStatsTracker = checkNotNull(backPressureStatsTracker); this.ioExecutor = checkNotNull(ioExecutor); this.jobMasterConfiguration = checkNotNull(jobMasterConfiguration); this.slotProvider = checkNotNull(slotProvider); this.futureExecutor = checkNotNull(futureExecutor); this.userCodeLoader = checkNotNull(userCodeLoader); this.checkpointRecoveryFactory = checkNotNull(checkpointRecoveryFactory); this.rpcTimeout = checkNotNull(rpcTimeout); final RestartStrategies.RestartStrategyConfiguration restartStrategyConfiguration = jobGraph.getSerializedExecutionConfig() .deserializeValue(userCodeLoader) .getRestartStrategy(); this.restartStrategy = RestartStrategyResolving.resolve(restartStrategyConfiguration, restartStrategyFactory, jobGraph.isCheckpointingEnabled()); log.info("Using restart strategy {} for {} ({}).", this.restartStrategy, jobGraph.getName(), jobGraph.getJobID()); this.blobWriter = checkNotNull(blobWriter); this.slotRequestTimeout = checkNotNull(slotRequestTimeout); this.executionGraph = createAndRestoreExecutionGraph(jobManagerJobMetricGroup, checkNotNull(shuffleMaster), checkNotNull(partitionTracker)); }
Example #13
Source File: TestingJobManagerSharedServicesBuilder.java From flink with Apache License 2.0 | 4 votes |
public TestingJobManagerSharedServicesBuilder setRestartStrategyFactory(RestartStrategyFactory restartStrategyFactory) { this.restartStrategyFactory = restartStrategyFactory; return this; }
Example #14
Source File: SchedulerBase.java From flink with Apache License 2.0 | 4 votes |
public SchedulerBase( final Logger log, final JobGraph jobGraph, final BackPressureStatsTracker backPressureStatsTracker, final Executor ioExecutor, final Configuration jobMasterConfiguration, final SlotProvider slotProvider, final ScheduledExecutorService futureExecutor, final ClassLoader userCodeLoader, final CheckpointRecoveryFactory checkpointRecoveryFactory, final Time rpcTimeout, final RestartStrategyFactory restartStrategyFactory, final BlobWriter blobWriter, final JobManagerJobMetricGroup jobManagerJobMetricGroup, final Time slotRequestTimeout, final ShuffleMaster<?> shuffleMaster, final JobMasterPartitionTracker partitionTracker, final ExecutionVertexVersioner executionVertexVersioner, final boolean legacyScheduling) throws Exception { this.log = checkNotNull(log); this.jobGraph = checkNotNull(jobGraph); this.backPressureStatsTracker = checkNotNull(backPressureStatsTracker); this.ioExecutor = checkNotNull(ioExecutor); this.jobMasterConfiguration = checkNotNull(jobMasterConfiguration); this.slotProvider = checkNotNull(slotProvider); this.futureExecutor = checkNotNull(futureExecutor); this.userCodeLoader = checkNotNull(userCodeLoader); this.checkpointRecoveryFactory = checkNotNull(checkpointRecoveryFactory); this.rpcTimeout = checkNotNull(rpcTimeout); final RestartStrategies.RestartStrategyConfiguration restartStrategyConfiguration = jobGraph.getSerializedExecutionConfig() .deserializeValue(userCodeLoader) .getRestartStrategy(); this.restartStrategy = RestartStrategyResolving.resolve(restartStrategyConfiguration, restartStrategyFactory, jobGraph.isCheckpointingEnabled()); if (legacyScheduling) { log.info("Using restart strategy {} for {} ({}).", this.restartStrategy, jobGraph.getName(), jobGraph.getJobID()); } this.blobWriter = checkNotNull(blobWriter); this.jobManagerJobMetricGroup = checkNotNull(jobManagerJobMetricGroup); this.slotRequestTimeout = checkNotNull(slotRequestTimeout); this.executionVertexVersioner = checkNotNull(executionVertexVersioner); this.legacyScheduling = legacyScheduling; this.executionGraph = createAndRestoreExecutionGraph(jobManagerJobMetricGroup, checkNotNull(shuffleMaster), checkNotNull(partitionTracker)); this.schedulingTopology = executionGraph.getSchedulingTopology(); final StateLocationRetriever stateLocationRetriever = executionVertexId -> getExecutionVertex(executionVertexId).getPreferredLocationBasedOnState(); final InputsLocationsRetriever inputsLocationsRetriever = new ExecutionGraphToInputsLocationsRetrieverAdapter(executionGraph); this.preferredLocationsRetriever = new DefaultPreferredLocationsRetriever(stateLocationRetriever, inputsLocationsRetriever); this.coordinatorMap = createCoordinatorMap(); }