org.apache.flink.runtime.jobmaster.JobMasterGateway Java Examples
The following examples show how to use
org.apache.flink.runtime.jobmaster.JobMasterGateway.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ResourceManager.java From flink with Apache License 2.0 | 6 votes |
/** * This method should be called by the framework once it detects that a currently registered * job manager has failed. * * @param jobId identifying the job whose leader shall be disconnected. * @param cause The exception which cause the JobManager failed. */ protected void closeJobManagerConnection(JobID jobId, Exception cause) { JobManagerRegistration jobManagerRegistration = jobManagerRegistrations.remove(jobId); if (jobManagerRegistration != null) { final ResourceID jobManagerResourceId = jobManagerRegistration.getJobManagerResourceID(); final JobMasterGateway jobMasterGateway = jobManagerRegistration.getJobManagerGateway(); final JobMasterId jobMasterId = jobManagerRegistration.getJobMasterId(); log.info("Disconnect job manager {}@{} for job {} from the resource manager.", jobMasterId, jobMasterGateway.getAddress(), jobId); jobManagerHeartbeatManager.unmonitorTarget(jobManagerResourceId); jmResourceIdRegistrations.remove(jobManagerResourceId); // tell the job manager about the disconnect jobMasterGateway.disconnectResourceManager(getFencingToken(), cause); } else { log.debug("There was no registered job manager for job {}.", jobId); } }
Example #2
Source File: Dispatcher.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Nonnull private <T> List<CompletableFuture<Optional<T>>> queryJobMastersForInformation(Function<JobMasterGateway, CompletableFuture<T>> queryFunction) { final int numberJobsRunning = jobManagerRunnerFutures.size(); ArrayList<CompletableFuture<Optional<T>>> optionalJobInformation = new ArrayList<>( numberJobsRunning); for (JobID jobId : jobManagerRunnerFutures.keySet()) { final CompletableFuture<JobMasterGateway> jobMasterGatewayFuture = getJobMasterGatewayFuture(jobId); final CompletableFuture<Optional<T>> optionalRequest = jobMasterGatewayFuture .thenCompose(queryFunction::apply) .handle((T value, Throwable throwable) -> Optional.ofNullable(value)); optionalJobInformation.add(optionalRequest); } return optionalJobInformation; }
Example #3
Source File: Dispatcher.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Override public CompletableFuture<JobStatus> requestJobStatus(JobID jobId, Time timeout) { final CompletableFuture<JobMasterGateway> jobMasterGatewayFuture = getJobMasterGatewayFuture(jobId); final CompletableFuture<JobStatus> jobStatusFuture = jobMasterGatewayFuture.thenCompose( (JobMasterGateway jobMasterGateway) -> jobMasterGateway.requestJobStatus(timeout)); return jobStatusFuture.exceptionally( (Throwable throwable) -> { final JobDetails jobDetails = archivedExecutionGraphStore.getAvailableJobDetails(jobId); // check whether it is a completed job if (jobDetails == null) { throw new CompletionException(ExceptionUtils.stripCompletionException(throwable)); } else { return jobDetails.getStatus(); } }); }
Example #4
Source File: TaskExecutor.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
private void disassociateFromJobManager(JobManagerConnection jobManagerConnection, Exception cause) throws IOException { checkNotNull(jobManagerConnection); final KvStateRegistry kvStateRegistry = networkEnvironment.getKvStateRegistry(); if (kvStateRegistry != null) { kvStateRegistry.unregisterListener(jobManagerConnection.getJobID()); } final KvStateClientProxy kvStateClientProxy = networkEnvironment.getKvStateProxy(); if (kvStateClientProxy != null) { kvStateClientProxy.updateKvStateLocationOracle(jobManagerConnection.getJobID(), null); } JobMasterGateway jobManagerGateway = jobManagerConnection.getJobManagerGateway(); jobManagerGateway.disconnectTaskManager(getResourceID(), cause); jobManagerConnection.getLibraryCacheManager().shutdown(); }
Example #5
Source File: Dispatcher.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Override public CompletableFuture<ClusterOverview> requestClusterOverview(Time timeout) { CompletableFuture<ResourceOverview> taskManagerOverviewFuture = runResourceManagerCommand(resourceManagerGateway -> resourceManagerGateway.requestResourceOverview(timeout)); final List<CompletableFuture<Optional<JobStatus>>> optionalJobInformation = queryJobMastersForInformation( (JobMasterGateway jobMasterGateway) -> jobMasterGateway.requestJobStatus(timeout)); CompletableFuture<Collection<Optional<JobStatus>>> allOptionalJobsFuture = FutureUtils.combineAll(optionalJobInformation); CompletableFuture<Collection<JobStatus>> allJobsFuture = allOptionalJobsFuture.thenApply(this::flattenOptionalCollection); final JobsOverview completedJobsOverview = archivedExecutionGraphStore.getStoredJobsOverview(); return allJobsFuture.thenCombine( taskManagerOverviewFuture, (Collection<JobStatus> runningJobsStatus, ResourceOverview resourceOverview) -> { final JobsOverview allJobsOverview = JobsOverview.create(runningJobsStatus).combine(completedJobsOverview); return new ClusterOverview(resourceOverview, allJobsOverview); }); }
Example #6
Source File: TaskExecutor.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
private void failNoLongerAllocatedSlots(AllocatedSlotReport allocatedSlotReport, JobMasterGateway jobMasterGateway) { for (AllocatedSlotInfo allocatedSlotInfo : allocatedSlotReport.getAllocatedSlotInfos()) { final AllocationID allocationId = allocatedSlotInfo.getAllocationId(); if (!taskSlotTable.isAllocated( allocatedSlotInfo.getSlotIndex(), allocatedSlotReport.getJobId(), allocationId)) { jobMasterGateway.failSlot( getResourceID(), allocationId, new FlinkException( String.format( "Slot %s on TaskExecutor %s is not allocated by job %s.", allocatedSlotInfo.getSlotIndex(), getResourceID(), allocatedSlotReport.getJobId()))); } } }
Example #7
Source File: DefaultJobLeaderService.java From flink with Apache License 2.0 | 6 votes |
JobManagerRetryingRegistration( Logger log, RpcService rpcService, String targetName, Class<JobMasterGateway> targetType, String targetAddress, JobMasterId jobMasterId, RetryingRegistrationConfiguration retryingRegistrationConfiguration, String taskManagerRpcAddress, UnresolvedTaskManagerLocation unresolvedTaskManagerLocation) { super( log, rpcService, targetName, targetType, targetAddress, jobMasterId, retryingRegistrationConfiguration); this.taskManagerRpcAddress = taskManagerRpcAddress; this.unresolvedTaskManagerLocation = Preconditions.checkNotNull(unresolvedTaskManagerLocation); }
Example #8
Source File: Dispatcher.java From flink with Apache License 2.0 | 6 votes |
@Override public CompletableFuture<ClusterOverview> requestClusterOverview(Time timeout) { CompletableFuture<ResourceOverview> taskManagerOverviewFuture = runResourceManagerCommand(resourceManagerGateway -> resourceManagerGateway.requestResourceOverview(timeout)); final List<CompletableFuture<Optional<JobStatus>>> optionalJobInformation = queryJobMastersForInformation( (JobMasterGateway jobMasterGateway) -> jobMasterGateway.requestJobStatus(timeout)); CompletableFuture<Collection<Optional<JobStatus>>> allOptionalJobsFuture = FutureUtils.combineAll(optionalJobInformation); CompletableFuture<Collection<JobStatus>> allJobsFuture = allOptionalJobsFuture.thenApply(this::flattenOptionalCollection); final JobsOverview completedJobsOverview = archivedExecutionGraphStore.getStoredJobsOverview(); return allJobsFuture.thenCombine( taskManagerOverviewFuture, (Collection<JobStatus> runningJobsStatus, ResourceOverview resourceOverview) -> { final JobsOverview allJobsOverview = JobsOverview.create(runningJobsStatus).combine(completedJobsOverview); return new ClusterOverview(resourceOverview, allJobsOverview); }); }
Example #9
Source File: JobLeaderService.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
public void reconnect() { if (stopped) { LOG.debug("Cannot reconnect because the JobManagerLeaderListener has already been stopped."); } else { final RegisteredRpcConnection<JobMasterId, JobMasterGateway, JMTMRegistrationSuccess> currentRpcConnection = rpcConnection; if (currentRpcConnection != null) { if (currentRpcConnection.isConnected()) { if (currentRpcConnection.tryReconnect()) { // double check for concurrent stop operation if (stopped) { currentRpcConnection.close(); } } else { LOG.debug("Could not reconnect to the JobMaster {}.", currentRpcConnection.getTargetAddress()); } } else { LOG.debug("Ongoing registration to JobMaster {}.", currentRpcConnection.getTargetAddress()); } } else { LOG.debug("Cannot reconnect to an unknown JobMaster."); } } }
Example #10
Source File: JobManagerConnection.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
public JobManagerConnection( JobID jobID, ResourceID resourceID, JobMasterGateway jobMasterGateway, TaskManagerActions taskManagerActions, CheckpointResponder checkpointResponder, GlobalAggregateManager aggregateManager, LibraryCacheManager libraryCacheManager, ResultPartitionConsumableNotifier resultPartitionConsumableNotifier, PartitionProducerStateChecker partitionStateChecker) { this.jobID = Preconditions.checkNotNull(jobID); this.resourceID = Preconditions.checkNotNull(resourceID); this.jobMasterGateway = Preconditions.checkNotNull(jobMasterGateway); this.taskManagerActions = Preconditions.checkNotNull(taskManagerActions); this.checkpointResponder = Preconditions.checkNotNull(checkpointResponder); this.aggregateManager = Preconditions.checkNotNull(aggregateManager); this.libraryCacheManager = Preconditions.checkNotNull(libraryCacheManager); this.resultPartitionConsumableNotifier = Preconditions.checkNotNull(resultPartitionConsumableNotifier); this.partitionStateChecker = Preconditions.checkNotNull(partitionStateChecker); }
Example #11
Source File: TaskSubmissionTestEnvironment.java From flink with Apache License 2.0 | 6 votes |
static void registerJobMasterConnection( JobTable jobTable, JobID jobId, RpcService testingRpcService, JobMasterGateway jobMasterGateway, TaskManagerActions taskManagerActions, Time timeout, MainThreadExecutable mainThreadExecutable) { mainThreadExecutable.runAsync(() -> { final JobTable.Job job = jobTable.getOrCreateJob(jobId, () -> TestingJobServices.newBuilder().build()); job.connect( ResourceID.generate(), jobMasterGateway, taskManagerActions, new TestCheckpointResponder(), new TestGlobalAggregateManager(), new RpcResultPartitionConsumableNotifier(jobMasterGateway, testingRpcService.getExecutor(), timeout), TestingPartitionProducerStateChecker.newBuilder() .setPartitionProducerStateFunction((jobID, intermediateDataSetID, resultPartitionID) -> CompletableFuture.completedFuture(ExecutionState.RUNNING)) .build()); }); }
Example #12
Source File: ResourceManager.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * This method should be called by the framework once it detects that a currently registered * job manager has failed. * * @param jobId identifying the job whose leader shall be disconnected. * @param cause The exception which cause the JobManager failed. */ protected void closeJobManagerConnection(JobID jobId, Exception cause) { JobManagerRegistration jobManagerRegistration = jobManagerRegistrations.remove(jobId); if (jobManagerRegistration != null) { final ResourceID jobManagerResourceId = jobManagerRegistration.getJobManagerResourceID(); final JobMasterGateway jobMasterGateway = jobManagerRegistration.getJobManagerGateway(); final JobMasterId jobMasterId = jobManagerRegistration.getJobMasterId(); log.info("Disconnect job manager {}@{} for job {} from the resource manager.", jobMasterId, jobMasterGateway.getAddress(), jobId); jobManagerHeartbeatManager.unmonitorTarget(jobManagerResourceId); jmResourceIdRegistrations.remove(jobManagerResourceId); // tell the job manager about the disconnect jobMasterGateway.disconnectResourceManager(getFencingToken(), cause); } else { log.debug("There was no registered job manager for job {}.", jobId); } }
Example #13
Source File: Dispatcher.java From flink with Apache License 2.0 | 6 votes |
@Override public CompletableFuture<ClusterOverview> requestClusterOverview(Time timeout) { CompletableFuture<ResourceOverview> taskManagerOverviewFuture = runResourceManagerCommand(resourceManagerGateway -> resourceManagerGateway.requestResourceOverview(timeout)); final List<CompletableFuture<Optional<JobStatus>>> optionalJobInformation = queryJobMastersForInformation( (JobMasterGateway jobMasterGateway) -> jobMasterGateway.requestJobStatus(timeout)); CompletableFuture<Collection<Optional<JobStatus>>> allOptionalJobsFuture = FutureUtils.combineAll(optionalJobInformation); CompletableFuture<Collection<JobStatus>> allJobsFuture = allOptionalJobsFuture.thenApply(this::flattenOptionalCollection); final JobsOverview completedJobsOverview = archivedExecutionGraphStore.getStoredJobsOverview(); return allJobsFuture.thenCombine( taskManagerOverviewFuture, (Collection<JobStatus> runningJobsStatus, ResourceOverview resourceOverview) -> { final JobsOverview allJobsOverview = JobsOverview.create(runningJobsStatus).combine(completedJobsOverview); return new ClusterOverview(resourceOverview, allJobsOverview); }); }
Example #14
Source File: Dispatcher.java From flink with Apache License 2.0 | 6 votes |
private CompletableFuture<JobMasterGateway> getJobMasterGatewayFuture(JobID jobId) { final CompletableFuture<JobManagerRunner> jobManagerRunnerFuture = jobManagerRunnerFutures.get(jobId); if (jobManagerRunnerFuture == null) { return FutureUtils.completedExceptionally(new FlinkJobNotFoundException(jobId)); } else { final CompletableFuture<JobMasterGateway> leaderGatewayFuture = jobManagerRunnerFuture.thenCompose(JobManagerRunner::getLeaderGatewayFuture); return leaderGatewayFuture.thenApplyAsync( (JobMasterGateway jobMasterGateway) -> { // check whether the retrieved JobMasterGateway belongs still to a running JobMaster if (jobManagerRunnerFutures.containsKey(jobId)) { return jobMasterGateway; } else { throw new CompletionException(new FlinkJobNotFoundException(jobId)); } }, getMainThreadExecutor()); } }
Example #15
Source File: TaskExecutor.java From flink with Apache License 2.0 | 6 votes |
private void registerQueryableState(JobID jobId, JobMasterGateway jobMasterGateway) { final KvStateServer kvStateServer = kvStateService.getKvStateServer(); final KvStateRegistry kvStateRegistry = kvStateService.getKvStateRegistry(); if (kvStateServer != null && kvStateRegistry != null) { kvStateRegistry.registerListener( jobId, new RpcKvStateRegistryListener( jobMasterGateway, kvStateServer.getServerAddress())); } final KvStateClientProxy kvStateProxy = kvStateService.getKvStateClientProxy(); if (kvStateProxy != null) { kvStateProxy.updateKvStateLocationOracle(jobId, jobMasterGateway); } }
Example #16
Source File: TaskExecutor.java From flink with Apache License 2.0 | 6 votes |
private void failNoLongerAllocatedSlots(AllocatedSlotReport allocatedSlotReport, JobMasterGateway jobMasterGateway) { for (AllocatedSlotInfo allocatedSlotInfo : allocatedSlotReport.getAllocatedSlotInfos()) { final AllocationID allocationId = allocatedSlotInfo.getAllocationId(); if (!taskSlotTable.isAllocated( allocatedSlotInfo.getSlotIndex(), allocatedSlotReport.getJobId(), allocationId)) { jobMasterGateway.failSlot( getResourceID(), allocationId, new FlinkException( String.format( "Slot %s on TaskExecutor %s is not allocated by job %s.", allocatedSlotInfo.getSlotIndex(), getResourceID(), allocatedSlotReport.getJobId()))); } } }
Example #17
Source File: Dispatcher.java From flink with Apache License 2.0 | 6 votes |
@Override public CompletableFuture<ArchivedExecutionGraph> requestJob(JobID jobId, Time timeout) { final CompletableFuture<JobMasterGateway> jobMasterGatewayFuture = getJobMasterGatewayFuture(jobId); final CompletableFuture<ArchivedExecutionGraph> archivedExecutionGraphFuture = jobMasterGatewayFuture.thenCompose( (JobMasterGateway jobMasterGateway) -> jobMasterGateway.requestJob(timeout)); return archivedExecutionGraphFuture.exceptionally( (Throwable throwable) -> { final ArchivedExecutionGraph serializableExecutionGraph = archivedExecutionGraphStore.get(jobId); // check whether it is a completed job if (serializableExecutionGraph == null) { throw new CompletionException(ExceptionUtils.stripCompletionException(throwable)); } else { return serializableExecutionGraph; } }); }
Example #18
Source File: JobLeaderService.java From flink with Apache License 2.0 | 6 votes |
public void reconnect() { if (stopped) { LOG.debug("Cannot reconnect because the JobManagerLeaderListener has already been stopped."); } else { final RegisteredRpcConnection<JobMasterId, JobMasterGateway, JMTMRegistrationSuccess> currentRpcConnection = rpcConnection; if (currentRpcConnection != null) { if (currentRpcConnection.isConnected()) { if (currentRpcConnection.tryReconnect()) { // double check for concurrent stop operation if (stopped) { currentRpcConnection.close(); } } else { LOG.debug("Could not reconnect to the JobMaster {}.", currentRpcConnection.getTargetAddress()); } } else { LOG.debug("Ongoing registration to JobMaster {}.", currentRpcConnection.getTargetAddress()); } } else { LOG.debug("Cannot reconnect to an unknown JobMaster."); } } }
Example #19
Source File: JobLeaderService.java From flink with Apache License 2.0 | 6 votes |
JobManagerRetryingRegistration( Logger log, RpcService rpcService, String targetName, Class<JobMasterGateway> targetType, String targetAddress, JobMasterId jobMasterId, RetryingRegistrationConfiguration retryingRegistrationConfiguration, String taskManagerRpcAddress, TaskManagerLocation taskManagerLocation) { super( log, rpcService, targetName, targetType, targetAddress, jobMasterId, retryingRegistrationConfiguration); this.taskManagerRpcAddress = taskManagerRpcAddress; this.taskManagerLocation = Preconditions.checkNotNull(taskManagerLocation); }
Example #20
Source File: Dispatcher.java From flink with Apache License 2.0 | 6 votes |
@Override public CompletableFuture<JobStatus> requestJobStatus(JobID jobId, Time timeout) { final CompletableFuture<JobMasterGateway> jobMasterGatewayFuture = getJobMasterGatewayFuture(jobId); final CompletableFuture<JobStatus> jobStatusFuture = jobMasterGatewayFuture.thenCompose( (JobMasterGateway jobMasterGateway) -> jobMasterGateway.requestJobStatus(timeout)); return jobStatusFuture.exceptionally( (Throwable throwable) -> { final JobDetails jobDetails = archivedExecutionGraphStore.getAvailableJobDetails(jobId); // check whether it is a completed job if (jobDetails == null) { throw new CompletionException(ExceptionUtils.stripCompletionException(throwable)); } else { return jobDetails.getStatus(); } }); }
Example #21
Source File: Dispatcher.java From flink with Apache License 2.0 | 6 votes |
@Override public CompletableFuture<MultipleJobsDetails> requestMultipleJobDetails(Time timeout) { List<CompletableFuture<Optional<JobDetails>>> individualOptionalJobDetails = queryJobMastersForInformation( (JobMasterGateway jobMasterGateway) -> jobMasterGateway.requestJobDetails(timeout)); CompletableFuture<Collection<Optional<JobDetails>>> optionalCombinedJobDetails = FutureUtils.combineAll( individualOptionalJobDetails); CompletableFuture<Collection<JobDetails>> combinedJobDetails = optionalCombinedJobDetails.thenApply(this::flattenOptionalCollection); final Collection<JobDetails> completedJobDetails = archivedExecutionGraphStore.getAvailableJobDetails(); return combinedJobDetails.thenApply( (Collection<JobDetails> runningJobDetails) -> { final Collection<JobDetails> allJobDetails = new ArrayList<>(completedJobDetails.size() + runningJobDetails.size()); allJobDetails.addAll(runningJobDetails); allJobDetails.addAll(completedJobDetails); return new MultipleJobsDetails(allJobDetails); }); }
Example #22
Source File: JobManagerConnection.java From flink with Apache License 2.0 | 6 votes |
public JobManagerConnection( JobID jobID, ResourceID resourceID, JobMasterGateway jobMasterGateway, TaskManagerActions taskManagerActions, CheckpointResponder checkpointResponder, GlobalAggregateManager aggregateManager, LibraryCacheManager libraryCacheManager, ResultPartitionConsumableNotifier resultPartitionConsumableNotifier, PartitionProducerStateChecker partitionStateChecker) { this.jobID = Preconditions.checkNotNull(jobID); this.resourceID = Preconditions.checkNotNull(resourceID); this.jobMasterGateway = Preconditions.checkNotNull(jobMasterGateway); this.taskManagerActions = Preconditions.checkNotNull(taskManagerActions); this.checkpointResponder = Preconditions.checkNotNull(checkpointResponder); this.aggregateManager = Preconditions.checkNotNull(aggregateManager); this.libraryCacheManager = Preconditions.checkNotNull(libraryCacheManager); this.resultPartitionConsumableNotifier = Preconditions.checkNotNull(resultPartitionConsumableNotifier); this.partitionStateChecker = Preconditions.checkNotNull(partitionStateChecker); }
Example #23
Source File: TaskSubmissionTestEnvironment.java From flink with Apache License 2.0 | 6 votes |
static JobManagerConnection createJobManagerConnection(JobID jobId, JobMasterGateway jobMasterGateway, RpcService testingRpcService, TaskManagerActions taskManagerActions, Time timeout) { final LibraryCacheManager libraryCacheManager = mock(LibraryCacheManager.class); when(libraryCacheManager.getClassLoader(any(JobID.class))).thenReturn(ClassLoader.getSystemClassLoader()); final PartitionProducerStateChecker partitionProducerStateChecker = mock(PartitionProducerStateChecker.class); when(partitionProducerStateChecker.requestPartitionProducerState(any(), any(), any())) .thenReturn(CompletableFuture.completedFuture(ExecutionState.RUNNING)); return new JobManagerConnection( jobId, ResourceID.generate(), jobMasterGateway, taskManagerActions, mock(CheckpointResponder.class), new TestGlobalAggregateManager(), libraryCacheManager, new RpcResultPartitionConsumableNotifier(jobMasterGateway, testingRpcService.getExecutor(), timeout), partitionProducerStateChecker); }
Example #24
Source File: Dispatcher.java From flink with Apache License 2.0 | 6 votes |
@Override public CompletableFuture<MultipleJobsDetails> requestMultipleJobDetails(Time timeout) { List<CompletableFuture<Optional<JobDetails>>> individualOptionalJobDetails = queryJobMastersForInformation( (JobMasterGateway jobMasterGateway) -> jobMasterGateway.requestJobDetails(timeout)); CompletableFuture<Collection<Optional<JobDetails>>> optionalCombinedJobDetails = FutureUtils.combineAll( individualOptionalJobDetails); CompletableFuture<Collection<JobDetails>> combinedJobDetails = optionalCombinedJobDetails.thenApply(this::flattenOptionalCollection); final Collection<JobDetails> completedJobDetails = archivedExecutionGraphStore.getAvailableJobDetails(); return combinedJobDetails.thenApply( (Collection<JobDetails> runningJobDetails) -> { final Collection<JobDetails> allJobDetails = new ArrayList<>(completedJobDetails.size() + runningJobDetails.size()); allJobDetails.addAll(runningJobDetails); allJobDetails.addAll(completedJobDetails); return new MultipleJobsDetails(allJobDetails); }); }
Example #25
Source File: ResourceManager.java From flink with Apache License 2.0 | 6 votes |
/** * This method should be called by the framework once it detects that a currently registered * job manager has failed. * * @param jobId identifying the job whose leader shall be disconnected. * @param cause The exception which cause the JobManager failed. */ protected void closeJobManagerConnection(JobID jobId, Exception cause) { JobManagerRegistration jobManagerRegistration = jobManagerRegistrations.remove(jobId); if (jobManagerRegistration != null) { final ResourceID jobManagerResourceId = jobManagerRegistration.getJobManagerResourceID(); final JobMasterGateway jobMasterGateway = jobManagerRegistration.getJobManagerGateway(); final JobMasterId jobMasterId = jobManagerRegistration.getJobMasterId(); log.info("Disconnect job manager {}@{} for job {} from the resource manager.", jobMasterId, jobMasterGateway.getAddress(), jobId); jobManagerHeartbeatManager.unmonitorTarget(jobManagerResourceId); jmResourceIdRegistrations.remove(jobManagerResourceId); // tell the job manager about the disconnect jobMasterGateway.disconnectResourceManager(getFencingToken(), cause); } else { log.debug("There was no registered job manager for job {}.", jobId); } }
Example #26
Source File: Dispatcher.java From flink with Apache License 2.0 | 6 votes |
private CompletableFuture<JobMasterGateway> getJobMasterGatewayFuture(JobID jobId) { final CompletableFuture<JobManagerRunner> jobManagerRunnerFuture = jobManagerRunnerFutures.get(jobId); if (jobManagerRunnerFuture == null) { return FutureUtils.completedExceptionally(new FlinkJobNotFoundException(jobId)); } else { final CompletableFuture<JobMasterGateway> leaderGatewayFuture = jobManagerRunnerFuture.thenCompose(JobManagerRunner::getJobMasterGateway); return leaderGatewayFuture.thenApplyAsync( (JobMasterGateway jobMasterGateway) -> { // check whether the retrieved JobMasterGateway belongs still to a running JobMaster if (jobManagerRunnerFutures.containsKey(jobId)) { return jobMasterGateway; } else { throw new CompletionException(new FlinkJobNotFoundException(jobId)); } }, getMainThreadExecutor()); } }
Example #27
Source File: TaskExecutor.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private void offerSlotsToJobManager(final JobID jobId) { final JobManagerConnection jobManagerConnection = jobManagerTable.get(jobId); if (jobManagerConnection == null) { log.debug("There is no job manager connection to the leader of job {}.", jobId); } else { if (taskSlotTable.hasAllocatedSlots(jobId)) { log.info("Offer reserved slots to the leader of job {}.", jobId); final JobMasterGateway jobMasterGateway = jobManagerConnection.getJobManagerGateway(); final Iterator<TaskSlot> reservedSlotsIterator = taskSlotTable.getAllocatedSlots(jobId); final JobMasterId jobMasterId = jobManagerConnection.getJobMasterId(); final Collection<SlotOffer> reservedSlots = new HashSet<>(2); while (reservedSlotsIterator.hasNext()) { SlotOffer offer = reservedSlotsIterator.next().generateSlotOffer(); reservedSlots.add(offer); } CompletableFuture<Collection<SlotOffer>> acceptedSlotsFuture = jobMasterGateway.offerSlots( getResourceID(), reservedSlots, taskManagerConfiguration.getTimeout()); acceptedSlotsFuture.whenCompleteAsync( handleAcceptedSlotOffers(jobId, jobMasterGateway, jobMasterId, reservedSlots), getMainThreadExecutor()); } else { log.debug("There are no unassigned slots for the job {}.", jobId); } } }
Example #28
Source File: Dispatcher.java From flink with Apache License 2.0 | 5 votes |
@Override public CompletableFuture<String> triggerSavepoint( final JobID jobId, final String targetDirectory, final boolean cancelJob, final Time timeout) { final CompletableFuture<JobMasterGateway> jobMasterGatewayFuture = getJobMasterGatewayFuture(jobId); return jobMasterGatewayFuture.thenCompose( (JobMasterGateway jobMasterGateway) -> jobMasterGateway.triggerSavepoint(targetDirectory, cancelJob, timeout)); }
Example #29
Source File: Dispatcher.java From flink with Apache License 2.0 | 5 votes |
@Override public CompletableFuture<String> triggerSavepoint( final JobID jobId, final String targetDirectory, final boolean cancelJob, final Time timeout) { final CompletableFuture<JobMasterGateway> jobMasterGatewayFuture = getJobMasterGatewayFuture(jobId); return jobMasterGatewayFuture.thenCompose( (JobMasterGateway jobMasterGateway) -> jobMasterGateway.triggerSavepoint(targetDirectory, cancelJob, timeout)); }
Example #30
Source File: TaskExecutor.java From flink with Apache License 2.0 | 5 votes |
private void unregisterTaskAndNotifyFinalState( final JobMasterGateway jobMasterGateway, final ExecutionAttemptID executionAttemptID) { Task task = taskSlotTable.removeTask(executionAttemptID); if (task != null) { if (!task.getExecutionState().isTerminal()) { try { task.failExternally(new IllegalStateException("Task is being remove from TaskManager.")); } catch (Exception e) { log.error("Could not properly fail task.", e); } } log.info("Un-registering task and sending final execution state {} to JobManager for task {} {}.", task.getExecutionState(), task.getTaskInfo().getTaskName(), task.getExecutionId()); AccumulatorSnapshot accumulatorSnapshot = task.getAccumulatorRegistry().getSnapshot(); updateTaskExecutionState( jobMasterGateway, new TaskExecutionState( task.getJobID(), task.getExecutionId(), task.getExecutionState(), task.getFailureCause(), accumulatorSnapshot, task.getMetricGroup().getIOMetricGroup().createSnapshot())); } else { log.error("Cannot find task with ID {} to unregister.", executionAttemptID); } }