org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint Java Examples
The following examples show how to use
org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JobMaster.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Override public void declineCheckpoint(DeclineCheckpoint decline) { final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator(); if (checkpointCoordinator != null) { getRpcService().execute(() -> { try { checkpointCoordinator.receiveDeclineMessage(decline); } catch (Exception e) { log.error("Error in CheckpointCoordinator while processing {}", decline, e); } }); } else { String errorMessage = "Received DeclineCheckpoint message for job {} with no CheckpointCoordinator"; if (executionGraph.getState() == JobStatus.RUNNING) { log.error(errorMessage, jobGraph.getJobID()); } else { log.debug(errorMessage, jobGraph.getJobID()); } } }
Example #2
Source File: SchedulerBase.java From flink with Apache License 2.0 | 6 votes |
@Override public void declineCheckpoint(final DeclineCheckpoint decline) { mainThreadExecutor.assertRunningInMainThread(); final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator(); final String taskManagerLocationInfo = retrieveTaskManagerLocation(decline.getTaskExecutionId()); if (checkpointCoordinator != null) { ioExecutor.execute(() -> { try { checkpointCoordinator.receiveDeclineMessage(decline, taskManagerLocationInfo); } catch (Exception e) { log.error("Error in CheckpointCoordinator while processing {}", decline, e); } }); } else { String errorMessage = "Received DeclineCheckpoint message for job {} with no CheckpointCoordinator"; if (executionGraph.getState() == JobStatus.RUNNING) { log.error(errorMessage, jobGraph.getJobID()); } else { log.debug(errorMessage, jobGraph.getJobID()); } } }
Example #3
Source File: LegacyScheduler.java From flink with Apache License 2.0 | 6 votes |
@Override public void declineCheckpoint(final DeclineCheckpoint decline) { mainThreadExecutor.assertRunningInMainThread(); final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator(); final String taskManagerLocationInfo = retrieveTaskManagerLocation(decline.getTaskExecutionId()); if (checkpointCoordinator != null) { ioExecutor.execute(() -> { try { checkpointCoordinator.receiveDeclineMessage(decline, taskManagerLocationInfo); } catch (Exception e) { log.error("Error in CheckpointCoordinator while processing {}", decline, e); } }); } else { String errorMessage = "Received DeclineCheckpoint message for job {} with no CheckpointCoordinator"; if (executionGraph.getState() == JobStatus.RUNNING) { log.error(errorMessage, jobGraph.getJobID()); } else { log.debug(errorMessage, jobGraph.getJobID()); } } }
Example #4
Source File: ActorGatewayCheckpointResponder.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Override public void declineCheckpoint( JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, Throwable reason) { DeclineCheckpoint decline = new DeclineCheckpoint( jobID, executionAttemptID, checkpointId, reason); actorGateway.tell(decline); }
Example #5
Source File: CheckpointCoordinatorTest.java From flink with Apache License 2.0 | 5 votes |
private PendingCheckpoint declineSynchronousSavepoint( final JobID jobId, final CheckpointCoordinator coordinator, final ExecutionAttemptID attemptID, final Throwable reason) { final long checkpointId = coordinator.getPendingCheckpoints().entrySet().iterator().next().getKey(); final PendingCheckpoint checkpoint = coordinator.getPendingCheckpoints().get(checkpointId); coordinator.receiveDeclineMessage(new DeclineCheckpoint(jobId, attemptID, checkpointId, reason), TASK_MANAGER_LOCATION_INFO); return checkpoint; }
Example #6
Source File: RpcCheckpointResponder.java From flink with Apache License 2.0 | 5 votes |
@Override public void declineCheckpoint( JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, Throwable cause) { checkpointCoordinatorGateway.declineCheckpoint(new DeclineCheckpoint(jobID, executionAttemptID, checkpointId, cause)); }
Example #7
Source File: CheckpointCoordinatorTest.java From flink with Apache License 2.0 | 5 votes |
private PendingCheckpoint declineSynchronousSavepoint( final JobID jobId, final CheckpointCoordinator coordinator, final ExecutionAttemptID attemptID, final Throwable reason) { final long checkpointId = coordinator.getPendingCheckpoints().entrySet().iterator().next().getKey(); final PendingCheckpoint checkpoint = coordinator.getPendingCheckpoints().get(checkpointId); coordinator.receiveDeclineMessage(new DeclineCheckpoint(jobId, attemptID, checkpointId, reason), TASK_MANAGER_LOCATION_INFO); return checkpoint; }
Example #8
Source File: RpcCheckpointResponder.java From flink with Apache License 2.0 | 5 votes |
@Override public void declineCheckpoint( JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, Throwable cause) { checkpointCoordinatorGateway.declineCheckpoint(new DeclineCheckpoint(jobID, executionAttemptID, checkpointId, cause)); }
Example #9
Source File: RpcCheckpointResponder.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Override public void declineCheckpoint( JobID jobID, ExecutionAttemptID executionAttemptID, long checkpointId, Throwable cause) { checkpointCoordinatorGateway.declineCheckpoint(new DeclineCheckpoint(jobID, executionAttemptID, checkpointId, cause)); }
Example #10
Source File: JobMaster.java From flink with Apache License 2.0 | 4 votes |
@Override public void declineCheckpoint(DeclineCheckpoint decline) { schedulerNG.declineCheckpoint(decline); }
Example #11
Source File: TestingJobMasterGateway.java From flink with Apache License 2.0 | 4 votes |
public TestingJobMasterGateway( @Nonnull String address, @Nonnull String hostname, @Nonnull Supplier<CompletableFuture<Acknowledge>> cancelFunction, @Nonnull Function<TaskExecutionState, CompletableFuture<Acknowledge>> updateTaskExecutionStateFunction, @Nonnull BiFunction<JobVertexID, ExecutionAttemptID, CompletableFuture<SerializedInputSplit>> requestNextInputSplitFunction, @Nonnull BiFunction<IntermediateDataSetID, ResultPartitionID, CompletableFuture<ExecutionState>> requestPartitionStateFunction, @Nonnull Function<ResultPartitionID, CompletableFuture<Acknowledge>> scheduleOrUpdateConsumersFunction, @Nonnull Function<ResourceID, CompletableFuture<Acknowledge>> disconnectTaskManagerFunction, @Nonnull Consumer<ResourceManagerId> disconnectResourceManagerConsumer, @Nonnull BiFunction<ResourceID, Collection<SlotOffer>, CompletableFuture<Collection<SlotOffer>>> offerSlotsFunction, @Nonnull TriConsumer<ResourceID, AllocationID, Throwable> failSlotConsumer, @Nonnull BiFunction<String, UnresolvedTaskManagerLocation, CompletableFuture<RegistrationResponse>> registerTaskManagerFunction, @Nonnull BiConsumer<ResourceID, AccumulatorReport> taskManagerHeartbeatConsumer, @Nonnull Consumer<ResourceID> resourceManagerHeartbeatConsumer, @Nonnull Supplier<CompletableFuture<JobDetails>> requestJobDetailsSupplier, @Nonnull Supplier<CompletableFuture<ArchivedExecutionGraph>> requestJobSupplier, @Nonnull BiFunction<String, Boolean, CompletableFuture<String>> triggerSavepointFunction, @Nonnull BiFunction<String, Boolean, CompletableFuture<String>> stopWithSavepointFunction, @Nonnull Function<JobVertexID, CompletableFuture<OperatorBackPressureStatsResponse>> requestOperatorBackPressureStatsFunction, @Nonnull BiConsumer<AllocationID, Throwable> notifyAllocationFailureConsumer, @Nonnull Consumer<Tuple5<JobID, ExecutionAttemptID, Long, CheckpointMetrics, TaskStateSnapshot>> acknowledgeCheckpointConsumer, @Nonnull Consumer<DeclineCheckpoint> declineCheckpointConsumer, @Nonnull Supplier<JobMasterId> fencingTokenSupplier, @Nonnull BiFunction<JobID, String, CompletableFuture<KvStateLocation>> requestKvStateLocationFunction, @Nonnull Function<Tuple6<JobID, JobVertexID, KeyGroupRange, String, KvStateID, InetSocketAddress>, CompletableFuture<Acknowledge>> notifyKvStateRegisteredFunction, @Nonnull Function<Tuple4<JobID, JobVertexID, KeyGroupRange, String>, CompletableFuture<Acknowledge>> notifyKvStateUnregisteredFunction, @Nonnull TriFunction<String, Object, byte[], CompletableFuture<Object>> updateAggregateFunction, @Nonnull TriFunction<ExecutionAttemptID, OperatorID, SerializedValue<OperatorEvent>, CompletableFuture<Acknowledge>> operatorEventSender, @Nonnull BiFunction<OperatorID, SerializedValue<CoordinationRequest>, CompletableFuture<CoordinationResponse>> deliverCoordinationRequestFunction) { this.address = address; this.hostname = hostname; this.cancelFunction = cancelFunction; this.updateTaskExecutionStateFunction = updateTaskExecutionStateFunction; this.requestNextInputSplitFunction = requestNextInputSplitFunction; this.requestPartitionStateFunction = requestPartitionStateFunction; this.scheduleOrUpdateConsumersFunction = scheduleOrUpdateConsumersFunction; this.disconnectTaskManagerFunction = disconnectTaskManagerFunction; this.disconnectResourceManagerConsumer = disconnectResourceManagerConsumer; this.offerSlotsFunction = offerSlotsFunction; this.failSlotConsumer = failSlotConsumer; this.registerTaskManagerFunction = registerTaskManagerFunction; this.taskManagerHeartbeatConsumer = taskManagerHeartbeatConsumer; this.resourceManagerHeartbeatConsumer = resourceManagerHeartbeatConsumer; this.requestJobDetailsSupplier = requestJobDetailsSupplier; this.requestJobSupplier = requestJobSupplier; this.triggerSavepointFunction = triggerSavepointFunction; this.stopWithSavepointFunction = stopWithSavepointFunction; this.requestOperatorBackPressureStatsFunction = requestOperatorBackPressureStatsFunction; this.notifyAllocationFailureConsumer = notifyAllocationFailureConsumer; this.acknowledgeCheckpointConsumer = acknowledgeCheckpointConsumer; this.declineCheckpointConsumer = declineCheckpointConsumer; this.fencingTokenSupplier = fencingTokenSupplier; this.requestKvStateLocationFunction = requestKvStateLocationFunction; this.notifyKvStateRegisteredFunction = notifyKvStateRegisteredFunction; this.notifyKvStateUnregisteredFunction = notifyKvStateUnregisteredFunction; this.updateAggregateFunction = updateAggregateFunction; this.operatorEventSender = operatorEventSender; this.deliverCoordinationRequestFunction = deliverCoordinationRequestFunction; }
Example #12
Source File: TestingJobMasterGatewayBuilder.java From flink with Apache License 2.0 | 4 votes |
public TestingJobMasterGatewayBuilder setDeclineCheckpointConsumer(Consumer<DeclineCheckpoint> declineCheckpointConsumer) { this.declineCheckpointConsumer = declineCheckpointConsumer; return this; }
Example #13
Source File: CheckpointCoordinator.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Receives a {@link DeclineCheckpoint} message for a pending checkpoint. * * @param message Checkpoint decline from the task manager */ public void receiveDeclineMessage(DeclineCheckpoint message) { if (shutdown || message == null) { return; } if (!job.equals(message.getJob())) { throw new IllegalArgumentException("Received DeclineCheckpoint message for job " + message.getJob() + " while this coordinator handles job " + job); } final long checkpointId = message.getCheckpointId(); final String reason = (message.getReason() != null ? message.getReason().getMessage() : ""); PendingCheckpoint checkpoint; synchronized (lock) { // we need to check inside the lock for being shutdown as well, otherwise we // get races and invalid error log messages if (shutdown) { return; } checkpoint = pendingCheckpoints.remove(checkpointId); if (checkpoint != null && !checkpoint.isDiscarded()) { LOG.info("Decline checkpoint {} by task {} of job {}.", checkpointId, message.getTaskExecutionId(), job); discardCheckpoint(checkpoint, message.getReason()); } else if (checkpoint != null) { // this should not happen throw new IllegalStateException( "Received message for discarded but non-removed checkpoint " + checkpointId); } else if (LOG.isDebugEnabled()) { if (recentPendingCheckpoints.contains(checkpointId)) { // message is for an unknown checkpoint, or comes too late (checkpoint disposed) LOG.debug("Received another decline message for now expired checkpoint attempt {} of job {} : {}", checkpointId, job, reason); } else { // message is for an unknown checkpoint. might be so old that we don't even remember it any more LOG.debug("Received decline message for unknown (too old?) checkpoint attempt {} of job {} : {}", checkpointId, job, reason); } } } }
Example #14
Source File: CheckpointCoordinatorTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testSavepointScheduledInUnalignedMode() throws Exception { int maxConcurrentCheckpoints = 1; int checkpointRequestsToSend = 10; int activeRequests = 0; JobID jobId = new JobID(); CheckpointCoordinator coordinator = new CheckpointCoordinatorBuilder() .setCheckpointCoordinatorConfiguration(CheckpointCoordinatorConfiguration .builder() .setUnalignedCheckpointsEnabled(true) .setMaxConcurrentCheckpoints(maxConcurrentCheckpoints) .build()) .setJobId(jobId) .setTimer(manuallyTriggeredScheduledExecutor) .build(); try { List<Future<?>> checkpointFutures = new ArrayList<>(checkpointRequestsToSend); coordinator.startCheckpointScheduler(); while (activeRequests < checkpointRequestsToSend) { checkpointFutures.add(coordinator.triggerCheckpoint(true)); activeRequests++; } assertEquals(activeRequests - maxConcurrentCheckpoints, coordinator.getNumQueuedRequests()); Future<?> savepointFuture = coordinator.triggerSavepoint("/tmp"); manuallyTriggeredScheduledExecutor.triggerAll(); assertEquals(++activeRequests - maxConcurrentCheckpoints, coordinator.getNumQueuedRequests()); coordinator.receiveDeclineMessage(new DeclineCheckpoint(jobId, new ExecutionAttemptID(), 1L), "none"); manuallyTriggeredScheduledExecutor.triggerAll(); activeRequests--; // savepoint triggered assertEquals(activeRequests - maxConcurrentCheckpoints , coordinator.getNumQueuedRequests()); assertEquals(1, checkpointFutures.stream().filter(Future::isDone).count()); assertFalse(savepointFuture.isDone()); assertEquals(maxConcurrentCheckpoints, coordinator.getNumberOfPendingCheckpoints()); CheckpointProperties props = coordinator.getPendingCheckpoints().values().iterator().next().getProps(); assertTrue(props.isSavepoint()); assertFalse(props.forceCheckpoint()); } finally { coordinator.shutdown(JobStatus.FINISHED); } }
Example #15
Source File: CheckpointCoordinatorTest.java From flink with Apache License 2.0 | 4 votes |
/** * This test triggers a checkpoint and then sends a decline checkpoint message from * one of the tasks. The expected behaviour is that said checkpoint is discarded and a new * checkpoint is triggered. */ @Test public void testTriggerAndDeclineCheckpointSimple() { try { final JobID jid = new JobID(); // create some mock Execution vertices that receive the checkpoint trigger messages final ExecutionAttemptID attemptID1 = new ExecutionAttemptID(); final ExecutionAttemptID attemptID2 = new ExecutionAttemptID(); ExecutionVertex vertex1 = mockExecutionVertex(attemptID1); ExecutionVertex vertex2 = mockExecutionVertex(attemptID2); // set up the coordinator and validate the initial state CheckpointCoordinator coord = getCheckpointCoordinator(jid, vertex1, vertex2); assertEquals(0, coord.getNumberOfPendingCheckpoints()); assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints()); // trigger the first checkpoint. this should succeed final CompletableFuture<CompletedCheckpoint> checkpointFuture = coord.triggerCheckpoint(false); manuallyTriggeredScheduledExecutor.triggerAll(); assertFalse(checkpointFuture.isCompletedExceptionally()); // validate that we have a pending checkpoint assertEquals(1, coord.getNumberOfPendingCheckpoints()); assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints()); // we have one task scheduled that will cancel after timeout assertEquals(1, manuallyTriggeredScheduledExecutor.getScheduledTasks().size()); long checkpointId = coord.getPendingCheckpoints().entrySet().iterator().next().getKey(); PendingCheckpoint checkpoint = coord.getPendingCheckpoints().get(checkpointId); assertNotNull(checkpoint); assertEquals(checkpointId, checkpoint.getCheckpointId()); assertEquals(jid, checkpoint.getJobId()); assertEquals(2, checkpoint.getNumberOfNonAcknowledgedTasks()); assertEquals(0, checkpoint.getNumberOfAcknowledgedTasks()); assertEquals(0, checkpoint.getOperatorStates().size()); assertFalse(checkpoint.isDiscarded()); assertFalse(checkpoint.areTasksFullyAcknowledged()); // check that the vertices received the trigger checkpoint message verify(vertex1.getCurrentExecutionAttempt()).triggerCheckpoint(checkpointId, checkpoint.getCheckpointTimestamp(), CheckpointOptions.forCheckpointWithDefaultLocation()); verify(vertex2.getCurrentExecutionAttempt()).triggerCheckpoint(checkpointId, checkpoint.getCheckpointTimestamp(), CheckpointOptions.forCheckpointWithDefaultLocation()); // acknowledge from one of the tasks coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID2, checkpointId), "Unknown location"); assertEquals(1, checkpoint.getNumberOfAcknowledgedTasks()); assertEquals(1, checkpoint.getNumberOfNonAcknowledgedTasks()); assertFalse(checkpoint.isDiscarded()); assertFalse(checkpoint.areTasksFullyAcknowledged()); // acknowledge the same task again (should not matter) coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID2, checkpointId), "Unknown location"); assertFalse(checkpoint.isDiscarded()); assertFalse(checkpoint.areTasksFullyAcknowledged()); // decline checkpoint from the other task, this should cancel the checkpoint // and trigger a new one coord.receiveDeclineMessage(new DeclineCheckpoint(jid, attemptID1, checkpointId), TASK_MANAGER_LOCATION_INFO); assertTrue(checkpoint.isDiscarded()); // the canceler is also removed assertEquals(0, manuallyTriggeredScheduledExecutor.getScheduledTasks().size()); // validate that we have no new pending checkpoint assertEquals(0, coord.getNumberOfPendingCheckpoints()); assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints()); // decline again, nothing should happen // decline from the other task, nothing should happen coord.receiveDeclineMessage(new DeclineCheckpoint(jid, attemptID1, checkpointId), TASK_MANAGER_LOCATION_INFO); coord.receiveDeclineMessage(new DeclineCheckpoint(jid, attemptID2, checkpointId), TASK_MANAGER_LOCATION_INFO); assertTrue(checkpoint.isDiscarded()); coord.shutdown(JobStatus.FINISHED); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #16
Source File: TestingJobMasterGatewayBuilder.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
public TestingJobMasterGatewayBuilder setDeclineCheckpointConsumer(Consumer<DeclineCheckpoint> declineCheckpointConsumer) { this.declineCheckpointConsumer = declineCheckpointConsumer; return this; }
Example #17
Source File: JobMaster.java From flink with Apache License 2.0 | 4 votes |
@Override public void declineCheckpoint(DeclineCheckpoint decline) { schedulerNG.declineCheckpoint(decline); }
Example #18
Source File: CheckpointCoordinator.java From flink with Apache License 2.0 | 4 votes |
/** * Receives a {@link DeclineCheckpoint} message for a pending checkpoint. * * @param message Checkpoint decline from the task manager * @param taskManagerLocationInfo The location info of the decline checkpoint message's sender */ public void receiveDeclineMessage(DeclineCheckpoint message, String taskManagerLocationInfo) { if (shutdown || message == null) { return; } if (!job.equals(message.getJob())) { throw new IllegalArgumentException("Received DeclineCheckpoint message for job " + message.getJob() + " from " + taskManagerLocationInfo + " while this coordinator handles job " + job); } final long checkpointId = message.getCheckpointId(); final String reason = (message.getReason() != null ? message.getReason().getMessage() : ""); PendingCheckpoint checkpoint; synchronized (lock) { // we need to check inside the lock for being shutdown as well, otherwise we // get races and invalid error log messages if (shutdown) { return; } checkpoint = pendingCheckpoints.get(checkpointId); if (checkpoint != null) { Preconditions.checkState( !checkpoint.isDiscarded(), "Received message for discarded but non-removed checkpoint " + checkpointId); LOG.info("Decline checkpoint {} by task {} of job {} at {}.", checkpointId, message.getTaskExecutionId(), job, taskManagerLocationInfo); final CheckpointException checkpointException; if (message.getReason() == null) { checkpointException = new CheckpointException(CheckpointFailureReason.CHECKPOINT_DECLINED); } else { checkpointException = getCheckpointException( CheckpointFailureReason.JOB_FAILURE, message.getReason()); } abortPendingCheckpoint( checkpoint, checkpointException, message.getTaskExecutionId()); } else if (LOG.isDebugEnabled()) { if (recentPendingCheckpoints.contains(checkpointId)) { // message is for an unknown checkpoint, or comes too late (checkpoint disposed) LOG.debug("Received another decline message for now expired checkpoint attempt {} from task {} of job {} at {} : {}", checkpointId, message.getTaskExecutionId(), job, taskManagerLocationInfo, reason); } else { // message is for an unknown checkpoint. might be so old that we don't even remember it any more LOG.debug("Received decline message for unknown (too old?) checkpoint attempt {} from task {} of job {} at {} : {}", checkpointId, message.getTaskExecutionId(), job, taskManagerLocationInfo, reason); } } } }
Example #19
Source File: TestingJobMasterGateway.java From flink with Apache License 2.0 | 4 votes |
public TestingJobMasterGateway( @Nonnull String address, @Nonnull String hostname, @Nonnull Supplier<CompletableFuture<Acknowledge>> cancelFunction, @Nonnull Function<TaskExecutionState, CompletableFuture<Acknowledge>> updateTaskExecutionStateFunction, @Nonnull BiFunction<JobVertexID, ExecutionAttemptID, CompletableFuture<SerializedInputSplit>> requestNextInputSplitFunction, @Nonnull BiFunction<IntermediateDataSetID, ResultPartitionID, CompletableFuture<ExecutionState>> requestPartitionStateFunction, @Nonnull Function<ResultPartitionID, CompletableFuture<Acknowledge>> scheduleOrUpdateConsumersFunction, @Nonnull Function<ResourceID, CompletableFuture<Acknowledge>> disconnectTaskManagerFunction, @Nonnull Consumer<ResourceManagerId> disconnectResourceManagerConsumer, @Nonnull Supplier<CompletableFuture<ClassloadingProps>> classloadingPropsSupplier, @Nonnull BiFunction<ResourceID, Collection<SlotOffer>, CompletableFuture<Collection<SlotOffer>>> offerSlotsFunction, @Nonnull TriConsumer<ResourceID, AllocationID, Throwable> failSlotConsumer, @Nonnull BiFunction<String, TaskManagerLocation, CompletableFuture<RegistrationResponse>> registerTaskManagerFunction, @Nonnull BiConsumer<ResourceID, AccumulatorReport> taskManagerHeartbeatConsumer, @Nonnull Consumer<ResourceID> resourceManagerHeartbeatConsumer, @Nonnull Supplier<CompletableFuture<JobDetails>> requestJobDetailsSupplier, @Nonnull Supplier<CompletableFuture<ArchivedExecutionGraph>> requestJobSupplier, @Nonnull BiFunction<String, Boolean, CompletableFuture<String>> triggerSavepointFunction, @Nonnull BiFunction<String, Boolean, CompletableFuture<String>> stopWithSavepointFunction, @Nonnull Function<JobVertexID, CompletableFuture<OperatorBackPressureStatsResponse>> requestOperatorBackPressureStatsFunction, @Nonnull BiConsumer<AllocationID, Throwable> notifyAllocationFailureConsumer, @Nonnull Consumer<Tuple5<JobID, ExecutionAttemptID, Long, CheckpointMetrics, TaskStateSnapshot>> acknowledgeCheckpointConsumer, @Nonnull Consumer<DeclineCheckpoint> declineCheckpointConsumer, @Nonnull Supplier<JobMasterId> fencingTokenSupplier, @Nonnull BiFunction<JobID, String, CompletableFuture<KvStateLocation>> requestKvStateLocationFunction, @Nonnull Function<Tuple6<JobID, JobVertexID, KeyGroupRange, String, KvStateID, InetSocketAddress>, CompletableFuture<Acknowledge>> notifyKvStateRegisteredFunction, @Nonnull Function<Tuple4<JobID, JobVertexID, KeyGroupRange, String>, CompletableFuture<Acknowledge>> notifyKvStateUnregisteredFunction, @Nonnull TriFunction<String, Object, byte[], CompletableFuture<Object>> updateAggregateFunction) { this.address = address; this.hostname = hostname; this.cancelFunction = cancelFunction; this.updateTaskExecutionStateFunction = updateTaskExecutionStateFunction; this.requestNextInputSplitFunction = requestNextInputSplitFunction; this.requestPartitionStateFunction = requestPartitionStateFunction; this.scheduleOrUpdateConsumersFunction = scheduleOrUpdateConsumersFunction; this.disconnectTaskManagerFunction = disconnectTaskManagerFunction; this.disconnectResourceManagerConsumer = disconnectResourceManagerConsumer; this.classloadingPropsSupplier = classloadingPropsSupplier; this.offerSlotsFunction = offerSlotsFunction; this.failSlotConsumer = failSlotConsumer; this.registerTaskManagerFunction = registerTaskManagerFunction; this.taskManagerHeartbeatConsumer = taskManagerHeartbeatConsumer; this.resourceManagerHeartbeatConsumer = resourceManagerHeartbeatConsumer; this.requestJobDetailsSupplier = requestJobDetailsSupplier; this.requestJobSupplier = requestJobSupplier; this.triggerSavepointFunction = triggerSavepointFunction; this.stopWithSavepointFunction = stopWithSavepointFunction; this.requestOperatorBackPressureStatsFunction = requestOperatorBackPressureStatsFunction; this.notifyAllocationFailureConsumer = notifyAllocationFailureConsumer; this.acknowledgeCheckpointConsumer = acknowledgeCheckpointConsumer; this.declineCheckpointConsumer = declineCheckpointConsumer; this.fencingTokenSupplier = fencingTokenSupplier; this.requestKvStateLocationFunction = requestKvStateLocationFunction; this.notifyKvStateRegisteredFunction = notifyKvStateRegisteredFunction; this.notifyKvStateUnregisteredFunction = notifyKvStateUnregisteredFunction; this.updateAggregateFunction = updateAggregateFunction; }
Example #20
Source File: TestingJobMasterGatewayBuilder.java From flink with Apache License 2.0 | 4 votes |
public TestingJobMasterGatewayBuilder setDeclineCheckpointConsumer(Consumer<DeclineCheckpoint> declineCheckpointConsumer) { this.declineCheckpointConsumer = declineCheckpointConsumer; return this; }
Example #21
Source File: TestingJobMasterGateway.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
public TestingJobMasterGateway( @Nonnull String address, @Nonnull String hostname, @Nonnull Supplier<CompletableFuture<Acknowledge>> cancelFunction, @Nonnull Supplier<CompletableFuture<Acknowledge>> stopFunction, @Nonnull BiFunction<Integer, RescalingBehaviour, CompletableFuture<Acknowledge>> rescalingJobFunction, @Nonnull TriFunction<Collection<JobVertexID>, Integer, RescalingBehaviour, CompletableFuture<Acknowledge>> rescalingOperatorsFunction, @Nonnull Function<TaskExecutionState, CompletableFuture<Acknowledge>> updateTaskExecutionStateFunction, @Nonnull BiFunction<JobVertexID, ExecutionAttemptID, CompletableFuture<SerializedInputSplit>> requestNextInputSplitFunction, @Nonnull BiFunction<IntermediateDataSetID, ResultPartitionID, CompletableFuture<ExecutionState>> requestPartitionStateFunction, @Nonnull Function<ResultPartitionID, CompletableFuture<Acknowledge>> scheduleOrUpdateConsumersFunction, @Nonnull Function<ResourceID, CompletableFuture<Acknowledge>> disconnectTaskManagerFunction, @Nonnull Consumer<ResourceManagerId> disconnectResourceManagerConsumer, @Nonnull Supplier<CompletableFuture<ClassloadingProps>> classloadingPropsSupplier, @Nonnull BiFunction<ResourceID, Collection<SlotOffer>, CompletableFuture<Collection<SlotOffer>>> offerSlotsFunction, @Nonnull TriConsumer<ResourceID, AllocationID, Throwable> failSlotConsumer, @Nonnull BiFunction<String, TaskManagerLocation, CompletableFuture<RegistrationResponse>> registerTaskManagerFunction, @Nonnull BiConsumer<ResourceID, AccumulatorReport> taskManagerHeartbeatConsumer, @Nonnull Consumer<ResourceID> resourceManagerHeartbeatConsumer, @Nonnull Supplier<CompletableFuture<JobDetails>> requestJobDetailsSupplier, @Nonnull Supplier<CompletableFuture<ArchivedExecutionGraph>> requestJobSupplier, @Nonnull BiFunction<String, Boolean, CompletableFuture<String>> triggerSavepointFunction, @Nonnull Function<JobVertexID, CompletableFuture<OperatorBackPressureStatsResponse>> requestOperatorBackPressureStatsFunction, @Nonnull BiConsumer<AllocationID, Throwable> notifyAllocationFailureConsumer, @Nonnull Consumer<Tuple5<JobID, ExecutionAttemptID, Long, CheckpointMetrics, TaskStateSnapshot>> acknowledgeCheckpointConsumer, @Nonnull Consumer<DeclineCheckpoint> declineCheckpointConsumer, @Nonnull Supplier<JobMasterId> fencingTokenSupplier, @Nonnull BiFunction<JobID, String, CompletableFuture<KvStateLocation>> requestKvStateLocationFunction, @Nonnull Function<Tuple6<JobID, JobVertexID, KeyGroupRange, String, KvStateID, InetSocketAddress>, CompletableFuture<Acknowledge>> notifyKvStateRegisteredFunction, @Nonnull Function<Tuple4<JobID, JobVertexID, KeyGroupRange, String>, CompletableFuture<Acknowledge>> notifyKvStateUnregisteredFunction, @Nonnull TriFunction<String, Object, byte[], CompletableFuture<Object>> updateAggregateFunction) { this.address = address; this.hostname = hostname; this.cancelFunction = cancelFunction; this.stopFunction = stopFunction; this.rescalingJobFunction = rescalingJobFunction; this.rescalingOperatorsFunction = rescalingOperatorsFunction; this.updateTaskExecutionStateFunction = updateTaskExecutionStateFunction; this.requestNextInputSplitFunction = requestNextInputSplitFunction; this.requestPartitionStateFunction = requestPartitionStateFunction; this.scheduleOrUpdateConsumersFunction = scheduleOrUpdateConsumersFunction; this.disconnectTaskManagerFunction = disconnectTaskManagerFunction; this.disconnectResourceManagerConsumer = disconnectResourceManagerConsumer; this.classloadingPropsSupplier = classloadingPropsSupplier; this.offerSlotsFunction = offerSlotsFunction; this.failSlotConsumer = failSlotConsumer; this.registerTaskManagerFunction = registerTaskManagerFunction; this.taskManagerHeartbeatConsumer = taskManagerHeartbeatConsumer; this.resourceManagerHeartbeatConsumer = resourceManagerHeartbeatConsumer; this.requestJobDetailsSupplier = requestJobDetailsSupplier; this.requestJobSupplier = requestJobSupplier; this.triggerSavepointFunction = triggerSavepointFunction; this.requestOperatorBackPressureStatsFunction = requestOperatorBackPressureStatsFunction; this.notifyAllocationFailureConsumer = notifyAllocationFailureConsumer; this.acknowledgeCheckpointConsumer = acknowledgeCheckpointConsumer; this.declineCheckpointConsumer = declineCheckpointConsumer; this.fencingTokenSupplier = fencingTokenSupplier; this.requestKvStateLocationFunction = requestKvStateLocationFunction; this.notifyKvStateRegisteredFunction = notifyKvStateRegisteredFunction; this.notifyKvStateUnregisteredFunction = notifyKvStateUnregisteredFunction; this.updateAggregateFunction = updateAggregateFunction; }
Example #22
Source File: CheckpointCoordinatorTest.java From flink with Apache License 2.0 | 4 votes |
/** * This test triggers a checkpoint and then sends a decline checkpoint message from * one of the tasks. The expected behaviour is that said checkpoint is discarded and a new * checkpoint is triggered. */ @Test public void testTriggerAndDeclineCheckpointSimple() { try { final JobID jid = new JobID(); final long timestamp = System.currentTimeMillis(); // create some mock Execution vertices that receive the checkpoint trigger messages final ExecutionAttemptID attemptID1 = new ExecutionAttemptID(); final ExecutionAttemptID attemptID2 = new ExecutionAttemptID(); ExecutionVertex vertex1 = mockExecutionVertex(attemptID1); ExecutionVertex vertex2 = mockExecutionVertex(attemptID2); // set up the coordinator and validate the initial state CheckpointCoordinator coord = getCheckpointCoordinator(jid, vertex1, vertex2, failureManager); assertEquals(0, coord.getNumberOfPendingCheckpoints()); assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints()); // trigger the first checkpoint. this should succeed assertTrue(coord.triggerCheckpoint(timestamp, false)); // validate that we have a pending checkpoint assertEquals(1, coord.getNumberOfPendingCheckpoints()); assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints()); // we have one task scheduled that will cancel after timeout assertEquals(1, coord.getNumScheduledTasks()); long checkpointId = coord.getPendingCheckpoints().entrySet().iterator().next().getKey(); PendingCheckpoint checkpoint = coord.getPendingCheckpoints().get(checkpointId); assertNotNull(checkpoint); assertEquals(checkpointId, checkpoint.getCheckpointId()); assertEquals(timestamp, checkpoint.getCheckpointTimestamp()); assertEquals(jid, checkpoint.getJobId()); assertEquals(2, checkpoint.getNumberOfNonAcknowledgedTasks()); assertEquals(0, checkpoint.getNumberOfAcknowledgedTasks()); assertEquals(0, checkpoint.getOperatorStates().size()); assertFalse(checkpoint.isDiscarded()); assertFalse(checkpoint.isFullyAcknowledged()); // check that the vertices received the trigger checkpoint message verify(vertex1.getCurrentExecutionAttempt()).triggerCheckpoint(checkpointId, timestamp, CheckpointOptions.forCheckpointWithDefaultLocation()); verify(vertex2.getCurrentExecutionAttempt()).triggerCheckpoint(checkpointId, timestamp, CheckpointOptions.forCheckpointWithDefaultLocation()); // acknowledge from one of the tasks coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID2, checkpointId), "Unknown location"); assertEquals(1, checkpoint.getNumberOfAcknowledgedTasks()); assertEquals(1, checkpoint.getNumberOfNonAcknowledgedTasks()); assertFalse(checkpoint.isDiscarded()); assertFalse(checkpoint.isFullyAcknowledged()); // acknowledge the same task again (should not matter) coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID2, checkpointId), "Unknown location"); assertFalse(checkpoint.isDiscarded()); assertFalse(checkpoint.isFullyAcknowledged()); // decline checkpoint from the other task, this should cancel the checkpoint // and trigger a new one coord.receiveDeclineMessage(new DeclineCheckpoint(jid, attemptID1, checkpointId), TASK_MANAGER_LOCATION_INFO); assertTrue(checkpoint.isDiscarded()); // the canceler is also removed assertEquals(0, coord.getNumScheduledTasks()); // validate that we have no new pending checkpoint assertEquals(0, coord.getNumberOfPendingCheckpoints()); assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints()); // decline again, nothing should happen // decline from the other task, nothing should happen coord.receiveDeclineMessage(new DeclineCheckpoint(jid, attemptID1, checkpointId), TASK_MANAGER_LOCATION_INFO); coord.receiveDeclineMessage(new DeclineCheckpoint(jid, attemptID2, checkpointId), TASK_MANAGER_LOCATION_INFO); assertTrue(checkpoint.isDiscarded()); coord.shutdown(JobStatus.FINISHED); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #23
Source File: CheckpointCoordinator.java From flink with Apache License 2.0 | 4 votes |
/** * Receives a {@link DeclineCheckpoint} message for a pending checkpoint. * * @param message Checkpoint decline from the task manager * @param taskManagerLocationInfo The location info of the decline checkpoint message's sender */ public void receiveDeclineMessage(DeclineCheckpoint message, String taskManagerLocationInfo) { if (shutdown || message == null) { return; } if (!job.equals(message.getJob())) { throw new IllegalArgumentException("Received DeclineCheckpoint message for job " + message.getJob() + " from " + taskManagerLocationInfo + " while this coordinator handles job " + job); } final long checkpointId = message.getCheckpointId(); final String reason = (message.getReason() != null ? message.getReason().getMessage() : ""); PendingCheckpoint checkpoint; synchronized (lock) { // we need to check inside the lock for being shutdown as well, otherwise we // get races and invalid error log messages if (shutdown) { return; } checkpoint = pendingCheckpoints.remove(checkpointId); if (checkpoint != null && !checkpoint.isDiscarded()) { LOG.info("Decline checkpoint {} by task {} of job {} at {}.", checkpointId, message.getTaskExecutionId(), job, taskManagerLocationInfo); discardCheckpoint(checkpoint, message.getReason(), message.getTaskExecutionId()); } else if (checkpoint != null) { // this should not happen throw new IllegalStateException( "Received message for discarded but non-removed checkpoint " + checkpointId); } else if (LOG.isDebugEnabled()) { if (recentPendingCheckpoints.contains(checkpointId)) { // message is for an unknown checkpoint, or comes too late (checkpoint disposed) LOG.debug("Received another decline message for now expired checkpoint attempt {} from task {} of job {} at {} : {}", checkpointId, message.getTaskExecutionId(), job, taskManagerLocationInfo, reason); } else { // message is for an unknown checkpoint. might be so old that we don't even remember it any more LOG.debug("Received decline message for unknown (too old?) checkpoint attempt {} from task {} of job {} at {} : {}", checkpointId, message.getTaskExecutionId(), job, taskManagerLocationInfo, reason); } } } }
Example #24
Source File: SchedulerNG.java From flink with Apache License 2.0 | votes |
void declineCheckpoint(DeclineCheckpoint decline);
Example #25
Source File: SchedulerNG.java From flink with Apache License 2.0 | votes |
void declineCheckpoint(DeclineCheckpoint decline);