Java Code Examples for org.apache.flink.runtime.jobmaster.LogicalSlot#getTaskManagerGateway()
The following examples show how to use
org.apache.flink.runtime.jobmaster.LogicalSlot#getTaskManagerGateway() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Execution.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Sends stop RPC call. */ public void stop() { assertRunningInJobMasterMainThread(); final LogicalSlot slot = assignedResource; if (slot != null) { final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway(); CompletableFuture<Acknowledge> stopResultFuture = FutureUtils.retry( () -> taskManagerGateway.stopTask(attemptId, rpcTimeout), NUM_STOP_CALL_TRIES, vertex.getExecutionGraph().getJobMasterMainThreadExecutor()); stopResultFuture.exceptionally( failure -> { LOG.info("Stopping task was not successful.", failure); return null; }); } }
Example 2
Source File: Execution.java From flink with Apache License 2.0 | 6 votes |
/** * Update the partition infos on the assigned resource. * * @param partitionInfos for the remote task */ private void sendUpdatePartitionInfoRpcCall( final Iterable<PartitionInfo> partitionInfos) { final LogicalSlot slot = assignedResource; if (slot != null) { final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway(); final TaskManagerLocation taskManagerLocation = slot.getTaskManagerLocation(); CompletableFuture<Acknowledge> updatePartitionsResultFuture = taskManagerGateway.updatePartitions(attemptId, partitionInfos, rpcTimeout); updatePartitionsResultFuture.whenCompleteAsync( (ack, failure) -> { // fail if there was a failure if (failure != null) { fail(new IllegalStateException("Update task on TaskManager " + taskManagerLocation + " failed due to:", failure)); } }, getVertex().getExecutionGraph().getJobMasterMainThreadExecutor()); } }
Example 3
Source File: Execution.java From flink with Apache License 2.0 | 6 votes |
private void sendReleaseIntermediateResultPartitionsRpcCall() { LOG.info("Discarding the results produced by task execution {}.", attemptId); final LogicalSlot slot = assignedResource; if (slot != null) { final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway(); final ShuffleMaster<?> shuffleMaster = getVertex().getExecutionGraph().getShuffleMaster(); Collection<ResultPartitionID> partitionIds = producedPartitions.values().stream() .filter(resultPartitionDeploymentDescriptor -> resultPartitionDeploymentDescriptor.getPartitionType().isPipelined()) .map(ResultPartitionDeploymentDescriptor::getShuffleDescriptor) .peek(shuffleMaster::releasePartitionExternally) .map(ShuffleDescriptor::getResultPartitionID) .collect(Collectors.toList()); if (!partitionIds.isEmpty()) { // TODO For some tests this could be a problem when querying too early if all resources were released taskManagerGateway.releasePartitions(getVertex().getJobId(), partitionIds); } } }
Example 4
Source File: Execution.java From flink with Apache License 2.0 | 6 votes |
/** * This method sends a CancelTask message to the instance of the assigned slot. * * <p>The sending is tried up to NUM_CANCEL_CALL_TRIES times. */ private void sendCancelRpcCall(int numberRetries) { final LogicalSlot slot = assignedResource; if (slot != null) { final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway(); final ComponentMainThreadExecutor jobMasterMainThreadExecutor = getVertex().getExecutionGraph().getJobMasterMainThreadExecutor(); CompletableFuture<Acknowledge> cancelResultFuture = FutureUtils.retry( () -> taskManagerGateway.cancelTask(attemptId, rpcTimeout), numberRetries, jobMasterMainThreadExecutor); cancelResultFuture.whenComplete( (ack, failure) -> { if (failure != null) { fail(new Exception("Task could not be canceled.", failure)); } }); } }
Example 5
Source File: Execution.java From flink with Apache License 2.0 | 6 votes |
private void triggerCheckpointHelper(long checkpointId, long timestamp, CheckpointOptions checkpointOptions, boolean advanceToEndOfEventTime) { final CheckpointType checkpointType = checkpointOptions.getCheckpointType(); if (advanceToEndOfEventTime && !(checkpointType.isSynchronous() && checkpointType.isSavepoint())) { throw new IllegalArgumentException("Only synchronous savepoints are allowed to advance the watermark to MAX."); } final LogicalSlot slot = assignedResource; if (slot != null) { final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway(); taskManagerGateway.triggerCheckpoint(attemptId, getVertex().getJobId(), checkpointId, timestamp, checkpointOptions, advanceToEndOfEventTime); } else { LOG.debug("The execution has no slot assigned. This indicates that the execution is no longer running."); } }
Example 6
Source File: Execution.java From flink with Apache License 2.0 | 6 votes |
private void triggerCheckpointHelper(long checkpointId, long timestamp, CheckpointOptions checkpointOptions, boolean advanceToEndOfEventTime) { final CheckpointType checkpointType = checkpointOptions.getCheckpointType(); if (advanceToEndOfEventTime && !(checkpointType.isSynchronous() && checkpointType.isSavepoint())) { throw new IllegalArgumentException("Only synchronous savepoints are allowed to advance the watermark to MAX."); } final LogicalSlot slot = assignedResource; if (slot != null) { final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway(); taskManagerGateway.triggerCheckpoint(attemptId, getVertex().getJobId(), checkpointId, timestamp, checkpointOptions, advanceToEndOfEventTime); } else { LOG.debug("The execution has no slot assigned. This indicates that the execution is no longer running."); } }
Example 7
Source File: Execution.java From flink with Apache License 2.0 | 6 votes |
/** * Request a stack trace sample from the task of this execution. * * @param sampleId of the stack trace sample * @param numSamples the sample should contain * @param delayBetweenSamples to wait * @param maxStackTraceDepth of the samples * @param timeout until the request times out * @return Future stack trace sample response */ public CompletableFuture<StackTraceSampleResponse> requestStackTraceSample( int sampleId, int numSamples, Time delayBetweenSamples, int maxStackTraceDepth, Time timeout) { final LogicalSlot slot = assignedResource; if (slot != null) { final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway(); return taskManagerGateway.requestStackTraceSample( attemptId, sampleId, numSamples, delayBetweenSamples, maxStackTraceDepth, timeout); } else { return FutureUtils.completedExceptionally(new Exception("The execution has no slot assigned.")); } }
Example 8
Source File: Execution.java From flink with Apache License 2.0 | 6 votes |
/** * Update the partition infos on the assigned resource. * * @param partitionInfos for the remote task */ private void sendUpdatePartitionInfoRpcCall( final Iterable<PartitionInfo> partitionInfos) { final LogicalSlot slot = assignedResource; if (slot != null) { final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway(); final TaskManagerLocation taskManagerLocation = slot.getTaskManagerLocation(); CompletableFuture<Acknowledge> updatePartitionsResultFuture = taskManagerGateway.updatePartitions(attemptId, partitionInfos, rpcTimeout); updatePartitionsResultFuture.whenCompleteAsync( (ack, failure) -> { // fail if there was a failure if (failure != null) { fail(new IllegalStateException("Update to task [" + getVertexWithAttempt() + "] on TaskManager " + taskManagerLocation + " failed", failure)); } }, getVertex().getExecutionGraph().getJobMasterMainThreadExecutor()); } }
Example 9
Source File: Execution.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * This method sends a CancelTask message to the instance of the assigned slot. * * <p>The sending is tried up to NUM_CANCEL_CALL_TRIES times. */ private void sendCancelRpcCall(int numberRetries) { final LogicalSlot slot = assignedResource; if (slot != null) { final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway(); final ComponentMainThreadExecutor jobMasterMainThreadExecutor = getVertex().getExecutionGraph().getJobMasterMainThreadExecutor(); CompletableFuture<Acknowledge> cancelResultFuture = FutureUtils.retry( () -> taskManagerGateway.cancelTask(attemptId, rpcTimeout), numberRetries, jobMasterMainThreadExecutor); cancelResultFuture.whenComplete( (ack, failure) -> { if (failure != null) { fail(new Exception("Task could not be canceled.", failure)); } }); } }
Example 10
Source File: Execution.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Request a stack trace sample from the task of this execution. * * @param sampleId of the stack trace sample * @param numSamples the sample should contain * @param delayBetweenSamples to wait * @param maxStackTraceDepth of the samples * @param timeout until the request times out * @return Future stack trace sample response */ public CompletableFuture<StackTraceSampleResponse> requestStackTraceSample( int sampleId, int numSamples, Time delayBetweenSamples, int maxStackTraceDepth, Time timeout) { final LogicalSlot slot = assignedResource; if (slot != null) { final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway(); return taskManagerGateway.requestStackTraceSample( attemptId, sampleId, numSamples, delayBetweenSamples, maxStackTraceDepth, timeout); } else { return FutureUtils.completedExceptionally(new Exception("The execution has no slot assigned.")); } }
Example 11
Source File: Execution.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private void sendFailIntermediateResultPartitionsRpcCall() { final LogicalSlot slot = assignedResource; if (slot != null) { final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway(); // TODO For some tests this could be a problem when querying too early if all resources were released taskManagerGateway.failPartition(attemptId); } }
Example 12
Source File: Execution.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Trigger a new checkpoint on the task of this execution. * * @param checkpointId of th checkpoint to trigger * @param timestamp of the checkpoint to trigger * @param checkpointOptions of the checkpoint to trigger */ public void triggerCheckpoint(long checkpointId, long timestamp, CheckpointOptions checkpointOptions) { final LogicalSlot slot = assignedResource; if (slot != null) { final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway(); taskManagerGateway.triggerCheckpoint(attemptId, getVertex().getJobId(), checkpointId, timestamp, checkpointOptions); } else { LOG.debug("The execution has no slot assigned. This indicates that the execution is " + "no longer running."); } }
Example 13
Source File: Execution.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Notify the task of this execution about a completed checkpoint. * * @param checkpointId of the completed checkpoint * @param timestamp of the completed checkpoint */ public void notifyCheckpointComplete(long checkpointId, long timestamp) { final LogicalSlot slot = assignedResource; if (slot != null) { final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway(); taskManagerGateway.notifyCheckpointComplete(attemptId, getVertex().getJobId(), checkpointId, timestamp); } else { LOG.debug("The execution has no slot assigned. This indicates that the execution is " + "no longer running."); } }
Example 14
Source File: SimpleSlotProvider.java From flink with Apache License 2.0 | 5 votes |
@Override public void returnLogicalSlot(LogicalSlot logicalSlot) { synchronized (lock) { SimpleSlotContext as = new SimpleSlotContext( logicalSlot.getAllocationId(), logicalSlot.getTaskManagerLocation(), logicalSlot.getPhysicalSlotNumber(), logicalSlot.getTaskManagerGateway(), ResourceProfile.UNKNOWN); slots.add(as); allocatedSlots.remove(logicalSlot.getSlotRequestId()); } }
Example 15
Source File: Execution.java From flink with Apache License 2.0 | 5 votes |
/** * Request the back pressure ratio from the task of this execution. * * @param requestId id of the request. * @param timeout the request times out. * @return A future of the task back pressure result. */ public CompletableFuture<TaskBackPressureResponse> requestBackPressure(int requestId, Time timeout) { final LogicalSlot slot = assignedResource; if (slot != null) { final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway(); return taskManagerGateway.requestTaskBackPressure(attemptId, requestId, timeout); } else { return FutureUtils.completedExceptionally(new Exception("The execution has no slot assigned.")); } }
Example 16
Source File: Execution.java From flink with Apache License 2.0 | 5 votes |
/** * Notify the task of this execution about a completed checkpoint. * * @param checkpointId of the completed checkpoint * @param timestamp of the completed checkpoint */ public void notifyCheckpointComplete(long checkpointId, long timestamp) { final LogicalSlot slot = assignedResource; if (slot != null) { final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway(); taskManagerGateway.notifyCheckpointComplete(attemptId, getVertex().getJobId(), checkpointId, timestamp); } else { LOG.debug("The execution has no slot assigned. This indicates that the execution is " + "no longer running."); } }
Example 17
Source File: SimpleSlotProvider.java From flink with Apache License 2.0 | 5 votes |
@Override public void returnLogicalSlot(LogicalSlot logicalSlot) { synchronized (lock) { SimpleSlotContext as = new SimpleSlotContext( logicalSlot.getAllocationId(), logicalSlot.getTaskManagerLocation(), logicalSlot.getPhysicalSlotNumber(), logicalSlot.getTaskManagerGateway(), ResourceProfile.ANY); slots.add(as); allocatedSlots.remove(logicalSlot.getSlotRequestId()); } }
Example 18
Source File: Execution.java From flink with Apache License 2.0 | 4 votes |
/** * Deploys the execution to the previously assigned resource. * * @throws JobException if the execution cannot be deployed to the assigned resource */ public void deploy() throws JobException { assertRunningInJobMasterMainThread(); final LogicalSlot slot = assignedResource; checkNotNull(slot, "In order to deploy the execution we first have to assign a resource via tryAssignResource."); // Check if the TaskManager died in the meantime // This only speeds up the response to TaskManagers failing concurrently to deployments. // The more general check is the rpcTimeout of the deployment call if (!slot.isAlive()) { throw new JobException("Target slot (TaskManager) for deployment is no longer alive."); } // make sure exactly one deployment call happens from the correct state // note: the transition from CREATED to DEPLOYING is for testing purposes only ExecutionState previous = this.state; if (previous == SCHEDULED || previous == CREATED) { if (!transitionState(previous, DEPLOYING)) { // race condition, someone else beat us to the deploying call. // this should actually not happen and indicates a race somewhere else throw new IllegalStateException("Cannot deploy task: Concurrent deployment call race."); } } else { // vertex may have been cancelled, or it was already scheduled throw new IllegalStateException("The vertex must be in CREATED or SCHEDULED state to be deployed. Found state " + previous); } if (this != slot.getPayload()) { throw new IllegalStateException( String.format("The execution %s has not been assigned to the assigned slot.", this)); } try { // race double check, did we fail/cancel and do we need to release the slot? if (this.state != DEPLOYING) { slot.releaseSlot(new FlinkException("Actual state of execution " + this + " (" + state + ") does not match expected state DEPLOYING.")); return; } if (LOG.isInfoEnabled()) { LOG.info(String.format("Deploying %s (attempt #%d) to %s", vertex.getTaskNameWithSubtaskIndex(), attemptNumber, getAssignedResourceLocation())); } final TaskDeploymentDescriptor deployment = TaskDeploymentDescriptorFactory .fromExecutionVertex(vertex, attemptNumber) .createDeploymentDescriptor( slot.getAllocationId(), slot.getPhysicalSlotNumber(), taskRestore, producedPartitions.values()); // null taskRestore to let it be GC'ed taskRestore = null; final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway(); final ComponentMainThreadExecutor jobMasterMainThreadExecutor = vertex.getExecutionGraph().getJobMasterMainThreadExecutor(); // We run the submission in the future executor so that the serialization of large TDDs does not block // the main thread and sync back to the main thread once submission is completed. CompletableFuture.supplyAsync(() -> taskManagerGateway.submitTask(deployment, rpcTimeout), executor) .thenCompose(Function.identity()) .whenCompleteAsync( (ack, failure) -> { // only respond to the failure case if (failure != null) { if (failure instanceof TimeoutException) { String taskname = vertex.getTaskNameWithSubtaskIndex() + " (" + attemptId + ')'; markFailed(new Exception( "Cannot deploy task " + taskname + " - TaskManager (" + getAssignedResourceLocation() + ") not responding after a rpcTimeout of " + rpcTimeout, failure)); } else { markFailed(failure); } } }, jobMasterMainThreadExecutor); } catch (Throwable t) { markFailed(t); ExceptionUtils.rethrow(t); } }
Example 19
Source File: Execution.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Deploys the execution to the previously assigned resource. * * @throws JobException if the execution cannot be deployed to the assigned resource */ public void deploy() throws JobException { assertRunningInJobMasterMainThread(); final LogicalSlot slot = assignedResource; checkNotNull(slot, "In order to deploy the execution we first have to assign a resource via tryAssignResource."); // Check if the TaskManager died in the meantime // This only speeds up the response to TaskManagers failing concurrently to deployments. // The more general check is the rpcTimeout of the deployment call if (!slot.isAlive()) { throw new JobException("Target slot (TaskManager) for deployment is no longer alive."); } // make sure exactly one deployment call happens from the correct state // note: the transition from CREATED to DEPLOYING is for testing purposes only ExecutionState previous = this.state; if (previous == SCHEDULED || previous == CREATED) { if (!transitionState(previous, DEPLOYING)) { // race condition, someone else beat us to the deploying call. // this should actually not happen and indicates a race somewhere else throw new IllegalStateException("Cannot deploy task: Concurrent deployment call race."); } } else { // vertex may have been cancelled, or it was already scheduled throw new IllegalStateException("The vertex must be in CREATED or SCHEDULED state to be deployed. Found state " + previous); } if (this != slot.getPayload()) { throw new IllegalStateException( String.format("The execution %s has not been assigned to the assigned slot.", this)); } try { // race double check, did we fail/cancel and do we need to release the slot? if (this.state != DEPLOYING) { slot.releaseSlot(new FlinkException("Actual state of execution " + this + " (" + state + ") does not match expected state DEPLOYING.")); return; } if (LOG.isInfoEnabled()) { LOG.info(String.format("Deploying %s (attempt #%d) to %s", vertex.getTaskNameWithSubtaskIndex(), attemptNumber, getAssignedResourceLocation())); } final TaskDeploymentDescriptor deployment = vertex.createDeploymentDescriptor( attemptId, slot, taskRestore, attemptNumber); // null taskRestore to let it be GC'ed taskRestore = null; final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway(); final ComponentMainThreadExecutor jobMasterMainThreadExecutor = vertex.getExecutionGraph().getJobMasterMainThreadExecutor(); // We run the submission in the future executor so that the serialization of large TDDs does not block // the main thread and sync back to the main thread once submission is completed. CompletableFuture.supplyAsync(() -> taskManagerGateway.submitTask(deployment, rpcTimeout), executor) .thenCompose(Function.identity()) .whenCompleteAsync( (ack, failure) -> { // only respond to the failure case if (failure != null) { if (failure instanceof TimeoutException) { String taskname = vertex.getTaskNameWithSubtaskIndex() + " (" + attemptId + ')'; markFailed(new Exception( "Cannot deploy task " + taskname + " - TaskManager (" + getAssignedResourceLocation() + ") not responding after a rpcTimeout of " + rpcTimeout, failure)); } else { markFailed(failure); } } }, jobMasterMainThreadExecutor); } catch (Throwable t) { markFailed(t); ExceptionUtils.rethrow(t); } }
Example 20
Source File: Execution.java From flink with Apache License 2.0 | 4 votes |
/** * Deploys the execution to the previously assigned resource. * * @throws JobException if the execution cannot be deployed to the assigned resource */ public void deploy() throws JobException { assertRunningInJobMasterMainThread(); final LogicalSlot slot = assignedResource; checkNotNull(slot, "In order to deploy the execution we first have to assign a resource via tryAssignResource."); // Check if the TaskManager died in the meantime // This only speeds up the response to TaskManagers failing concurrently to deployments. // The more general check is the rpcTimeout of the deployment call if (!slot.isAlive()) { throw new JobException("Target slot (TaskManager) for deployment is no longer alive."); } // make sure exactly one deployment call happens from the correct state // note: the transition from CREATED to DEPLOYING is for testing purposes only ExecutionState previous = this.state; if (previous == SCHEDULED || previous == CREATED) { if (!transitionState(previous, DEPLOYING)) { // race condition, someone else beat us to the deploying call. // this should actually not happen and indicates a race somewhere else throw new IllegalStateException("Cannot deploy task: Concurrent deployment call race."); } } else { // vertex may have been cancelled, or it was already scheduled throw new IllegalStateException("The vertex must be in CREATED or SCHEDULED state to be deployed. Found state " + previous); } if (this != slot.getPayload()) { throw new IllegalStateException( String.format("The execution %s has not been assigned to the assigned slot.", this)); } try { // race double check, did we fail/cancel and do we need to release the slot? if (this.state != DEPLOYING) { slot.releaseSlot(new FlinkException("Actual state of execution " + this + " (" + state + ") does not match expected state DEPLOYING.")); return; } LOG.info("Deploying {} (attempt #{}) with attempt id {} to {} with allocation id {}", vertex.getTaskNameWithSubtaskIndex(), attemptNumber, vertex.getCurrentExecutionAttempt().getAttemptId(), getAssignedResourceLocation(), slot.getAllocationId()); final TaskDeploymentDescriptor deployment = TaskDeploymentDescriptorFactory .fromExecutionVertex(vertex, attemptNumber) .createDeploymentDescriptor( slot.getAllocationId(), slot.getPhysicalSlotNumber(), taskRestore, producedPartitions.values()); // null taskRestore to let it be GC'ed taskRestore = null; final TaskManagerGateway taskManagerGateway = slot.getTaskManagerGateway(); final ComponentMainThreadExecutor jobMasterMainThreadExecutor = vertex.getExecutionGraph().getJobMasterMainThreadExecutor(); // We run the submission in the future executor so that the serialization of large TDDs does not block // the main thread and sync back to the main thread once submission is completed. CompletableFuture.supplyAsync(() -> taskManagerGateway.submitTask(deployment, rpcTimeout), executor) .thenCompose(Function.identity()) .whenCompleteAsync( (ack, failure) -> { // only respond to the failure case if (failure != null) { if (failure instanceof TimeoutException) { String taskname = vertex.getTaskNameWithSubtaskIndex() + " (" + attemptId + ')'; markFailed(new Exception( "Cannot deploy task " + taskname + " - TaskManager (" + getAssignedResourceLocation() + ") not responding after a rpcTimeout of " + rpcTimeout, failure)); } else { markFailed(failure); } } }, jobMasterMainThreadExecutor); } catch (Throwable t) { markFailed(t); if (isLegacyScheduling()) { ExceptionUtils.rethrow(t); } } }