com.netflix.fenzo.VirtualMachineLease Java Examples

The following examples show how to use com.netflix.fenzo.VirtualMachineLease. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MesosResourceManager.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Creates the Fenzo optimizer (builder).
 * The builder is an indirection to facilitate unit testing of the Launch Coordinator.
 */
private static TaskSchedulerBuilder createOptimizer() {
	return new TaskSchedulerBuilder() {
		TaskScheduler.Builder builder = new TaskScheduler.Builder();

		@Override
		public TaskSchedulerBuilder withLeaseRejectAction(Action1<VirtualMachineLease> action) {
			builder.withLeaseRejectAction(action);
			return this;
		}

		@Override
		public TaskScheduler build() {
			return builder.build();
		}
	};
}
 
Example #2
Source File: MesosResourceManager.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Creates the Fenzo optimizer (builder).
 * The builder is an indirection to facilitate unit testing of the Launch Coordinator.
 */
private static TaskSchedulerBuilder createOptimizer() {
	return new TaskSchedulerBuilder() {
		TaskScheduler.Builder builder = new TaskScheduler.Builder();

		@Override
		public TaskSchedulerBuilder withLeaseRejectAction(Action1<VirtualMachineLease> action) {
			builder.withLeaseRejectAction(action);
			return this;
		}

		@Override
		public TaskScheduler build() {
			return builder.build();
		}
	};
}
 
Example #3
Source File: TaskPlacementRecorder.java    From titus-control-plane with Apache License 2.0 6 votes vote down vote up
List<Pair<List<VirtualMachineLease>, List<TaskInfoRequest>>> record(SchedulingResult schedulingResult) {
    List<AgentAssignment> assignments = schedulingResult.getResultMap().entrySet().stream()
            .map(entry -> new AgentAssignment(entry.getKey(), entry.getValue()))
            .collect(Collectors.toList());

    long startTime = wallTime();
    try {
        Map<AgentAssignment, List<TaskInfoRequest>> v3Result = processV3Assignments(assignments);

        Set<AgentAssignment> allAssignments = v3Result.keySet();

        return allAssignments.stream()
                .map(a -> Pair.of(a.getLeases(), v3Result.get(a)))
                .collect(Collectors.toList());
    } finally {
        int taskCount = schedulingResult.getResultMap().values().stream().mapToInt(a -> a.getTasksAssigned().size()).sum();
        if (taskCount > 0) {
            logger.info("Task placement recording: tasks={}, executionTimeMs={}", taskCount, wallTime() - startTime);
        }
    }
}
 
Example #4
Source File: TaskLaunchScheduledServiceTest.java    From shardingsphere-elasticjob-cloud with Apache License 2.0 6 votes vote down vote up
@Test
public void assertRunOneIteration() throws Exception {
    when(facadeService.getEligibleJobContext()).thenReturn(Lists.newArrayList(
            JobContext.from(CloudJobConfigurationBuilder.createCloudJobConfiguration("failover_job", CloudJobExecutionType.DAEMON, 1), ExecutionType.FAILOVER)));
    Map<String, VMAssignmentResult> vmAssignmentResultMap = new HashMap<>();
    vmAssignmentResultMap.put("rs1", new VMAssignmentResult("localhost", Lists.<VirtualMachineLease>newArrayList(new VMLeaseObject(OfferBuilder.createOffer("offer_0"))),
            Sets.newHashSet(mockTaskAssignmentResult("failover_job", ExecutionType.FAILOVER))));
    when(taskScheduler.scheduleOnce(ArgumentMatchers.<TaskRequest>anyList(), ArgumentMatchers.<VirtualMachineLease>anyList())).thenReturn(new SchedulingResult(vmAssignmentResultMap));
    when(facadeService.load("failover_job")).thenReturn(Optional.of(CloudJobConfigurationBuilder.createCloudJobConfiguration("failover_job")));
    when(facadeService.getFailoverTaskId(any(TaskContext.MetaInfo.class)))
            .thenReturn(Optional.of(String.format("%s@-@0@-@%s@-@unassigned-slave@-@0", "failover_job", ExecutionType.FAILOVER.name())));
    when(taskScheduler.getTaskAssigner()).thenReturn(mock(Action2.class));
    taskLaunchScheduledService.runOneIteration();
    verify(facadeService).removeLaunchTasksFromQueue(ArgumentMatchers.<TaskContext>anyList());
    verify(facadeService).loadAppConfig("test_app");
    verify(jobEventBus).post(ArgumentMatchers.<JobStatusTraceEvent>any());
}
 
Example #5
Source File: TaskLaunchScheduledServiceTest.java    From shardingsphere-elasticjob-cloud with Apache License 2.0 6 votes vote down vote up
@Test
public void assertRunOneIterationWithScriptJob() throws Exception {
    when(facadeService.getEligibleJobContext()).thenReturn(Lists.newArrayList(
            JobContext.from(CloudJobConfigurationBuilder.createScriptCloudJobConfiguration("script_job", 1), ExecutionType.READY)));
    Map<String, VMAssignmentResult> vmAssignmentResultMap = new HashMap<>();
    vmAssignmentResultMap.put("rs1", new VMAssignmentResult("localhost", Lists.<VirtualMachineLease>newArrayList(new VMLeaseObject(OfferBuilder.createOffer("offer_0"))),
            Sets.newHashSet(mockTaskAssignmentResult("script_job", ExecutionType.READY))));
    when(taskScheduler.scheduleOnce(ArgumentMatchers.<TaskRequest>anyList(), ArgumentMatchers.<VirtualMachineLease>anyList())).thenReturn(new SchedulingResult(vmAssignmentResultMap));
    when(facadeService.loadAppConfig("test_app")).thenReturn(Optional.of(CloudAppConfigurationBuilder.createCloudAppConfiguration("test_app")));
    when(facadeService.load("script_job")).thenReturn(Optional.of(CloudJobConfigurationBuilder.createScriptCloudJobConfiguration("script_job", 1)));
    when(taskScheduler.getTaskAssigner()).thenReturn(mock(Action2.class));
    taskLaunchScheduledService.runOneIteration();
    verify(facadeService).removeLaunchTasksFromQueue(ArgumentMatchers.<TaskContext>anyList());
    verify(facadeService).isRunning(TaskContext.from(String.format("%s@-@0@-@%s@-@unassigned-slave@-@0", "script_job", ExecutionType.READY)));
    verify(facadeService).loadAppConfig("test_app");
    verify(jobEventBus).post(ArgumentMatchers.<JobStatusTraceEvent>any());
}
 
Example #6
Source File: KubeApiServerIntegrator.java    From titus-control-plane with Apache License 2.0 6 votes vote down vote up
private void nodeUpdated(V1Node node) {
    try {
        boolean notOwnedByFenzo = !KubeUtil.isNodeOwnedByFenzo(directKubeConfiguration.getFarzones(), node);
        if (notOwnedByFenzo) {
            String nodeName = node.getMetadata().getName();
            logger.debug("Ignoring node: {} as it is not owned by fenzo", nodeName);
        } else {
            VirtualMachineLease lease = nodeToLease(node);
            if (lease != null) {
                logger.debug("Adding lease: {}", lease.getId());
                leaseHandler.call(Collections.singletonList(lease));
            }
        }
    } catch (Exception e) {
        logger.warn("Exception on node update: {}", node, e);
    }
}
 
Example #7
Source File: VMStateMgr.java    From titus-control-plane with Apache License 2.0 5 votes vote down vote up
static List<VirtualMachineCurrentState> getInactiveVMs(String instanceIdAttributeName,
                                                       AgentManagementService agentManagementService,
                                                       List<VirtualMachineCurrentState> currentStates) {
    if (isNullOrEmpty(currentStates)) {
        return Collections.emptyList();
    }

    List<VirtualMachineCurrentState> inactiveVMs = new ArrayList<>();
    for (VirtualMachineCurrentState currentState : currentStates) {
        final VirtualMachineLease lease = currentState.getCurrAvailableResources();
        if (lease != null) {
            final Collection<TaskRequest> runningTasks = currentState.getRunningTasks();
            if (!isNullOrEmpty(runningTasks)) {
                SchedulerUtils.findInstance(agentManagementService, instanceIdAttributeName, currentState)
                        .map(AgentInstance::getInstanceGroupId)
                        .flatMap(agentManagementService::findInstanceGroup)
                        .ifPresent(instanceGroup -> {
                            InstanceGroupLifecycleState state = instanceGroup.getLifecycleStatus().getState();
                            if (state == InstanceGroupLifecycleState.Inactive || state == InstanceGroupLifecycleState.Removable) {
                                inactiveVMs.add(currentState);
                            }
                        });
            }
        }
    }
    return inactiveVMs;
}
 
Example #8
Source File: DefaultSchedulingService.java    From titus-control-plane with Apache License 2.0 5 votes vote down vote up
private void checkInactiveVMs(List<VirtualMachineCurrentState> vmCurrentStates) {
    logger.debug("Checking on any workers on VMs that are not active anymore");
    List<VirtualMachineCurrentState> inactiveVmStates = VMStateMgr.getInactiveVMs(schedulerConfiguration.getInstanceAttributeName(),
            agentManagementService, vmCurrentStates);

    // expire all leases on inactive vms
    for (VirtualMachineCurrentState inactiveVmState : inactiveVmStates) {
        VirtualMachineLease lease = inactiveVmState.getCurrAvailableResources();
        String vmHost = lease.hostname();
        logger.debug("expiring all leases of inactive vm {}", vmHost);
        taskScheduler.expireAllLeases(vmHost);
    }
}
 
Example #9
Source File: VMOperationsImpl.java    From titus-control-plane with Apache License 2.0 5 votes vote down vote up
@Override
public List<AgentInfo> getAgentInfos() {
    List<VirtualMachineCurrentState> vmStates = vmStatesMap.get("0");
    List<AgentInfo> agentInfos = new ArrayList<>();
    if (vmStates != null && !vmStates.isEmpty()) {
        for (VirtualMachineCurrentState s : vmStates) {
            List<VirtualMachineLease.Range> ranges = s.getCurrAvailableResources().portRanges();
            int ports = 0;
            if (ranges != null && !ranges.isEmpty()) {
                for (VirtualMachineLease.Range r : ranges) {
                    ports += r.getEnd() - r.getBeg();
                }
            }
            Map<String, Protos.Attribute> attributeMap = s.getCurrAvailableResources().getAttributeMap();
            Map<String, String> attributes = new HashMap<>();
            if (attributeMap != null && !attributeMap.isEmpty()) {
                for (Map.Entry<String, Protos.Attribute> entry : attributeMap.entrySet()) {
                    attributes.put(entry.getKey(), entry.getValue().getText().getValue());
                }
            }

            List<String> offerIds = s.getAllCurrentOffers().stream()
                    .map(offer -> offer.getId().getValue()).collect(Collectors.toList());
            agentInfos.add(new AgentInfo(
                    s.getHostname(), s.getCurrAvailableResources().cpuCores(),
                    s.getCurrAvailableResources().memoryMB(), s.getCurrAvailableResources().diskMB(),
                    ports, s.getCurrAvailableResources().getScalarValues(), attributes, s.getResourceSets().keySet(),
                    getTimeString(s.getDisabledUntil()), offerIds
            ));
        }
    }
    return agentInfos;
}
 
Example #10
Source File: DefaultSchedulingService.java    From titus-control-plane with Apache License 2.0 5 votes vote down vote up
private void setupVmOps(final String attrName) {
    taskScheduler.setActiveVmGroupAttributeName(masterConfiguration.getActiveSlaveAttributeName());
    vmOps.setJobsOnVMsGetter(() -> {
        List<VMOperations.JobsOnVMStatus> result = new ArrayList<>();
        final List<VirtualMachineCurrentState> vmCurrentStates = vmCurrentStatesMap.get(0);
        if (vmCurrentStates != null && !vmCurrentStates.isEmpty()) {
            for (VirtualMachineCurrentState currentState : vmCurrentStates) {
                final VirtualMachineLease currAvailableResources = currentState.getCurrAvailableResources();
                if (currAvailableResources != null) {
                    final Protos.Attribute attribute = currAvailableResources.getAttributeMap().get(attrName);
                    if (attribute != null) {
                        VMOperations.JobsOnVMStatus s =
                                new VMOperations.JobsOnVMStatus(currAvailableResources.hostname(),
                                        attribute.getText().getValue());
                        for (TaskRequest r : currentState.getRunningTasks()) {
                            if (r instanceof V3QueueableTask) {
                                V3QueueableTask v3Task = (V3QueueableTask) r;
                                s.addJob(new VMOperations.JobOnVMInfo(v3Task.getJob().getId(), v3Task.getId()));
                            }
                        }
                        result.add(s);
                    }
                }
            }
        }
        return result;
    });

    //TODO remove this once we switch to fully using SchedulingMachinesFilter
    titusRuntime.persistentStream(AgentManagementFunctions.observeActiveInstanceGroupIds(agentManagementService))
            .subscribe(ids -> {
                taskScheduler.setActiveVmGroups(ids);
                logger.info("Updating Fenzo taskScheduler active instance group list to: {}", ids);
            });
}
 
Example #11
Source File: VirtualMachineMasterServiceMesosImpl.java    From titus-control-plane with Apache License 2.0 5 votes vote down vote up
@Override
public void setVMLeaseHandler(Action1<List<? extends VirtualMachineLease>> leaseHandler) {
    this.leaseHandler = virtualMachineLeases -> {
        drainKillTaskQueue();
        leaseHandler.call(virtualMachineLeases);
    };
}
 
Example #12
Source File: VirtualMachineMasterServiceMesosImpl.java    From titus-control-plane with Apache License 2.0 5 votes vote down vote up
@Override
public void rejectLease(VirtualMachineLease lease) {
    if (!isActivatedAndRunning()) {
        logger.error("Not in leader mode, not rejecting lease");
        return;
    }
    if (lease.getOffer() != null) {
        traceMesosVoidRequest(
                "Declining offer " + lease.getId(),
                () -> mesosDriver.declineOffer(lease.getOffer().getId(), (Protos.Filters.getDefaultInstance().toBuilder()).setRefuseSeconds(offerSecDelayInterval).build())
        );
    } else {
        logger.warn("Got invalid lease to reject with null offer for host {}", lease.hostname());
    }
}
 
Example #13
Source File: KubeApiServerIntegrator.java    From titus-control-plane with Apache License 2.0 5 votes vote down vote up
private VirtualMachineLease nodeToLease(V1Node node) {
    Protos.Offer offer = nodeToOffer(node);
    if (offer == null) {
        return null;
    }
    return new VMLeaseObject(offer);
}
 
Example #14
Source File: AgentManagementConstraintTest.java    From titus-control-plane with Apache License 2.0 5 votes vote down vote up
private VirtualMachineCurrentState createVirtualMachineCurrentStateMock(String id) {
    VirtualMachineCurrentState currentState = mock(VirtualMachineCurrentState.class);
    VirtualMachineLease lease = mock(VirtualMachineLease.class);
    Map<String, Protos.Attribute> attributes = new HashMap<>();
    attributes.put("id", Protos.Attribute.newBuilder().setName("id").setType(Protos.Value.Type.TEXT).setText(Protos.Value.Text.newBuilder().setValue(id)).build());
    when(lease.getAttributeMap()).thenReturn(attributes);
    when(currentState.getCurrAvailableResources()).thenReturn(lease);
    return currentState;
}
 
Example #15
Source File: KubeApiServerIntegrator.java    From titus-control-plane with Apache License 2.0 5 votes vote down vote up
@Override
public void launchTasks(List<TaskInfoRequest> requests, List<VirtualMachineLease> leases) {
    Stopwatch stopwatch = Stopwatch.createStarted();
    if (directKubeConfiguration.isAsyncApiEnabled()) {
        launchTasksConcurrently(requests);
        logger.info("Async pod launches completed: pods={}, elapsed={}[ms]", requests.size(), stopwatch.elapsed(TimeUnit.MILLISECONDS));
    } else {
        launchTasksSync(requests);
        logger.info("Sync pod launches completed: pods={}, elapsed={}[ms]", requests.size(), stopwatch.elapsed(TimeUnit.MILLISECONDS));
    }
    launchTasksTimer.record(stopwatch.elapsed(TimeUnit.MILLISECONDS), TimeUnit.MILLISECONDS);
}
 
Example #16
Source File: SchedulerTestUtils.java    From titus-control-plane with Apache License 2.0 5 votes vote down vote up
public static VirtualMachineCurrentState createVirtualMachineCurrentStateMock(String id) {
    VirtualMachineCurrentState currentState = mock(VirtualMachineCurrentState.class);
    VirtualMachineLease lease = mock(VirtualMachineLease.class);
    Map<String, Protos.Attribute> attributes = new HashMap<>();
    attributes.put("id", Protos.Attribute.newBuilder().setName("id").setType(Protos.Value.Type.TEXT).setText(Protos.Value.Text.newBuilder().setValue(id)).build());
    when(lease.getAttributeMap()).thenReturn(attributes);
    when(currentState.getCurrAvailableResources()).thenReturn(lease);
    return currentState;
}
 
Example #17
Source File: AppConstraintEvaluatorTest.java    From shardingsphere-elasticjob-cloud with Apache License 2.0 5 votes vote down vote up
private VirtualMachineLease getLease(final int index, final double cpus, final double mem) {
    return new VMLeaseObject(Protos.Offer.newBuilder()
            .setId(Protos.OfferID.newBuilder().setValue("offer" + index))
            .setSlaveId(Protos.SlaveID.newBuilder().setValue("S" + index))
            .setHostname("slave" + index)
            .setFrameworkId(Protos.FrameworkID.newBuilder().setValue("f1"))
            .addResources(Protos.Resource.newBuilder().setName("cpus").setType(Protos.Value.Type.SCALAR).setScalar(Protos.Value.Scalar.newBuilder().setValue(cpus)))
            .addResources(Protos.Resource.newBuilder().setName("mem").setType(Protos.Value.Type.SCALAR).setScalar(Protos.Value.Scalar.newBuilder().setValue(mem)))
            .build());
}
 
Example #18
Source File: AppConstraintEvaluatorTest.java    From shardingsphere-elasticjob-cloud with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() throws Exception {
    taskScheduler = new TaskScheduler.Builder().withLeaseOfferExpirySecs(1000000000L).withLeaseRejectAction(new Action1<VirtualMachineLease>() {
        
        @Override
        public void call(final VirtualMachineLease virtualMachineLease) {
        }
    }).build();
}
 
Example #19
Source File: SchedulerTestUtils.java    From titus-control-plane with Apache License 2.0 5 votes vote down vote up
public static VirtualMachineCurrentState createVirtualMachineCurrentStateMock(String id, List<TaskRequest> runningTasks,
                                                                              List<TaskAssignmentResult> assignedTasks) {
    VirtualMachineCurrentState currentState = mock(VirtualMachineCurrentState.class);
    VirtualMachineLease lease = mock(VirtualMachineLease.class);
    Map<String, Protos.Attribute> attributes = new HashMap<>();
    attributes.put("id", Protos.Attribute.newBuilder().setName("id").setType(Protos.Value.Type.TEXT).setText(Protos.Value.Text.newBuilder().setValue(id)).build());
    when(lease.getAttributeMap()).thenReturn(attributes);
    when(currentState.getCurrAvailableResources()).thenReturn(lease);
    when(currentState.getRunningTasks()).thenReturn(runningTasks);
    when(currentState.getTasksCurrentlyAssigned()).thenReturn(assignedTasks);
    return currentState;
}
 
Example #20
Source File: SchedulerService.java    From shardingsphere-elasticjob-cloud with Apache License 2.0 5 votes vote down vote up
private TaskScheduler getTaskScheduler() {
    return new TaskScheduler.Builder()
            .withLeaseOfferExpirySecs(1000000000L)
            .withLeaseRejectAction(new Action1<VirtualMachineLease>() {
                
                @Override
                public void call(final VirtualMachineLease lease) {
                    log.warn("Declining offer on '{}'", lease.hostname());
                    schedulerDriver.declineOffer(lease.getOffer().getId());
                }
            }).build();
}
 
Example #21
Source File: TaskLaunchScheduledService.java    From shardingsphere-elasticjob-cloud with Apache License 2.0 5 votes vote down vote up
private List<Protos.OfferID> getOfferIDs(final List<VirtualMachineLease> leasesUsed) {
    List<Protos.OfferID> result = new ArrayList<>();
    for (VirtualMachineLease virtualMachineLease: leasesUsed) {
        result.add(virtualMachineLease.getOffer().getId());
    }
    return result;
}
 
Example #22
Source File: MesosResourceManager.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Creates the Fenzo optimizer (builder).
 * The builder is an indirection to facilitate unit testing of the Launch Coordinator.
 */
private static TaskSchedulerBuilder createOptimizer() {
	return new TaskSchedulerBuilder() {
		TaskScheduler.Builder builder = new TaskScheduler.Builder();

		@Override
		public TaskSchedulerBuilder withLeaseRejectAction(Action1<VirtualMachineLease> action) {
			builder.withLeaseRejectAction(action);
			return this;
		}

		@Override
		public TaskSchedulerBuilder withRejectAllExpiredOffers() {
			builder.withRejectAllExpiredOffers();
			return this;
		}

		@Override
		public TaskSchedulerBuilder withLeaseOfferExpirySecs(long leaseOfferExpirySecs) {
			builder.withLeaseOfferExpirySecs(leaseOfferExpirySecs);
			return this;
		}

		@Override
		public TaskScheduler build() {
			return builder.build();
		}
	};
}
 
Example #23
Source File: JobManagerUtil.java    From titus-control-plane with Apache License 2.0 4 votes vote down vote up
public static Function<Task, Task> newTaskLaunchConfigurationUpdater(String zoneAttributeName,
                                                                     VirtualMachineLease lease,
                                                                     PreferentialNamedConsumableResourceSet.ConsumeResult consumeResult,
                                                                     Optional<String> executorUriOverrideOpt,
                                                                     Map<String, String> attributesMap,
                                                                     Map<String, String> opportunisticResourcesContext,
                                                                     String tier,
                                                                     TitusRuntime titusRuntime) {
    return oldTask -> {
        if (oldTask.getStatus().getState() != TaskState.Accepted) {
            throw JobManagerException.unexpectedTaskState(oldTask, TaskState.Accepted);
        }

        Map<String, String> taskContext = new HashMap<>(opportunisticResourcesContext);
        Map<String, Protos.Attribute> attributes = CollectionsExt.nonNull(lease.getAttributeMap());
        String hostIp = findAttribute(attributes, "hostIp").orElse(lease.hostname());
        taskContext.put(TaskAttributes.TASK_ATTRIBUTES_AGENT_HOST, hostIp);
        taskContext.put(TaskAttributes.TASK_ATTRIBUTES_AGENT_HOST_IP, hostIp);

        executorUriOverrideOpt.ifPresent(v -> taskContext.put(TASK_ATTRIBUTES_EXECUTOR_URI_OVERRIDE, v));
        taskContext.put(TaskAttributes.TASK_ATTRIBUTES_TIER, tier);

        if (!attributes.isEmpty()) {
            attributesMap.forEach((k, v) -> taskContext.put("agent." + k, v));

            // TODO Some agent attribute names are configurable, some not. We need to clean this up.
            findAttribute(attributes, zoneAttributeName).ifPresent(value ->
                    taskContext.put(TaskAttributes.TASK_ATTRIBUTES_AGENT_ZONE, value)
            );
            findAttribute(attributes, "id").ifPresent(value ->
                    taskContext.put(TaskAttributes.TASK_ATTRIBUTES_AGENT_INSTANCE_ID, value)
            );
        }

        TaskStatus taskStatus = JobModel.newTaskStatus()
                .withState(TaskState.Launched)
                .withReasonCode("scheduled")
                .withReasonMessage("Fenzo task placement")
                .withTimestamp(titusRuntime.getClock().wallTime())
                .build();

        TwoLevelResource twoLevelResource = TwoLevelResource.newBuilder()
                .withName(consumeResult.getAttrName())
                .withValue(consumeResult.getResName())
                .withIndex(consumeResult.getIndex())
                .build();

        return JobFunctions.addAllocatedResourcesToTask(oldTask, taskStatus, twoLevelResource, taskContext);
    };
}
 
Example #24
Source File: TaskPlacementRecorder.java    From titus-control-plane with Apache License 2.0 4 votes vote down vote up
List<VirtualMachineLease> getLeases() {
    return assignmentResult.getLeasesUsed();
}
 
Example #25
Source File: TaskPlacementRecorder.java    From titus-control-plane with Apache License 2.0 4 votes vote down vote up
private Observable<Pair<AgentAssignment, TaskInfoRequest>> processTask(
        AgentAssignment assignment,
        TaskAssignmentResult assignmentResult) {
    VirtualMachineLease lease = assignment.getLeases().get(0);
    TitusQueuableTask fenzoTask = (TitusQueuableTask) assignmentResult.getRequest();

    Optional<Pair<Job<?>, Task>> v3JobAndTask = v3JobOperations.findTaskById(fenzoTask.getId());
    if (!v3JobAndTask.isPresent()) {
        logger.warn("Rejecting assignment and removing task after not finding jobMgr for task: {}", fenzoTask.getId());
        removeTask(assignmentResult, fenzoTask);
        return Observable.empty();
    }

    Job<?> v3Job = v3JobAndTask.get().getLeft();
    Task v3Task = v3JobAndTask.get().getRight();

    try {
        PreferentialNamedConsumableResourceSet.ConsumeResult consumeResult = assignmentResult.getrSets().get(0);

        Map<String, String> attributesMap = assignment.getAttributesMap();
        Optional<String> executorUriOverrideOpt = JobManagerUtil.getExecutorUriOverride(config, attributesMap);
        Map<String, String> opportunisticResourcesContext = buildOpportunisticResourcesContext(assignmentResult);

        return v3JobOperations.recordTaskPlacement(
                fenzoTask.getId(),
                oldTask -> JobManagerUtil.newTaskLaunchConfigurationUpdater(
                        masterConfiguration.getHostZoneAttributeName(), lease, consumeResult,
                        executorUriOverrideOpt, attributesMap, opportunisticResourcesContext, getTierName(fenzoTask),
                        titusRuntime
                ).apply(oldTask),
                JobManagerConstants.SCHEDULER_CALLMETADATA.toBuilder().withCallReason("Record task placement").build()
        ).toObservable().cast(TaskInfoRequest.class).concatWith(Observable.fromCallable(() -> {
                    return v3TaskInfoRequestFactory.newTaskInfo(fenzoTask, v3Job, v3Task, lease.hostname(), attributesMap,
                            lease.getOffer().getSlaveId(), consumeResult, executorUriOverrideOpt,
                            opportunisticResourcesContext);
                }
        )).timeout(
                STORE_UPDATE_TIMEOUT_MS, TimeUnit.MILLISECONDS
        ).onErrorResumeNext(error -> {
            Throwable recordTaskError = (Throwable) error; // Type inference issue
            if (JobManagerException.hasErrorCode(recordTaskError, JobManagerException.ErrorCode.UnexpectedTaskState)) {
                logger.info("Not launching task: {} with state: {} and removing from fenzo as it is no longer in Accepted state (probably killed)",
                        v3Task.getId(), v3Task.getStatus().getState());
                removeTask(assignmentResult, fenzoTask);
            } else {
                if (error instanceof TimeoutException) {
                    logger.error("Timed out during writing task {} (job {}) status update to the store", fenzoTask.getId(), v3Job.getId());
                } else {
                    logger.info("Not launching task due to model update failure: {}", v3Task.getId(), error);
                }
                killBrokenV3Task(fenzoTask, "model update error: " + ExceptionExt.toMessageChain(recordTaskError));
            }
            return Observable.empty();
        }).map(taskInfoRequest -> Pair.of(assignment, taskInfoRequest));
    } catch (Exception e) {
        killBrokenV3Task(fenzoTask, ExceptionExt.toMessageChain(e));
        logger.error("Fatal error when creating Mesos#TaskInfo for task: {}", fenzoTask.getId(), e);
        return Observable.empty();
    }
}
 
Example #26
Source File: SchedulerUtils.java    From titus-control-plane with Apache License 2.0 4 votes vote down vote up
public static Optional<String> getInstanceGroupName(String instanceGroupAttributeName, VirtualMachineLease lease) {
    String name = getAttributeValueOrEmptyString(lease.getAttributeMap(), instanceGroupAttributeName);
    return name.isEmpty() ? Optional.empty() : Optional.of(name);
}
 
Example #27
Source File: SchedulerUtils.java    From titus-control-plane with Apache License 2.0 4 votes vote down vote up
public static String getAttributeValueOrDefault(VirtualMachineLease lease, String attributeName, String defaultValue) {
    return getAttributeValueOrDefault(lease.getAttributeMap(), attributeName, defaultValue);
}
 
Example #28
Source File: StubbedVirtualMachineMasterService.java    From titus-control-plane with Apache License 2.0 4 votes vote down vote up
VirtualMachineLease buildLease(String taskId) {
    return new VirtualMachineLease() {
        @Override
        public String getId() {
            return "leaseOf#" + taskId;
        }

        @Override
        public long getOfferedTime() {
            return TimeUnit.DAYS.toMillis(1);
        }

        @Override
        public String hostname() {
            return "i-12345";
        }

        @Override
        public String getVMID() {
            return "vm#0";
        }

        @Override
        public double cpuCores() {
            return 16;
        }

        @Override
        public double memoryMB() {
            return 16384;
        }

        @Override
        public double networkMbps() {
            return 2000;
        }

        @Override
        public double diskMB() {
            return 32768;
        }

        @Override
        public List<Range> portRanges() {
            return Collections.emptyList();
        }

        @Override
        public Protos.Offer getOffer() {
            throw new IllegalStateException("not supported");
        }

        @Override
        public Map<String, Protos.Attribute> getAttributeMap() {
            return Collections.emptyMap();
        }

        @Override
        public Double getScalarValue(String name) {
            return null;
        }

        @Override
        public Map<String, Double> getScalarValues() {
            return Collections.emptyMap();
        }
    };
}
 
Example #29
Source File: DefaultSchedulingService.java    From titus-control-plane with Apache License 2.0 4 votes vote down vote up
private void verifyAndReportResourceUsageMetrics(List<VirtualMachineCurrentState> vmCurrentStates) {
    try {
        double totalCpu = 0.0;
        double usedCpu = 0.0;
        double totalMemory = 0.0;
        double usedMemory = 0.0;
        double totalDisk = 0.0;
        double usedDisk = 0.0;
        double totalNetworkMbps = 0.0;
        double usedNetworkMbps = 0.0;
        long totalNetworkInterfaces = 0;
        long usedNetworkInterfaces = 0;
        long totalDisabled = 0;
        long currentMinDisableDuration = 0;
        long currentMaxDisableDuration = 0;
        long now = titusRuntime.getClock().wallTime();

        for (VirtualMachineCurrentState state : vmCurrentStates) {
            for (PreferentialNamedConsumableResourceSet set : state.getResourceSets().values()) {
                if (set.getName().equalsIgnoreCase("enis")) {
                    List<PreferentialNamedConsumableResource> usageBy = set.getUsageBy();
                    totalNetworkInterfaces += usageBy.size();
                    for (PreferentialNamedConsumableResource consumableResource : usageBy) {
                        if (!consumableResource.getUsageBy().isEmpty()) {
                            usedNetworkInterfaces++;
                        }
                    }
                }
            }

            final VirtualMachineLease currAvailableResources = state.getCurrAvailableResources();
            if (currAvailableResources != null) {
                totalCpu += currAvailableResources.cpuCores();
                totalMemory += currAvailableResources.memoryMB();
                totalDisk += currAvailableResources.diskMB();
                totalNetworkMbps += currAvailableResources.networkMbps();
            }

            long disableDuration = state.getDisabledUntil() - now;
            if (disableDuration > 0) {
                totalDisabled++;
                currentMinDisableDuration = Math.min(currentMinDisableDuration, disableDuration);
                currentMaxDisableDuration = Math.max(currentMaxDisableDuration, disableDuration);
            }
            final Collection<TaskRequest> runningTasks = state.getRunningTasks();
            if (runningTasks != null && !runningTasks.isEmpty()) {
                for (TaskRequest t : runningTasks) {
                    QueuableTask task = (QueuableTask) t;
                    if (task instanceof V3QueueableTask) {
                        //TODO redo the metrics publishing but we should keep it the same as v2 for now
                        usedCpu += t.getCPUs();
                        totalCpu += t.getCPUs();
                        usedMemory += t.getMemory();
                        totalMemory += t.getMemory();
                        usedDisk += t.getDisk();
                        totalDisk += t.getDisk();
                        usedNetworkMbps += t.getNetworkMbps();
                        totalNetworkMbps += t.getNetworkMbps();
                    }
                }
            }
        }

        totalDisabledAgentsGauge.set(totalDisabled);
        minDisableDurationGauge.set(currentMinDisableDuration);
        maxDisableDurationGauge.set(currentMaxDisableDuration);
        totalAvailableCpusGauge.set((long) totalCpu);
        totalAllocatedCpusGauge.set((long) usedCpu);
        cpuUtilizationGauge.set((long) (usedCpu * 100.0 / Math.max(1.0, totalCpu)));
        double dominantResourceUtilization = usedCpu * 100.0 / totalCpu;
        totalAvailableMemoryGauge.set((long) totalMemory);
        totalAllocatedMemoryGauge.set((long) usedMemory);
        memoryUtilizationGauge.set((long) (usedMemory * 100.0 / Math.max(1.0, totalMemory)));
        dominantResourceUtilization = Math.max(dominantResourceUtilization, usedMemory * 100.0 / totalMemory);
        totalAvailableDiskGauge.set((long) totalDisk);
        totalAllocatedDiskGauge.set((long) usedDisk);
        diskUtilizationGauge.set((long) (usedDisk * 100.0 / Math.max(1.0, totalDisk)));
        dominantResourceUtilization = Math.max(dominantResourceUtilization, usedDisk * 100.0 / totalDisk);
        totalAvailableNetworkMbpsGauge.set((long) totalNetworkMbps);
        totalAllocatedNetworkMbpsGauge.set((long) usedNetworkMbps);
        networkUtilizationGauge.set((long) (usedNetworkMbps * 100.0 / Math.max(1.0, totalNetworkMbps)));
        dominantResourceUtilization = Math.max(dominantResourceUtilization, usedNetworkMbps * 100.0 / totalNetworkMbps);
        this.dominantResourceUtilizationGauge.set((long) dominantResourceUtilization);
        totalAvailableNetworkInterfacesGauge.set(totalNetworkInterfaces);
        totalAllocatedNetworkInterfacesGauge.set(usedNetworkInterfaces);
    } catch (Exception e) {
        logger.error("Error settings metrics with error: ", e);
    }
}
 
Example #30
Source File: DefaultSchedulingService.java    From titus-control-plane with Apache License 2.0 4 votes vote down vote up
private void schedulingResultsHandler(SchedulingResult schedulingResult) {
    logger.debug("Task placement results: taskAndAgentEvaluations={}, executionTimeMs={}",
            schedulingResult.getNumAllocations(), schedulingResult.getRuntime());
    long callbackStart = titusRuntime.getClock().wallTime();
    totalSchedulingIterationMesosLatency.set(0);

    if (!schedulingResult.getExceptions().isEmpty()) {
        logger.error("Exceptions in scheduling iteration:");
        for (Exception e : schedulingResult.getExceptions()) {
            if (e instanceof TaskQueueMultiException) {
                for (Exception ee : ((TaskQueueMultiException) e).getExceptions()) {
                    logger.error(ee.getMessage(), ee);
                }
            } else {
                logger.error(e.getMessage(), e);
            }
        }
        checkIfExitOnSchedError("One or more errors in Fenzo scheduling iteration");
        return;
    }

    int assignedDuringSchedulingResult = 0;
    int failedTasksDuringSchedulingResult = schedulingResult.getFailures().size();

    long recordingStart = titusRuntime.getClock().wallTime();
    List<Pair<List<VirtualMachineLease>, List<TaskInfoRequest>>> taskInfoRequests = taskPlacementRecorder.record(schedulingResult);
    recordTaskPlacementLatencyTimer.record(titusRuntime.getClock().wallTime() - recordingStart, TimeUnit.MILLISECONDS);
    taskInfoRequests.forEach(ts -> launchTasks(ts.getLeft(), ts.getRight()));
    assignedDuringSchedulingResult += taskInfoRequests.stream().mapToInt(p -> p.getRight().size()).sum();

    recordLastSchedulingResult(schedulingResult);
    taskPlacementFailureClassifier.update(schedulingResult);
    processTaskSchedulingFailureCallbacks(taskPlacementFailureClassifier.getLastTaskPlacementFailures());

    totalTasksPerIterationGauge.set(assignedDuringSchedulingResult + failedTasksDuringSchedulingResult);
    assignedTasksPerIterationGauge.set(assignedDuringSchedulingResult);
    failedTasksPerIterationGauge.set(failedTasksDuringSchedulingResult);
    taskAndAgentEvaluationsPerIterationGauge.set(schedulingResult.getNumAllocations());
    offersReceivedGauge.set(schedulingResult.getLeasesAdded());
    offersRejectedGauge.set(schedulingResult.getLeasesRejected());
    totalActiveAgentsGauge.set(schedulingResult.getTotalVMsCount());
    fenzoSchedulingResultLatencyTimer.record(schedulingResult.getRuntime(), TimeUnit.MILLISECONDS);
    fenzoCallbackLatencyTimer.record(titusRuntime.getClock().wallTime() - callbackStart, TimeUnit.MILLISECONDS);
    mesosLatencyTimer.record(totalSchedulingIterationMesosLatency.get(), TimeUnit.MILLISECONDS);
}