org.apache.flink.mesos.scheduler.LaunchCoordinator Java Exaples

Source File: MesosResourceManager.java From Flink-CEPplus with Apache License 2.0

6 votes

@Override
public Collection<ResourceProfile> startNewWorker(ResourceProfile resourceProfile) {
	LOG.info("Starting a new worker.");
	try {
		// generate new workers into persistent state and launch associated actors
		MesosWorkerStore.Worker worker = MesosWorkerStore.Worker.newWorker(workerStore.newTaskID(), resourceProfile);
		workerStore.putWorker(worker);
		workersInNew.put(extractResourceID(worker.taskID()), worker);

		LaunchableMesosWorker launchable = createLaunchableMesosWorker(worker.taskID());

		LOG.info("Scheduling Mesos task {} with ({} MB, {} cpus).",
			launchable.taskID().getValue(), launchable.taskRequest().getMemory(), launchable.taskRequest().getCPUs());

		// tell the task monitor about the new plans
		taskMonitor.tell(new TaskMonitor.TaskGoalStateUpdated(extractGoalState(worker)), selfActor);

		// tell the launch coordinator to launch the new tasks
		launchCoordinator.tell(new LaunchCoordinator.Launch(Collections.singletonList(launchable)), selfActor);

		return slotsPerWorker;
	} catch (Exception ex) {
		onFatalError(new ResourceManagerException("Unable to request new workers.", ex));
		return Collections.emptyList();
	}
}

Source File: MesosResourceManagerTest.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * Allocate a worker using the RM.
 */
public MesosWorkerStore.Worker allocateWorker(Protos.TaskID taskID, ResourceProfile resourceProfile) throws Exception {
	when(rmServices.workerStore.newTaskID()).thenReturn(taskID);
	rmServices.slotManagerStarted.get(timeout.toMilliseconds(), TimeUnit.MILLISECONDS);

	CompletableFuture<Void> allocateResourceFuture = resourceManager.callAsync(
		() -> {
			rmServices.rmActions.allocateResource(resourceProfile);
			return null;
		},
		timeout);
	MesosWorkerStore.Worker expected = MesosWorkerStore.Worker.newWorker(taskID, resourceProfile);

	// check for exceptions
	allocateResourceFuture.get(timeout.toMilliseconds(), TimeUnit.MILLISECONDS);

	// drain the probe messages
	verify(rmServices.workerStore, Mockito.timeout(timeout.toMilliseconds())).putWorker(expected);
	assertThat(resourceManager.workersInNew, hasEntry(extractResourceID(taskID), expected));
	resourceManager.taskRouter.expectMsgClass(TaskMonitor.TaskGoalStateUpdated.class);
	resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Launch.class);
	return expected;
}

Source File: MesosResourceManagerTest.java From flink with Apache License 2.0

6 votes

/**
 * Allocate a worker using the RM.
 */
public MesosWorkerStore.Worker allocateWorker(Protos.TaskID taskID, WorkerResourceSpec workerResourceSpec) throws Exception {
	when(rmServices.workerStore.newTaskID()).thenReturn(taskID);
	rmServices.slotManagerStarted.get(timeout.toMilliseconds(), TimeUnit.MILLISECONDS);

	CompletableFuture<Void> allocateResourceFuture = resourceManager.callAsync(
		() -> {
			rmServices.rmActions.allocateResource(workerResourceSpec);
			return null;
		},
		timeout);
	MesosWorkerStore.Worker expected = MesosWorkerStore.Worker.newWorker(taskID, workerResourceSpec);

	// check for exceptions
	allocateResourceFuture.get(timeout.toMilliseconds(), TimeUnit.MILLISECONDS);

	// drain the probe messages
	verify(rmServices.workerStore, Mockito.timeout(timeout.toMilliseconds())).putWorker(expected);
	assertThat(resourceManager.workersInNew, hasEntry(extractResourceID(taskID), expected));
	resourceManager.taskRouter.expectMsgClass(TaskMonitor.TaskGoalStateUpdated.class);
	resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Launch.class);
	return expected;
}

Source File: MesosResourceManagerTest.java From flink with Apache License 2.0

6 votes

/**
 * Allocate a worker using the RM.
 */
public MesosWorkerStore.Worker allocateWorker(Protos.TaskID taskID, ResourceProfile resourceProfile) throws Exception {
	when(rmServices.workerStore.newTaskID()).thenReturn(taskID);
	rmServices.slotManagerStarted.get(timeout.toMilliseconds(), TimeUnit.MILLISECONDS);

	CompletableFuture<Void> allocateResourceFuture = resourceManager.callAsync(
		() -> {
			rmServices.rmActions.allocateResource(resourceProfile);
			return null;
		},
		timeout);
	MesosWorkerStore.Worker expected = MesosWorkerStore.Worker.newWorker(taskID, resourceProfile);

	// check for exceptions
	allocateResourceFuture.get(timeout.toMilliseconds(), TimeUnit.MILLISECONDS);

	// drain the probe messages
	verify(rmServices.workerStore, Mockito.timeout(timeout.toMilliseconds())).putWorker(expected);
	assertThat(resourceManager.workersInNew, hasEntry(extractResourceID(taskID), expected));
	resourceManager.taskRouter.expectMsgClass(TaskMonitor.TaskGoalStateUpdated.class);
	resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Launch.class);
	return expected;
}

Source File: MesosResourceManagerTest.java From flink with Apache License 2.0

5 votes

/**
 * Test planned stop of a launched worker.
 */
@Test
public void testStopWorker() throws Exception {
	new Context() {{
		// set the initial persistent state with a launched worker
		MesosWorkerStore.Worker worker1launched = MesosWorkerStore.Worker.newWorker(task1, workerResourceSpec).launchWorker(slave1, slave1host);
		when(rmServices.workerStore.getFrameworkID()).thenReturn(Option.apply(framework1));
		when(rmServices.workerStore.recoverWorkers()).thenReturn(singletonList(worker1launched));
		startResourceManager();

		// drain the assign message
		resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Assign.class);

		// tell the RM to stop the worker
		resourceManager.stopWorker(new RegisteredMesosWorkerNode(worker1launched));

		// verify that the instance state was updated
		MesosWorkerStore.Worker worker1Released = worker1launched.releaseWorker();
		verify(rmServices.workerStore).putWorker(worker1Released);
		assertThat(resourceManager.workersInLaunch.entrySet(), empty());
		assertThat(resourceManager.workersBeingReturned, hasEntry(extractResourceID(task1), worker1Released));

		// verify that the monitor was notified
		resourceManager.taskRouter.expectMsgClass(TaskMonitor.TaskGoalStateUpdated.class);
		resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Unassign.class);
	}};
}

Source File: MesosResourceManager.java From Flink-CEPplus with Apache License 2.0

5 votes

protected ActorRef createLaunchCoordinator(
		SchedulerDriver schedulerDriver,
		ActorRef selfActor) {
	return actorSystem.actorOf(
		LaunchCoordinator.createActorProps(LaunchCoordinator.class, selfActor, flinkConfig, schedulerDriver, createOptimizer()),
		"launchCoordinator");
}

Source File: MesosResourceManagerTest.java From flink with Apache License 2.0

5 votes

/**
 * Test request for new workers.
 */
@Test
public void testRequestNewWorkers() throws Exception {
	new Context() {{
		startResourceManager();

		// allocate a worker
		when(rmServices.workerStore.newTaskID()).thenReturn(task1).thenThrow(new AssertionFailedError());
		rmServices.slotManagerStarted.get(timeout.toMilliseconds(), TimeUnit.MILLISECONDS);

		CompletableFuture<Void> allocateResourceFuture = resourceManager.callAsync(
			() -> {
				rmServices.rmActions.allocateResource(workerResourceSpec);
				return null;
			},
			timeout);

		// check for exceptions
		allocateResourceFuture.get(timeout.toMilliseconds(), TimeUnit.MILLISECONDS);

		// verify that a new worker was persisted, the internal state was updated, the task router was notified,
		// and the launch coordinator was asked to launch a task
		MesosWorkerStore.Worker expected = MesosWorkerStore.Worker.newWorker(task1, workerResourceSpec);
		verify(rmServices.workerStore, Mockito.timeout(timeout.toMilliseconds())).putWorker(expected);
		assertThat(resourceManager.workersInNew, hasEntry(extractResourceID(task1), expected));
		resourceManager.taskRouter.expectMsgClass(TaskMonitor.TaskGoalStateUpdated.class);
		resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Launch.class);
	}};
}

Source File: MesosResourceManagerTest.java From flink with Apache License 2.0

5 votes

/**
 * Test recovery of persistent workers.
 */
@Test
public void testRecoverWorkers() throws Exception {
	new Context() {{
		// set the initial persistent state then initialize the RM
		MesosWorkerStore.Worker worker1 = MesosWorkerStore.Worker.newWorker(task1, workerResourceSpec);
		MesosWorkerStore.Worker worker2 = MesosWorkerStore.Worker.newWorker(task2, workerResourceSpec).launchWorker(slave1, slave1host);
		MesosWorkerStore.Worker worker3 = MesosWorkerStore.Worker.newWorker(task3, workerResourceSpec).launchWorker(slave1, slave1host).releaseWorker();
		when(rmServices.workerStore.getFrameworkID()).thenReturn(Option.apply(framework1));
		when(rmServices.workerStore.recoverWorkers()).thenReturn(Arrays.asList(worker1, worker2, worker3));
		startResourceManager();

		// verify that the internal state was updated, the task router was notified,
		// and the launch coordinator was asked to launch a task.
		// note: "new" workers are discarded
		assertThat(resourceManager.workersInNew.entrySet(), empty());
		assertThat(resourceManager.workersInLaunch, hasEntry(extractResourceID(task2), worker2));
		assertThat(resourceManager.workersBeingReturned, hasEntry(extractResourceID(task3), worker3));
		resourceManager.taskRouter.expectMsgClass(TaskMonitor.TaskGoalStateUpdated.class);
		LaunchCoordinator.Assign actualAssign =
			resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Assign.class);
		assertThat(actualAssign.tasks(), hasSize(1));
		assertThat(actualAssign.tasks().get(0).f0.getId(), equalTo(task2.getValue()));
		assertThat(actualAssign.tasks().get(0).f1, equalTo(slave1host));
		resourceManager.launchCoordinator.expectNoMsg();
	}};
}

Source File: MesosResourceManager.java From flink with Apache License 2.0

5 votes

@Override
public boolean stopWorker(RegisteredMesosWorkerNode workerNode) {
	LOG.info("Stopping worker {}.", workerNode.getResourceID());
	try {

		if (workersInLaunch.containsKey(workerNode.getResourceID())) {
			// update persistent state of worker to Released
			MesosWorkerStore.Worker worker = workersInLaunch.remove(workerNode.getResourceID());
			worker = worker.releaseWorker();
			workerStore.putWorker(worker);
			workersBeingReturned.put(extractResourceID(worker.taskID()), worker);

			taskMonitor.tell(new TaskMonitor.TaskGoalStateUpdated(extractGoalState(worker)), selfActor);

			if (worker.hostname().isDefined()) {
				// tell the launch coordinator that the task is being unassigned from the host, for planning purposes
				launchCoordinator.tell(new LaunchCoordinator.Unassign(worker.taskID(), worker.hostname().get()), selfActor);
			}
		}
		else if (workersBeingReturned.containsKey(workerNode.getResourceID())) {
			LOG.info("Ignoring request to stop worker {} because it is already being stopped.", workerNode.getResourceID());
		}
		else {
			LOG.warn("Unrecognized worker {}.", workerNode.getResourceID());
		}
	}
	catch (Exception e) {
		onFatalError(new ResourceManagerException("Unable to release a worker.", e));
	}

	return true;
}

Source File: MesosResourceManager.java From flink with Apache License 2.0

5 votes

@Override
public boolean startNewWorker(WorkerResourceSpec workerResourceSpec) {
	Preconditions.checkArgument(Objects.equals(
		workerResourceSpec,
		WorkerResourceSpec.fromTaskExecutorProcessSpec(taskManagerParameters.containeredParameters().getTaskExecutorProcessSpec())));
	LOG.info("Starting a new worker.");
	try {
		// generate new workers into persistent state and launch associated actors
		MesosWorkerStore.Worker worker = MesosWorkerStore.Worker.newWorker(workerStore.newTaskID(), workerResourceSpec);
		workerStore.putWorker(worker);
		workersInNew.put(extractResourceID(worker.taskID()), worker);

		LaunchableMesosWorker launchable = createLaunchableMesosWorker(worker.taskID());

		LOG.info("Scheduling Mesos task {} with ({} MB, {} cpus, {} gpus, {} disk MB, {} Mbps).",
			launchable.taskID().getValue(), launchable.taskRequest().getMemory(), launchable.taskRequest().getCPUs(),
			launchable.taskRequest().getScalarRequests().get("gpus"), launchable.taskRequest().getDisk(), launchable.taskRequest().getNetworkMbps());

		// tell the task monitor about the new plans
		taskMonitor.tell(new TaskMonitor.TaskGoalStateUpdated(extractGoalState(worker)), selfActor);

		// tell the launch coordinator to launch the new tasks
		launchCoordinator.tell(new LaunchCoordinator.Launch(Collections.singletonList(launchable)), selfActor);

		return true;
	} catch (Exception ex) {
		onFatalError(new ResourceManagerException("Unable to request new workers.", ex));
		return false;
	}
}

Source File: MesosResourceManager.java From flink with Apache License 2.0

5 votes

/**
 * Recovers given framework/worker information.
 *
 * @see #getWorkersAsync()
 */
private void recoverWorkers(final List<MesosWorkerStore.Worker> tasksFromPreviousAttempts) {
	assert(workersInNew.isEmpty());
	assert(workersInLaunch.isEmpty());
	assert(workersBeingReturned.isEmpty());

	if (!tasksFromPreviousAttempts.isEmpty()) {
		LOG.info("Retrieved {} TaskManagers from previous attempt", tasksFromPreviousAttempts.size());

		List<Tuple2<TaskRequest, String>> toAssign = new ArrayList<>(tasksFromPreviousAttempts.size());

		for (final MesosWorkerStore.Worker worker : tasksFromPreviousAttempts) {
			switch(worker.state()) {
				case Launched:
					workersInLaunch.put(extractResourceID(worker.taskID()), worker);
					final LaunchableMesosWorker launchable = createLaunchableMesosWorker(worker.taskID());
					toAssign.add(new Tuple2<>(launchable.taskRequest(), worker.hostname().get()));
					break;
				case Released:
					workersBeingReturned.put(extractResourceID(worker.taskID()), worker);
					break;
			}
			taskMonitor.tell(new TaskMonitor.TaskGoalStateUpdated(extractGoalState(worker)), selfActor);
		}

		// tell the launch coordinator about prior assignments
		if (toAssign.size() >= 1) {
			launchCoordinator.tell(new LaunchCoordinator.Assign(toAssign), selfActor);
		}
	}
}

Source File: MesosResourceManager.java From flink with Apache License 2.0

5 votes

protected ActorRef createLaunchCoordinator(
		SchedulerDriver schedulerDriver,
		ActorRef selfActor) {
	return actorSystem.actorOf(
		LaunchCoordinator.createActorProps(LaunchCoordinator.class, selfActor, flinkConfig, schedulerDriver, createOptimizer()),
		"launchCoordinator");
}

Source File: MesosResourceManagerTest.java From flink with Apache License 2.0

5 votes

/**
 * Test planned stop of a launched worker.
 */
@Test
public void testStopWorker() throws Exception {
	new Context() {{
		// set the initial persistent state with a launched worker
		MesosWorkerStore.Worker worker1launched = MesosWorkerStore.Worker.newWorker(task1).launchWorker(slave1, slave1host);
		when(rmServices.workerStore.getFrameworkID()).thenReturn(Option.apply(framework1));
		when(rmServices.workerStore.recoverWorkers()).thenReturn(singletonList(worker1launched));
		startResourceManager();

		// drain the assign message
		resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Assign.class);

		// tell the RM to stop the worker
		resourceManager.stopWorker(new RegisteredMesosWorkerNode(worker1launched));

		// verify that the instance state was updated
		MesosWorkerStore.Worker worker1Released = worker1launched.releaseWorker();
		verify(rmServices.workerStore).putWorker(worker1Released);
		assertThat(resourceManager.workersInLaunch.entrySet(), empty());
		assertThat(resourceManager.workersBeingReturned, hasEntry(extractResourceID(task1), worker1Released));

		// verify that the monitor was notified
		resourceManager.taskRouter.expectMsgClass(TaskMonitor.TaskGoalStateUpdated.class);
		resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Unassign.class);
	}};
}

Source File: MesosResourceManagerTest.java From flink with Apache License 2.0

5 votes

/**
 * Test request for new workers.
 */
@Test
public void testRequestNewWorkers() throws Exception {
	new Context() {{
		startResourceManager();

		// allocate a worker
		when(rmServices.workerStore.newTaskID()).thenReturn(task1).thenThrow(new AssertionFailedError());
		rmServices.slotManagerStarted.get(timeout.toMilliseconds(), TimeUnit.MILLISECONDS);

		CompletableFuture<Void> allocateResourceFuture = resourceManager.callAsync(
			() -> {
				rmServices.rmActions.allocateResource(resourceProfile1);
				return null;
			},
			timeout);

		// check for exceptions
		allocateResourceFuture.get(timeout.toMilliseconds(), TimeUnit.MILLISECONDS);

		// verify that a new worker was persisted, the internal state was updated, the task router was notified,
		// and the launch coordinator was asked to launch a task
		MesosWorkerStore.Worker expected = MesosWorkerStore.Worker.newWorker(task1, resourceProfile1);
		verify(rmServices.workerStore, Mockito.timeout(timeout.toMilliseconds())).putWorker(expected);
		assertThat(resourceManager.workersInNew, hasEntry(extractResourceID(task1), expected));
		resourceManager.taskRouter.expectMsgClass(TaskMonitor.TaskGoalStateUpdated.class);
		resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Launch.class);
	}};
}

Source File: MesosResourceManagerTest.java From flink with Apache License 2.0

5 votes

/**
 * Test recovery of persistent workers.
 */
@Test
public void testRecoverWorkers() throws Exception {
	new Context() {{
		// set the initial persistent state then initialize the RM
		MesosWorkerStore.Worker worker1 = MesosWorkerStore.Worker.newWorker(task1);
		MesosWorkerStore.Worker worker2 = MesosWorkerStore.Worker.newWorker(task2).launchWorker(slave1, slave1host);
		MesosWorkerStore.Worker worker3 = MesosWorkerStore.Worker.newWorker(task3).launchWorker(slave1, slave1host).releaseWorker();
		when(rmServices.workerStore.getFrameworkID()).thenReturn(Option.apply(framework1));
		when(rmServices.workerStore.recoverWorkers()).thenReturn(Arrays.asList(worker1, worker2, worker3));
		startResourceManager();

		// verify that the internal state was updated, the task router was notified,
		// and the launch coordinator was asked to launch a task.
		// note: "new" workers are discarded
		assertThat(resourceManager.workersInNew.entrySet(), empty());
		assertThat(resourceManager.workersInLaunch, hasEntry(extractResourceID(task2), worker2));
		assertThat(resourceManager.workersBeingReturned, hasEntry(extractResourceID(task3), worker3));
		resourceManager.taskRouter.expectMsgClass(TaskMonitor.TaskGoalStateUpdated.class);
		LaunchCoordinator.Assign actualAssign =
			resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Assign.class);
		assertThat(actualAssign.tasks(), hasSize(1));
		assertThat(actualAssign.tasks().get(0).f0.getId(), equalTo(task2.getValue()));
		assertThat(actualAssign.tasks().get(0).f1, equalTo(slave1host));
		resourceManager.launchCoordinator.expectNoMsg();
	}};
}

Source File: MesosResourceManager.java From flink with Apache License 2.0

5 votes

@Override
public boolean stopWorker(RegisteredMesosWorkerNode workerNode) {
	LOG.info("Stopping worker {}.", workerNode.getResourceID());
	try {

		if (workersInLaunch.containsKey(workerNode.getResourceID())) {
			// update persistent state of worker to Released
			MesosWorkerStore.Worker worker = workersInLaunch.remove(workerNode.getResourceID());
			worker = worker.releaseWorker();
			workerStore.putWorker(worker);
			workersBeingReturned.put(extractResourceID(worker.taskID()), worker);

			taskMonitor.tell(new TaskMonitor.TaskGoalStateUpdated(extractGoalState(worker)), selfActor);

			if (worker.hostname().isDefined()) {
				// tell the launch coordinator that the task is being unassigned from the host, for planning purposes
				launchCoordinator.tell(new LaunchCoordinator.Unassign(worker.taskID(), worker.hostname().get()), selfActor);
			}
		}
		else if (workersBeingReturned.containsKey(workerNode.getResourceID())) {
			LOG.info("Ignoring request to stop worker {} because it is already being stopped.", workerNode.getResourceID());
		}
		else {
			LOG.warn("Unrecognized worker {}.", workerNode.getResourceID());
		}
	}
	catch (Exception e) {
		onFatalError(new ResourceManagerException("Unable to release a worker.", e));
	}

	return true;
}

Source File: MesosResourceManager.java From flink with Apache License 2.0

5 votes

@Override
public Collection<ResourceProfile> startNewWorker(ResourceProfile resourceProfile) {
	if (!slotsPerWorker.iterator().next().isMatching(resourceProfile)) {
		return Collections.emptyList();
	}
	LOG.info("Starting a new worker.");
	try {
		// generate new workers into persistent state and launch associated actors
		MesosWorkerStore.Worker worker = MesosWorkerStore.Worker.newWorker(workerStore.newTaskID(), resourceProfile);
		workerStore.putWorker(worker);
		workersInNew.put(extractResourceID(worker.taskID()), worker);

		LaunchableMesosWorker launchable = createLaunchableMesosWorker(worker.taskID());

		LOG.info("Scheduling Mesos task {} with ({} MB, {} cpus).",
			launchable.taskID().getValue(), launchable.taskRequest().getMemory(), launchable.taskRequest().getCPUs());

		// tell the task monitor about the new plans
		taskMonitor.tell(new TaskMonitor.TaskGoalStateUpdated(extractGoalState(worker)), selfActor);

		// tell the launch coordinator to launch the new tasks
		launchCoordinator.tell(new LaunchCoordinator.Launch(Collections.singletonList(launchable)), selfActor);

		return slotsPerWorker;
	} catch (Exception ex) {
		onFatalError(new ResourceManagerException("Unable to request new workers.", ex));
		return Collections.emptyList();
	}
}

Source File: MesosResourceManager.java From flink with Apache License 2.0

5 votes

/**
 * Recovers given framework/worker information.
 *
 * @see #getWorkersAsync()
 */
private void recoverWorkers(final List<MesosWorkerStore.Worker> tasksFromPreviousAttempts) {
	assert(workersInNew.isEmpty());
	assert(workersInLaunch.isEmpty());
	assert(workersBeingReturned.isEmpty());

	if (!tasksFromPreviousAttempts.isEmpty()) {
		LOG.info("Retrieved {} TaskManagers from previous attempt", tasksFromPreviousAttempts.size());

		List<Tuple2<TaskRequest, String>> toAssign = new ArrayList<>(tasksFromPreviousAttempts.size());

		for (final MesosWorkerStore.Worker worker : tasksFromPreviousAttempts) {
			switch(worker.state()) {
				case Launched:
					workersInLaunch.put(extractResourceID(worker.taskID()), worker);
					final LaunchableMesosWorker launchable = createLaunchableMesosWorker(worker.taskID());
					toAssign.add(new Tuple2<>(launchable.taskRequest(), worker.hostname().get()));
					break;
				case Released:
					workersBeingReturned.put(extractResourceID(worker.taskID()), worker);
					break;
			}
			taskMonitor.tell(new TaskMonitor.TaskGoalStateUpdated(extractGoalState(worker)), selfActor);
		}

		// tell the launch coordinator about prior assignments
		if (toAssign.size() >= 1) {
			launchCoordinator.tell(new LaunchCoordinator.Assign(toAssign), selfActor);
		}
	}
}

Source File: MesosResourceManager.java From flink with Apache License 2.0

5 votes

protected ActorRef createLaunchCoordinator(
		SchedulerDriver schedulerDriver,
		ActorRef selfActor) {
	return actorSystem.actorOf(
		LaunchCoordinator.createActorProps(LaunchCoordinator.class, selfActor, flinkConfig, schedulerDriver, createOptimizer()),
		"launchCoordinator");
}

Source File: MesosResourceManagerTest.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Test planned stop of a launched worker.
 */
@Test
public void testStopWorker() throws Exception {
	new Context() {{
		// set the initial persistent state with a launched worker
		MesosWorkerStore.Worker worker1launched = MesosWorkerStore.Worker.newWorker(task1).launchWorker(slave1, slave1host);
		when(rmServices.workerStore.getFrameworkID()).thenReturn(Option.apply(framework1));
		when(rmServices.workerStore.recoverWorkers()).thenReturn(singletonList(worker1launched));
		startResourceManager();

		// drain the assign message
		resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Assign.class);

		// tell the RM to stop the worker
		resourceManager.stopWorker(new RegisteredMesosWorkerNode(worker1launched));

		// verify that the instance state was updated
		MesosWorkerStore.Worker worker1Released = worker1launched.releaseWorker();
		verify(rmServices.workerStore).putWorker(worker1Released);
		assertThat(resourceManager.workersInLaunch.entrySet(), empty());
		assertThat(resourceManager.workersBeingReturned, hasEntry(extractResourceID(task1), worker1Released));

		// verify that the monitor was notified
		resourceManager.taskRouter.expectMsgClass(TaskMonitor.TaskGoalStateUpdated.class);
		resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Unassign.class);
	}};
}

Source File: MesosResourceManagerTest.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Test request for new workers.
 */
@Test
public void testRequestNewWorkers() throws Exception {
	new Context() {{
		startResourceManager();

		// allocate a worker
		when(rmServices.workerStore.newTaskID()).thenReturn(task1).thenThrow(new AssertionFailedError());
		rmServices.slotManagerStarted.get(timeout.toMilliseconds(), TimeUnit.MILLISECONDS);

		CompletableFuture<Void> allocateResourceFuture = resourceManager.callAsync(
			() -> {
				rmServices.rmActions.allocateResource(resourceProfile1);
				return null;
			},
			timeout);

		// check for exceptions
		allocateResourceFuture.get(timeout.toMilliseconds(), TimeUnit.MILLISECONDS);

		// verify that a new worker was persisted, the internal state was updated, the task router was notified,
		// and the launch coordinator was asked to launch a task
		MesosWorkerStore.Worker expected = MesosWorkerStore.Worker.newWorker(task1, resourceProfile1);
		verify(rmServices.workerStore, Mockito.timeout(timeout.toMilliseconds())).putWorker(expected);
		assertThat(resourceManager.workersInNew, hasEntry(extractResourceID(task1), expected));
		resourceManager.taskRouter.expectMsgClass(TaskMonitor.TaskGoalStateUpdated.class);
		resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Launch.class);
	}};
}

Source File: MesosResourceManagerTest.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Test recovery of persistent workers.
 */
@Test
public void testRecoverWorkers() throws Exception {
	new Context() {{
		// set the initial persistent state then initialize the RM
		MesosWorkerStore.Worker worker1 = MesosWorkerStore.Worker.newWorker(task1);
		MesosWorkerStore.Worker worker2 = MesosWorkerStore.Worker.newWorker(task2).launchWorker(slave1, slave1host);
		MesosWorkerStore.Worker worker3 = MesosWorkerStore.Worker.newWorker(task3).launchWorker(slave1, slave1host).releaseWorker();
		when(rmServices.workerStore.getFrameworkID()).thenReturn(Option.apply(framework1));
		when(rmServices.workerStore.recoverWorkers()).thenReturn(Arrays.asList(worker1, worker2, worker3));
		startResourceManager();

		// verify that the internal state was updated, the task router was notified,
		// and the launch coordinator was asked to launch a task.
		// note: "new" workers are discarded
		assertThat(resourceManager.workersInNew.entrySet(), empty());
		assertThat(resourceManager.workersInLaunch, hasEntry(extractResourceID(task2), worker2));
		assertThat(resourceManager.workersBeingReturned, hasEntry(extractResourceID(task3), worker3));
		resourceManager.taskRouter.expectMsgClass(TaskMonitor.TaskGoalStateUpdated.class);
		LaunchCoordinator.Assign actualAssign =
			resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Assign.class);
		assertThat(actualAssign.tasks(), hasSize(1));
		assertThat(actualAssign.tasks().get(0).f0.getId(), equalTo(task2.getValue()));
		assertThat(actualAssign.tasks().get(0).f1, equalTo(slave1host));
		resourceManager.launchCoordinator.expectNoMsg();
	}};
}

Source File: MesosResourceManager.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
public boolean stopWorker(RegisteredMesosWorkerNode workerNode) {
	LOG.info("Stopping worker {}.", workerNode.getResourceID());
	try {

		if (workersInLaunch.containsKey(workerNode.getResourceID())) {
			// update persistent state of worker to Released
			MesosWorkerStore.Worker worker = workersInLaunch.remove(workerNode.getResourceID());
			worker = worker.releaseWorker();
			workerStore.putWorker(worker);
			workersBeingReturned.put(extractResourceID(worker.taskID()), worker);

			taskMonitor.tell(new TaskMonitor.TaskGoalStateUpdated(extractGoalState(worker)), selfActor);

			if (worker.hostname().isDefined()) {
				// tell the launch coordinator that the task is being unassigned from the host, for planning purposes
				launchCoordinator.tell(new LaunchCoordinator.Unassign(worker.taskID(), worker.hostname().get()), selfActor);
			}
		}
		else if (workersBeingReturned.containsKey(workerNode.getResourceID())) {
			LOG.info("Ignoring request to stop worker {} because it is already being stopped.", workerNode.getResourceID());
		}
		else {
			LOG.warn("Unrecognized worker {}.", workerNode.getResourceID());
		}
	}
	catch (Exception e) {
		onFatalError(new ResourceManagerException("Unable to release a worker.", e));
	}

	return true;
}

Source File: MesosResourceManager.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Recovers given framework/worker information.
 *
 * @see #getWorkersAsync()
 */
private void recoverWorkers(final List<MesosWorkerStore.Worker> tasksFromPreviousAttempts) {
	assert(workersInNew.isEmpty());
	assert(workersInLaunch.isEmpty());
	assert(workersBeingReturned.isEmpty());

	if (!tasksFromPreviousAttempts.isEmpty()) {
		LOG.info("Retrieved {} TaskManagers from previous attempt", tasksFromPreviousAttempts.size());

		List<Tuple2<TaskRequest, String>> toAssign = new ArrayList<>(tasksFromPreviousAttempts.size());

		for (final MesosWorkerStore.Worker worker : tasksFromPreviousAttempts) {
			switch(worker.state()) {
				case Launched:
					workersInLaunch.put(extractResourceID(worker.taskID()), worker);
					final LaunchableMesosWorker launchable = createLaunchableMesosWorker(worker.taskID());
					toAssign.add(new Tuple2<>(launchable.taskRequest(), worker.hostname().get()));
					break;
				case Released:
					workersBeingReturned.put(extractResourceID(worker.taskID()), worker);
					break;
			}
			taskMonitor.tell(new TaskMonitor.TaskGoalStateUpdated(extractGoalState(worker)), selfActor);
		}

		// tell the launch coordinator about prior assignments
		if (toAssign.size() >= 1) {
			launchCoordinator.tell(new LaunchCoordinator.Assign(toAssign), selfActor);
		}
	}
}

org.apache.flink.mesos.scheduler.LaunchCoordinator Java Examples