org.apache.flink.mesos.scheduler.LaunchCoordinator Java Examples
The following examples show how to use
org.apache.flink.mesos.scheduler.LaunchCoordinator.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MesosResourceManager.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Override public Collection<ResourceProfile> startNewWorker(ResourceProfile resourceProfile) { LOG.info("Starting a new worker."); try { // generate new workers into persistent state and launch associated actors MesosWorkerStore.Worker worker = MesosWorkerStore.Worker.newWorker(workerStore.newTaskID(), resourceProfile); workerStore.putWorker(worker); workersInNew.put(extractResourceID(worker.taskID()), worker); LaunchableMesosWorker launchable = createLaunchableMesosWorker(worker.taskID()); LOG.info("Scheduling Mesos task {} with ({} MB, {} cpus).", launchable.taskID().getValue(), launchable.taskRequest().getMemory(), launchable.taskRequest().getCPUs()); // tell the task monitor about the new plans taskMonitor.tell(new TaskMonitor.TaskGoalStateUpdated(extractGoalState(worker)), selfActor); // tell the launch coordinator to launch the new tasks launchCoordinator.tell(new LaunchCoordinator.Launch(Collections.singletonList(launchable)), selfActor); return slotsPerWorker; } catch (Exception ex) { onFatalError(new ResourceManagerException("Unable to request new workers.", ex)); return Collections.emptyList(); } }
Example #2
Source File: MesosResourceManagerTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Allocate a worker using the RM. */ public MesosWorkerStore.Worker allocateWorker(Protos.TaskID taskID, ResourceProfile resourceProfile) throws Exception { when(rmServices.workerStore.newTaskID()).thenReturn(taskID); rmServices.slotManagerStarted.get(timeout.toMilliseconds(), TimeUnit.MILLISECONDS); CompletableFuture<Void> allocateResourceFuture = resourceManager.callAsync( () -> { rmServices.rmActions.allocateResource(resourceProfile); return null; }, timeout); MesosWorkerStore.Worker expected = MesosWorkerStore.Worker.newWorker(taskID, resourceProfile); // check for exceptions allocateResourceFuture.get(timeout.toMilliseconds(), TimeUnit.MILLISECONDS); // drain the probe messages verify(rmServices.workerStore, Mockito.timeout(timeout.toMilliseconds())).putWorker(expected); assertThat(resourceManager.workersInNew, hasEntry(extractResourceID(taskID), expected)); resourceManager.taskRouter.expectMsgClass(TaskMonitor.TaskGoalStateUpdated.class); resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Launch.class); return expected; }
Example #3
Source File: MesosResourceManagerTest.java From flink with Apache License 2.0 | 6 votes |
/** * Allocate a worker using the RM. */ public MesosWorkerStore.Worker allocateWorker(Protos.TaskID taskID, WorkerResourceSpec workerResourceSpec) throws Exception { when(rmServices.workerStore.newTaskID()).thenReturn(taskID); rmServices.slotManagerStarted.get(timeout.toMilliseconds(), TimeUnit.MILLISECONDS); CompletableFuture<Void> allocateResourceFuture = resourceManager.callAsync( () -> { rmServices.rmActions.allocateResource(workerResourceSpec); return null; }, timeout); MesosWorkerStore.Worker expected = MesosWorkerStore.Worker.newWorker(taskID, workerResourceSpec); // check for exceptions allocateResourceFuture.get(timeout.toMilliseconds(), TimeUnit.MILLISECONDS); // drain the probe messages verify(rmServices.workerStore, Mockito.timeout(timeout.toMilliseconds())).putWorker(expected); assertThat(resourceManager.workersInNew, hasEntry(extractResourceID(taskID), expected)); resourceManager.taskRouter.expectMsgClass(TaskMonitor.TaskGoalStateUpdated.class); resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Launch.class); return expected; }
Example #4
Source File: MesosResourceManagerTest.java From flink with Apache License 2.0 | 6 votes |
/** * Allocate a worker using the RM. */ public MesosWorkerStore.Worker allocateWorker(Protos.TaskID taskID, ResourceProfile resourceProfile) throws Exception { when(rmServices.workerStore.newTaskID()).thenReturn(taskID); rmServices.slotManagerStarted.get(timeout.toMilliseconds(), TimeUnit.MILLISECONDS); CompletableFuture<Void> allocateResourceFuture = resourceManager.callAsync( () -> { rmServices.rmActions.allocateResource(resourceProfile); return null; }, timeout); MesosWorkerStore.Worker expected = MesosWorkerStore.Worker.newWorker(taskID, resourceProfile); // check for exceptions allocateResourceFuture.get(timeout.toMilliseconds(), TimeUnit.MILLISECONDS); // drain the probe messages verify(rmServices.workerStore, Mockito.timeout(timeout.toMilliseconds())).putWorker(expected); assertThat(resourceManager.workersInNew, hasEntry(extractResourceID(taskID), expected)); resourceManager.taskRouter.expectMsgClass(TaskMonitor.TaskGoalStateUpdated.class); resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Launch.class); return expected; }
Example #5
Source File: MesosResourceManagerTest.java From flink with Apache License 2.0 | 5 votes |
/** * Test planned stop of a launched worker. */ @Test public void testStopWorker() throws Exception { new Context() {{ // set the initial persistent state with a launched worker MesosWorkerStore.Worker worker1launched = MesosWorkerStore.Worker.newWorker(task1, workerResourceSpec).launchWorker(slave1, slave1host); when(rmServices.workerStore.getFrameworkID()).thenReturn(Option.apply(framework1)); when(rmServices.workerStore.recoverWorkers()).thenReturn(singletonList(worker1launched)); startResourceManager(); // drain the assign message resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Assign.class); // tell the RM to stop the worker resourceManager.stopWorker(new RegisteredMesosWorkerNode(worker1launched)); // verify that the instance state was updated MesosWorkerStore.Worker worker1Released = worker1launched.releaseWorker(); verify(rmServices.workerStore).putWorker(worker1Released); assertThat(resourceManager.workersInLaunch.entrySet(), empty()); assertThat(resourceManager.workersBeingReturned, hasEntry(extractResourceID(task1), worker1Released)); // verify that the monitor was notified resourceManager.taskRouter.expectMsgClass(TaskMonitor.TaskGoalStateUpdated.class); resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Unassign.class); }}; }
Example #6
Source File: MesosResourceManager.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
protected ActorRef createLaunchCoordinator( SchedulerDriver schedulerDriver, ActorRef selfActor) { return actorSystem.actorOf( LaunchCoordinator.createActorProps(LaunchCoordinator.class, selfActor, flinkConfig, schedulerDriver, createOptimizer()), "launchCoordinator"); }
Example #7
Source File: MesosResourceManagerTest.java From flink with Apache License 2.0 | 5 votes |
/** * Test request for new workers. */ @Test public void testRequestNewWorkers() throws Exception { new Context() {{ startResourceManager(); // allocate a worker when(rmServices.workerStore.newTaskID()).thenReturn(task1).thenThrow(new AssertionFailedError()); rmServices.slotManagerStarted.get(timeout.toMilliseconds(), TimeUnit.MILLISECONDS); CompletableFuture<Void> allocateResourceFuture = resourceManager.callAsync( () -> { rmServices.rmActions.allocateResource(workerResourceSpec); return null; }, timeout); // check for exceptions allocateResourceFuture.get(timeout.toMilliseconds(), TimeUnit.MILLISECONDS); // verify that a new worker was persisted, the internal state was updated, the task router was notified, // and the launch coordinator was asked to launch a task MesosWorkerStore.Worker expected = MesosWorkerStore.Worker.newWorker(task1, workerResourceSpec); verify(rmServices.workerStore, Mockito.timeout(timeout.toMilliseconds())).putWorker(expected); assertThat(resourceManager.workersInNew, hasEntry(extractResourceID(task1), expected)); resourceManager.taskRouter.expectMsgClass(TaskMonitor.TaskGoalStateUpdated.class); resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Launch.class); }}; }
Example #8
Source File: MesosResourceManagerTest.java From flink with Apache License 2.0 | 5 votes |
/** * Test recovery of persistent workers. */ @Test public void testRecoverWorkers() throws Exception { new Context() {{ // set the initial persistent state then initialize the RM MesosWorkerStore.Worker worker1 = MesosWorkerStore.Worker.newWorker(task1, workerResourceSpec); MesosWorkerStore.Worker worker2 = MesosWorkerStore.Worker.newWorker(task2, workerResourceSpec).launchWorker(slave1, slave1host); MesosWorkerStore.Worker worker3 = MesosWorkerStore.Worker.newWorker(task3, workerResourceSpec).launchWorker(slave1, slave1host).releaseWorker(); when(rmServices.workerStore.getFrameworkID()).thenReturn(Option.apply(framework1)); when(rmServices.workerStore.recoverWorkers()).thenReturn(Arrays.asList(worker1, worker2, worker3)); startResourceManager(); // verify that the internal state was updated, the task router was notified, // and the launch coordinator was asked to launch a task. // note: "new" workers are discarded assertThat(resourceManager.workersInNew.entrySet(), empty()); assertThat(resourceManager.workersInLaunch, hasEntry(extractResourceID(task2), worker2)); assertThat(resourceManager.workersBeingReturned, hasEntry(extractResourceID(task3), worker3)); resourceManager.taskRouter.expectMsgClass(TaskMonitor.TaskGoalStateUpdated.class); LaunchCoordinator.Assign actualAssign = resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Assign.class); assertThat(actualAssign.tasks(), hasSize(1)); assertThat(actualAssign.tasks().get(0).f0.getId(), equalTo(task2.getValue())); assertThat(actualAssign.tasks().get(0).f1, equalTo(slave1host)); resourceManager.launchCoordinator.expectNoMsg(); }}; }
Example #9
Source File: MesosResourceManager.java From flink with Apache License 2.0 | 5 votes |
@Override public boolean stopWorker(RegisteredMesosWorkerNode workerNode) { LOG.info("Stopping worker {}.", workerNode.getResourceID()); try { if (workersInLaunch.containsKey(workerNode.getResourceID())) { // update persistent state of worker to Released MesosWorkerStore.Worker worker = workersInLaunch.remove(workerNode.getResourceID()); worker = worker.releaseWorker(); workerStore.putWorker(worker); workersBeingReturned.put(extractResourceID(worker.taskID()), worker); taskMonitor.tell(new TaskMonitor.TaskGoalStateUpdated(extractGoalState(worker)), selfActor); if (worker.hostname().isDefined()) { // tell the launch coordinator that the task is being unassigned from the host, for planning purposes launchCoordinator.tell(new LaunchCoordinator.Unassign(worker.taskID(), worker.hostname().get()), selfActor); } } else if (workersBeingReturned.containsKey(workerNode.getResourceID())) { LOG.info("Ignoring request to stop worker {} because it is already being stopped.", workerNode.getResourceID()); } else { LOG.warn("Unrecognized worker {}.", workerNode.getResourceID()); } } catch (Exception e) { onFatalError(new ResourceManagerException("Unable to release a worker.", e)); } return true; }
Example #10
Source File: MesosResourceManager.java From flink with Apache License 2.0 | 5 votes |
@Override public boolean startNewWorker(WorkerResourceSpec workerResourceSpec) { Preconditions.checkArgument(Objects.equals( workerResourceSpec, WorkerResourceSpec.fromTaskExecutorProcessSpec(taskManagerParameters.containeredParameters().getTaskExecutorProcessSpec()))); LOG.info("Starting a new worker."); try { // generate new workers into persistent state and launch associated actors MesosWorkerStore.Worker worker = MesosWorkerStore.Worker.newWorker(workerStore.newTaskID(), workerResourceSpec); workerStore.putWorker(worker); workersInNew.put(extractResourceID(worker.taskID()), worker); LaunchableMesosWorker launchable = createLaunchableMesosWorker(worker.taskID()); LOG.info("Scheduling Mesos task {} with ({} MB, {} cpus, {} gpus, {} disk MB, {} Mbps).", launchable.taskID().getValue(), launchable.taskRequest().getMemory(), launchable.taskRequest().getCPUs(), launchable.taskRequest().getScalarRequests().get("gpus"), launchable.taskRequest().getDisk(), launchable.taskRequest().getNetworkMbps()); // tell the task monitor about the new plans taskMonitor.tell(new TaskMonitor.TaskGoalStateUpdated(extractGoalState(worker)), selfActor); // tell the launch coordinator to launch the new tasks launchCoordinator.tell(new LaunchCoordinator.Launch(Collections.singletonList(launchable)), selfActor); return true; } catch (Exception ex) { onFatalError(new ResourceManagerException("Unable to request new workers.", ex)); return false; } }
Example #11
Source File: MesosResourceManager.java From flink with Apache License 2.0 | 5 votes |
/** * Recovers given framework/worker information. * * @see #getWorkersAsync() */ private void recoverWorkers(final List<MesosWorkerStore.Worker> tasksFromPreviousAttempts) { assert(workersInNew.isEmpty()); assert(workersInLaunch.isEmpty()); assert(workersBeingReturned.isEmpty()); if (!tasksFromPreviousAttempts.isEmpty()) { LOG.info("Retrieved {} TaskManagers from previous attempt", tasksFromPreviousAttempts.size()); List<Tuple2<TaskRequest, String>> toAssign = new ArrayList<>(tasksFromPreviousAttempts.size()); for (final MesosWorkerStore.Worker worker : tasksFromPreviousAttempts) { switch(worker.state()) { case Launched: workersInLaunch.put(extractResourceID(worker.taskID()), worker); final LaunchableMesosWorker launchable = createLaunchableMesosWorker(worker.taskID()); toAssign.add(new Tuple2<>(launchable.taskRequest(), worker.hostname().get())); break; case Released: workersBeingReturned.put(extractResourceID(worker.taskID()), worker); break; } taskMonitor.tell(new TaskMonitor.TaskGoalStateUpdated(extractGoalState(worker)), selfActor); } // tell the launch coordinator about prior assignments if (toAssign.size() >= 1) { launchCoordinator.tell(new LaunchCoordinator.Assign(toAssign), selfActor); } } }
Example #12
Source File: MesosResourceManager.java From flink with Apache License 2.0 | 5 votes |
protected ActorRef createLaunchCoordinator( SchedulerDriver schedulerDriver, ActorRef selfActor) { return actorSystem.actorOf( LaunchCoordinator.createActorProps(LaunchCoordinator.class, selfActor, flinkConfig, schedulerDriver, createOptimizer()), "launchCoordinator"); }
Example #13
Source File: MesosResourceManagerTest.java From flink with Apache License 2.0 | 5 votes |
/** * Test planned stop of a launched worker. */ @Test public void testStopWorker() throws Exception { new Context() {{ // set the initial persistent state with a launched worker MesosWorkerStore.Worker worker1launched = MesosWorkerStore.Worker.newWorker(task1).launchWorker(slave1, slave1host); when(rmServices.workerStore.getFrameworkID()).thenReturn(Option.apply(framework1)); when(rmServices.workerStore.recoverWorkers()).thenReturn(singletonList(worker1launched)); startResourceManager(); // drain the assign message resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Assign.class); // tell the RM to stop the worker resourceManager.stopWorker(new RegisteredMesosWorkerNode(worker1launched)); // verify that the instance state was updated MesosWorkerStore.Worker worker1Released = worker1launched.releaseWorker(); verify(rmServices.workerStore).putWorker(worker1Released); assertThat(resourceManager.workersInLaunch.entrySet(), empty()); assertThat(resourceManager.workersBeingReturned, hasEntry(extractResourceID(task1), worker1Released)); // verify that the monitor was notified resourceManager.taskRouter.expectMsgClass(TaskMonitor.TaskGoalStateUpdated.class); resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Unassign.class); }}; }
Example #14
Source File: MesosResourceManagerTest.java From flink with Apache License 2.0 | 5 votes |
/** * Test request for new workers. */ @Test public void testRequestNewWorkers() throws Exception { new Context() {{ startResourceManager(); // allocate a worker when(rmServices.workerStore.newTaskID()).thenReturn(task1).thenThrow(new AssertionFailedError()); rmServices.slotManagerStarted.get(timeout.toMilliseconds(), TimeUnit.MILLISECONDS); CompletableFuture<Void> allocateResourceFuture = resourceManager.callAsync( () -> { rmServices.rmActions.allocateResource(resourceProfile1); return null; }, timeout); // check for exceptions allocateResourceFuture.get(timeout.toMilliseconds(), TimeUnit.MILLISECONDS); // verify that a new worker was persisted, the internal state was updated, the task router was notified, // and the launch coordinator was asked to launch a task MesosWorkerStore.Worker expected = MesosWorkerStore.Worker.newWorker(task1, resourceProfile1); verify(rmServices.workerStore, Mockito.timeout(timeout.toMilliseconds())).putWorker(expected); assertThat(resourceManager.workersInNew, hasEntry(extractResourceID(task1), expected)); resourceManager.taskRouter.expectMsgClass(TaskMonitor.TaskGoalStateUpdated.class); resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Launch.class); }}; }
Example #15
Source File: MesosResourceManagerTest.java From flink with Apache License 2.0 | 5 votes |
/** * Test recovery of persistent workers. */ @Test public void testRecoverWorkers() throws Exception { new Context() {{ // set the initial persistent state then initialize the RM MesosWorkerStore.Worker worker1 = MesosWorkerStore.Worker.newWorker(task1); MesosWorkerStore.Worker worker2 = MesosWorkerStore.Worker.newWorker(task2).launchWorker(slave1, slave1host); MesosWorkerStore.Worker worker3 = MesosWorkerStore.Worker.newWorker(task3).launchWorker(slave1, slave1host).releaseWorker(); when(rmServices.workerStore.getFrameworkID()).thenReturn(Option.apply(framework1)); when(rmServices.workerStore.recoverWorkers()).thenReturn(Arrays.asList(worker1, worker2, worker3)); startResourceManager(); // verify that the internal state was updated, the task router was notified, // and the launch coordinator was asked to launch a task. // note: "new" workers are discarded assertThat(resourceManager.workersInNew.entrySet(), empty()); assertThat(resourceManager.workersInLaunch, hasEntry(extractResourceID(task2), worker2)); assertThat(resourceManager.workersBeingReturned, hasEntry(extractResourceID(task3), worker3)); resourceManager.taskRouter.expectMsgClass(TaskMonitor.TaskGoalStateUpdated.class); LaunchCoordinator.Assign actualAssign = resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Assign.class); assertThat(actualAssign.tasks(), hasSize(1)); assertThat(actualAssign.tasks().get(0).f0.getId(), equalTo(task2.getValue())); assertThat(actualAssign.tasks().get(0).f1, equalTo(slave1host)); resourceManager.launchCoordinator.expectNoMsg(); }}; }
Example #16
Source File: MesosResourceManager.java From flink with Apache License 2.0 | 5 votes |
@Override public boolean stopWorker(RegisteredMesosWorkerNode workerNode) { LOG.info("Stopping worker {}.", workerNode.getResourceID()); try { if (workersInLaunch.containsKey(workerNode.getResourceID())) { // update persistent state of worker to Released MesosWorkerStore.Worker worker = workersInLaunch.remove(workerNode.getResourceID()); worker = worker.releaseWorker(); workerStore.putWorker(worker); workersBeingReturned.put(extractResourceID(worker.taskID()), worker); taskMonitor.tell(new TaskMonitor.TaskGoalStateUpdated(extractGoalState(worker)), selfActor); if (worker.hostname().isDefined()) { // tell the launch coordinator that the task is being unassigned from the host, for planning purposes launchCoordinator.tell(new LaunchCoordinator.Unassign(worker.taskID(), worker.hostname().get()), selfActor); } } else if (workersBeingReturned.containsKey(workerNode.getResourceID())) { LOG.info("Ignoring request to stop worker {} because it is already being stopped.", workerNode.getResourceID()); } else { LOG.warn("Unrecognized worker {}.", workerNode.getResourceID()); } } catch (Exception e) { onFatalError(new ResourceManagerException("Unable to release a worker.", e)); } return true; }
Example #17
Source File: MesosResourceManager.java From flink with Apache License 2.0 | 5 votes |
@Override public Collection<ResourceProfile> startNewWorker(ResourceProfile resourceProfile) { if (!slotsPerWorker.iterator().next().isMatching(resourceProfile)) { return Collections.emptyList(); } LOG.info("Starting a new worker."); try { // generate new workers into persistent state and launch associated actors MesosWorkerStore.Worker worker = MesosWorkerStore.Worker.newWorker(workerStore.newTaskID(), resourceProfile); workerStore.putWorker(worker); workersInNew.put(extractResourceID(worker.taskID()), worker); LaunchableMesosWorker launchable = createLaunchableMesosWorker(worker.taskID()); LOG.info("Scheduling Mesos task {} with ({} MB, {} cpus).", launchable.taskID().getValue(), launchable.taskRequest().getMemory(), launchable.taskRequest().getCPUs()); // tell the task monitor about the new plans taskMonitor.tell(new TaskMonitor.TaskGoalStateUpdated(extractGoalState(worker)), selfActor); // tell the launch coordinator to launch the new tasks launchCoordinator.tell(new LaunchCoordinator.Launch(Collections.singletonList(launchable)), selfActor); return slotsPerWorker; } catch (Exception ex) { onFatalError(new ResourceManagerException("Unable to request new workers.", ex)); return Collections.emptyList(); } }
Example #18
Source File: MesosResourceManager.java From flink with Apache License 2.0 | 5 votes |
/** * Recovers given framework/worker information. * * @see #getWorkersAsync() */ private void recoverWorkers(final List<MesosWorkerStore.Worker> tasksFromPreviousAttempts) { assert(workersInNew.isEmpty()); assert(workersInLaunch.isEmpty()); assert(workersBeingReturned.isEmpty()); if (!tasksFromPreviousAttempts.isEmpty()) { LOG.info("Retrieved {} TaskManagers from previous attempt", tasksFromPreviousAttempts.size()); List<Tuple2<TaskRequest, String>> toAssign = new ArrayList<>(tasksFromPreviousAttempts.size()); for (final MesosWorkerStore.Worker worker : tasksFromPreviousAttempts) { switch(worker.state()) { case Launched: workersInLaunch.put(extractResourceID(worker.taskID()), worker); final LaunchableMesosWorker launchable = createLaunchableMesosWorker(worker.taskID()); toAssign.add(new Tuple2<>(launchable.taskRequest(), worker.hostname().get())); break; case Released: workersBeingReturned.put(extractResourceID(worker.taskID()), worker); break; } taskMonitor.tell(new TaskMonitor.TaskGoalStateUpdated(extractGoalState(worker)), selfActor); } // tell the launch coordinator about prior assignments if (toAssign.size() >= 1) { launchCoordinator.tell(new LaunchCoordinator.Assign(toAssign), selfActor); } } }
Example #19
Source File: MesosResourceManager.java From flink with Apache License 2.0 | 5 votes |
protected ActorRef createLaunchCoordinator( SchedulerDriver schedulerDriver, ActorRef selfActor) { return actorSystem.actorOf( LaunchCoordinator.createActorProps(LaunchCoordinator.class, selfActor, flinkConfig, schedulerDriver, createOptimizer()), "launchCoordinator"); }
Example #20
Source File: MesosResourceManagerTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Test planned stop of a launched worker. */ @Test public void testStopWorker() throws Exception { new Context() {{ // set the initial persistent state with a launched worker MesosWorkerStore.Worker worker1launched = MesosWorkerStore.Worker.newWorker(task1).launchWorker(slave1, slave1host); when(rmServices.workerStore.getFrameworkID()).thenReturn(Option.apply(framework1)); when(rmServices.workerStore.recoverWorkers()).thenReturn(singletonList(worker1launched)); startResourceManager(); // drain the assign message resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Assign.class); // tell the RM to stop the worker resourceManager.stopWorker(new RegisteredMesosWorkerNode(worker1launched)); // verify that the instance state was updated MesosWorkerStore.Worker worker1Released = worker1launched.releaseWorker(); verify(rmServices.workerStore).putWorker(worker1Released); assertThat(resourceManager.workersInLaunch.entrySet(), empty()); assertThat(resourceManager.workersBeingReturned, hasEntry(extractResourceID(task1), worker1Released)); // verify that the monitor was notified resourceManager.taskRouter.expectMsgClass(TaskMonitor.TaskGoalStateUpdated.class); resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Unassign.class); }}; }
Example #21
Source File: MesosResourceManagerTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Test request for new workers. */ @Test public void testRequestNewWorkers() throws Exception { new Context() {{ startResourceManager(); // allocate a worker when(rmServices.workerStore.newTaskID()).thenReturn(task1).thenThrow(new AssertionFailedError()); rmServices.slotManagerStarted.get(timeout.toMilliseconds(), TimeUnit.MILLISECONDS); CompletableFuture<Void> allocateResourceFuture = resourceManager.callAsync( () -> { rmServices.rmActions.allocateResource(resourceProfile1); return null; }, timeout); // check for exceptions allocateResourceFuture.get(timeout.toMilliseconds(), TimeUnit.MILLISECONDS); // verify that a new worker was persisted, the internal state was updated, the task router was notified, // and the launch coordinator was asked to launch a task MesosWorkerStore.Worker expected = MesosWorkerStore.Worker.newWorker(task1, resourceProfile1); verify(rmServices.workerStore, Mockito.timeout(timeout.toMilliseconds())).putWorker(expected); assertThat(resourceManager.workersInNew, hasEntry(extractResourceID(task1), expected)); resourceManager.taskRouter.expectMsgClass(TaskMonitor.TaskGoalStateUpdated.class); resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Launch.class); }}; }
Example #22
Source File: MesosResourceManagerTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Test recovery of persistent workers. */ @Test public void testRecoverWorkers() throws Exception { new Context() {{ // set the initial persistent state then initialize the RM MesosWorkerStore.Worker worker1 = MesosWorkerStore.Worker.newWorker(task1); MesosWorkerStore.Worker worker2 = MesosWorkerStore.Worker.newWorker(task2).launchWorker(slave1, slave1host); MesosWorkerStore.Worker worker3 = MesosWorkerStore.Worker.newWorker(task3).launchWorker(slave1, slave1host).releaseWorker(); when(rmServices.workerStore.getFrameworkID()).thenReturn(Option.apply(framework1)); when(rmServices.workerStore.recoverWorkers()).thenReturn(Arrays.asList(worker1, worker2, worker3)); startResourceManager(); // verify that the internal state was updated, the task router was notified, // and the launch coordinator was asked to launch a task. // note: "new" workers are discarded assertThat(resourceManager.workersInNew.entrySet(), empty()); assertThat(resourceManager.workersInLaunch, hasEntry(extractResourceID(task2), worker2)); assertThat(resourceManager.workersBeingReturned, hasEntry(extractResourceID(task3), worker3)); resourceManager.taskRouter.expectMsgClass(TaskMonitor.TaskGoalStateUpdated.class); LaunchCoordinator.Assign actualAssign = resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Assign.class); assertThat(actualAssign.tasks(), hasSize(1)); assertThat(actualAssign.tasks().get(0).f0.getId(), equalTo(task2.getValue())); assertThat(actualAssign.tasks().get(0).f1, equalTo(slave1host)); resourceManager.launchCoordinator.expectNoMsg(); }}; }
Example #23
Source File: MesosResourceManager.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Override public boolean stopWorker(RegisteredMesosWorkerNode workerNode) { LOG.info("Stopping worker {}.", workerNode.getResourceID()); try { if (workersInLaunch.containsKey(workerNode.getResourceID())) { // update persistent state of worker to Released MesosWorkerStore.Worker worker = workersInLaunch.remove(workerNode.getResourceID()); worker = worker.releaseWorker(); workerStore.putWorker(worker); workersBeingReturned.put(extractResourceID(worker.taskID()), worker); taskMonitor.tell(new TaskMonitor.TaskGoalStateUpdated(extractGoalState(worker)), selfActor); if (worker.hostname().isDefined()) { // tell the launch coordinator that the task is being unassigned from the host, for planning purposes launchCoordinator.tell(new LaunchCoordinator.Unassign(worker.taskID(), worker.hostname().get()), selfActor); } } else if (workersBeingReturned.containsKey(workerNode.getResourceID())) { LOG.info("Ignoring request to stop worker {} because it is already being stopped.", workerNode.getResourceID()); } else { LOG.warn("Unrecognized worker {}.", workerNode.getResourceID()); } } catch (Exception e) { onFatalError(new ResourceManagerException("Unable to release a worker.", e)); } return true; }
Example #24
Source File: MesosResourceManager.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Recovers given framework/worker information. * * @see #getWorkersAsync() */ private void recoverWorkers(final List<MesosWorkerStore.Worker> tasksFromPreviousAttempts) { assert(workersInNew.isEmpty()); assert(workersInLaunch.isEmpty()); assert(workersBeingReturned.isEmpty()); if (!tasksFromPreviousAttempts.isEmpty()) { LOG.info("Retrieved {} TaskManagers from previous attempt", tasksFromPreviousAttempts.size()); List<Tuple2<TaskRequest, String>> toAssign = new ArrayList<>(tasksFromPreviousAttempts.size()); for (final MesosWorkerStore.Worker worker : tasksFromPreviousAttempts) { switch(worker.state()) { case Launched: workersInLaunch.put(extractResourceID(worker.taskID()), worker); final LaunchableMesosWorker launchable = createLaunchableMesosWorker(worker.taskID()); toAssign.add(new Tuple2<>(launchable.taskRequest(), worker.hostname().get())); break; case Released: workersBeingReturned.put(extractResourceID(worker.taskID()), worker); break; } taskMonitor.tell(new TaskMonitor.TaskGoalStateUpdated(extractGoalState(worker)), selfActor); } // tell the launch coordinator about prior assignments if (toAssign.size() >= 1) { launchCoordinator.tell(new LaunchCoordinator.Assign(toAssign), selfActor); } } }