Java Code Examples for org.apache.flink.mesos.runtime.clusterframework.store.MesosWorkerStore#Worker
The following examples show how to use
org.apache.flink.mesos.runtime.clusterframework.store.MesosWorkerStore#Worker .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MesosResourceManagerTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testClearStateAfterRevokeLeadership() throws Exception { new Context() {{ final MesosWorkerStore.Worker worker1 = MesosWorkerStore.Worker.newWorker(task1); final MesosWorkerStore.Worker worker2 = MesosWorkerStore.Worker.newWorker(task2).launchWorker(slave1, slave1host); final MesosWorkerStore.Worker worker3 = MesosWorkerStore.Worker.newWorker(task3).launchWorker(slave1, slave1host).releaseWorker(); when(rmServices.workerStore.getFrameworkID()).thenReturn(Option.apply(framework1)); when(rmServices.workerStore.recoverWorkers()).thenReturn(Arrays.asList(worker1, worker2, worker3)).thenReturn(Collections.emptyList()); startResourceManager(); rmServices.rmLeaderElectionService.notLeader(); rmServices.grantLeadership(); assertThat(resourceManager.workersInNew.size(), equalTo(0)); assertThat(resourceManager.workersInLaunch.size(), equalTo(0)); assertThat(resourceManager.workersBeingReturned.size(), equalTo(0)); verify(rmServices.schedulerDriver).stop(true); }}; }
Example 2
Source File: MesosResourceManager.java From flink with Apache License 2.0 | 6 votes |
/** * Fetches framework/worker information persisted by a prior incarnation of the RM. */ private CompletableFuture<List<MesosWorkerStore.Worker>> getWorkersAsync() { // if this resource manager is recovering from failure, // then some worker tasks are most likely still alive and we can re-obtain them return CompletableFuture.supplyAsync(() -> { try { final List<MesosWorkerStore.Worker> tasksFromPreviousAttempts = workerStore.recoverWorkers(); for (final MesosWorkerStore.Worker worker : tasksFromPreviousAttempts) { if (worker.state() == MesosWorkerStore.WorkerState.New) { // remove new workers because allocation requests are transient workerStore.removeWorker(worker.taskID()); } } return tasksFromPreviousAttempts; } catch (final Exception e) { throw new CompletionException(new ResourceManagerException(e)); } }, getRpcService().getExecutor()); }
Example 3
Source File: MesosResourceManagerTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testClearStateAfterRevokeLeadership() throws Exception { new Context() {{ final MesosWorkerStore.Worker worker1 = MesosWorkerStore.Worker.newWorker(task1); final MesosWorkerStore.Worker worker2 = MesosWorkerStore.Worker.newWorker(task2).launchWorker(slave1, slave1host); final MesosWorkerStore.Worker worker3 = MesosWorkerStore.Worker.newWorker(task3).launchWorker(slave1, slave1host).releaseWorker(); when(rmServices.workerStore.getFrameworkID()).thenReturn(Option.apply(framework1)); when(rmServices.workerStore.recoverWorkers()).thenReturn(Arrays.asList(worker1, worker2, worker3)).thenReturn(Collections.emptyList()); startResourceManager(); rmServices.rmLeaderElectionService.notLeader(); rmServices.grantLeadership(); assertThat(resourceManager.workersInNew.size(), equalTo(0)); assertThat(resourceManager.workersInLaunch.size(), equalTo(0)); assertThat(resourceManager.workersBeingReturned.size(), equalTo(0)); verify(rmServices.schedulerDriver).stop(true); }}; }
Example 4
Source File: MesosResourceManagerTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Test unplanned task failure of a pending worker. */ @Test public void testWorkerFailed() throws Exception { new Context() {{ // set the initial persistent state with a launched worker MesosWorkerStore.Worker worker1launched = MesosWorkerStore.Worker.newWorker(task1).launchWorker(slave1, slave1host); when(rmServices.workerStore.getFrameworkID()).thenReturn(Option.apply(framework1)); when(rmServices.workerStore.recoverWorkers()).thenReturn(singletonList(worker1launched)); when(rmServices.workerStore.newTaskID()).thenReturn(task2); startResourceManager(); // tell the RM that a task failed when(rmServices.workerStore.removeWorker(task1)).thenReturn(true); resourceManager.taskTerminated(new TaskMonitor.TaskTerminated(task1, Protos.TaskStatus.newBuilder() .setTaskId(task1).setSlaveId(slave1).setState(Protos.TaskState.TASK_FAILED).build())); // verify that the instance state was updated verify(rmServices.workerStore).removeWorker(task1); assertThat(resourceManager.workersInLaunch.entrySet(), empty()); assertThat(resourceManager.workersBeingReturned.entrySet(), empty()); assertThat(resourceManager.workersInNew, hasKey(extractResourceID(task2))); // verify that `closeTaskManagerConnection` was called assertThat(resourceManager.closedTaskManagerConnections, hasItem(extractResourceID(task1))); }}; }
Example 5
Source File: ZooKeeperMesosServices.java From flink with Apache License 2.0 | 6 votes |
@Override public MesosWorkerStore createMesosWorkerStore(Configuration configuration, Executor executor) throws Exception { RetrievableStateStorageHelper<MesosWorkerStore.Worker> stateStorageHelper = ZooKeeperUtils.createFileSystemStateStorage(configuration, "mesosWorkerStore"); ZooKeeperStateHandleStore<MesosWorkerStore.Worker> zooKeeperStateHandleStore = zooKeeperUtilityFactory.createZooKeeperStateHandleStore( "/workers", stateStorageHelper); ZooKeeperSharedValue frameworkId = zooKeeperUtilityFactory.createSharedValue("/frameworkId", new byte[0]); ZooKeeperSharedCount totalTaskCount = zooKeeperUtilityFactory.createSharedCount("/taskCount", 0); return new ZooKeeperMesosWorkerStore( zooKeeperStateHandleStore, frameworkId, totalTaskCount); }
Example 6
Source File: MesosResourceManagerTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testClearStateAfterRevokeLeadership() throws Exception { new Context() {{ final MesosWorkerStore.Worker worker1 = MesosWorkerStore.Worker.newWorker(task1, workerResourceSpec); final MesosWorkerStore.Worker worker2 = MesosWorkerStore.Worker.newWorker(task2, workerResourceSpec).launchWorker(slave1, slave1host); final MesosWorkerStore.Worker worker3 = MesosWorkerStore.Worker.newWorker(task3, workerResourceSpec).launchWorker(slave1, slave1host).releaseWorker(); when(rmServices.workerStore.getFrameworkID()).thenReturn(Option.apply(framework1)); when(rmServices.workerStore.recoverWorkers()).thenReturn(Arrays.asList(worker1, worker2, worker3)).thenReturn(Collections.emptyList()); startResourceManager(); rmServices.rmLeaderElectionService.notLeader(); rmServices.grantLeadership(); assertThat(resourceManager.workersInNew.size(), equalTo(0)); assertThat(resourceManager.workersInLaunch.size(), equalTo(0)); assertThat(resourceManager.workersBeingReturned.size(), equalTo(0)); verify(rmServices.schedulerDriver).stop(true); }}; }
Example 7
Source File: MesosResourceManager.java From flink with Apache License 2.0 | 5 votes |
@Override public boolean startNewWorker(WorkerResourceSpec workerResourceSpec) { Preconditions.checkArgument(Objects.equals( workerResourceSpec, WorkerResourceSpec.fromTaskExecutorProcessSpec(taskManagerParameters.containeredParameters().getTaskExecutorProcessSpec()))); LOG.info("Starting a new worker."); try { // generate new workers into persistent state and launch associated actors MesosWorkerStore.Worker worker = MesosWorkerStore.Worker.newWorker(workerStore.newTaskID(), workerResourceSpec); workerStore.putWorker(worker); workersInNew.put(extractResourceID(worker.taskID()), worker); LaunchableMesosWorker launchable = createLaunchableMesosWorker(worker.taskID()); LOG.info("Scheduling Mesos task {} with ({} MB, {} cpus, {} gpus, {} disk MB, {} Mbps).", launchable.taskID().getValue(), launchable.taskRequest().getMemory(), launchable.taskRequest().getCPUs(), launchable.taskRequest().getScalarRequests().get("gpus"), launchable.taskRequest().getDisk(), launchable.taskRequest().getNetworkMbps()); // tell the task monitor about the new plans taskMonitor.tell(new TaskMonitor.TaskGoalStateUpdated(extractGoalState(worker)), selfActor); // tell the launch coordinator to launch the new tasks launchCoordinator.tell(new LaunchCoordinator.Launch(Collections.singletonList(launchable)), selfActor); return true; } catch (Exception ex) { onFatalError(new ResourceManagerException("Unable to request new workers.", ex)); return false; } }
Example 8
Source File: MesosResourceManagerTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Test planned stop of a launched worker. */ @Test public void testStopWorker() throws Exception { new Context() {{ // set the initial persistent state with a launched worker MesosWorkerStore.Worker worker1launched = MesosWorkerStore.Worker.newWorker(task1).launchWorker(slave1, slave1host); when(rmServices.workerStore.getFrameworkID()).thenReturn(Option.apply(framework1)); when(rmServices.workerStore.recoverWorkers()).thenReturn(singletonList(worker1launched)); startResourceManager(); // drain the assign message resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Assign.class); // tell the RM to stop the worker resourceManager.stopWorker(new RegisteredMesosWorkerNode(worker1launched)); // verify that the instance state was updated MesosWorkerStore.Worker worker1Released = worker1launched.releaseWorker(); verify(rmServices.workerStore).putWorker(worker1Released); assertThat(resourceManager.workersInLaunch.entrySet(), empty()); assertThat(resourceManager.workersBeingReturned, hasEntry(extractResourceID(task1), worker1Released)); // verify that the monitor was notified resourceManager.taskRouter.expectMsgClass(TaskMonitor.TaskGoalStateUpdated.class); resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Unassign.class); }}; }
Example 9
Source File: MesosResourceManagerTest.java From flink with Apache License 2.0 | 5 votes |
/** * Test planned stop of a launched worker. */ @Test public void testStopWorker() throws Exception { new Context() {{ // set the initial persistent state with a launched worker MesosWorkerStore.Worker worker1launched = MesosWorkerStore.Worker.newWorker(task1, workerResourceSpec).launchWorker(slave1, slave1host); when(rmServices.workerStore.getFrameworkID()).thenReturn(Option.apply(framework1)); when(rmServices.workerStore.recoverWorkers()).thenReturn(singletonList(worker1launched)); startResourceManager(); // drain the assign message resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Assign.class); // tell the RM to stop the worker resourceManager.stopWorker(new RegisteredMesosWorkerNode(worker1launched)); // verify that the instance state was updated MesosWorkerStore.Worker worker1Released = worker1launched.releaseWorker(); verify(rmServices.workerStore).putWorker(worker1Released); assertThat(resourceManager.workersInLaunch.entrySet(), empty()); assertThat(resourceManager.workersBeingReturned, hasEntry(extractResourceID(task1), worker1Released)); // verify that the monitor was notified resourceManager.taskRouter.expectMsgClass(TaskMonitor.TaskGoalStateUpdated.class); resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Unassign.class); }}; }
Example 10
Source File: MesosResourceManager.java From flink with Apache License 2.0 | 5 votes |
/** * Accept offers as advised by the launch coordinator. * * <p>Acceptance is routed through the RM to update the persistent state before * forwarding the message to Mesos. */ public void acceptOffers(AcceptOffers msg) { try { List<TaskMonitor.TaskGoalStateUpdated> toMonitor = new ArrayList<>(msg.operations().size()); // transition the persistent state of some tasks to Launched for (Protos.Offer.Operation op : msg.operations()) { if (op.getType() == Protos.Offer.Operation.Type.LAUNCH) { for (Protos.TaskInfo info : op.getLaunch().getTaskInfosList()) { MesosWorkerStore.Worker worker = workersInNew.remove(extractResourceID(info.getTaskId())); assert (worker != null); worker = worker.launchWorker(info.getSlaveId(), msg.hostname()); workerStore.putWorker(worker); workersInLaunch.put(extractResourceID(worker.taskID()), worker); LOG.info("Launching Mesos task {} on host {}.", worker.taskID().getValue(), worker.hostname().get()); toMonitor.add(new TaskMonitor.TaskGoalStateUpdated(extractGoalState(worker))); } } } // tell the task monitor about the new plans for (TaskMonitor.TaskGoalStateUpdated update : toMonitor) { taskMonitor.tell(update, selfActor); } // send the acceptance message to Mesos schedulerDriver.acceptOffers(msg.offerIds(), msg.operations(), msg.filters()); } catch (Exception ex) { onFatalError(new ResourceManagerException("unable to accept offers", ex)); } }
Example 11
Source File: MesosResourceManagerTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Test recovery of persistent workers. */ @Test public void testRecoverWorkers() throws Exception { new Context() {{ // set the initial persistent state then initialize the RM MesosWorkerStore.Worker worker1 = MesosWorkerStore.Worker.newWorker(task1); MesosWorkerStore.Worker worker2 = MesosWorkerStore.Worker.newWorker(task2).launchWorker(slave1, slave1host); MesosWorkerStore.Worker worker3 = MesosWorkerStore.Worker.newWorker(task3).launchWorker(slave1, slave1host).releaseWorker(); when(rmServices.workerStore.getFrameworkID()).thenReturn(Option.apply(framework1)); when(rmServices.workerStore.recoverWorkers()).thenReturn(Arrays.asList(worker1, worker2, worker3)); startResourceManager(); // verify that the internal state was updated, the task router was notified, // and the launch coordinator was asked to launch a task. // note: "new" workers are discarded assertThat(resourceManager.workersInNew.entrySet(), empty()); assertThat(resourceManager.workersInLaunch, hasEntry(extractResourceID(task2), worker2)); assertThat(resourceManager.workersBeingReturned, hasEntry(extractResourceID(task3), worker3)); resourceManager.taskRouter.expectMsgClass(TaskMonitor.TaskGoalStateUpdated.class); LaunchCoordinator.Assign actualAssign = resourceManager.launchCoordinator.expectMsgClass(LaunchCoordinator.Assign.class); assertThat(actualAssign.tasks(), hasSize(1)); assertThat(actualAssign.tasks().get(0).f0.getId(), equalTo(task2.getValue())); assertThat(actualAssign.tasks().get(0).f1, equalTo(slave1host)); resourceManager.launchCoordinator.expectNoMsg(); }}; }
Example 12
Source File: MesosResourceManagerTest.java From flink with Apache License 2.0 | 5 votes |
/** * Test offer acceptance. */ @Test public void testAcceptOffers() throws Exception { new Context() {{ startResourceManager(); // allocate a new worker MesosWorkerStore.Worker worker1 = allocateWorker(task1, resourceProfile1); // send an AcceptOffers message as the LaunchCoordinator would // to launch task1 onto slave1 with offer1 Protos.TaskInfo task1info = Protos.TaskInfo.newBuilder() .setTaskId(task1).setName("").setSlaveId(slave1).build(); AcceptOffers msg = new AcceptOffers(slave1host, singletonList(offer1), singletonList(launch(task1info))); resourceManager.acceptOffers(msg); // verify that the worker was persisted, the internal state was updated, // Mesos was asked to launch task1, and the task router was notified MesosWorkerStore.Worker worker1launched = worker1.launchWorker(slave1, slave1host); verify(rmServices.workerStore).putWorker(worker1launched); assertThat(resourceManager.workersInNew.entrySet(), empty()); assertThat(resourceManager.workersInLaunch, hasEntry(extractResourceID(task1), worker1launched)); resourceManager.taskRouter.expectMsg( new TaskMonitor.TaskGoalStateUpdated(extractGoalState(worker1launched))); verify(rmServices.schedulerDriver).acceptOffers(msg.offerIds(), msg.operations(), msg.filters()); }}; }
Example 13
Source File: MesosResourceManager.java From flink with Apache License 2.0 | 5 votes |
/** * Recovers given framework/worker information. * * @see #getWorkersAsync() */ private void recoverWorkers(final List<MesosWorkerStore.Worker> tasksFromPreviousAttempts) { assert(workersInNew.isEmpty()); assert(workersInLaunch.isEmpty()); assert(workersBeingReturned.isEmpty()); if (!tasksFromPreviousAttempts.isEmpty()) { LOG.info("Retrieved {} TaskManagers from previous attempt", tasksFromPreviousAttempts.size()); List<Tuple2<TaskRequest, String>> toAssign = new ArrayList<>(tasksFromPreviousAttempts.size()); for (final MesosWorkerStore.Worker worker : tasksFromPreviousAttempts) { switch(worker.state()) { case Launched: workersInLaunch.put(extractResourceID(worker.taskID()), worker); final LaunchableMesosWorker launchable = createLaunchableMesosWorker(worker.taskID()); toAssign.add(new Tuple2<>(launchable.taskRequest(), worker.hostname().get())); break; case Released: workersBeingReturned.put(extractResourceID(worker.taskID()), worker); break; } taskMonitor.tell(new TaskMonitor.TaskGoalStateUpdated(extractGoalState(worker)), selfActor); } // tell the launch coordinator about prior assignments if (toAssign.size() >= 1) { launchCoordinator.tell(new LaunchCoordinator.Assign(toAssign), selfActor); } } }
Example 14
Source File: MesosResourceManager.java From flink with Apache License 2.0 | 5 votes |
/** * Accept offers as advised by the launch coordinator. * * <p>Acceptance is routed through the RM to update the persistent state before * forwarding the message to Mesos. */ public void acceptOffers(AcceptOffers msg) { try { List<TaskMonitor.TaskGoalStateUpdated> toMonitor = new ArrayList<>(msg.operations().size()); // transition the persistent state of some tasks to Launched for (Protos.Offer.Operation op : msg.operations()) { if (op.getType() == Protos.Offer.Operation.Type.LAUNCH) { for (Protos.TaskInfo info : op.getLaunch().getTaskInfosList()) { MesosWorkerStore.Worker worker = workersInNew.remove(extractResourceID(info.getTaskId())); assert (worker != null); worker = worker.launchWorker(info.getSlaveId(), msg.hostname()); workerStore.putWorker(worker); workersInLaunch.put(extractResourceID(worker.taskID()), worker); LOG.info("Launching Mesos task {} on host {}.", worker.taskID().getValue(), worker.hostname().get()); toMonitor.add(new TaskMonitor.TaskGoalStateUpdated(extractGoalState(worker))); } } } // tell the task monitor about the new plans for (TaskMonitor.TaskGoalStateUpdated update : toMonitor) { taskMonitor.tell(update, selfActor); } // send the acceptance message to Mesos schedulerDriver.acceptOffers(msg.offerIds(), msg.operations(), msg.filters()); } catch (Exception ex) { onFatalError(new ResourceManagerException("unable to accept offers", ex)); } }
Example 15
Source File: MesosResourceManager.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Accept offers as advised by the launch coordinator. * * <p>Acceptance is routed through the RM to update the persistent state before * forwarding the message to Mesos. */ public void acceptOffers(AcceptOffers msg) { try { List<TaskMonitor.TaskGoalStateUpdated> toMonitor = new ArrayList<>(msg.operations().size()); // transition the persistent state of some tasks to Launched for (Protos.Offer.Operation op : msg.operations()) { if (op.getType() == Protos.Offer.Operation.Type.LAUNCH) { for (Protos.TaskInfo info : op.getLaunch().getTaskInfosList()) { MesosWorkerStore.Worker worker = workersInNew.remove(extractResourceID(info.getTaskId())); assert (worker != null); worker = worker.launchWorker(info.getSlaveId(), msg.hostname()); workerStore.putWorker(worker); workersInLaunch.put(extractResourceID(worker.taskID()), worker); LOG.info("Launching Mesos task {} on host {}.", worker.taskID().getValue(), worker.hostname().get()); toMonitor.add(new TaskMonitor.TaskGoalStateUpdated(extractGoalState(worker))); } } } // tell the task monitor about the new plans for (TaskMonitor.TaskGoalStateUpdated update : toMonitor) { taskMonitor.tell(update, selfActor); } // send the acceptance message to Mesos schedulerDriver.acceptOffers(msg.offerIds(), msg.operations(), msg.filters()); } catch (Exception ex) { onFatalError(new ResourceManagerException("unable to accept offers", ex)); } }
Example 16
Source File: MesosResourceManagerTest.java From flink with Apache License 2.0 | 5 votes |
/** * Test offer acceptance. */ @Test public void testAcceptOffers() throws Exception { new Context() {{ startResourceManager(); // allocate a new worker MesosWorkerStore.Worker worker1 = allocateWorker(task1, workerResourceSpec); // send an AcceptOffers message as the LaunchCoordinator would // to launch task1 onto slave1 with offer1 Protos.TaskInfo task1info = Protos.TaskInfo.newBuilder() .setTaskId(task1).setName("").setSlaveId(slave1).build(); AcceptOffers msg = new AcceptOffers(slave1host, singletonList(offer1), singletonList(launch(task1info))); resourceManager.acceptOffers(msg); // verify that the worker was persisted, the internal state was updated, // Mesos was asked to launch task1, and the task router was notified MesosWorkerStore.Worker worker1launched = worker1.launchWorker(slave1, slave1host); verify(rmServices.workerStore).putWorker(worker1launched); assertThat(resourceManager.workersInNew.entrySet(), empty()); assertThat(resourceManager.workersInLaunch, hasEntry(extractResourceID(task1), worker1launched)); resourceManager.taskRouter.expectMsg( new TaskMonitor.TaskGoalStateUpdated(extractGoalState(worker1launched))); verify(rmServices.schedulerDriver).acceptOffers(msg.offerIds(), msg.operations(), msg.filters()); }}; }
Example 17
Source File: RegisteredMesosWorkerNode.java From flink with Apache License 2.0 | 4 votes |
public RegisteredMesosWorkerNode(MesosWorkerStore.Worker worker) { this.worker = Preconditions.checkNotNull(worker); Preconditions.checkArgument(worker.slaveID().isDefined()); Preconditions.checkArgument(worker.hostname().isDefined()); }
Example 18
Source File: RegisteredMesosWorkerNode.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
public MesosWorkerStore.Worker getWorker() { return worker; }
Example 19
Source File: RegisteredMesosWorkerNode.java From flink with Apache License 2.0 | 4 votes |
public MesosWorkerStore.Worker getWorker() { return worker; }
Example 20
Source File: RegisteredMesosWorkerNode.java From flink with Apache License 2.0 | 4 votes |
public RegisteredMesosWorkerNode(MesosWorkerStore.Worker worker) { this.worker = Preconditions.checkNotNull(worker); Preconditions.checkArgument(worker.slaveID().isDefined()); Preconditions.checkArgument(worker.hostname().isDefined()); }