Java Code Examples for org.apache.helix.HelixManager#connect()
The following examples show how to use
org.apache.helix.HelixManager#connect() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ClusterIntegrationTestUtils.java From incubator-gobblin with Apache License 2.0 | 6 votes |
/** * A utility method that creates a partial instance structure in ZK. */ public static void createPartialInstanceStructure(HelixManager helixManager, String zkConnectString) { //Connect and disconnect the helixManager to create a Helix Instance set up. try { helixManager.connect(); helixManager.disconnect(); } catch (Exception e) { Assert.fail("Failed to connect to ZK"); } //Delete ERRORS/HISTORY/STATUSUPDATES znodes under INSTANCES to simulate partial instance set up. ZkClient zkClient = new ZkClient(zkConnectString); zkClient.delete(PropertyPathBuilder.instanceError(helixManager.getClusterName(), helixManager.getInstanceName())); zkClient.delete(PropertyPathBuilder.instanceHistory(helixManager.getClusterName(), helixManager.getInstanceName())); zkClient.delete(PropertyPathBuilder.instanceStatusUpdate(helixManager.getClusterName(), helixManager.getInstanceName())); }
Example 2
Source File: GobblinTaskRunnerTest.java From incubator-gobblin with Apache License 2.0 | 6 votes |
@Test (groups = {"disabledOnTravis"}) public void testTaskAssignmentAfterHelixConnectionRetry() throws Exception { Config jobConfigOverrides = ClusterIntegrationTestUtils.buildSleepingJob(JOB_ID, TASK_STATE_FILE); this.suite = new TaskAssignmentAfterConnectionRetry(jobConfigOverrides); String zkConnectString = suite.getManagerConfig().getString(GobblinClusterConfigurationKeys.ZK_CONNECTION_STRING_KEY); String clusterName = suite.getManagerConfig().getString(GobblinClusterConfigurationKeys.HELIX_CLUSTER_NAME_KEY); //A test manager instance for observing the state of the cluster HelixManager helixManager = HelixManagerFactory.getZKHelixManager(clusterName, "TestManager", InstanceType.SPECTATOR, zkConnectString); suite.startCluster(); helixManager.connect(); //Ensure that Helix has created a workflow AssertWithBackoff.create().maxSleepMs(1000).backoffFactor(1). assertTrue(ClusterIntegrationTest.isTaskStarted(helixManager, JOB_ID), "Waiting for the job to start..."); //Ensure that the SleepingTask is running AssertWithBackoff.create().maxSleepMs(100).timeoutMs(2000).backoffFactor(1). assertTrue(ClusterIntegrationTest.isTaskRunning(TASK_STATE_FILE),"Waiting for the task to enter running state"); helixManager.disconnect(); }
Example 3
Source File: TestResourceGroupEndtoEnd.java From helix with Apache License 2.0 | 6 votes |
public HelixManager start() throws Exception { HelixManager manager = null; // zk cluster manager if (_clusterMangerType.equalsIgnoreCase("zk")) { manager = HelixManagerFactory.getZKHelixManager(_clusterName, _instanceName, InstanceType.PARTICIPANT, _zkConnectString); } else { throw new IllegalArgumentException("Unsupported cluster manager type:" + _clusterMangerType); } MockOnlineOfflineStateModelFactory stateModelFactory2 = new MockOnlineOfflineStateModelFactory(_transDelayInMs, _resourceName, _resourceTag, _instanceName); // genericStateMachineHandler = new StateMachineEngine(); StateMachineEngine stateMach = manager.getStateMachineEngine(); stateMach.registerStateModelFactory("OnlineOffline", stateModelFactory2); manager.connect(); //manager.getMessagingService().registerMessageHandlerFactory(MessageType.STATE_TRANSITION.name(), genericStateMachineHandler); return manager; }
Example 4
Source File: TestControllerHistory.java From helix with Apache License 2.0 | 6 votes |
@Test() public void testControllerLeaderHistory() throws Exception { HelixManager manager = HelixManagerFactory .getZKHelixManager(CLUSTER_NAME, "admin", InstanceType.ADMINISTRATOR, ZK_ADDR); manager.connect(); PropertyKey.Builder keyBuilder = new PropertyKey.Builder(CLUSTER_NAME); PropertyKey propertyKey = keyBuilder.controllerLeaderHistory(); ControllerHistory controllerHistory = manager.getHelixDataAccessor().getProperty(propertyKey); Assert.assertNotNull(controllerHistory); List<String> list = controllerHistory.getRecord().getListField("HISTORY"); Assert.assertEquals(list.size(), 1); for (int i = 0; i <= 12; i++) { _controller.syncStop(); _controller = new ClusterControllerManager(ZK_ADDR, CLUSTER_NAME, "Controller-" + i); _controller.syncStart(); } controllerHistory = manager.getHelixDataAccessor().getProperty(propertyKey); Assert.assertNotNull(controllerHistory); list = controllerHistory.getRecord().getListField("HISTORY"); Assert.assertEquals(list.size(), 10); manager.disconnect(); }
Example 5
Source File: ServiceDiscovery.java From helix with Apache License 2.0 | 6 votes |
public boolean register(final String serviceId, final ServiceMetadata serviceMetadata) throws Exception { HelixManager helixManager = HelixManagerFactory.getZKHelixManager(cluster, serviceId, InstanceType.PARTICIPANT, zkAddress); LiveInstanceInfoProvider liveInstanceInfoProvider = new LiveInstanceInfoProvider() { @Override public ZNRecord getAdditionalLiveInstanceInfo() { // serialize serviceMetadata to ZNRecord ZNRecord rec = new ZNRecord(serviceId); rec.setSimpleField("HOST", serviceMetadata.getHost()); rec.setSimpleField("PORT", String.valueOf(serviceMetadata.getPort())); rec.setSimpleField("SERVICE_NAME", serviceMetadata.getServiceName()); return rec; } }; helixManager.setLiveInstanceInfoProvider(liveInstanceInfoProvider); helixManager.connect(); serviceMap.put(serviceId, helixManager); refreshCache(); return true; }
Example 6
Source File: PerfBenchmarkDriver.java From incubator-pinot with Apache License 2.0 | 5 votes |
/** * Register and connect to Helix cluster as Spectator role. */ private HelixManager registerAndConnectAsHelixSpectator(String instanceId) { HelixManager helixManager = HelixManagerFactory.getZKHelixManager(_clusterName, instanceId, InstanceType.SPECTATOR, _zkAddress); try { helixManager.connect(); return helixManager; } catch (Exception e) { String errorMsg = String.format("Exception when connecting the instance %s as Spectator role to Helix.", instanceId); LOGGER.error(errorMsg, e); throw new RuntimeException(errorMsg); } }
Example 7
Source File: MockSpectatorProcess.java From helix with Apache License 2.0 | 5 votes |
public void start() { try { HelixManager manager = HelixManagerFactory.getZKHelixManager(clusterName, null, InstanceType.SPECTATOR, zkConnectString); manager.connect(); manager.addExternalViewChangeListener(_routingTableProvider); } catch (Exception e) { e.printStackTrace(); } }
Example 8
Source File: TestZeroReplicaAvoidance.java From helix with Apache License 2.0 | 5 votes |
@Test public void testDelayedRebalancer() throws Exception { System.out.println("START testDelayedRebalancer at " + new Date(System.currentTimeMillis())); HelixManager manager = HelixManagerFactory.getZKHelixManager(CLUSTER_NAME, null, InstanceType.SPECTATOR, ZK_ADDR); manager.connect(); manager.addExternalViewChangeListener(this); manager.addIdealStateChangeListener(this); enablePersistBestPossibleAssignment(_gZkClient, CLUSTER_NAME, true); // Start half number of nodes. int i = 0; for (; i < NUM_NODE / 2; i++) { _participants.get(i).syncStart(); } int replica = 3; int partition = 30; for (String stateModel : TestStateModels) { String db = "Test-DB-" + stateModel; createResourceWithDelayedRebalance(CLUSTER_NAME, db, stateModel, partition, replica, replica, 0); } Assert.assertTrue(_clusterVerifier.verifyByPolling(50000L, 100L)); _startListen = true; DelayedTransition.setDelay(5); // add the other half of nodes. for (; i < NUM_NODE; i++) { _participants.get(i).syncStart(); } Assert.assertTrue(_clusterVerifier.verify(70000L)); Assert.assertTrue(_testSuccess); if (manager.isConnected()) { manager.disconnect(); } System.out.println("END testDelayedRebalancer at " + new Date(System.currentTimeMillis())); }
Example 9
Source File: TestCorrectnessOnConnectivityLoss.java From helix with Apache License 2.0 | 5 votes |
@SuppressWarnings("deprecation") @Test public void testSpectator() throws Exception { Map<String, Integer> stateReachedCounts = Maps.newHashMap(); HelixManager participant = HelixManagerFactory.getZKHelixManager(_clusterName, "localhost_12918", InstanceType.PARTICIPANT, ZK_ADDR); participant.getStateMachineEngine().registerStateModelFactory("OnlineOffline", new MyStateModelFactory(stateReachedCounts)); participant.connect(); RoutingTableProvider routingTableProvider = new RoutingTableProvider(); try { HelixManager spectator = HelixManagerFactory .getZKHelixManager(_clusterName, "spectator", InstanceType.SPECTATOR, ZK_ADDR); spectator.connect(); spectator.addConfigChangeListener(routingTableProvider); spectator.addExternalViewChangeListener(routingTableProvider); Thread.sleep(1000); // Now let's stop the ZK server; this should do nothing TestHelper.stopZkServer(_zkServer); Thread.sleep(1000); // Verify routing table still works Assert.assertEquals(routingTableProvider.getInstances("resource0", "ONLINE").size(), 1); Assert.assertEquals(routingTableProvider.getInstances("resource0", "OFFLINE").size(), 0); } finally { routingTableProvider.shutdown(); if (participant.isConnected()) { participant.disconnect(); } } }
Example 10
Source File: TestCorrectnessOnConnectivityLoss.java From helix with Apache License 2.0 | 5 votes |
@Test public void testParticipant() throws Exception { Map<String, Integer> stateReachedCounts = Maps.newHashMap(); HelixManager participant = HelixManagerFactory.getZKHelixManager(_clusterName, "localhost_12918", InstanceType.PARTICIPANT, ZK_ADDR); participant.getStateMachineEngine().registerStateModelFactory("OnlineOffline", new MyStateModelFactory(stateReachedCounts)); participant.connect(); Thread.sleep(1000); // Ensure that the external view coalesces boolean result = ClusterStateVerifier.verifyByZkCallback(new BestPossAndExtViewZkVerifier(ZK_ADDR, _clusterName)); Assert.assertTrue(result); // Ensure that there was only one state transition Assert.assertEquals(stateReachedCounts.size(), 1); Assert.assertTrue(stateReachedCounts.containsKey("ONLINE")); Assert.assertEquals(stateReachedCounts.get("ONLINE").intValue(), 1); // Now let's stop the ZK server; this should do nothing TestHelper.stopZkServer(_zkServer); Thread.sleep(1000); // Verify no change Assert.assertEquals(stateReachedCounts.size(), 1); Assert.assertTrue(stateReachedCounts.containsKey("ONLINE")); Assert.assertEquals(stateReachedCounts.get("ONLINE").intValue(), 1); if (participant.isConnected()) { participant.disconnect(); } }
Example 11
Source File: TestBatchAddJobs.java From helix with Apache License 2.0 | 5 votes |
public SubmitJobTask(String zkAddress, int index) throws Exception { HelixManager manager = HelixManagerFactory.getZKHelixManager(CLUSTER_NAME, "Administrator", InstanceType.ADMINISTRATOR, zkAddress); manager.connect(); _driver = new TaskDriver(manager); _jobPrefixName = "JOB_" + index + "#"; }
Example 12
Source File: DummyParticipant.java From helix with Apache License 2.0 | 5 votes |
public static void main(String[] args) { if (args.length < 3) { System.err.println("USAGE: DummyParticipant zkAddress clusterName instanceName"); System.exit(1); } String zkAddr = args[0]; String clusterName = args[1]; String instanceName = args[2]; HelixManager manager = null; try { manager = HelixManagerFactory.getZKHelixManager(clusterName, instanceName, InstanceType.PARTICIPANT, zkAddr); StateMachineEngine stateMach = manager.getStateMachineEngine(); DummyMSModelFactory msModelFactory = new DummyMSModelFactory(); stateMach.registerStateModelFactory("MasterSlave", msModelFactory); manager.connect(); Thread.currentThread().join(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { if (manager != null) { manager.disconnect(); } } }
Example 13
Source File: WorkflowsResource.java From helix with Apache License 2.0 | 4 votes |
@Override public Representation post(Representation entity) { try { String clusterName = (String) getRequest().getAttributes().get("clusterName"); Form form = new Form(entity); // Get the workflow and submit it if (form.size() < 1) { throw new HelixException("yaml workflow is required!"); } Parameter payload = form.get(0); String yamlPayload = payload.getName(); if (yamlPayload == null) { throw new HelixException("yaml workflow is required!"); } String zkAddr = (String) getContext().getAttributes().get(RestAdminApplication.ZKSERVERADDRESS); HelixManager manager = HelixManagerFactory.getZKHelixManager(clusterName, null, InstanceType.ADMINISTRATOR, zkAddr); manager.connect(); try { Workflow workflow = Workflow.parse(yamlPayload); TaskDriver driver = new TaskDriver(manager); driver.start(workflow); } finally { manager.disconnect(); } getResponse().setEntity(getHostedEntitiesRepresentation(clusterName)); getResponse().setStatus(Status.SUCCESS_OK); } catch (Exception e) { getResponse().setEntity(ClusterRepresentationUtil.getErrorAsJsonStringFromException(e), MediaType.APPLICATION_JSON); getResponse().setStatus(Status.SUCCESS_OK); LOG.error("Error in posting " + entity, e); } return null; }
Example 14
Source File: MetricCollectorHAControllerTest.java From ambari-metrics with Apache License 2.0 | 4 votes |
@Test(timeout = 180000) public void testHAControllerDistributedAggregation() throws Exception { MetricCollectorHAController haController = new MetricCollectorHAController(configuration); haController.initializeHAController(); // Wait for task assignment Thread.sleep(10000); Assert.assertTrue(haController.isInitialized()); Assert.assertEquals(1, haController.getLiveInstanceHostNames().size()); Assert.assertTrue(haController.getAggregationTaskRunner().performsClusterAggregation()); Assert.assertTrue(haController.getAggregationTaskRunner().performsHostAggregation()); // Add new instance InstanceConfig instanceConfig2 = new InstanceConfig("h2_12001"); haController.admin.addInstance(CLUSTER_NAME, instanceConfig2); HelixManager manager2 = HelixManagerFactory.getZKHelixManager(CLUSTER_NAME, instanceConfig2.getInstanceName(), InstanceType.PARTICIPANT, haController.zkConnectUrl); manager2.getStateMachineEngine().registerStateModelFactory(DEFAULT_STATE_MODEL, new OnlineOfflineStateModelFactory(instanceConfig2.getInstanceName(), new AggregationTaskRunner(instanceConfig2.getInstanceName(), "", CLUSTER_NAME))); manager2.connect(); haController.admin.rebalance(CLUSTER_NAME, METRIC_AGGREGATORS, 1); // Wait on re-assignment of partitions Thread.sleep(10000); Assert.assertEquals(2, haController.getLiveInstanceHostNames().size()); ExternalView view = haController.admin.getResourceExternalView(CLUSTER_NAME, METRIC_AGGREGATORS); Map<String, String> partitionInstanceMap = new HashMap<>(); for (String partition : view.getPartitionSet()) { Map<String, String> states = view.getStateMap(partition); // (instance, state) pairs for (Map.Entry<String, String> stateEntry : states.entrySet()) { partitionInstanceMap.put(partition, stateEntry.getKey()); Assert.assertEquals("ONLINE", stateEntry.getValue()); } } // Re-assigned partitions Assert.assertEquals(2, partitionInstanceMap.size()); haController.getAggregationTaskRunner().stop(); haController.manager.disconnect(); }
Example 15
Source File: TestParticipantManager.java From helix with Apache License 2.0 | 4 votes |
@Test public void simpleIntegrationTest() throws Exception { int n = 1; TestHelper.setupCluster(clusterName, ZK_ADDR, 12918, // participant port "localhost", // participant name prefix "TestDB", // resource name prefix 1, // resources 4, // partitions per resource n, // number of nodes 1, // replicas "MasterSlave", true); // do rebalance HelixManager participant = new ZKHelixManager(clusterName, "localhost_12918", InstanceType.PARTICIPANT, ZK_ADDR); participant.getStateMachineEngine().registerStateModelFactory("MasterSlave", new MockMSModelFactory()); participant.connect(); HelixManager controller = new ZKHelixManager(clusterName, "controller_0", InstanceType.CONTROLLER, ZK_ADDR); controller.connect(); verifyHelixManagerMetrics(InstanceType.PARTICIPANT, MonitorLevel.DEFAULT, participant.getInstanceName()); verifyHelixManagerMetrics(InstanceType.CONTROLLER, MonitorLevel.DEFAULT, controller.getInstanceName()); BestPossibleExternalViewVerifier verifier = new BestPossibleExternalViewVerifier.Builder(clusterName).setZkClient(_gZkClient) .setZkAddr(ZK_ADDR).build(); Assert.assertTrue(verifier.verifyByPolling()); // cleanup controller.disconnect(); participant.disconnect(); // verify all live-instances and leader nodes are gone ZKHelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_gZkClient)); PropertyKey.Builder keyBuilder = accessor.keyBuilder(); Assert.assertNull(accessor.getProperty(keyBuilder.liveInstance("localhost_12918"))); Assert.assertNull(accessor.getProperty(keyBuilder.controllerLeader())); }
Example 16
Source File: TestResourceAccessor.java From helix with Apache License 2.0 | 4 votes |
/** * Creates a setup where the health API can be tested. * @param clusterName * @param resourceName * @param idealStateParams * @param partitionReplicaStates maps partitionName to its replicas' states * @throws Exception */ private void createDummyMapping(String clusterName, String resourceName, Map<String, String> idealStateParams, Map<String, List<String>> partitionReplicaStates) throws Exception { IdealState idealState = new IdealState(resourceName); idealState.setMinActiveReplicas(Integer.parseInt(idealStateParams.get("MinActiveReplicas"))); // 2 idealState.setStateModelDefRef(idealStateParams.get("StateModelDefRef")); // MasterSlave idealState.setMaxPartitionsPerInstance( Integer.parseInt(idealStateParams.get("MaxPartitionsPerInstance"))); // 3 idealState.setReplicas(idealStateParams.get("Replicas")); // 3 idealState.setNumPartitions(Integer.parseInt(idealStateParams.get("NumPartitions"))); // 3 idealState.enable(false); Map<String, List<String>> partitionNames = new LinkedHashMap<>(); List<String> dummyPrefList = new ArrayList<>(); for (int i = 0; i < Integer.parseInt(idealStateParams.get("MaxPartitionsPerInstance")); i++) { dummyPrefList.add(ANY_INSTANCE); partitionNames.put("p" + i, dummyPrefList); } idealState.getRecord().getListFields().putAll(partitionNames); if (!_gSetupTool.getClusterManagementTool().getClusters().contains(clusterName)) { _gSetupTool.getClusterManagementTool().addCluster(clusterName); } _gSetupTool.getClusterManagementTool().setResourceIdealState(clusterName, resourceName, idealState); // Set ExternalView's replica states for a given parameter map ExternalView externalView = new ExternalView(resourceName); Map<String, Map<String, String>> mappingCurrent = new LinkedHashMap<>(); List<String> partitionReplicaStatesList = new ArrayList<>(partitionReplicaStates.keySet()); for (int k = 0; k < partitionReplicaStatesList.size(); k++) { Map<String, String> replicaStatesForPartition = new LinkedHashMap<>(); List<String> replicaStateList = partitionReplicaStates.get(partitionReplicaStatesList.get(k)); for (int i = 0; i < replicaStateList.size(); i++) { replicaStatesForPartition.put("r" + i, replicaStateList.get(i)); } mappingCurrent.put("p" + k, replicaStatesForPartition); } externalView.getRecord().getMapFields().putAll(mappingCurrent); HelixManager helixManager = HelixManagerFactory.getZKHelixManager(clusterName, "p1", InstanceType.ADMINISTRATOR, ZK_ADDR); helixManager.connect(); HelixDataAccessor helixDataAccessor = helixManager.getHelixDataAccessor(); helixDataAccessor.setProperty(helixDataAccessor.keyBuilder().externalView(resourceName), externalView); System.out.println("End test :" + TestHelper.getTestMethodName()); }
Example 17
Source File: TestZKLiveInstanceData.java From helix with Apache License 2.0 | 4 votes |
@Test public void testDataChange() throws Exception { // Create an admin and add LiveInstanceChange listener to it HelixManager adminManager = HelixManagerFactory.getZKHelixManager(clusterName, null, InstanceType.ADMINISTRATOR, ZK_ADDR); adminManager.connect(); final BlockingQueue<List<LiveInstance>> changeList = new LinkedBlockingQueue<List<LiveInstance>>(); adminManager.addLiveInstanceChangeListener(new LiveInstanceChangeListener() { @Override public void onLiveInstanceChange(List<LiveInstance> liveInstances, NotificationContext changeContext) { // The queue is basically unbounded, so shouldn't throw exception when calling // "add". changeList.add(deepCopy(liveInstances)); } }); // Check the initial condition List<LiveInstance> instances = changeList.poll(1, TimeUnit.SECONDS); Assert.assertNotNull(instances, "Expecting a list of live instance"); Assert.assertTrue(instances.isEmpty(), "Expecting an empty list of live instance"); // Join as participant, should trigger a live instance change event HelixManager manager = HelixManagerFactory.getZKHelixManager(clusterName, "localhost_54321", InstanceType.PARTICIPANT, ZK_ADDR); manager.connect(); instances = changeList.poll(1, TimeUnit.SECONDS); Assert.assertNotNull(instances, "Expecting a list of live instance"); Assert.assertEquals(instances.size(), 1, "Expecting one live instance"); Assert.assertEquals(instances.get(0).getInstanceName(), manager.getInstanceName()); // Update data in the live instance node, should trigger another live instance change // event HelixDataAccessor helixDataAccessor = manager.getHelixDataAccessor(); PropertyKey propertyKey = helixDataAccessor.keyBuilder().liveInstance(manager.getInstanceName()); LiveInstance instance = helixDataAccessor.getProperty(propertyKey); Map<String, String> map = new TreeMap<String, String>(); map.put("k1", "v1"); instance.getRecord().setMapField("test", map); Assert.assertTrue(helixDataAccessor.updateProperty(propertyKey, instance), "Failed to update live instance node"); instances = changeList.poll(1, TimeUnit.SECONDS); Assert.assertNotNull(instances, "Expecting a list of live instance"); Assert.assertEquals(instances.get(0).getRecord().getMapField("test"), map, "Wrong map data."); manager.disconnect(); Thread.sleep(1000); // wait for callback finish instances = changeList.poll(1, TimeUnit.SECONDS); Assert.assertNotNull(instances, "Expecting a list of live instance"); Assert.assertTrue(instances.isEmpty(), "Expecting an empty list of live instance"); adminManager.disconnect(); }
Example 18
Source File: TestZeroReplicaAvoidance.java From helix with Apache License 2.0 | 4 votes |
@Test public void testWagedRebalancer() throws Exception { System.out.println("START testWagedRebalancer at " + new Date(System.currentTimeMillis())); HelixManager manager = HelixManagerFactory.getZKHelixManager(CLUSTER_NAME, null, InstanceType.SPECTATOR, ZK_ADDR); manager.connect(); manager.addExternalViewChangeListener(this); manager.addIdealStateChangeListener(this); enablePersistBestPossibleAssignment(_gZkClient, CLUSTER_NAME, true); // Start half number of nodes. int i = 0; for (; i < NUM_NODE / 2; i++) { _participants.get(i).syncStart(); } int replica = 3; int partition = 30; for (String stateModel : TestStateModels) { String db = "Test-DB-" + stateModel; createResourceWithWagedRebalance(CLUSTER_NAME, db, stateModel, partition, replica, replica); } // TODO remove this sleep after fix https://github.com/apache/helix/issues/526 Thread.sleep(1000); Assert.assertTrue(_clusterVerifier.verifyByPolling(50000L, 100L)); _startListen = true; DelayedTransition.setDelay(5); // add the other half of nodes. for (; i < NUM_NODE; i++) { _participants.get(i).syncStart(); } Assert.assertTrue(_clusterVerifier.verify(70000L)); Assert.assertTrue(_testSuccess); if (manager.isConnected()) { manager.disconnect(); } System.out.println("END testWagedRebalancer at " + new Date(System.currentTimeMillis())); }
Example 19
Source File: ClusterIntegrationTest.java From incubator-gobblin with Apache License 2.0 | 4 votes |
@Test void testJobShouldGetCancelled() throws Exception { // Cancellation usually needs long time to successfully be executed, therefore setting the sleeping time to 100. Config jobConfigOverrides = ClusterIntegrationTestUtils.buildSleepingJob(IntegrationJobCancelSuite.JOB_ID, IntegrationJobCancelSuite.TASK_STATE_FILE) .withValue(SleepingTask.SLEEP_TIME_IN_SECONDS, ConfigValueFactory.fromAnyRef(100)); this.suite = new IntegrationJobCancelSuite(jobConfigOverrides); HelixManager helixManager = getHelixManager(); suite.startCluster(); helixManager.connect(); ExecutorService executor = Executors.newSingleThreadExecutor(); Runnable cancelAfterTaskInit = () -> { try { TaskDriver taskDriver = new TaskDriver(helixManager); // The actual cancellation needs to be executed in separated thread to make the cancel of helix is not blocked by // SleepingTask's thread in its own thread. // Issue the cancel after ensuring the workflow is created and the SleepingTask is running AssertWithBackoff.create().maxSleepMs(1000).backoffFactor(1). assertTrue(isTaskStarted(helixManager, IntegrationJobCancelSuite.JOB_ID), "Waiting for the job to start..."); AssertWithBackoff.create().maxSleepMs(100).timeoutMs(2000).backoffFactor(1). assertTrue(isTaskRunning(IntegrationJobCancelSuite.TASK_STATE_FILE), "Waiting for the task to enter running state"); log.info("Stopping the job"); taskDriver.stop(IntegrationJobCancelSuite.JOB_ID); suite.shutdownCluster(); } catch (Exception e) { throw new RuntimeException("Failure in canceling tasks"); } }; FutureTask<String> futureTask = new FutureTask<String>( cancelAfterTaskInit, "cancelled"); executor.submit(futureTask); AssertWithBackoff assertWithBackoff = AssertWithBackoff.create().backoffFactor(1).maxSleepMs(1000).timeoutMs(500000); assertWithBackoff.assertTrue(new Predicate<Void>() { @Override public boolean apply(Void input) { return futureTask.isDone(); } }, "waiting for future to complete"); Assert.assertEquals(futureTask.get(), "cancelled"); suite.waitForAndVerifyOutputFiles(); }
Example 20
Source File: DatacenterInitializer.java From ambry with Apache License 2.0 | 4 votes |
/** * Perform initialization for a helix-managed datacenter of servers. * @return the {@link DcInfo} for the datacenter. * @throws Exception if something went wrong during startup */ private DcInfo initializeHelixDatacenter() throws Exception { // For now, the first ZK endpoint (if there are more than one endpoints) will be adopted by default for initialization. // Note that, Ambry currently doesn't support multiple spectators, because there should be only one source of truth. String zkConnectStr = dcZkInfo.getZkConnectStrs().get(0); HelixManager manager; if (dcZkInfo.getDcName().equals(clusterMapConfig.clusterMapDatacenterName)) { manager = Objects.requireNonNull(localManager, "localManager should have been set"); } else { manager = helixFactory.getZKHelixManager(clusterMapConfig.clusterMapClusterName, selfInstanceName, InstanceType.SPECTATOR, zkConnectStr); logger.info("Connecting to Helix manager at {}", zkConnectStr); manager.connect(); logger.info("Established connection to Helix manager at {}", zkConnectStr); } HelixClusterChangeHandler clusterChangeHandler; String clusterChangeHandlerType = clusterMapConfig.clusterMapClusterChangeHandlerType; if (clusterChangeHandlerType.equals(SimpleClusterChangeHandler.class.getSimpleName())) { clusterChangeHandler = new SimpleClusterChangeHandler(clusterMapConfig, dcName, selfInstanceName, partitionOverrideInfoMap, partitionMap, partitionNameToAmbryPartition, ambryPartitionToAmbryReplicas, helixClusterManagerCallback, helixClusterManagerMetrics, this::onInitializationFailure, sealedStateChangeCounter); } else if (clusterChangeHandlerType.equals(DynamicClusterChangeHandler.class.getSimpleName())) { clusterChangeHandler = new DynamicClusterChangeHandler(clusterMapConfig, dcName, selfInstanceName, partitionOverrideInfoMap, helixClusterManagerCallback, clusterChangeHandlerCallback, helixClusterManagerMetrics, this::onInitializationFailure, sealedStateChangeCounter); } else { throw new IllegalArgumentException("Unsupported cluster change handler type: " + clusterChangeHandlerType); } // Create RoutingTableProvider of each DC to keep track of partition(replicas) state. Here, we use current // state based RoutingTableProvider to remove dependency on Helix's pipeline and reduce notification latency. logger.info("Creating routing table provider associated with Helix manager at {}", zkConnectStr); RoutingTableProvider routingTableProvider = new RoutingTableProvider(manager, PropertyType.CURRENTSTATES); logger.info("Routing table provider is created in {}", dcName); routingTableProvider.addRoutingTableChangeListener(clusterChangeHandler, null); logger.info("Registered routing table change listeners in {}", dcName); // The initial instance config change notification is required to populate the static cluster // information, and only after that is complete do we want the live instance change notification to // come in. We do not need to do anything extra to ensure this, however, since Helix provides the initial // notification for a change from within the same thread that adds the listener, in the context of the add // call. Therefore, when the call to add a listener returns, the initial notification will have been // received and handled. DataNodeConfigSource dataNodeConfigSource = new InstanceConfigToDataNodeConfigAdapter(manager, clusterMapConfig); dataNodeConfigSource.addDataNodeConfigChangeListener(clusterChangeHandler); logger.info("Registered instance config change listeners for Helix manager at {}", zkConnectStr); manager.addIdealStateChangeListener(clusterChangeHandler); logger.info("Registered ideal state change listeners for Helix manager at {}", zkConnectStr); // Now register listeners to get notified on live instance change in every datacenter. manager.addLiveInstanceChangeListener(clusterChangeHandler); logger.info("Registered live instance change listeners for Helix manager at {}", zkConnectStr); // in case initial event occurs before adding routing table listener, here we explicitly set snapshot in // ClusterChangeHandler. The reason is, if listener missed initial event, snapshot inside routing table // provider should be already populated. clusterChangeHandler.setRoutingTableSnapshot(routingTableProvider.getRoutingTableSnapshot()); // the initial routing table change should populate the instanceConfigs. If it's empty that means initial // change didn't come and thread should wait on the init latch to ensure routing table snapshot is non-empty if (clusterChangeHandler.getRoutingTableSnapshot().getInstanceConfigs().isEmpty()) { // Periodic refresh in routing table provider is enabled by default. In worst case, routerUpdater should // trigger routing table change within 5 minutes logger.info("Routing table snapshot in {} is currently empty. Waiting for initial notification.", dcName); clusterChangeHandler.waitForInitNotification(); } if (!clusterMapConfig.clustermapListenCrossColo && manager != localManager) { manager.disconnect(); logger.info("Stopped listening to cross colo ZK server {}", zkConnectStr); } return new HelixDcInfo(dcName, dcZkInfo, manager, clusterChangeHandler); }