Java Code Examples for org.apache.helix.ConfigAccessor#setClusterConfig()

The following examples show how to use org.apache.helix.ConfigAccessor#setClusterConfig() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestJobFailureTaskNotStarted.java    From helix with Apache License 2.0 6 votes vote down vote up
@BeforeClass
public void beforeClass() throws Exception {
  _participants = new MockParticipantManager[_numNodes];
  _numDbs = 1;
  _numNodes = 2;
  _numPartitions = 2;
  _numReplicas = 1;

  _gSetupTool.addCluster(CLUSTER_NAME, true);
  setupParticipants();
  setupDBs();
  startParticipantsWithStuckTaskStateModelFactory();
  createManagers();
  _controller = new ClusterControllerManager(ZK_ADDR, CLUSTER_NAME, CONTROLLER_PREFIX);
  _controller.syncStart();

  // Enable cancellation
  ConfigAccessor _configAccessor = new ConfigAccessor(_gZkClient);
  ClusterConfig clusterConfig = _configAccessor.getClusterConfig(CLUSTER_NAME);
  clusterConfig.stateTransitionCancelEnabled(true);
  _configAccessor.setClusterConfig(CLUSTER_NAME, clusterConfig);

  _clusterVerifier =
      new BestPossibleExternalViewVerifier.Builder(CLUSTER_NAME).setZkAddr(ZK_ADDR).build();
}
 
Example 2
Source File: TestRoutingTableProviderFromCurrentStates.java    From helix with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public void beforeClass() throws Exception {
  _gSetupTool.addCluster(CLUSTER_NAME, true);
  _participants = new MockParticipantManager[NUM_NODES];
  for (int i = 0; i < NUM_NODES; i++) {
    String storageNodeName = PARTICIPANT_PREFIX + "_" + (START_PORT + i);
    _gSetupTool.addInstanceToCluster(CLUSTER_NAME, storageNodeName);
  }

  for (int i = 0; i < NUM_NODES; i++) {
    String instanceName = PARTICIPANT_PREFIX + "_" + (START_PORT + i);
    _participants[i] = new MockParticipantManager(ZK_ADDR, CLUSTER_NAME, instanceName);
    _participants[i].syncStart();
  }

  _manager = HelixManagerFactory
      .getZKHelixManager(CLUSTER_NAME, "Admin", InstanceType.ADMINISTRATOR, ZK_ADDR);
  _manager.connect();

  String controllerName = CONTROLLER_PREFIX + "_0";
  _controller = new ClusterControllerManager(ZK_ADDR, CLUSTER_NAME, controllerName);
  _controller.syncStart();

  ConfigAccessor _configAccessor = _manager.getConfigAccessor();
  ClusterConfig clusterConfig = _configAccessor.getClusterConfig(CLUSTER_NAME);
  clusterConfig.enableTargetExternalView(true);
  _configAccessor.setClusterConfig(CLUSTER_NAME, clusterConfig);
}
 
Example 3
Source File: TestJobTimeoutTaskNotStarted.java    From helix with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public void beforeClass() throws Exception {
  _numDbs = 1;
  _numNodes = 1;
  _numPartitions = 50;
  _numReplicas = 1;
  _participants = new MockParticipantManager[_numNodes];
  _gSetupTool.addCluster(CLUSTER_NAME, true);
  setupParticipants();
  setupDBs();
  startParticipantsWithStuckTaskStateModelFactory();
  createManagers();
  _controller = new ClusterControllerManager(ZK_ADDR, CLUSTER_NAME, CONTROLLER_PREFIX);
  _controller.syncStart();

  // Enable cancellation
  ConfigAccessor _configAccessor = new ConfigAccessor(_gZkClient);
  ClusterConfig clusterConfig = _configAccessor.getClusterConfig(CLUSTER_NAME);
  clusterConfig.stateTransitionCancelEnabled(true);
  clusterConfig.setMaxConcurrentTaskPerInstance(_numPartitions);
  _configAccessor.setClusterConfig(CLUSTER_NAME, clusterConfig);

  _clusterVerifier =
      new BestPossibleExternalViewVerifier.Builder(CLUSTER_NAME).setZkClient(_gZkClient).build();

  Assert.assertTrue(_clusterVerifier.verifyByPolling(10000, 100));
}
 
Example 4
Source File: ZKHelixAdmin.java    From helix with Apache License 2.0 5 votes vote down vote up
@Override
public void enableBatchMessageMode(String clusterName, boolean enabled) {
  logger
      .info("{} batch message mode for cluster {}.", enabled ? "Enable" : "Disable", clusterName);
  if (!ZKUtil.isClusterSetup(clusterName, _zkClient)) {
    throw new HelixException("cluster " + clusterName + " is not setup yet");
  }
  ConfigAccessor accessor = new ConfigAccessor(_zkClient);

  ClusterConfig clusterConfig = accessor.getClusterConfig(clusterName);
  clusterConfig.setBatchMessageMode(enabled);
  accessor.setClusterConfig(clusterName, clusterConfig);
}
 
Example 5
Source File: HelixVcrPopulateTool.java    From ambry with Apache License 2.0 5 votes vote down vote up
/**
 * Set the cluster config in the destination cluster using the latest settings.
 * @param destZkClient the {@link HelixZkClient} for the cluster.
 * @param destClusterName the cluster name.
 * @param dryRun run without actual change.
 */
static void setClusterConfig(HelixZkClient destZkClient, String destClusterName, boolean dryRun) {
  ConfigAccessor configAccessor = new ConfigAccessor(destZkClient);
  ClusterConfig clusterConfig = configAccessor.getClusterConfig(destClusterName);
  clusterConfig.setPersistBestPossibleAssignment(true);
  // if offline instances >= 4, helix enters maintenance mode.
  clusterConfig.setMaxOfflineInstancesAllowed(MAX_OFFLINE_INSTANCES_ALLOWED);
  // if offline instances <= 2, helix exit maintenance mode.
  clusterConfig.setNumOfflineInstancesForAutoExit(NUM_OFFLINE_INSTANCES_FOR_AUTO_EXIT);
  if (dryRun) {
    System.out.println("Will update cluster config to: " + clusterConfig.toString());
  }
  configAccessor.setClusterConfig(destClusterName, clusterConfig);
}
 
Example 6
Source File: ZkTestBase.java    From helix with Apache License 2.0 5 votes vote down vote up
protected void enableDelayRebalanceInCluster(HelixZkClient zkClient, String clusterName,
    boolean enabled, long delay) {
  ConfigAccessor configAccessor = new ConfigAccessor(zkClient);
  ClusterConfig clusterConfig = configAccessor.getClusterConfig(clusterName);
  clusterConfig.setDelayRebalaceEnabled(enabled);
  clusterConfig.setRebalanceDelayTime(delay);
  configAccessor.setClusterConfig(clusterName, clusterConfig);
}
 
Example 7
Source File: TestWagedRebalanceTopologyAware.java    From helix with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public void beforeClass() throws Exception {
  System.out.println("START " + CLASS_NAME + " at " + new Date(System.currentTimeMillis()));

  _gSetupTool.addCluster(CLUSTER_NAME, true);

  ConfigAccessor configAccessor = new ConfigAccessor(_gZkClient);
  ClusterConfig clusterConfig = configAccessor.getClusterConfig(CLUSTER_NAME);
  clusterConfig.setTopology(TOLOPOGY_DEF);
  clusterConfig.setFaultZoneType(FAULT_ZONE);
  configAccessor.setClusterConfig(CLUSTER_NAME, clusterConfig);

  for (int i = 0; i < NUM_NODE; i++) {
    String storageNodeName = PARTICIPANT_PREFIX + "_" + (START_PORT + i);
    addInstanceConfig(storageNodeName, i, ZONES, TAGS);
  }

  // start dummy participants
  for (String node : _nodes) {
    MockParticipantManager participant = new MockParticipantManager(ZK_ADDR, CLUSTER_NAME, node);
    participant.syncStart();
    _participants.add(participant);
  }

  // start controller
  String controllerName = CONTROLLER_PREFIX + "_0";
  _controller = new ClusterControllerManager(ZK_ADDR, CLUSTER_NAME, controllerName);
  _controller.syncStart();

  enablePersistBestPossibleAssignment(_gZkClient, CLUSTER_NAME, true);
  enableTopologyAwareRebalance(_gZkClient, CLUSTER_NAME, true);
}
 
Example 8
Source File: TestClusterInMaintenanceModeWhenReachingMaxPartition.java    From helix with Apache License 2.0 5 votes vote down vote up
@Test
public void testDisableCluster() throws Exception {
  ConfigAccessor configAccessor = new ConfigAccessor(_gZkClient);
  ClusterConfig clusterConfig = configAccessor.getClusterConfig(CLUSTER_NAME);
  clusterConfig.setMaxPartitionsPerInstance(10);
  configAccessor.setClusterConfig(CLUSTER_NAME, clusterConfig);

  int i = 0;
  for (String stateModel : TestStateModels) {
    String db = "Test-DB-" + i++;
    int replica = 3;
    createResourceWithDelayedRebalance(CLUSTER_NAME, db, stateModel, _PARTITIONS, replica,
        replica, -1);
    _testDBs.add(db);
  }
  Thread.sleep(100L);
  Assert.assertTrue(_clusterVerifier.verifyByPolling());

  MaintenanceSignal maintenanceSignal =
      _dataAccessor.getProperty(_dataAccessor.keyBuilder().maintenance());
  Assert.assertNull(maintenanceSignal);

  for (i = 2; i < NUM_NODE; i++) {
    _participants.get(i).syncStop();
  }

  Thread.sleep(1000L);
  maintenanceSignal = _dataAccessor.getProperty(_dataAccessor.keyBuilder().maintenance());
  Assert.assertNotNull(maintenanceSignal);
  Assert.assertNotNull(maintenanceSignal.getReason());
}
 
Example 9
Source File: TestCrushAutoRebalanceNonRack.java    From helix with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public void beforeClass() throws Exception {
  System.out.println("START " + CLASS_NAME + " at " + new Date(System.currentTimeMillis()));

  _gSetupTool.addCluster(CLUSTER_NAME, true);

  ConfigAccessor configAccessor = new ConfigAccessor(_gZkClient);
  ClusterConfig clusterConfig = configAccessor.getClusterConfig(CLUSTER_NAME);
  clusterConfig.setTopology("/instance");
  clusterConfig.setFaultZoneType("instance");
  configAccessor.setClusterConfig(CLUSTER_NAME, clusterConfig);

  for (int i = 0; i < NUM_NODE; i++) {
    String storageNodeName = PARTICIPANT_PREFIX + "_" + (START_PORT + i);
    _gSetupTool.addInstanceToCluster(CLUSTER_NAME, storageNodeName);
    _nodes.add(storageNodeName);
    String tag = "tag-" + i % 2;
    _gSetupTool.getClusterManagementTool().addInstanceTag(CLUSTER_NAME, storageNodeName, tag);
    _nodeToTagMap.put(storageNodeName, tag);
    InstanceConfig instanceConfig =
        configAccessor.getInstanceConfig(CLUSTER_NAME, storageNodeName);
    instanceConfig.setDomain("instance=" + storageNodeName);
    configAccessor.setInstanceConfig(CLUSTER_NAME, storageNodeName, instanceConfig);
  }

  // start dummy participants
  for (String node : _nodes) {
    MockParticipantManager participant = new MockParticipantManager(ZK_ADDR, CLUSTER_NAME, node);
    participant.syncStart();
    _participants.add(participant);
  }

  // start controller
  String controllerName = CONTROLLER_PREFIX + "_0";
  _controller = new ClusterControllerManager(ZK_ADDR, CLUSTER_NAME, controllerName);
  _controller.syncStart();

  enablePersistBestPossibleAssignment(_gZkClient, CLUSTER_NAME, true);
}
 
Example 10
Source File: ZkTestBase.java    From helix with Apache License 2.0 5 votes vote down vote up
protected void enableDelayRebalanceInCluster(HelixZkClient zkClient, String clusterName,
    boolean enabled) {
  ConfigAccessor configAccessor = new ConfigAccessor(zkClient);
  ClusterConfig clusterConfig = configAccessor.getClusterConfig(clusterName);
  clusterConfig.setDelayRebalaceEnabled(enabled);
  configAccessor.setClusterConfig(clusterName, clusterConfig);
}
 
Example 11
Source File: ZkTestBase.java    From helix with Apache License 2.0 5 votes vote down vote up
protected void enableTopologyAwareRebalance(HelixZkClient zkClient, String clusterName,
    Boolean enabled) {
  ConfigAccessor configAccessor = new ConfigAccessor(zkClient);
  ClusterConfig clusterConfig = configAccessor.getClusterConfig(clusterName);
  clusterConfig.setTopologyAwareEnabled(enabled);
  configAccessor.setClusterConfig(clusterName, clusterConfig);
}
 
Example 12
Source File: TestQuotaConstraintSkipWorkflowAssignment.java    From helix with Apache License 2.0 5 votes vote down vote up
@Test
public void testQuotaConstraintSkipWorkflowAssignment() throws Exception {
  ClusterEvent event = new ClusterEvent(ClusterEventType.Unknown);
  WorkflowControllerDataProvider cache = new WorkflowControllerDataProvider(CLUSTER_NAME);
  JobConfig.Builder job = new JobConfig.Builder();

  job.setJobCommandConfigMap(Collections.singletonMap(MockTask.JOB_DELAY, "100000"));
  TaskDriver driver = new TaskDriver(_manager);
  for (int i = 0; i < 10; i++) {
    Workflow.Builder workflow = new Workflow.Builder("Workflow" + i);
    job.setWorkflow("Workflow" + i);
    TaskConfig taskConfig =
        new TaskConfig(MockTask.TASK_COMMAND, new HashMap<String, String>(), null, null);
    job.addTaskConfigMap(Collections.singletonMap(taskConfig.getId(), taskConfig));
    job.setJobId(TaskUtil.getNamespacedJobName("Workflow" + i, "JOB"));
    workflow.addJob("JOB", job);
    driver.start(workflow.build());
  }
  ConfigAccessor accessor = new ConfigAccessor(_gZkClient);
  ClusterConfig clusterConfig = accessor.getClusterConfig(CLUSTER_NAME);
  clusterConfig.setTaskQuotaRatio(AssignableInstance.DEFAULT_QUOTA_TYPE, 3);
  clusterConfig.setTaskQuotaRatio("OtherType", 37);
  accessor.setClusterConfig(CLUSTER_NAME, clusterConfig);
  cache.refresh(_manager.getHelixDataAccessor());
  event.addAttribute(AttributeName.ControllerDataProvider.name(), cache);
  event.addAttribute(AttributeName.helixmanager.name(), _manager);
  runStage(event, new ResourceComputationStage());
  runStage(event, new CurrentStateComputationStage());
  runStage(event, new TaskSchedulingStage());
  Assert.assertTrue(!cache.getAssignableInstanceManager()
      .hasGlobalCapacity(AssignableInstance.DEFAULT_QUOTA_TYPE));
  BestPossibleStateOutput bestPossibleStateOutput =
      event.getAttribute(AttributeName.BEST_POSSIBLE_STATE.name());
  Assert.assertTrue(bestPossibleStateOutput.getStateMap().size() == 3);
}
 
Example 13
Source File: VcrTestUtil.java    From ambry with Apache License 2.0 5 votes vote down vote up
/**
 * Populate info on ZooKeeper server and start {@link HelixControllerManager}.
 * @param zkConnectString zk connect string to zk server.
 * @param vcrClusterName the vcr cluster name.
 * @param clusterMap the {@link ClusterMap} to use.
 * @return the created {@link HelixControllerManager}.
 */
public static HelixControllerManager populateZkInfoAndStartController(String zkConnectString, String vcrClusterName,
    ClusterMap clusterMap) {
  HelixZkClient zkClient = DedicatedZkClientFactory.getInstance()
      .buildZkClient(new HelixZkClient.ZkConnectionConfig(zkConnectString), new HelixZkClient.ZkClientConfig());
  try {
    zkClient.setZkSerializer(new ZNRecordSerializer());
    ClusterSetup clusterSetup = new ClusterSetup(zkClient);
    clusterSetup.addCluster(vcrClusterName, true);
    HelixAdmin admin = new ZKHelixAdmin(zkClient);
    // set ALLOW_PARTICIPANT_AUTO_JOIN
    HelixConfigScope configScope = new HelixConfigScopeBuilder(HelixConfigScope.ConfigScopeProperty.CLUSTER).
        forCluster(vcrClusterName).build();
    Map<String, String> helixClusterProperties = new HashMap<>();
    helixClusterProperties.put(ZKHelixManager.ALLOW_PARTICIPANT_AUTO_JOIN, String.valueOf(true));
    admin.setConfig(configScope, helixClusterProperties);
    // set PersistBestPossibleAssignment
    ConfigAccessor configAccessor = new ConfigAccessor(zkClient);
    ClusterConfig clusterConfig = configAccessor.getClusterConfig(vcrClusterName);
    clusterConfig.setPersistBestPossibleAssignment(true);
    configAccessor.setClusterConfig(vcrClusterName, clusterConfig);

    FullAutoModeISBuilder builder = new FullAutoModeISBuilder(helixResource);
    builder.setStateModel(LeaderStandbySMD.name);
    for (PartitionId partitionId : clusterMap.getAllPartitionIds(null)) {
      builder.add(partitionId.toPathString());
    }
    builder.setRebalanceStrategy(CrushEdRebalanceStrategy.class.getName());
    IdealState idealState = builder.build();
    admin.addResource(vcrClusterName, helixResource, idealState);
    admin.rebalance(vcrClusterName, helixResource, 3, "", "");
    HelixControllerManager helixControllerManager = new HelixControllerManager(zkConnectString, vcrClusterName);
    helixControllerManager.syncStart();
    return helixControllerManager;
  } finally {
    zkClient.close();
  }
}
 
Example 14
Source File: ZkTestBase.java    From helix with Apache License 2.0 5 votes vote down vote up
protected void enablePersistBestPossibleAssignment(HelixZkClient zkClient, String clusterName,
    Boolean enabled) {
  ConfigAccessor configAccessor = new ConfigAccessor(zkClient);
  ClusterConfig clusterConfig = configAccessor.getClusterConfig(clusterName);
  clusterConfig.setPersistBestPossibleAssignment(enabled);
  configAccessor.setClusterConfig(clusterName, clusterConfig);
}
 
Example 15
Source File: ZkTestBase.java    From helix with Apache License 2.0 5 votes vote down vote up
protected void enablePersistIntermediateAssignment(HelixZkClient zkClient, String clusterName,
    Boolean enabled) {
  ConfigAccessor configAccessor = new ConfigAccessor(zkClient);
  ClusterConfig clusterConfig = configAccessor.getClusterConfig(clusterName);
  clusterConfig.setPersistIntermediateAssignment(enabled);
  configAccessor.setClusterConfig(clusterName, clusterConfig);
}
 
Example 16
Source File: ZkTestBase.java    From helix with Apache License 2.0 4 votes vote down vote up
protected void setDelayTimeInCluster(HelixZkClient zkClient, String clusterName, long delay) {
  ConfigAccessor configAccessor = new ConfigAccessor(zkClient);
  ClusterConfig clusterConfig = configAccessor.getClusterConfig(clusterName);
  clusterConfig.setRebalanceDelayTime(delay);
  configAccessor.setClusterConfig(clusterName, clusterConfig);
}
 
Example 17
Source File: TestAbnormalStatesResolver.java    From helix with Apache License 2.0 4 votes vote down vote up
@Test(dependsOnMethods = "testConfigureResolver")
public void testExcessiveTopStateResolver() throws InterruptedException {
  BestPossibleExternalViewVerifier verifier =
      new BestPossibleExternalViewVerifier.Builder(CLUSTER_NAME).setZkClient(_gZkClient).build();
  Assert.assertTrue(verifier.verify());

  // 1. Find a partition with a MASTER replica and a SLAVE replica
  HelixAdmin admin = new ZKHelixAdmin.Builder().setZkAddress(ZK_ADDR).build();
  ExternalView ev = admin.getResourceExternalView(CLUSTER_NAME, TEST_DB);
  String targetPartition = ev.getPartitionSet().iterator().next();
  Map<String, String> partitionAssignment = ev.getStateMap(targetPartition);
  String slaveHost = partitionAssignment.entrySet().stream()
      .filter(entry -> entry.getValue().equals(MasterSlaveSMD.States.SLAVE.name())).findAny()
      .get().getKey();
  long previousMasterUpdateTime =
      getTopStateUpdateTime(ev, targetPartition, MasterSlaveSMD.States.MASTER.name());

  // Build SLAVE to MASTER message
  String msgId = new UUID(123, 456).toString();
  Message msg = createMessage(Message.MessageType.STATE_TRANSITION, msgId,
      MasterSlaveSMD.States.SLAVE.name(), MasterSlaveSMD.States.MASTER.name(), TEST_DB,
      slaveHost);
  msg.setStateModelDef(MasterSlaveSMD.name);

  Criteria cr = new Criteria();
  cr.setInstanceName(slaveHost);
  cr.setRecipientInstanceType(InstanceType.PARTICIPANT);
  cr.setSessionSpecific(true);
  cr.setPartition(targetPartition);
  cr.setResource(TEST_DB);
  cr.setClusterName(CLUSTER_NAME);

  AsyncCallback callback = new AsyncCallback() {
    @Override
    public void onTimeOut() {
      Assert.fail("The test state transition timeout.");
    }

    @Override
    public void onReplyMessage(Message message) {
      Assert.assertEquals(message.getMsgState(), Message.MessageState.READ);
    }
  };

  // 2. Send the SLAVE to MASTER message to the SLAVE host to make abnormal partition states.

  // 2.A. Without resolver, the fixing is not completely done by the default rebalancer logic.
  _controller.getMessagingService()
      .sendAndWait(cr, msg, callback, (int) TestHelper.WAIT_DURATION);
  Thread.sleep(DEFAULT_REBALANCE_PROCESSING_WAIT_TIME);
  // Wait until the partition status is fixed, verify if the result is as expected
  verifier =
      new BestPossibleExternalViewVerifier.Builder(CLUSTER_NAME).setZkClient(_gZkClient).build();
  Assert.assertTrue(verifier.verifyByPolling());
  ev = admin.getResourceExternalView(CLUSTER_NAME, TEST_DB);
  Assert.assertEquals(ev.getStateMap(targetPartition).values().stream()
      .filter(state -> state.equals(MasterSlaveSMD.States.MASTER.name())).count(), 1);
  // Since the resolver is not used in the auto default fix process, there is no update on the
  // original master. So if there is any data issue, it was not fixed.
  long currentMasterUpdateTime =
      getTopStateUpdateTime(ev, targetPartition, MasterSlaveSMD.States.MASTER.name());
  Assert.assertFalse(currentMasterUpdateTime > previousMasterUpdateTime);

  // 2.B. with resolver configured, the fixing is complete.
  ConfigAccessor configAccessor = new ConfigAccessor.Builder().setZkAddress(ZK_ADDR).build();
  ClusterConfig clusterConfig = configAccessor.getClusterConfig(CLUSTER_NAME);
  clusterConfig.setAbnormalStateResolverMap(
      ImmutableMap.of(MasterSlaveSMD.name, ExcessiveTopStateResolver.class.getName()));
  configAccessor.setClusterConfig(CLUSTER_NAME, clusterConfig);

  _controller.getMessagingService()
      .sendAndWait(cr, msg, callback, (int) TestHelper.WAIT_DURATION);
  Thread.sleep(DEFAULT_REBALANCE_PROCESSING_WAIT_TIME);
  // Wait until the partition status is fixed, verify if the result is as expected
  Assert.assertTrue(verifier.verifyByPolling());
  ev = admin.getResourceExternalView(CLUSTER_NAME, TEST_DB);
  Assert.assertEquals(ev.getStateMap(targetPartition).values().stream()
      .filter(state -> state.equals(MasterSlaveSMD.States.MASTER.name())).count(), 1);
  // Now the resolver is used in the auto fix process, the original master has also been refreshed.
  // The potential data issue has been fixed in this process.
  currentMasterUpdateTime =
      getTopStateUpdateTime(ev, targetPartition, MasterSlaveSMD.States.MASTER.name());
  Assert.assertTrue(currentMasterUpdateTime > previousMasterUpdateTime);

  // Reset the resolver map
  clusterConfig = configAccessor.getClusterConfig(CLUSTER_NAME);
  clusterConfig.setAbnormalStateResolverMap(Collections.emptyMap());
  configAccessor.setClusterConfig(CLUSTER_NAME, clusterConfig);
}
 
Example 18
Source File: TestNodeSwap.java    From helix with Apache License 2.0 4 votes vote down vote up
@BeforeClass
public void beforeClass() throws Exception {
  System.out.println("START " + CLASS_NAME + " at " + new Date(System.currentTimeMillis()));

  _gSetupTool.addCluster(CLUSTER_NAME, true);

  ConfigAccessor configAccessor = new ConfigAccessor(_gZkClient);
  ClusterConfig clusterConfig = configAccessor.getClusterConfig(CLUSTER_NAME);
  clusterConfig.setTopology("/zone/instance");
  clusterConfig.setFaultZoneType("zone");
  configAccessor.setClusterConfig(CLUSTER_NAME, clusterConfig);

  Set<String> nodes = new HashSet<>();
  for (int i = 0; i < NUM_NODE; i++) {
    String storageNodeName = PARTICIPANT_PREFIX + "_" + (START_PORT + i);
    _gSetupTool.addInstanceToCluster(CLUSTER_NAME, storageNodeName);
    String zone = "zone-" + i % 3;
    String domain = String.format("zone=%s,instance=%s", zone, storageNodeName);

    InstanceConfig instanceConfig =
        configAccessor.getInstanceConfig(CLUSTER_NAME, storageNodeName);
    instanceConfig.setDomain(domain);
    _gSetupTool.getClusterManagementTool().setInstanceConfig(CLUSTER_NAME, storageNodeName,
        instanceConfig);
    nodes.add(storageNodeName);
  }

  // start dummy participants
  for (String node : nodes) {
    MockParticipantManager participant = new MockParticipantManager(ZK_ADDR, CLUSTER_NAME, node);
    participant.syncStart();
    _participants.add(participant);
  }

  // start controller
  String controllerName = CONTROLLER_PREFIX + "_0";
  _controller = new ClusterControllerManager(ZK_ADDR, CLUSTER_NAME, controllerName);
  _controller.syncStart();

  enablePersistBestPossibleAssignment(_gZkClient, CLUSTER_NAME, true);
  enableTopologyAwareRebalance(_gZkClient, CLUSTER_NAME, true);
}
 
Example 19
Source File: TestWagedRebalance.java    From helix with Apache License 2.0 4 votes vote down vote up
/**
 * The stateful WAGED rebalancer will be reset while the controller regains the leadership.
 * This test is to verify if the reset has been done and the rebalancer has forgotten any previous
 * status after leadership switched.
 */
@Test(dependsOnMethods = "test")
public void testRebalancerReset() throws Exception {
  // Configure the rebalance preference so as to trigger more partition movements for evenness.
  // This is to ensure the controller will try to move something if the rebalancer has been reset.
  ConfigAccessor configAccessor = new ConfigAccessor(_gZkClient);
  ClusterConfig clusterConfig = configAccessor.getClusterConfig(CLUSTER_NAME);
  clusterConfig.setGlobalRebalancePreference(ImmutableMap
      .of(ClusterConfig.GlobalRebalancePreferenceKey.EVENNESS, 10,
          ClusterConfig.GlobalRebalancePreferenceKey.LESS_MOVEMENT, 0));
  configAccessor.setClusterConfig(CLUSTER_NAME, clusterConfig);

  int i = 0;
  for (String stateModel : _testModels) {
    String db = "Test-DB-" + TestHelper.getTestMethodName() + i++;
    createResourceWithWagedRebalance(CLUSTER_NAME, db, stateModel, PARTITIONS, _replica,
        _replica);
    _gSetupTool.rebalanceStorageCluster(CLUSTER_NAME, db, _replica);
    _allDBs.add(db);
  }
  // TODO remove this sleep after fix https://github.com/apache/helix/issues/526
  Thread.sleep(300);
  validate(_replica);

  // Adding one more resource. Since it is added after the other resources, the assignment is
  // impacted because of the other resources' assignment.
  String moreDB = "More-Test-DB";
  createResourceWithWagedRebalance(CLUSTER_NAME, moreDB,
      BuiltInStateModelDefinitions.MasterSlave.name(), PARTITIONS, _replica, _replica);
  _gSetupTool.rebalanceStorageCluster(CLUSTER_NAME, moreDB, _replica);
  _allDBs.add(moreDB);
  // TODO remove this sleep after fix https://github.com/apache/helix/issues/526
  Thread.sleep(300);
  validate(_replica);
  ExternalView oldEV =
      _gSetupTool.getClusterManagementTool().getResourceExternalView(CLUSTER_NAME, moreDB);

  _controller.handleNewSession();
  // Trigger a rebalance to test if the rebalancer calculate with empty cache states.
  RebalanceScheduler.invokeRebalance(_controller.getHelixDataAccessor(), moreDB);

  // After reset done, the rebalancer will try to rebalance all the partitions since it has
  // forgotten the previous state.
  // TODO remove this sleep after fix https://github.com/apache/helix/issues/526
  Thread.sleep(300);
  validate(_replica);
  ExternalView newEV =
      _gSetupTool.getClusterManagementTool().getResourceExternalView(CLUSTER_NAME, moreDB);

  // To verify that the controller has moved some partitions.
  Assert.assertFalse(newEV.equals(oldEV));
}
 
Example 20
Source File: TestAlertingRebalancerFailure.java    From helix with Apache License 2.0 4 votes vote down vote up
@Test(dependsOnMethods = "testTagSetIncorrect")
public void testWithDomainId() throws Exception {
  int replicas = 2;
  ConfigAccessor configAccessor = new ConfigAccessor(_gZkClient);
  // 1. disable all participants except one node, then set domain Id
  for (int i = NODE_NR - 1; i >= 0; i--) {
    if (i < replicas) {
      setDomainId(_participants[i].getInstanceName(), configAccessor);
    } else {
      setInstanceEnable(_participants[i].getInstanceName(), false, configAccessor);
    }
  }

  // enable topology aware
  ClusterConfig clusterConfig = configAccessor.getClusterConfig(CLUSTER_NAME);
  clusterConfig.setTopologyAwareEnabled(true);
  clusterConfig.setTopology("/Rack/Instance");
  clusterConfig.setFaultZoneType("Rack");
  configAccessor.setClusterConfig(CLUSTER_NAME, clusterConfig);

  // Ensure error caused by node config changes has been removed.
  // Error may be recorded unexpectedly when a resource from other tests is not cleaned up.
  accessor.removeProperty(errorNodeKey);

  _gSetupTool.addResourceToCluster(CLUSTER_NAME, testDb, 5,
      BuiltInStateModelDefinitions.MasterSlave.name(), RebalanceMode.FULL_AUTO.name(),
      CrushRebalanceStrategy.class.getName());
  _gSetupTool.rebalanceStorageCluster(CLUSTER_NAME, testDb, replicas);
  ZkHelixClusterVerifier verifier = new BestPossibleExternalViewVerifier.Builder(CLUSTER_NAME)
      .setZkAddr(ZK_ADDR).setResources(new HashSet<>(Collections.singleton(testDb))).build();
  Assert.assertTrue(verifier.verifyByPolling());
  // Verify there is no rebalance error logged
  Assert.assertNull(accessor.getProperty(errorNodeKey));
  checkRebalanceFailureGauge(false);
  checkResourceBestPossibleCalFailureState(ResourceMonitor.RebalanceStatus.NORMAL, testDb);

  // 2. enable the rest nodes with no domain Id
  for (int i = replicas; i < NODE_NR; i++) {
    setInstanceEnable(_participants[i].getInstanceName(), true, configAccessor);
  }
  // Verify there is rebalance error logged
  pollForError(accessor, errorNodeKey);
  checkRebalanceFailureGauge(true);
  checkResourceBestPossibleCalFailureState(
      ResourceMonitor.RebalanceStatus.BEST_POSSIBLE_STATE_CAL_FAILED, testDb);

  // 3. reset all nodes domain Id to be correct setting
  for (int i = replicas; i < NODE_NR; i++) {
    setDomainId(_participants[i].getInstanceName(), configAccessor);
  }
  _gSetupTool.rebalanceStorageCluster(CLUSTER_NAME, testDb, replicas);

  Assert.assertTrue(_clusterVerifier.verify());

  // Verify that rebalance error state is removed
  checkRebalanceFailureGauge(false);
  checkResourceBestPossibleCalFailureState(ResourceMonitor.RebalanceStatus.NORMAL, testDb);

  // clean up
  _gSetupTool.getClusterManagementTool().dropResource(CLUSTER_NAME, testDb);
  clusterConfig.setTopologyAwareEnabled(false);
}