com.amazonaws.services.elasticmapreduce.model.BootstrapActionConfig Java Examples
The following examples show how to use
com.amazonaws.services.elasticmapreduce.model.BootstrapActionConfig.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: EmrDaoImpl.java From herd with Apache License 2.0 | 6 votes |
private void addCustomBootstrapActionConfig(EmrClusterDefinition emrClusterDefinition, ArrayList<BootstrapActionConfig> bootstrapActions) { // Add Custom bootstrap script support if needed if (!CollectionUtils.isEmpty(emrClusterDefinition.getCustomBootstrapActionAll())) { for (ScriptDefinition scriptDefinition : emrClusterDefinition.getCustomBootstrapActionAll()) { BootstrapActionConfig customActionConfigAll = getBootstrapActionConfig(scriptDefinition.getScriptName(), scriptDefinition.getScriptLocation()); ArrayList<String> argList = new ArrayList<>(); if (!CollectionUtils.isEmpty(scriptDefinition.getScriptArguments())) { for (String argument : scriptDefinition.getScriptArguments()) { // Trim the argument argList.add(argument.trim()); } } // Set arguments to bootstrap action customActionConfigAll.getScriptBootstrapAction().setArgs(argList); bootstrapActions.add(customActionConfigAll); } } }
Example #2
Source File: EmrDaoImpl.java From herd with Apache License 2.0 | 6 votes |
private void addDaemonBootstrapActionConfig(EmrClusterDefinition emrClusterDefinition, ArrayList<BootstrapActionConfig> bootstrapActions) { // Add daemon Configuration support if needed if (!CollectionUtils.isEmpty(emrClusterDefinition.getDaemonConfigurations())) { BootstrapActionConfig daemonBootstrapActionConfig = getBootstrapActionConfig(ConfigurationValue.EMR_CONFIGURE_DAEMON.getKey(), configurationHelper.getProperty(ConfigurationValue.EMR_CONFIGURE_DAEMON)); // Add arguments to the bootstrap script ArrayList<String> argList = new ArrayList<>(); for (Parameter daemonConfig : emrClusterDefinition.getDaemonConfigurations()) { argList.add(daemonConfig.getName() + "=" + daemonConfig.getValue()); } // Add the bootstrap action with arguments daemonBootstrapActionConfig.getScriptBootstrapAction().setArgs(argList); bootstrapActions.add(daemonBootstrapActionConfig); } }
Example #3
Source File: EmrDaoImpl.java From herd with Apache License 2.0 | 5 votes |
private void addCustomMasterBootstrapActionConfig(EmrClusterDefinition emrClusterDefinition, ArrayList<BootstrapActionConfig> bootstrapActions) { // Add Master custom bootstrap script support if needed if (!CollectionUtils.isEmpty(emrClusterDefinition.getCustomBootstrapActionMaster())) { for (ScriptDefinition scriptDefinition : emrClusterDefinition.getCustomBootstrapActionMaster()) { BootstrapActionConfig bootstrapActionConfig = getBootstrapActionConfig(scriptDefinition.getScriptName(), configurationHelper.getProperty(ConfigurationValue.EMR_CONDITIONAL_SCRIPT)); // Add arguments to the bootstrap script ArrayList<String> argList = new ArrayList<>(); // Execute this script only on the master node. argList.add(configurationHelper.getProperty(ConfigurationValue.EMR_NODE_CONDITION)); argList.add(scriptDefinition.getScriptLocation()); if (!CollectionUtils.isEmpty(scriptDefinition.getScriptArguments())) { for (String argument : scriptDefinition.getScriptArguments()) { // Trim the argument argList.add(argument.trim()); } } bootstrapActionConfig.getScriptBootstrapAction().setArgs(argList); bootstrapActions.add(bootstrapActionConfig); } } }
Example #4
Source File: EmrDaoImpl.java From herd with Apache License 2.0 | 5 votes |
private void addHadoopBootstrapActionConfig(EmrClusterDefinition emrClusterDefinition, ArrayList<BootstrapActionConfig> bootstrapActions) { // Add hadoop Configuration support if needed if (!CollectionUtils.isEmpty(emrClusterDefinition.getHadoopConfigurations())) { ArrayList<String> argList = new ArrayList<>(); BootstrapActionConfig hadoopBootstrapActionConfig = getBootstrapActionConfig(ConfigurationValue.EMR_CONFIGURE_HADOOP.getKey(), configurationHelper.getProperty(ConfigurationValue.EMR_CONFIGURE_HADOOP)); // If config files are available, add them as arguments for (Object hadoopConfigObject : emrClusterDefinition.getHadoopConfigurations()) { // If the Config Files are available, add them as arguments if (hadoopConfigObject instanceof ConfigurationFiles) { for (ConfigurationFile configurationFile : ((ConfigurationFiles) hadoopConfigObject).getConfigurationFiles()) { argList.add(configurationFile.getFileNameShortcut()); argList.add(configurationFile.getConfigFileLocation()); } } // If the key value pairs are available, add them as arguments if (hadoopConfigObject instanceof KeyValuePairConfigurations) { for (KeyValuePairConfiguration keyValuePairConfiguration : ((KeyValuePairConfigurations) hadoopConfigObject) .getKeyValuePairConfigurations()) { argList.add(keyValuePairConfiguration.getKeyValueShortcut()); argList.add(keyValuePairConfiguration.getAttribKey() + "=" + keyValuePairConfiguration.getAttribVal()); } } } // Add the bootstrap action with arguments hadoopBootstrapActionConfig.getScriptBootstrapAction().setArgs(argList); bootstrapActions.add(hadoopBootstrapActionConfig); } }
Example #5
Source File: EmrDaoImpl.java From herd with Apache License 2.0 | 5 votes |
/** * Create the BootstrapActionConfig object from the bootstrap script. * * @param scriptDescription bootstrap script name to be displayed. * @param bootstrapScript location of the bootstrap script. * * @return bootstrap action configuration that contains all the bootstrap actions for the given configuration. */ private BootstrapActionConfig getBootstrapActionConfig(String scriptDescription, String bootstrapScript) { // Create the BootstrapActionConfig object BootstrapActionConfig bootstrapConfig = new BootstrapActionConfig(); ScriptBootstrapActionConfig bootstrapConfigScript = new ScriptBootstrapActionConfig(); // Set the bootstrapScript bootstrapConfig.setName(scriptDescription); bootstrapConfigScript.setPath(bootstrapScript); bootstrapConfig.setScriptBootstrapAction(bootstrapConfigScript); // Return the object return bootstrapConfig; }
Example #6
Source File: EmrDaoImpl.java From herd with Apache License 2.0 | 5 votes |
/** * Create the bootstrap action configuration List from all the bootstrapping scripts specified. * * @param emrClusterDefinition the EMR definition name value. * * @return list of bootstrap action configurations that contains all the bootstrap actions for the given configuration. */ private ArrayList<BootstrapActionConfig> getBootstrapActionConfigList(EmrClusterDefinition emrClusterDefinition) { // Create the list ArrayList<BootstrapActionConfig> bootstrapActions = new ArrayList<>(); // Add encryption script support if needed if (emrClusterDefinition.isEncryptionEnabled() != null && emrClusterDefinition.isEncryptionEnabled()) { // Whenever the user requests for encryption, we have an encryption script that is stored in herd bucket. // We use this encryption script to encrypt all the volumes of all the instances. // Amazon plans to support encryption in EMR soon. Once that support is enabled, we can remove this script and use the one provided by AWS. bootstrapActions.add(getBootstrapActionConfig(ConfigurationValue.EMR_ENCRYPTION_SCRIPT.getKey(), getBootstrapScriptLocation(configurationHelper.getProperty(ConfigurationValue.EMR_ENCRYPTION_SCRIPT)))); } // Add NSCD script support if the script location is not empty String emrNscdScript = configurationHelper.getProperty(ConfigurationValue.EMR_NSCD_SCRIPT); if (StringUtils.isNotEmpty(emrNscdScript)) { // Upon launch, all EMR clusters should have NSCD running to cache DNS host lookups so EMR does not overwhelm DNS servers bootstrapActions .add(getBootstrapActionConfig(ConfigurationValue.EMR_NSCD_SCRIPT.getKey(), getBootstrapScriptLocation(emrNscdScript))); } // Add bootstrap actions. addDaemonBootstrapActionConfig(emrClusterDefinition, bootstrapActions); addHadoopBootstrapActionConfig(emrClusterDefinition, bootstrapActions); addCustomBootstrapActionConfig(emrClusterDefinition, bootstrapActions); addCustomMasterBootstrapActionConfig(emrClusterDefinition, bootstrapActions); // Return the object return bootstrapActions; }
Example #7
Source File: EmrDaoImplTest.java From herd with Apache License 2.0 | 5 votes |
@Test public void testCreateEmrClusterNoNscdBootstrapScript() { // Create an AWS parameters DTO. final AwsParamsDto awsParamsDto = new AwsParamsDto(AWS_ASSUMED_ROLE_ACCESS_KEY, AWS_ASSUMED_ROLE_SECRET_KEY, AWS_ASSUMED_ROLE_SESSION_TOKEN, HTTP_PROXY_HOST, HTTP_PROXY_PORT, AWS_REGION_NAME_US_EAST_1); EmrClusterDefinition emrClusterDefinition = new EmrClusterDefinition(); final InstanceDefinitions instanceDefinitions = new InstanceDefinitions(new MasterInstanceDefinition(), new InstanceDefinition(), new InstanceDefinition()); emrClusterDefinition.setInstanceDefinitions(instanceDefinitions); emrClusterDefinition.setNodeTags(Collections.emptyList()); AmazonElasticMapReduce amazonElasticMapReduce = AmazonElasticMapReduceClientBuilder.standard().withRegion(awsParamsDto.getAwsRegionName()) .build(); when(awsClientFactory.getEmrClient(awsParamsDto)).thenReturn(amazonElasticMapReduce); when(emrOperations.runEmrJobFlow(amazonElasticMapReduceClientArgumentCaptor.capture(), runJobFlowRequestArgumentCaptor.capture())) .thenReturn(EMR_CLUSTER_ID); // Create the cluster without NSCD script configuration String clusterId = emrDaoImpl.createEmrCluster(EMR_CLUSTER_NAME, emrClusterDefinition, awsParamsDto); // Verifications assertEquals(clusterId, EMR_CLUSTER_ID); verify(configurationHelper).getProperty(ConfigurationValue.EMR_NSCD_SCRIPT); verify(awsClientFactory).getEmrClient(awsParamsDto); verify(emrOperations).runEmrJobFlow(any(), any()); RunJobFlowRequest runJobFlowRequest = runJobFlowRequestArgumentCaptor.getValue(); List<BootstrapActionConfig> bootstrapActionConfigs = runJobFlowRequest.getBootstrapActions(); // There should be no bootstrap action assertTrue(bootstrapActionConfigs.isEmpty()); }
Example #8
Source File: EmrDaoImplTest.java From herd with Apache License 2.0 | 4 votes |
@Test public void testCreateEmrClusterWithNscdBootstrapScript() { // Create an AWS parameters DTO. final AwsParamsDto awsParamsDto = new AwsParamsDto(AWS_ASSUMED_ROLE_ACCESS_KEY, AWS_ASSUMED_ROLE_SECRET_KEY, AWS_ASSUMED_ROLE_SESSION_TOKEN, HTTP_PROXY_HOST, HTTP_PROXY_PORT, AWS_REGION_NAME_US_EAST_1); EmrClusterDefinition emrClusterDefinition = new EmrClusterDefinition(); final InstanceDefinitions instanceDefinitions = new InstanceDefinitions(new MasterInstanceDefinition(), new InstanceDefinition(), new InstanceDefinition()); emrClusterDefinition.setInstanceDefinitions(instanceDefinitions); emrClusterDefinition.setNodeTags(Collections.emptyList()); when(configurationHelper.getProperty(ConfigurationValue.EMR_NSCD_SCRIPT)).thenReturn(EMR_NSCD_SCRIPT); when(configurationHelper.getProperty(ConfigurationValue.S3_URL_PROTOCOL)).thenReturn(S3_URL_PROTOCOL); when(configurationHelper.getProperty(ConfigurationValue.S3_STAGING_BUCKET_NAME)).thenReturn(S3_BUCKET_NAME); when(configurationHelper.getProperty(ConfigurationValue.S3_STAGING_RESOURCE_BASE)).thenReturn(S3_STAGING_RESOURCE_BASE); when(configurationHelper.getProperty(ConfigurationValue.S3_URL_PATH_DELIMITER)).thenReturn(S3_URL_PATH_DELIMITER); when(configurationHelper.getProperty(ConfigurationValue.EMR_CONFIGURE_DAEMON)).thenReturn(EMR_CONFIGURE_DAEMON); List<Parameter> daemonConfigs = new ArrayList<>(); Parameter daemonConfig = new Parameter(); daemonConfig.setName(EMR_CLUSTER_DAEMON_CONFIG_NAME); daemonConfig.setValue(EMR_CLUSTER_DAEMON_CONFIG_VALUE); daemonConfigs.add(daemonConfig); emrClusterDefinition.setDaemonConfigurations(daemonConfigs); AmazonElasticMapReduce amazonElasticMapReduce = AmazonElasticMapReduceClientBuilder.standard().withRegion(awsParamsDto.getAwsRegionName()) .build(); when(awsClientFactory.getEmrClient(awsParamsDto)).thenReturn(amazonElasticMapReduce); when(awsClientFactory.getEmrClient(awsParamsDto)).thenReturn(amazonElasticMapReduce); when(emrOperations.runEmrJobFlow(amazonElasticMapReduceClientArgumentCaptor.capture(), runJobFlowRequestArgumentCaptor.capture())) .thenReturn(EMR_CLUSTER_ID); // Create the cluster String clusterId = emrDaoImpl.createEmrCluster(EMR_CLUSTER_NAME, emrClusterDefinition, awsParamsDto); // Verifications RunJobFlowRequest runJobFlowRequest = runJobFlowRequestArgumentCaptor.getValue(); assertEquals(clusterId, EMR_CLUSTER_ID); verify(configurationHelper).getProperty(ConfigurationValue.EMR_NSCD_SCRIPT); verify(configurationHelper).getProperty(ConfigurationValue.S3_URL_PROTOCOL); verify(configurationHelper).getProperty(ConfigurationValue.S3_STAGING_BUCKET_NAME); verify(configurationHelper).getProperty(ConfigurationValue.S3_STAGING_RESOURCE_BASE); verify(configurationHelper).getProperty(ConfigurationValue.EMR_CONFIGURE_DAEMON); verify(awsClientFactory).getEmrClient(awsParamsDto); verify(emrOperations).runEmrJobFlow((AmazonElasticMapReduceClient) amazonElasticMapReduce, runJobFlowRequest); List<BootstrapActionConfig> bootstrapActionConfigs = runJobFlowRequest.getBootstrapActions(); // There should be two bootstrap actions: NSCD script, and emr daemon config assertEquals(2, bootstrapActionConfigs.size()); // Verify NSCD bootstrap action assertEquals(ConfigurationValue.EMR_NSCD_SCRIPT.getKey(), bootstrapActionConfigs.get(0).getName()); assertEquals(String .format("%s%s%s%s%s%s", S3_URL_PROTOCOL, S3_BUCKET_NAME, S3_URL_PATH_DELIMITER, S3_STAGING_RESOURCE_BASE, S3_URL_PATH_DELIMITER, EMR_NSCD_SCRIPT), bootstrapActionConfigs.get(0).getScriptBootstrapAction().getPath()); // Verify EMR configure daemon bootstrap action assertEquals(ConfigurationValue.EMR_CONFIGURE_DAEMON.getKey(), bootstrapActionConfigs.get(1).getName()); assertEquals(EMR_CONFIGURE_DAEMON, bootstrapActionConfigs.get(1).getScriptBootstrapAction().getPath()); assertEquals(String.format("%s=%s", EMR_CLUSTER_DAEMON_CONFIG_NAME, EMR_CLUSTER_DAEMON_CONFIG_VALUE), bootstrapActionConfigs.get(1).getScriptBootstrapAction().getArgs().get(0)); }
Example #9
Source File: EmrOperatorFactory.java From digdag with Apache License 2.0 | 4 votes |
private NewCluster submitNewClusterRequest(AmazonElasticMapReduce emr, String tag, StepCompiler stepCompiler, Config cluster, Filer filer, ParameterCompiler parameterCompiler) throws IOException { RemoteFile runner = prepareRunner(filer, tag); // Compile steps stepCompiler.compile(runner); List<StepConfig> stepConfigs = stepCompiler.stepConfigs(); Config ec2 = cluster.getNested("ec2"); Config master = ec2.getNestedOrGetEmpty("master"); List<Config> core = ec2.getOptional("core", Config.class).transform(ImmutableList::of).or(ImmutableList.of()); List<Config> task = ec2.getListOrEmpty("task", Config.class); List<String> applications = cluster.getListOrEmpty("applications", String.class); if (applications.isEmpty()) { applications = ImmutableList.of("Hadoop", "Hive", "Spark", "Flink"); } // TODO: allow configuring additional application parameters List<Application> applicationConfigs = applications.stream() .map(application -> new Application().withName(application)) .collect(toList()); // TODO: merge configurations with the same classification? List<Configuration> configurations = cluster.getListOrEmpty("configurations", JsonNode.class).stream() .map(this::configurations) .flatMap(Collection::stream) .collect(toList()); List<JsonNode> bootstrap = cluster.getListOrEmpty("bootstrap", JsonNode.class); List<BootstrapActionConfig> bootstrapActions = new ArrayList<>(); for (int i = 0; i < bootstrap.size(); i++) { bootstrapActions.add(bootstrapAction(i + 1, bootstrap.get(i), tag, filer, runner, parameterCompiler)); } // Stage files to S3 filer.stageFiles(); Optional<String> subnetId = ec2.getOptional("subnet_id", String.class); String defaultMasterInstanceType; String defaultCoreInstanceType; String defaultTaskInstanceType; if (subnetId.isPresent()) { // m4 requires VPC (subnet id) defaultMasterInstanceType = "m4.2xlarge"; defaultCoreInstanceType = "m4.xlarge"; defaultTaskInstanceType = "m4.xlarge"; } else { defaultMasterInstanceType = "m3.2xlarge"; defaultCoreInstanceType = "m3.xlarge"; defaultTaskInstanceType = "m3.xlarge"; } RunJobFlowRequest request = new RunJobFlowRequest() .withName(cluster.get("name", String.class, "Digdag") + " (" + tag + ")") .withReleaseLabel(cluster.get("release", String.class, "emr-5.2.0")) .withSteps(stepConfigs) .withBootstrapActions(bootstrapActions) .withApplications(applicationConfigs) .withLogUri(cluster.get("logs", String.class, null)) .withJobFlowRole(cluster.get("cluster_role", String.class, "EMR_EC2_DefaultRole")) .withServiceRole(cluster.get("service_role", String.class, "EMR_DefaultRole")) .withTags(new Tag().withKey("DIGDAG_CLUSTER_ID").withValue(tag)) .withVisibleToAllUsers(cluster.get("visible", boolean.class, true)) .withConfigurations(configurations) .withInstances(new JobFlowInstancesConfig() .withInstanceGroups(ImmutableList.<InstanceGroupConfig>builder() // Master Node .add(instanceGroupConfig("Master", master, "MASTER", defaultMasterInstanceType, 1)) // Core Group .addAll(instanceGroupConfigs("Core", core, "CORE", defaultCoreInstanceType)) // Task Groups .addAll(instanceGroupConfigs("Task %d", task, "TASK", defaultTaskInstanceType)) .build() ) .withAdditionalMasterSecurityGroups(ec2.getListOrEmpty("additional_master_security_groups", String.class)) .withAdditionalSlaveSecurityGroups(ec2.getListOrEmpty("additional_slave_security_groups", String.class)) .withEmrManagedMasterSecurityGroup(ec2.get("emr_managed_master_security_group", String.class, null)) .withEmrManagedSlaveSecurityGroup(ec2.get("emr_managed_slave_security_group", String.class, null)) .withServiceAccessSecurityGroup(ec2.get("service_access_security_group", String.class, null)) .withTerminationProtected(cluster.get("termination_protected", boolean.class, false)) .withPlacement(cluster.getOptional("availability_zone", String.class) .transform(zone -> new PlacementType().withAvailabilityZone(zone)).orNull()) .withEc2SubnetId(subnetId.orNull()) .withEc2KeyName(ec2.get("key", String.class)) .withKeepJobFlowAliveWhenNoSteps(!cluster.get("auto_terminate", boolean.class, true))); logger.info("Submitting EMR job with {} steps(s)", request.getSteps().size()); RunJobFlowResult result = emr.runJobFlow(request); logger.info("Submitted EMR job with {} step(s): {}", request.getSteps().size(), result.getJobFlowId(), result); return NewCluster.of(result.getJobFlowId(), request.getSteps().size()); }
Example #10
Source File: EmrOperatorFactory.java From digdag with Apache License 2.0 | 4 votes |
private BootstrapActionConfig bootstrapAction(int index, JsonNode action, String tag, Filer filer, RemoteFile runner, ParameterCompiler parameterCompiler) throws IOException { String script; String name; FileReference reference; Config config; if (action.isTextual()) { script = action.asText(); reference = fileReference("bootstrap", script); name = reference.filename(); config = request.getConfig().getFactory().create(); } else if (action.isObject()) { config = request.getConfig().getFactory().create(action); script = config.get("path", String.class); reference = fileReference("bootstrap", script); name = config.get("name", String.class, reference.filename()); } else { throw new ConfigException("Invalid bootstrap action: " + action); } RemoteFile file = filer.prepareRemoteFile(tag, "bootstrap", Integer.toString(index), reference, false); CommandRunnerConfiguration configuration = CommandRunnerConfiguration.builder() .workingDirectory(bootstrapWorkingDirectory(index)) .env(parameterCompiler.parameters(config.getNestedOrGetEmpty("env"), (key, value) -> value)) .addDownload(DownloadConfig.of(file, 0777)) .addAllDownload(config.getListOrEmpty("files", String.class).stream() .map(r -> fileReference("file", r)) .map(r -> filer.prepareRemoteFile(tag, "bootstrap", Integer.toString(index), r, false, bootstrapWorkingDirectory(index))) .collect(toList())) .addCommand(file.localPath()) .addAllCommand(parameterCompiler.parameters(config, "args")) .build(); FileReference configurationFileReference = ImmutableFileReference.builder() .type(FileReference.Type.DIRECT) .contents(objectMapper.writeValueAsBytes(configuration)) .filename("config.json") .build(); RemoteFile remoteConfigurationFile = filer.prepareRemoteFile(tag, "bootstrap", Integer.toString(index), configurationFileReference, false); return new BootstrapActionConfig() .withName(name) .withScriptBootstrapAction(new ScriptBootstrapActionConfig() .withPath(runner.s3Uri().toString()) .withArgs(remoteConfigurationFile.s3Uri().toString())); }
Example #11
Source File: EMRUtils.java From aws-big-data-blog with Apache License 2.0 | 4 votes |
/** * This method uses method the AWS Java to launch an Apache HBase cluster on Amazon EMR. * * @param client - AmazonElasticMapReduce client that interfaces directly with the Amazon EMR Web Service * @param clusterIdentifier - identifier of an existing cluster * @param amiVersion - AMI to use for launching this cluster * @param keypair - A keypair for SSHing into the Amazon EMR master node * @param masterInstanceType - Master node Amazon EC2 instance type * @param coreInstanceType - core nodes Amazon EC2 instance type * @param logUri - An Amazon S3 bucket for your * @param numberOfNodes - total number of nodes in this cluster including master node * @return */ public static String createCluster(AmazonElasticMapReduce client, String clusterIdentifier, String amiVersion, String keypair, String masterInstanceType, String coreInstanceType, String logUri, int numberOfNodes) { if (clusterExists(client, clusterIdentifier)) { LOG.info("Cluster " + clusterIdentifier + " is available"); return clusterIdentifier; } //Error checking if (amiVersion == null || amiVersion.isEmpty()) throw new RuntimeException("ERROR: Please specify an AMI Version"); if (keypair == null || keypair.isEmpty()) throw new RuntimeException("ERROR: Please specify a valid Amazon Key Pair"); if (masterInstanceType == null || masterInstanceType.isEmpty()) throw new RuntimeException("ERROR: Please specify a Master Instance Type"); if (logUri == null || logUri.isEmpty()) throw new RuntimeException("ERROR: Please specify a valid Amazon S3 bucket for your logs."); if (numberOfNodes < 0) throw new RuntimeException("ERROR: Please specify at least 1 node"); RunJobFlowRequest request = new RunJobFlowRequest() .withAmiVersion(amiVersion) .withBootstrapActions(new BootstrapActionConfig() .withName("Install HBase") .withScriptBootstrapAction(new ScriptBootstrapActionConfig() .withPath("s3://elasticmapreduce/bootstrap-actions/setup-hbase"))) .withName("Job Flow With HBAse Actions") .withSteps(new StepConfig() //enable debugging step .withName("Enable debugging") .withActionOnFailure("TERMINATE_CLUSTER") .withHadoopJarStep(new StepFactory().newEnableDebuggingStep()), //Start HBase step - after installing it with a bootstrap action createStepConfig("Start HBase","TERMINATE_CLUSTER", "/home/hadoop/lib/hbase.jar", getHBaseArgs()), //add HBase backup step createStepConfig("Modify backup schedule","TERMINATE_JOB_FLOW", "/home/hadoop/lib/hbase.jar", getHBaseBackupArgs())) .withLogUri(logUri) .withInstances(new JobFlowInstancesConfig() .withEc2KeyName(keypair) .withInstanceCount(numberOfNodes) .withKeepJobFlowAliveWhenNoSteps(true) .withMasterInstanceType(masterInstanceType) .withSlaveInstanceType(coreInstanceType)); RunJobFlowResult result = client.runJobFlow(request); String state = null; while (!(state = clusterState(client, result.getJobFlowId())).equalsIgnoreCase("waiting")) { try { Thread.sleep(10 * 1000); LOG.info(result.getJobFlowId() + " is " + state + ". Waiting for cluster to become available."); } catch (InterruptedException e) { } if (state.equalsIgnoreCase("TERMINATED_WITH_ERRORS")){ LOG.error("Could not create EMR Cluster"); System.exit(-1); } } LOG.info("Created cluster " + result.getJobFlowId()); LOG.info("Cluster " + clusterIdentifier + " is available"); return result.getJobFlowId(); }