com.amazonaws.services.elasticmapreduce.model.RunJobFlowRequest Java Examples
The following examples show how to use
com.amazonaws.services.elasticmapreduce.model.RunJobFlowRequest.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: EmrDaoImpl.java From herd with Apache License 2.0 | 6 votes |
@Override public String createEmrCluster(String clusterName, EmrClusterDefinition emrClusterDefinition, AwsParamsDto awsParams) { RunJobFlowRequest runJobFlowRequest = getRunJobFlowRequest(clusterName, emrClusterDefinition); LOGGER.info("runJobFlowRequest={}", HerdStringUtils.sanitizeLogText(jsonHelper.objectToJson(runJobFlowRequest))); String clusterId = emrOperations.runEmrJobFlow(getEmrClient(awsParams), runJobFlowRequest); LOGGER.info("EMR cluster started. emrClusterId=\"{}\"", clusterId); // Add the new cluster name and cluster id to the EMR cluster cache. LOGGER.info("Adding EMR cluster to the EMR Cluster Cache. emrClusterName=\"{}\" emrClusterId=\"{}\" accountId=\"{}\"", clusterName.toUpperCase(), clusterId, emrClusterDefinition.getAccountId()); // Build the EMR cluster cache key using the cluster name and the account id. EmrClusterCacheKey emrClusterCacheKey = new EmrClusterCacheKey(clusterName.toUpperCase(), emrClusterDefinition.getAccountId()); // Get the cluster cache using the accountId. Map<EmrClusterCacheKey, String> emrClusterCache = getEmrClusterCacheByAccountId(emrClusterDefinition.getAccountId()); // Add the newly created cluster cache key and id pair to the cluster cache. emrClusterCache.put(emrClusterCacheKey, clusterId); LOGGER.debug("EMR cluster cache after creating a cluster and adding it to the existing cache. emrClusterCache=\"{}\" emrClusterCacheContents=\"{}\"", System.identityHashCode(emrClusterCache), emrClusterCache.toString()); return clusterId; }
Example #2
Source File: MockEmrOperationsImpl.java From herd with Apache License 2.0 | 6 votes |
private MockEmrJobFlow createNewCluster(RunJobFlowRequest jobFlowRequest, String status, StatusChangeReason reason, StatusTimeline timeline) { MockEmrJobFlow cluster = new MockEmrJobFlow(); cluster.setJobFlowId(getNewJobFlowId()); cluster.setJobFlowName(jobFlowRequest.getName()); cluster.setStatus(status); cluster.setStatusTimeline(timeline); cluster.setStatusChangeReason(reason); emrClusters.put(cluster.getJobFlowId(), cluster); // Add the steps for (StepConfig stepConfig : jobFlowRequest.getSteps()) { addClusterStep(cluster.getJobFlowId(), stepConfig); } return cluster; }
Example #3
Source File: EmrDaoTest.java From herd with Apache License 2.0 | 5 votes |
@Test public void createEmrClusterAssertEncryptionDisabled() { /* * Use only minimum required options */ String clusterName = "clusterName"; EmrClusterDefinition emrClusterDefinition = new EmrClusterDefinition(); InstanceDefinitions instanceDefinitions = new InstanceDefinitions(); instanceDefinitions.setMasterInstances( new MasterInstanceDefinition(10, "masterInstanceType", NO_EMR_CLUSTER_DEFINITION_EBS_CONFIGURATION, NO_INSTANCE_SPOT_PRICE, NO_INSTANCE_MAX_SEARCH_PRICE, NO_INSTANCE_ON_DEMAND_THRESHOLD)); instanceDefinitions.setCoreInstances( new InstanceDefinition(20, "coreInstanceType", NO_EMR_CLUSTER_DEFINITION_EBS_CONFIGURATION, NO_INSTANCE_SPOT_PRICE, NO_INSTANCE_MAX_SEARCH_PRICE, NO_INSTANCE_ON_DEMAND_THRESHOLD)); emrClusterDefinition.setInstanceDefinitions(instanceDefinitions); emrClusterDefinition.setNodeTags(Lists.newArrayList(new NodeTag("tagName", "tagValue"))); emrClusterDefinition.setEncryptionEnabled(false); String clusterId = "clusterId"; when(mockEmrOperations.runEmrJobFlow(any(), any())).then(new Answer<String>() { @Override public String answer(InvocationOnMock invocation) { RunJobFlowRequest runJobFlowRequest = invocation.getArgument(1); // No bootstrap action should be added assertEquals(0, runJobFlowRequest.getBootstrapActions().size()); return clusterId; } }); assertEquals(clusterId, emrDao.createEmrCluster(clusterName, emrClusterDefinition, getAwsParamsDto())); }
Example #4
Source File: EmrDaoTest.java From herd with Apache License 2.0 | 5 votes |
@Test public void createEmrClusterAssertInstallOozieDisabled() { /* * Use only minimum required options */ String clusterName = "clusterName"; EmrClusterDefinition emrClusterDefinition = new EmrClusterDefinition(); InstanceDefinitions instanceDefinitions = new InstanceDefinitions(); instanceDefinitions.setMasterInstances( new MasterInstanceDefinition(10, "masterInstanceType", NO_EMR_CLUSTER_DEFINITION_EBS_CONFIGURATION, NO_INSTANCE_SPOT_PRICE, NO_INSTANCE_MAX_SEARCH_PRICE, NO_INSTANCE_ON_DEMAND_THRESHOLD)); instanceDefinitions.setCoreInstances( new InstanceDefinition(20, "coreInstanceType", NO_EMR_CLUSTER_DEFINITION_EBS_CONFIGURATION, NO_INSTANCE_SPOT_PRICE, NO_INSTANCE_MAX_SEARCH_PRICE, NO_INSTANCE_ON_DEMAND_THRESHOLD)); emrClusterDefinition.setInstanceDefinitions(instanceDefinitions); emrClusterDefinition.setNodeTags(Lists.newArrayList(new NodeTag("tagName", "tagValue"))); emrClusterDefinition.setInstallOozie(false); String clusterId = "clusterId"; when(mockEmrOperations.runEmrJobFlow(any(), any())).then(new Answer<String>() { @Override public String answer(InvocationOnMock invocation) { RunJobFlowRequest runJobFlowRequest = invocation.getArgument(1); // The oozie step should be skipped. assertEquals(0, runJobFlowRequest.getSteps().size()); return clusterId; } }); assertEquals(clusterId, emrDao.createEmrCluster(clusterName, emrClusterDefinition, getAwsParamsDto())); }
Example #5
Source File: EmrDaoImplTest.java From herd with Apache License 2.0 | 5 votes |
@Test public void testCreateEmrClusterNoNscdBootstrapScript() { // Create an AWS parameters DTO. final AwsParamsDto awsParamsDto = new AwsParamsDto(AWS_ASSUMED_ROLE_ACCESS_KEY, AWS_ASSUMED_ROLE_SECRET_KEY, AWS_ASSUMED_ROLE_SESSION_TOKEN, HTTP_PROXY_HOST, HTTP_PROXY_PORT, AWS_REGION_NAME_US_EAST_1); EmrClusterDefinition emrClusterDefinition = new EmrClusterDefinition(); final InstanceDefinitions instanceDefinitions = new InstanceDefinitions(new MasterInstanceDefinition(), new InstanceDefinition(), new InstanceDefinition()); emrClusterDefinition.setInstanceDefinitions(instanceDefinitions); emrClusterDefinition.setNodeTags(Collections.emptyList()); AmazonElasticMapReduce amazonElasticMapReduce = AmazonElasticMapReduceClientBuilder.standard().withRegion(awsParamsDto.getAwsRegionName()) .build(); when(awsClientFactory.getEmrClient(awsParamsDto)).thenReturn(amazonElasticMapReduce); when(emrOperations.runEmrJobFlow(amazonElasticMapReduceClientArgumentCaptor.capture(), runJobFlowRequestArgumentCaptor.capture())) .thenReturn(EMR_CLUSTER_ID); // Create the cluster without NSCD script configuration String clusterId = emrDaoImpl.createEmrCluster(EMR_CLUSTER_NAME, emrClusterDefinition, awsParamsDto); // Verifications assertEquals(clusterId, EMR_CLUSTER_ID); verify(configurationHelper).getProperty(ConfigurationValue.EMR_NSCD_SCRIPT); verify(awsClientFactory).getEmrClient(awsParamsDto); verify(emrOperations).runEmrJobFlow(any(), any()); RunJobFlowRequest runJobFlowRequest = runJobFlowRequestArgumentCaptor.getValue(); List<BootstrapActionConfig> bootstrapActionConfigs = runJobFlowRequest.getBootstrapActions(); // There should be no bootstrap action assertTrue(bootstrapActionConfigs.isEmpty()); }
Example #6
Source File: EmrIT.java From digdag with Apache License 2.0 | 5 votes |
@Test public void test() throws Exception { RunJobFlowRequest request = new RunJobFlowRequest() .withName("Digdag Test") .withReleaseLabel("emr-5.2.0") .withApplications(Stream.of("Hadoop", "Hive", "Spark", "Flink") .map(s -> new Application().withName(s)) .collect(toList())) .withJobFlowRole("EMR_EC2_DefaultRole") .withServiceRole("EMR_DefaultRole") .withVisibleToAllUsers(true) .withLogUri(tmpS3FolderUri + "/logs/") .withInstances(new JobFlowInstancesConfig() .withEc2KeyName("digdag-test") .withInstanceCount(1) .withKeepJobFlowAliveWhenNoSteps(true) .withMasterInstanceType("m3.xlarge") .withSlaveInstanceType("m3.xlarge")); RunJobFlowResult result = emr.runJobFlow(request); String clusterId = result.getJobFlowId(); clusterIds.add(clusterId); Id attemptId = pushAndStart(server.endpoint(), projectDir, "emr", ImmutableMap.of( "test_s3_folder", tmpS3FolderUri.toString(), "test_cluster", clusterId, "outfile", outfile.toString())); expect(Duration.ofMinutes(30), attemptSuccess(server.endpoint(), attemptId)); validateTdSparkQueryOutput(); assertThat(Files.exists(outfile), is(true)); }
Example #7
Source File: EmrClusterJob.java From datacollector with Apache License 2.0 | 5 votes |
@Override public String createCluster(String clusterName) { RunJobFlowRequest request = new RunJobFlowRequest() .withName(clusterName) .withReleaseLabel(EmrInfo.getVersion()) .withServiceRole(emrClusterConfig.getServiceRole()) .withJobFlowRole(emrClusterConfig.getJobFlowRole()) .withVisibleToAllUsers(emrClusterConfig.isVisibleToAllUsers()) .withInstances(new JobFlowInstancesConfig() .withEc2SubnetId(emrClusterConfig.getEc2SubnetId()) .withEmrManagedMasterSecurityGroup(emrClusterConfig.getMasterSecurityGroup()) .withEmrManagedSlaveSecurityGroup(emrClusterConfig.getSlaveSecurityGroup()) .withInstanceCount(emrClusterConfig.getInstanceCount()) .withKeepJobFlowAliveWhenNoSteps(true) .withMasterInstanceType(emrClusterConfig.getMasterInstanceType()) .withSlaveInstanceType(emrClusterConfig.getSlaveInstanceType())); if (emrClusterConfig.isLoggingEnabled()) { request.withLogUri(emrClusterConfig.getS3LogUri()); if (emrClusterConfig.isEnableEmrDebugging()) { String COMMAND_RUNNER = "command-runner.jar"; String DEBUGGING_COMMAND = "state-pusher-script"; String DEBUGGING_NAME = "Setup Hadoop Debugging"; StepConfig enabledebugging = new StepConfig() .withName(DEBUGGING_NAME) .withActionOnFailure(ActionOnFailure.CONTINUE) .withHadoopJarStep(new HadoopJarStepConfig() .withJar(COMMAND_RUNNER) .withArgs(DEBUGGING_COMMAND)); request.withSteps(enabledebugging); } } RunJobFlowResult result = getEmrClient(emrClusterConfig).runJobFlow(request); return result.getJobFlowId(); }
Example #8
Source File: TestEmrClusterJob.java From datacollector with Apache License 2.0 | 5 votes |
@Test public void testCreateCluster() { Properties properties = new Properties(); properties.setProperty("instanceCount", "1"); EmrClusterJob emrClusterJob = new EmrClusterJob(); EmrClusterJob.Client client = Mockito.spy(emrClusterJob.getClient(properties)); AmazonElasticMapReduce emr = Mockito.mock(AmazonElasticMapReduce.class); Mockito.doReturn(Mockito.mock(RunJobFlowResult.class)).when(emr).runJobFlow(Mockito.any(RunJobFlowRequest.class)); Mockito.doReturn(emr).when(client).getEmrClient(Mockito.any(EmrClusterConfig.class)); client.createCluster("foo"); Mockito.verify(emr, Mockito.times(1)).runJobFlow(Mockito.any(RunJobFlowRequest.class)); Mockito.verify(client, Mockito.times(1)).getEmrClient(Mockito.any(EmrClusterConfig.class)); }
Example #9
Source File: EmrOperationsImpl.java From herd with Apache License 2.0 | 4 votes |
/** * Run Job Flow to AmazonElasticMapReduceClient */ @Override public String runEmrJobFlow(AmazonElasticMapReduceClient emrClient, RunJobFlowRequest jobFlowRequest) { return emrClient.runJobFlow(jobFlowRequest).getJobFlowId(); }
Example #10
Source File: EmrDaoTest.java From herd with Apache License 2.0 | 4 votes |
@Test public void createEmrClusterAssertCallRunEmrJobFlowWithInstanceFleetAndMultipleSubnets() { // Create objects required for testing. final String clusterName = "clusterName"; final String clusterId = "clusterId"; final String name = STRING_VALUE; final String instanceFleetType = STRING_VALUE_2; final Integer targetOnDemandCapacity = INTEGER_VALUE; final Integer targetSpotCapacity = INTEGER_VALUE_2; final List<EmrClusterDefinitionInstanceTypeConfig> emrClusterDefinitionInstanceTypeConfigs = null; final EmrClusterDefinitionLaunchSpecifications emrClusterDefinitionLaunchSpecifications = null; final EmrClusterDefinitionInstanceFleet emrClusterDefinitionInstanceFleet = new EmrClusterDefinitionInstanceFleet(name, instanceFleetType, targetOnDemandCapacity, targetSpotCapacity, emrClusterDefinitionInstanceTypeConfigs, emrClusterDefinitionLaunchSpecifications); // Create an EMR cluster definition with instance fleet configuration and multiple EC2 subnet IDs. EmrClusterDefinition emrClusterDefinition = new EmrClusterDefinition(); emrClusterDefinition.setInstanceFleets(Lists.newArrayList(emrClusterDefinitionInstanceFleet)); emrClusterDefinition.setSubnetId(String.format("%s , %s ", EC2_SUBNET, EC2_SUBNET_2)); emrClusterDefinition.setNodeTags(Lists.newArrayList(new NodeTag("tagName", "tagValue"))); when(mockEmrOperations.runEmrJobFlow(any(), any())).then(new Answer<String>() { @Override public String answer(InvocationOnMock invocation) { // Assert that the given EMR cluster definition produced the correct RunJobFlowRequest. RunJobFlowRequest runJobFlowRequest = invocation.getArgument(1); JobFlowInstancesConfig jobFlowInstancesConfig = runJobFlowRequest.getInstances(); assertEquals(0, CollectionUtils.size(jobFlowInstancesConfig.getInstanceGroups())); final List<InstanceTypeConfig> expectedInstanceTypeConfigs = null; assertEquals(Lists.newArrayList( new InstanceFleetConfig().withName(name).withInstanceFleetType(instanceFleetType).withTargetOnDemandCapacity(targetOnDemandCapacity) .withTargetSpotCapacity(targetSpotCapacity).withInstanceTypeConfigs(expectedInstanceTypeConfigs).withLaunchSpecifications(null)), jobFlowInstancesConfig.getInstanceFleets()); assertNull(jobFlowInstancesConfig.getEc2SubnetId()); assertEquals(2, CollectionUtils.size(jobFlowInstancesConfig.getEc2SubnetIds())); assertTrue(jobFlowInstancesConfig.getEc2SubnetIds().contains(EC2_SUBNET)); assertTrue(jobFlowInstancesConfig.getEc2SubnetIds().contains(EC2_SUBNET_2)); assertEquals(herdStringHelper.getRequiredConfigurationValue(ConfigurationValue.EMR_DEFAULT_EC2_NODE_IAM_PROFILE_NAME), runJobFlowRequest.getJobFlowRole()); assertEquals(herdStringHelper.getRequiredConfigurationValue(ConfigurationValue.EMR_DEFAULT_SERVICE_IAM_ROLE_NAME), runJobFlowRequest.getServiceRole()); List<StepConfig> stepConfigs = runJobFlowRequest.getSteps(); assertEquals(0, stepConfigs.size()); List<Tag> tags = runJobFlowRequest.getTags(); assertEquals(1, tags.size()); { Tag tag = tags.get(0); assertEquals("tagName", tag.getKey()); assertEquals("tagValue", tag.getValue()); } return clusterId; } }); assertEquals(clusterId, emrDao.createEmrCluster(clusterName, emrClusterDefinition, getAwsParamsDto())); }
Example #11
Source File: MockEmrOperationsImpl.java From herd with Apache License 2.0 | 4 votes |
@Override public String runEmrJobFlow(AmazonElasticMapReduceClient emrClient, RunJobFlowRequest jobFlowRequest) { String clusterStatus = ClusterState.BOOTSTRAPPING.toString(); StatusChangeReason reason = new StatusChangeReason(ClusterStateChangeReasonCode.USER_REQUEST.toString(), "Started " + clusterStatus); StatusTimeline timeline = new StatusTimeline(); timeline.setCreationTime(HerdDateUtils.getXMLGregorianCalendarValue(new Date())); if (StringUtils.isNotBlank(jobFlowRequest.getAmiVersion())) { if (jobFlowRequest.getAmiVersion().equals(MockAwsOperationsHelper.AMAZON_THROTTLING_EXCEPTION)) { AmazonServiceException throttlingException = new AmazonServiceException("test throttling exception"); throttlingException.setErrorCode("ThrottlingException"); throw throttlingException; } else if (jobFlowRequest.getAmiVersion().equals(MockAwsOperationsHelper.AMAZON_BAD_REQUEST)) { AmazonServiceException badRequestException = new AmazonServiceException(MockAwsOperationsHelper.AMAZON_BAD_REQUEST); badRequestException.setStatusCode(HttpStatus.SC_BAD_REQUEST); throw badRequestException; } else if (jobFlowRequest.getAmiVersion().equals(MockAwsOperationsHelper.AMAZON_NOT_FOUND)) { AmazonServiceException notFoundException = new AmazonServiceException(MockAwsOperationsHelper.AMAZON_NOT_FOUND); notFoundException.setStatusCode(HttpStatus.SC_NOT_FOUND); throw notFoundException; } else if (jobFlowRequest.getAmiVersion().equals(MockAwsOperationsHelper.AMAZON_SERVICE_EXCEPTION)) { throw new AmazonServiceException(MockAwsOperationsHelper.AMAZON_SERVICE_EXCEPTION); } else if (jobFlowRequest.getAmiVersion().equals(MockAwsOperationsHelper.AMAZON_CLUSTER_STATUS_WAITING)) { clusterStatus = ClusterState.WAITING.toString(); } else if (jobFlowRequest.getAmiVersion().equals(MockAwsOperationsHelper.AMAZON_CLUSTER_STATUS_RUNNING)) { clusterStatus = ClusterState.RUNNING.toString(); } } return createNewCluster(jobFlowRequest, clusterStatus, reason, timeline).getJobFlowId(); }
Example #12
Source File: EmrDaoImplTest.java From herd with Apache License 2.0 | 4 votes |
@Test public void testCreateEmrClusterWithNscdBootstrapScript() { // Create an AWS parameters DTO. final AwsParamsDto awsParamsDto = new AwsParamsDto(AWS_ASSUMED_ROLE_ACCESS_KEY, AWS_ASSUMED_ROLE_SECRET_KEY, AWS_ASSUMED_ROLE_SESSION_TOKEN, HTTP_PROXY_HOST, HTTP_PROXY_PORT, AWS_REGION_NAME_US_EAST_1); EmrClusterDefinition emrClusterDefinition = new EmrClusterDefinition(); final InstanceDefinitions instanceDefinitions = new InstanceDefinitions(new MasterInstanceDefinition(), new InstanceDefinition(), new InstanceDefinition()); emrClusterDefinition.setInstanceDefinitions(instanceDefinitions); emrClusterDefinition.setNodeTags(Collections.emptyList()); when(configurationHelper.getProperty(ConfigurationValue.EMR_NSCD_SCRIPT)).thenReturn(EMR_NSCD_SCRIPT); when(configurationHelper.getProperty(ConfigurationValue.S3_URL_PROTOCOL)).thenReturn(S3_URL_PROTOCOL); when(configurationHelper.getProperty(ConfigurationValue.S3_STAGING_BUCKET_NAME)).thenReturn(S3_BUCKET_NAME); when(configurationHelper.getProperty(ConfigurationValue.S3_STAGING_RESOURCE_BASE)).thenReturn(S3_STAGING_RESOURCE_BASE); when(configurationHelper.getProperty(ConfigurationValue.S3_URL_PATH_DELIMITER)).thenReturn(S3_URL_PATH_DELIMITER); when(configurationHelper.getProperty(ConfigurationValue.EMR_CONFIGURE_DAEMON)).thenReturn(EMR_CONFIGURE_DAEMON); List<Parameter> daemonConfigs = new ArrayList<>(); Parameter daemonConfig = new Parameter(); daemonConfig.setName(EMR_CLUSTER_DAEMON_CONFIG_NAME); daemonConfig.setValue(EMR_CLUSTER_DAEMON_CONFIG_VALUE); daemonConfigs.add(daemonConfig); emrClusterDefinition.setDaemonConfigurations(daemonConfigs); AmazonElasticMapReduce amazonElasticMapReduce = AmazonElasticMapReduceClientBuilder.standard().withRegion(awsParamsDto.getAwsRegionName()) .build(); when(awsClientFactory.getEmrClient(awsParamsDto)).thenReturn(amazonElasticMapReduce); when(awsClientFactory.getEmrClient(awsParamsDto)).thenReturn(amazonElasticMapReduce); when(emrOperations.runEmrJobFlow(amazonElasticMapReduceClientArgumentCaptor.capture(), runJobFlowRequestArgumentCaptor.capture())) .thenReturn(EMR_CLUSTER_ID); // Create the cluster String clusterId = emrDaoImpl.createEmrCluster(EMR_CLUSTER_NAME, emrClusterDefinition, awsParamsDto); // Verifications RunJobFlowRequest runJobFlowRequest = runJobFlowRequestArgumentCaptor.getValue(); assertEquals(clusterId, EMR_CLUSTER_ID); verify(configurationHelper).getProperty(ConfigurationValue.EMR_NSCD_SCRIPT); verify(configurationHelper).getProperty(ConfigurationValue.S3_URL_PROTOCOL); verify(configurationHelper).getProperty(ConfigurationValue.S3_STAGING_BUCKET_NAME); verify(configurationHelper).getProperty(ConfigurationValue.S3_STAGING_RESOURCE_BASE); verify(configurationHelper).getProperty(ConfigurationValue.EMR_CONFIGURE_DAEMON); verify(awsClientFactory).getEmrClient(awsParamsDto); verify(emrOperations).runEmrJobFlow((AmazonElasticMapReduceClient) amazonElasticMapReduce, runJobFlowRequest); List<BootstrapActionConfig> bootstrapActionConfigs = runJobFlowRequest.getBootstrapActions(); // There should be two bootstrap actions: NSCD script, and emr daemon config assertEquals(2, bootstrapActionConfigs.size()); // Verify NSCD bootstrap action assertEquals(ConfigurationValue.EMR_NSCD_SCRIPT.getKey(), bootstrapActionConfigs.get(0).getName()); assertEquals(String .format("%s%s%s%s%s%s", S3_URL_PROTOCOL, S3_BUCKET_NAME, S3_URL_PATH_DELIMITER, S3_STAGING_RESOURCE_BASE, S3_URL_PATH_DELIMITER, EMR_NSCD_SCRIPT), bootstrapActionConfigs.get(0).getScriptBootstrapAction().getPath()); // Verify EMR configure daemon bootstrap action assertEquals(ConfigurationValue.EMR_CONFIGURE_DAEMON.getKey(), bootstrapActionConfigs.get(1).getName()); assertEquals(EMR_CONFIGURE_DAEMON, bootstrapActionConfigs.get(1).getScriptBootstrapAction().getPath()); assertEquals(String.format("%s=%s", EMR_CLUSTER_DAEMON_CONFIG_NAME, EMR_CLUSTER_DAEMON_CONFIG_VALUE), bootstrapActionConfigs.get(1).getScriptBootstrapAction().getArgs().get(0)); }
Example #13
Source File: EmrOperatorFactory.java From digdag with Apache License 2.0 | 4 votes |
private NewCluster submitNewClusterRequest(AmazonElasticMapReduce emr, String tag, StepCompiler stepCompiler, Config cluster, Filer filer, ParameterCompiler parameterCompiler) throws IOException { RemoteFile runner = prepareRunner(filer, tag); // Compile steps stepCompiler.compile(runner); List<StepConfig> stepConfigs = stepCompiler.stepConfigs(); Config ec2 = cluster.getNested("ec2"); Config master = ec2.getNestedOrGetEmpty("master"); List<Config> core = ec2.getOptional("core", Config.class).transform(ImmutableList::of).or(ImmutableList.of()); List<Config> task = ec2.getListOrEmpty("task", Config.class); List<String> applications = cluster.getListOrEmpty("applications", String.class); if (applications.isEmpty()) { applications = ImmutableList.of("Hadoop", "Hive", "Spark", "Flink"); } // TODO: allow configuring additional application parameters List<Application> applicationConfigs = applications.stream() .map(application -> new Application().withName(application)) .collect(toList()); // TODO: merge configurations with the same classification? List<Configuration> configurations = cluster.getListOrEmpty("configurations", JsonNode.class).stream() .map(this::configurations) .flatMap(Collection::stream) .collect(toList()); List<JsonNode> bootstrap = cluster.getListOrEmpty("bootstrap", JsonNode.class); List<BootstrapActionConfig> bootstrapActions = new ArrayList<>(); for (int i = 0; i < bootstrap.size(); i++) { bootstrapActions.add(bootstrapAction(i + 1, bootstrap.get(i), tag, filer, runner, parameterCompiler)); } // Stage files to S3 filer.stageFiles(); Optional<String> subnetId = ec2.getOptional("subnet_id", String.class); String defaultMasterInstanceType; String defaultCoreInstanceType; String defaultTaskInstanceType; if (subnetId.isPresent()) { // m4 requires VPC (subnet id) defaultMasterInstanceType = "m4.2xlarge"; defaultCoreInstanceType = "m4.xlarge"; defaultTaskInstanceType = "m4.xlarge"; } else { defaultMasterInstanceType = "m3.2xlarge"; defaultCoreInstanceType = "m3.xlarge"; defaultTaskInstanceType = "m3.xlarge"; } RunJobFlowRequest request = new RunJobFlowRequest() .withName(cluster.get("name", String.class, "Digdag") + " (" + tag + ")") .withReleaseLabel(cluster.get("release", String.class, "emr-5.2.0")) .withSteps(stepConfigs) .withBootstrapActions(bootstrapActions) .withApplications(applicationConfigs) .withLogUri(cluster.get("logs", String.class, null)) .withJobFlowRole(cluster.get("cluster_role", String.class, "EMR_EC2_DefaultRole")) .withServiceRole(cluster.get("service_role", String.class, "EMR_DefaultRole")) .withTags(new Tag().withKey("DIGDAG_CLUSTER_ID").withValue(tag)) .withVisibleToAllUsers(cluster.get("visible", boolean.class, true)) .withConfigurations(configurations) .withInstances(new JobFlowInstancesConfig() .withInstanceGroups(ImmutableList.<InstanceGroupConfig>builder() // Master Node .add(instanceGroupConfig("Master", master, "MASTER", defaultMasterInstanceType, 1)) // Core Group .addAll(instanceGroupConfigs("Core", core, "CORE", defaultCoreInstanceType)) // Task Groups .addAll(instanceGroupConfigs("Task %d", task, "TASK", defaultTaskInstanceType)) .build() ) .withAdditionalMasterSecurityGroups(ec2.getListOrEmpty("additional_master_security_groups", String.class)) .withAdditionalSlaveSecurityGroups(ec2.getListOrEmpty("additional_slave_security_groups", String.class)) .withEmrManagedMasterSecurityGroup(ec2.get("emr_managed_master_security_group", String.class, null)) .withEmrManagedSlaveSecurityGroup(ec2.get("emr_managed_slave_security_group", String.class, null)) .withServiceAccessSecurityGroup(ec2.get("service_access_security_group", String.class, null)) .withTerminationProtected(cluster.get("termination_protected", boolean.class, false)) .withPlacement(cluster.getOptional("availability_zone", String.class) .transform(zone -> new PlacementType().withAvailabilityZone(zone)).orNull()) .withEc2SubnetId(subnetId.orNull()) .withEc2KeyName(ec2.get("key", String.class)) .withKeepJobFlowAliveWhenNoSteps(!cluster.get("auto_terminate", boolean.class, true))); logger.info("Submitting EMR job with {} steps(s)", request.getSteps().size()); RunJobFlowResult result = emr.runJobFlow(request); logger.info("Submitted EMR job with {} step(s): {}", request.getSteps().size(), result.getJobFlowId(), result); return NewCluster.of(result.getJobFlowId(), request.getSteps().size()); }
Example #14
Source File: EMRUtils.java From aws-big-data-blog with Apache License 2.0 | 4 votes |
/** * This method uses method the AWS Java to launch an Apache HBase cluster on Amazon EMR. * * @param client - AmazonElasticMapReduce client that interfaces directly with the Amazon EMR Web Service * @param clusterIdentifier - identifier of an existing cluster * @param amiVersion - AMI to use for launching this cluster * @param keypair - A keypair for SSHing into the Amazon EMR master node * @param masterInstanceType - Master node Amazon EC2 instance type * @param coreInstanceType - core nodes Amazon EC2 instance type * @param logUri - An Amazon S3 bucket for your * @param numberOfNodes - total number of nodes in this cluster including master node * @return */ public static String createCluster(AmazonElasticMapReduce client, String clusterIdentifier, String amiVersion, String keypair, String masterInstanceType, String coreInstanceType, String logUri, int numberOfNodes) { if (clusterExists(client, clusterIdentifier)) { LOG.info("Cluster " + clusterIdentifier + " is available"); return clusterIdentifier; } //Error checking if (amiVersion == null || amiVersion.isEmpty()) throw new RuntimeException("ERROR: Please specify an AMI Version"); if (keypair == null || keypair.isEmpty()) throw new RuntimeException("ERROR: Please specify a valid Amazon Key Pair"); if (masterInstanceType == null || masterInstanceType.isEmpty()) throw new RuntimeException("ERROR: Please specify a Master Instance Type"); if (logUri == null || logUri.isEmpty()) throw new RuntimeException("ERROR: Please specify a valid Amazon S3 bucket for your logs."); if (numberOfNodes < 0) throw new RuntimeException("ERROR: Please specify at least 1 node"); RunJobFlowRequest request = new RunJobFlowRequest() .withAmiVersion(amiVersion) .withBootstrapActions(new BootstrapActionConfig() .withName("Install HBase") .withScriptBootstrapAction(new ScriptBootstrapActionConfig() .withPath("s3://elasticmapreduce/bootstrap-actions/setup-hbase"))) .withName("Job Flow With HBAse Actions") .withSteps(new StepConfig() //enable debugging step .withName("Enable debugging") .withActionOnFailure("TERMINATE_CLUSTER") .withHadoopJarStep(new StepFactory().newEnableDebuggingStep()), //Start HBase step - after installing it with a bootstrap action createStepConfig("Start HBase","TERMINATE_CLUSTER", "/home/hadoop/lib/hbase.jar", getHBaseArgs()), //add HBase backup step createStepConfig("Modify backup schedule","TERMINATE_JOB_FLOW", "/home/hadoop/lib/hbase.jar", getHBaseBackupArgs())) .withLogUri(logUri) .withInstances(new JobFlowInstancesConfig() .withEc2KeyName(keypair) .withInstanceCount(numberOfNodes) .withKeepJobFlowAliveWhenNoSteps(true) .withMasterInstanceType(masterInstanceType) .withSlaveInstanceType(coreInstanceType)); RunJobFlowResult result = client.runJobFlow(request); String state = null; while (!(state = clusterState(client, result.getJobFlowId())).equalsIgnoreCase("waiting")) { try { Thread.sleep(10 * 1000); LOG.info(result.getJobFlowId() + " is " + state + ". Waiting for cluster to become available."); } catch (InterruptedException e) { } if (state.equalsIgnoreCase("TERMINATED_WITH_ERRORS")){ LOG.error("Could not create EMR Cluster"); System.exit(-1); } } LOG.info("Created cluster " + result.getJobFlowId()); LOG.info("Cluster " + clusterIdentifier + " is available"); return result.getJobFlowId(); }
Example #15
Source File: EmrOperations.java From herd with Apache License 2.0 | votes |
public String runEmrJobFlow(AmazonElasticMapReduceClient emrClient, RunJobFlowRequest jobFlowRequest);