com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduce Java Examples
The following examples show how to use
com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduce.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestEmrClusterJob.java From datacollector with Apache License 2.0 | 6 votes |
@Test public void testGetClusterStatus() { Properties properties = new Properties(); EmrClusterJob emrClusterJob = new EmrClusterJob(); EmrClusterJob.Client client = Mockito.spy(emrClusterJob.getClient(properties)); AmazonElasticMapReduce emr = Mockito.mock(AmazonElasticMapReduce.class); Mockito.doReturn(emr).when(client).getEmrClient(Mockito.any(EmrClusterConfig.class)); DescribeClusterResult result = Mockito.mock(DescribeClusterResult.class); Mockito.doReturn(result).when(emr).describeCluster(Mockito.any(DescribeClusterRequest .class)); Cluster cluster = Mockito.mock(Cluster.class); Mockito.doReturn(cluster).when(result).getCluster(); Mockito.doReturn(Mockito.mock(ClusterStatus.class)).when(cluster).getStatus(); client.getClusterStatus("foo"); Mockito.verify(emr, Mockito.times(1)).describeCluster(Mockito.any(DescribeClusterRequest .class)); Mockito.verify(client, Mockito.times(1)).getEmrClient(Mockito.any(EmrClusterConfig.class)); }
Example #2
Source File: CloudFormationClient.java From herd-mdl with Apache License 2.0 | 6 votes |
public List<ClusterSummary> getStackClustersSummary(AmazonElasticMapReduce amazonElasticMapReduce, List<String> stackClusterIds, CFTStackInfo cftStackInfo) { List<ClusterSummary> stackClustersSummary = new ArrayList<>(); ListClustersRequest listClustersRequest = new ListClustersRequest(); //Only get clusters that got created after we setup our stack listClustersRequest.setCreatedAfter(cftStackInfo.creationTime()); ListClustersResult listClustersResult = amazonElasticMapReduce .listClusters(listClustersRequest); while (true) { for (ClusterSummary cluster : listClustersResult.getClusters()) { if (stackClusterIds.contains(cluster.getId())) { stackClustersSummary.add(cluster); } } if (listClustersResult.getMarker() != null) { listClustersRequest.setMarker(listClustersResult.getMarker()); listClustersResult = amazonElasticMapReduce.listClusters(listClustersRequest); } else { break; } } return stackClustersSummary; }
Example #3
Source File: EmrOperatorFactory.java From digdag with Apache License 2.0 | 6 votes |
private List<StepSummary> listSubmittedSteps(AmazonElasticMapReduce emr, String tag, NewCluster cluster) { List<StepSummary> steps = new ArrayList<>(); ListStepsRequest request = new ListStepsRequest().withClusterId(cluster.id()); while (steps.size() < cluster.steps()) { ListStepsResult result = emr.listSteps(request); for (StepSummary step : result.getSteps()) { if (step.getName().contains(tag)) { steps.add(step); } } if (result.getMarker() == null) { break; } request.setMarker(result.getMarker()); } // The ListSteps api returns steps in reverse order. So reverse them to submission order. Collections.reverse(steps); return steps; }
Example #4
Source File: AwsClientFactory.java From herd with Apache License 2.0 | 6 votes |
/** * Creates a client for accessing Amazon EMR service. * * @param awsParamsDto the AWS related parameters DTO that includes optional AWS credentials and proxy information * * @return the Amazon EMR client */ @Cacheable(DaoSpringModuleConfig.HERD_CACHE_NAME) public AmazonElasticMapReduce getEmrClient(AwsParamsDto awsParamsDto) { // Get client configuration. ClientConfiguration clientConfiguration = awsHelper.getClientConfiguration(awsParamsDto); // If specified, use the AWS credentials passed in. if (StringUtils.isNotBlank(awsParamsDto.getAwsAccessKeyId())) { return AmazonElasticMapReduceClientBuilder.standard().withCredentials(new AWSStaticCredentialsProvider( new BasicSessionCredentials(awsParamsDto.getAwsAccessKeyId(), awsParamsDto.getAwsSecretKey(), awsParamsDto.getSessionToken()))) .withClientConfiguration(clientConfiguration).withRegion(awsParamsDto.getAwsRegionName()).build(); } // Otherwise, use the default AWS credentials provider chain. else { return AmazonElasticMapReduceClientBuilder.standard().withClientConfiguration(clientConfiguration).withRegion(awsParamsDto.getAwsRegionName()) .build(); } }
Example #5
Source File: EmrOperatorFactory.java From digdag with Apache License 2.0 | 5 votes |
private TaskResult run(String tag, AmazonElasticMapReduce emr, AWSKMSClient kms, Filer filer) throws IOException { ParameterCompiler parameterCompiler = new ParameterCompiler(kms, context); // Set up step compiler List<Config> steps = params.getListOrEmpty("steps", Config.class); StepCompiler stepCompiler = new StepCompiler(tag, steps, filer, parameterCompiler, objectMapper, defaultActionOnFailure); // Set up job submitter Submitter submitter; Config cluster = null; try { cluster = params.parseNestedOrGetEmpty("cluster"); } catch (ConfigException ignore) { } if (cluster != null) { // Create a new cluster submitter = newClusterSubmitter(emr, tag, stepCompiler, cluster, filer, parameterCompiler); } else { // Cluster ID? Use existing cluster. String clusterId = params.get("cluster", String.class); submitter = existingClusterSubmitter(emr, tag, stepCompiler, clusterId, filer); } // Submit EMR job SubmissionResult submission = submitter.submit(); // Wait for the steps to finish running if (!steps.isEmpty()) { waitForSteps(emr, submission); } return result(submission); }
Example #6
Source File: EMRUtils.java From aws-big-data-blog with Apache License 2.0 | 5 votes |
/** * Helper method to determine if HBase is installed on this cluster * @param client - The {@link AmazonElasticMapReduceClient} with read permissions * @param clusterId - unique identifier for this cluster * @return true, other throws Runtime exception */ private static boolean isHBaseInstalled(AmazonElasticMapReduce client, String clusterId) { ListBootstrapActionsResult bootstrapActions = client.listBootstrapActions(new ListBootstrapActionsRequest() .withClusterId(clusterId)); ListIterator<Command> iterator = bootstrapActions.getBootstrapActions().listIterator(); while(iterator.hasNext()) { Command command = iterator.next(); if (command.getName().equalsIgnoreCase("Install HBase")) return true; } throw new RuntimeException("ERROR: Apache HBase is not installed on this cluster!!"); }
Example #7
Source File: EMRUtils.java From aws-big-data-blog with Apache License 2.0 | 5 votes |
/** * Helper method to determine the master node public DNS of an Amazon EMR cluster * * @param client - The {@link AmazonElasticMapReduceClient} with read permissions * @param clusterIdentifier - unique identifier for this cluster * @return public dns url */ public static String getPublicDns(AmazonElasticMapReduce client, String clusterId) { DescribeJobFlowsResult describeJobFlows=client.describeJobFlows(new DescribeJobFlowsRequest().withJobFlowIds(clusterId)); describeJobFlows.getJobFlows(); List<JobFlowDetail> jobFlows = describeJobFlows.getJobFlows(); JobFlowDetail jobflow = jobFlows.get(0); JobFlowInstancesDetail instancesDetail = jobflow.getInstances(); LOG.info("EMR cluster public DNS is "+instancesDetail.getMasterPublicDnsName()); return instancesDetail.getMasterPublicDnsName(); }
Example #8
Source File: EMRUtils.java From aws-big-data-blog with Apache License 2.0 | 5 votes |
/** * Helper method to determine if an Amazon EMR cluster exists * * @param client * The {@link AmazonElasticMapReduceClient} with read permissions * @param clusterIdentifier * The Amazon EMR cluster to check * @return true if the Amazon EMR cluster exists, otherwise false */ public static boolean clusterExists(AmazonElasticMapReduce client, String clusterIdentifier) { if (clusterIdentifier != null && !clusterIdentifier.isEmpty()) { ListClustersResult clustersList = client.listClusters(); ListIterator<ClusterSummary> iterator = clustersList.getClusters().listIterator(); ClusterSummary summary; for (summary = iterator.next() ; iterator.hasNext();summary = iterator.next()) { if (summary.getId().equals(clusterIdentifier)) { DescribeClusterRequest describeClusterRequest = new DescribeClusterRequest().withClusterId(clusterIdentifier); DescribeClusterResult result = client.describeCluster(describeClusterRequest); if (result != null) { Cluster cluster = result.getCluster(); //check if HBase is installed on this cluster if (isHBaseInstalled(client, cluster.getId())) return false; String state = cluster.getStatus().getState(); LOG.info(clusterIdentifier + " is " + state + ". "); if (state.equalsIgnoreCase("RUNNING") ||state.equalsIgnoreCase("WAITING")) { LOG.info("The cluster with id " + clusterIdentifier + " exists and is " + state); return true; } } } } } LOG.info("The cluster with id " + clusterIdentifier + " does not exist"); return false; }
Example #9
Source File: TestEmrClusterJob.java From datacollector with Apache License 2.0 | 5 votes |
@Test public void testGetActiveCluster() { Properties properties = new Properties(); EmrClusterJob emrClusterJob = new EmrClusterJob(); EmrClusterJob.Client client = Mockito.spy(emrClusterJob.getClient(properties)); AmazonElasticMapReduce emr = Mockito.mock(AmazonElasticMapReduce.class); Mockito.doReturn(emr).when(client).getEmrClient(Mockito.any(EmrClusterConfig.class)); Mockito.doReturn(Mockito.mock(ListClustersResult.class)).when(emr).listClusters(Mockito.any(ListClustersRequest .class)); client.getActiveCluster("foo"); Mockito.verify(emr, Mockito.times(1)).listClusters(Mockito.any(ListClustersRequest.class)); Mockito.verify(client, Mockito.times(1)).getEmrClient(Mockito.any(EmrClusterConfig.class)); }
Example #10
Source File: TestEmrClusterJob.java From datacollector with Apache License 2.0 | 5 votes |
@Test public void testTerminateCluster() { Properties properties = new Properties(); EmrClusterJob emrClusterJob = new EmrClusterJob(); EmrClusterJob.Client client = Mockito.spy(emrClusterJob.getClient(properties)); AmazonElasticMapReduce emr = Mockito.mock(AmazonElasticMapReduce.class); Mockito.doReturn(emr).when(client).getEmrClient(Mockito.any(EmrClusterConfig.class)); Mockito.doReturn(Mockito.mock(TerminateJobFlowsResult.class)).when(emr).terminateJobFlows(Mockito.any(TerminateJobFlowsRequest .class)); client.terminateCluster("foo"); Mockito.verify(emr, Mockito.times(1)).terminateJobFlows(Mockito.any(TerminateJobFlowsRequest.class)); Mockito.verify(client, Mockito.times(1)).getEmrClient(Mockito.any(EmrClusterConfig.class)); }
Example #11
Source File: TestEmrClusterJob.java From datacollector with Apache License 2.0 | 5 votes |
@Test public void testCreateCluster() { Properties properties = new Properties(); properties.setProperty("instanceCount", "1"); EmrClusterJob emrClusterJob = new EmrClusterJob(); EmrClusterJob.Client client = Mockito.spy(emrClusterJob.getClient(properties)); AmazonElasticMapReduce emr = Mockito.mock(AmazonElasticMapReduce.class); Mockito.doReturn(Mockito.mock(RunJobFlowResult.class)).when(emr).runJobFlow(Mockito.any(RunJobFlowRequest.class)); Mockito.doReturn(emr).when(client).getEmrClient(Mockito.any(EmrClusterConfig.class)); client.createCluster("foo"); Mockito.verify(emr, Mockito.times(1)).runJobFlow(Mockito.any(RunJobFlowRequest.class)); Mockito.verify(client, Mockito.times(1)).getEmrClient(Mockito.any(EmrClusterConfig.class)); }
Example #12
Source File: EmrClusterJob.java From datacollector with Apache License 2.0 | 5 votes |
@VisibleForTesting AmazonElasticMapReduce getEmrClient(EmrClusterConfig emrClusterConfig) { if (emrClient==null) { emrClient = AmazonElasticMapReduceClientBuilder.standard().withCredentials( new AWSStaticCredentialsProvider(new BasicAWSCredentials( emrClusterConfig.getAccessKey(), emrClusterConfig.getSecretKey() ))).withRegion(Regions.fromName(emrClusterConfig.getUserRegion())).build(); } return emrClient; }
Example #13
Source File: EmrOperatorFactory.java From digdag with Apache License 2.0 | 5 votes |
private Submitter newClusterSubmitter(AmazonElasticMapReduce emr, String tag, StepCompiler stepCompiler, Config clusterConfig, Filer filer, ParameterCompiler parameterCompiler) { return () -> { // Start cluster NewCluster cluster = pollingRetryExecutor(state, "submission") .withRetryInterval(DurationInterval.of(Duration.ofSeconds(30), Duration.ofMinutes(5))) // TODO: EMR requests are not idempotent, thus retrying might produce duplicate cluster submissions. .retryUnless(AmazonServiceException.class, Aws::isDeterministicException) .runOnce(NewCluster.class, s -> submitNewClusterRequest(emr, tag, stepCompiler, clusterConfig, filer, parameterCompiler)); // Get submitted step IDs List<String> stepIds = pollingRetryExecutor(this.state, "steps") .withRetryInterval(DurationInterval.of(Duration.ofSeconds(30), Duration.ofMinutes(5))) .retryUnless(AmazonServiceException.class, Aws::isDeterministicException) .runOnce(new TypeReference<List<String>>() {}, s -> { List<StepSummary> steps = listSubmittedSteps(emr, tag, cluster); logSubmittedSteps(cluster.id(), cluster.steps(), i -> steps.get(i).getName(), i -> steps.get(i).getId()); return steps.stream().map(StepSummary::getId).collect(toList()); }); // Log cluster status while waiting for it to come up pollingWaiter(state, "bootstrap") .withWaitMessage("EMR cluster still booting") .withPollInterval(DurationInterval.of(Duration.ofSeconds(30), Duration.ofMinutes(5))) .awaitOnce(String.class, pollState -> checkClusterBootStatus(emr, cluster, pollState)); return SubmissionResult.ofNewCluster(cluster.id(), stepIds); }; }
Example #14
Source File: EmrOperatorFactory.java From digdag with Apache License 2.0 | 5 votes |
private Submitter existingClusterSubmitter(AmazonElasticMapReduce emr, String tag, StepCompiler stepCompiler, String clusterId, Filer filer) { return () -> { List<String> stepIds = pollingRetryExecutor(state, "submission") .retryUnless(AmazonServiceException.class, Aws::isDeterministicException) .withRetryInterval(DurationInterval.of(Duration.ofSeconds(30), Duration.ofMinutes(5))) .runOnce(new TypeReference<List<String>>() {}, s -> { RemoteFile runner = prepareRunner(filer, tag); // Compile steps stepCompiler.compile(runner); // Stage files to S3 filer.stageFiles(); AddJobFlowStepsRequest request = new AddJobFlowStepsRequest() .withJobFlowId(clusterId) .withSteps(stepCompiler.stepConfigs()); int steps = request.getSteps().size(); logger.info("Submitting {} EMR step(s) to {}", steps, clusterId); AddJobFlowStepsResult result = emr.addJobFlowSteps(request); logSubmittedSteps(clusterId, steps, i -> request.getSteps().get(i).getName(), i -> result.getStepIds().get(i)); return ImmutableList.copyOf(result.getStepIds()); }); return SubmissionResult.ofExistingCluster(clusterId, stepIds); }; }
Example #15
Source File: EmrOperatorFactory.java From digdag with Apache License 2.0 | 5 votes |
private void waitForSteps(AmazonElasticMapReduce emr, SubmissionResult submission) { String lastStepId = Iterables.getLast(submission.stepIds()); pollingWaiter(state, "result") .withWaitMessage("EMR job still running: %s", submission.clusterId()) .withPollInterval(DurationInterval.of(Duration.ofSeconds(15), Duration.ofMinutes(5))) .awaitOnce(Step.class, pollState -> checkStepCompletion(emr, submission, lastStepId, pollState)); }
Example #16
Source File: TableProviderFactory.java From aws-athena-query-federation with Apache License 2.0 | 5 votes |
@VisibleForTesting protected TableProviderFactory(AmazonEC2 ec2, AmazonElasticMapReduce emr, AmazonRDS rds, AmazonS3 amazonS3) { addProvider(new Ec2TableProvider(ec2)); addProvider(new EbsTableProvider(ec2)); addProvider(new VpcTableProvider(ec2)); addProvider(new SecurityGroupsTableProvider(ec2)); addProvider(new RouteTableProvider(ec2)); addProvider(new SubnetTableProvider(ec2)); addProvider(new ImagesTableProvider(ec2)); addProvider(new EmrClusterTableProvider(emr)); addProvider(new RdsTableProvider(rds)); addProvider(new S3ObjectsTableProvider(amazonS3)); addProvider(new S3BucketsTableProvider(amazonS3)); }
Example #17
Source File: AwsClientFactoryTest.java From herd with Apache License 2.0 | 5 votes |
@Test public void testGetEmrClientCacheHitMiss() { // Create an AWS parameters DTO that contains both AWS credentials and proxy information. AwsParamsDto awsParamsDto = new AwsParamsDto(AWS_ASSUMED_ROLE_ACCESS_KEY, AWS_ASSUMED_ROLE_SECRET_KEY, AWS_ASSUMED_ROLE_SESSION_TOKEN, HTTP_PROXY_HOST, HTTP_PROXY_PORT, AWS_REGION_NAME_US_EAST_1); // Get an Amazon EMR client. AmazonElasticMapReduce amazonElasticMapReduceClient = awsClientFactory.getEmrClient(awsParamsDto); // Confirm a cache hit. assertEquals(amazonElasticMapReduceClient, awsClientFactory.getEmrClient( new AwsParamsDto(AWS_ASSUMED_ROLE_ACCESS_KEY, AWS_ASSUMED_ROLE_SECRET_KEY, AWS_ASSUMED_ROLE_SESSION_TOKEN, HTTP_PROXY_HOST, HTTP_PROXY_PORT, AWS_REGION_NAME_US_EAST_1))); // Confirm a cache miss due to AWS credentials. assertNotEquals(amazonElasticMapReduceClient, awsClientFactory.getEmrClient( new AwsParamsDto(AWS_ASSUMED_ROLE_ACCESS_KEY_2, AWS_ASSUMED_ROLE_SECRET_KEY_2, AWS_ASSUMED_ROLE_SESSION_TOKEN_2, HTTP_PROXY_HOST, HTTP_PROXY_PORT, AWS_REGION_NAME_US_EAST_1))); // Confirm a cache miss due to http proxy information. assertNotEquals(amazonElasticMapReduceClient, awsClientFactory.getEmrClient( new AwsParamsDto(AWS_ASSUMED_ROLE_ACCESS_KEY, AWS_ASSUMED_ROLE_SECRET_KEY, AWS_ASSUMED_ROLE_SESSION_TOKEN, HTTP_PROXY_HOST_2, HTTP_PROXY_PORT_2, AWS_REGION_NAME_US_EAST_1))); // Clear the cache. cacheManager.getCache(DaoSpringModuleConfig.HERD_CACHE_NAME).clear(); // Confirm a cache miss due to cleared cache. assertNotEquals(amazonElasticMapReduceClient, awsClientFactory.getEmrClient(awsParamsDto)); }
Example #18
Source File: emr-add-steps.java From aws-doc-sdk-examples with Apache License 2.0 | 5 votes |
public static void main(String[] args) { AWSCredentials credentials_profile = null; try { credentials_profile = new ProfileCredentialsProvider("default").getCredentials(); } catch (Exception e) { throw new AmazonClientException( "Cannot load credentials from .aws/credentials file. " + "Make sure that the credentials file exists and the profile name is specified within it.", e); } AmazonElasticMapReduce emr = AmazonElasticMapReduceClientBuilder.standard() .withCredentials(new AWSStaticCredentialsProvider(credentials_profile)) .withRegion(Regions.US_WEST_1) .build(); // Run a bash script using a predefined step in the StepFactory helper class StepFactory stepFactory = new StepFactory(); StepConfig runBashScript = new StepConfig() .withName("Run a bash script") .withHadoopJarStep(stepFactory.newScriptRunnerStep("s3://jeffgoll/emr-scripts/create_users.sh")) .withActionOnFailure("CONTINUE"); // Run a custom jar file as a step HadoopJarStepConfig hadoopConfig1 = new HadoopJarStepConfig() .withJar("s3://path/to/my/jarfolder") // replace with the location of the jar to run as a step .withMainClass("com.my.Main1") // optional main class, this can be omitted if jar above has a manifest .withArgs("--verbose"); // optional list of arguments to pass to the jar StepConfig myCustomJarStep = new StepConfig("RunHadoopJar", hadoopConfig1); AddJobFlowStepsResult result = emr.addJobFlowSteps(new AddJobFlowStepsRequest() .withJobFlowId("j-xxxxxxxxxxxx") // replace with cluster id to run the steps .withSteps(runBashScript,myCustomJarStep)); System.out.println(result.getStepIds()); }
Example #19
Source File: InventoryUtil.java From pacbot with Apache License 2.0 | 5 votes |
/** * Fetch EMR info. * * @param temporaryCredentials the temporary credentials * @param skipRegions the skip regions * @param accountId the accountId * @param accountName the account name * @return the map */ public static Map<String,List<Cluster>> fetchEMRInfo(BasicSessionCredentials temporaryCredentials, String skipRegions,String accountId,String accountName){ Map<String,List<Cluster>> clusterList = new LinkedHashMap<>(); String expPrefix = InventoryConstants.ERROR_PREFIX_CODE+accountId + "\",\"Message\": \"Exception in fetching info for resource in specific region\" ,\"type\": \"EMR\" , \"region\":\"" ; for(Region region : RegionUtils.getRegions()){ try{ if(!skipRegions.contains(region.getName())){ AmazonElasticMapReduce emrClient = AmazonElasticMapReduceClientBuilder.standard(). withCredentials(new AWSStaticCredentialsProvider(temporaryCredentials)).withRegion(region.getName()).build(); List<ClusterSummary> clusters = new ArrayList<>(); String marker = null; ListClustersResult clusterResult ; do{ clusterResult = emrClient.listClusters(new ListClustersRequest().withMarker(marker)); clusters.addAll(clusterResult.getClusters()); marker = clusterResult.getMarker(); }while(marker!=null); List<Cluster> clustersList = new ArrayList<>(); clusters.forEach(cluster -> { DescribeClusterResult descClstrRslt = emrClient.describeCluster(new DescribeClusterRequest().withClusterId(cluster.getId())); clustersList.add(descClstrRslt.getCluster()); }); if( !clustersList.isEmpty() ){ log.debug(InventoryConstants.ACCOUNT + accountId +" Type : EMR "+region.getName() + " >> "+clustersList.size()); clusterList.put(accountId+delimiter+accountName+delimiter+region.getName(),clustersList); } } }catch(Exception e){ if(region.isServiceSupported(AmazonElasticMapReduce.ENDPOINT_PREFIX)){ log.warn(expPrefix+ region.getName()+InventoryConstants.ERROR_CAUSE +e.getMessage()+"\"}"); ErrorManageUtil.uploadError(accountId,region.getName(),"emr",e.getMessage()); } } } return clusterList; }
Example #20
Source File: InventoryUtilTest.java From pacbot with Apache License 2.0 | 5 votes |
/** * Fetch EMR info test. * * @throws Exception the exception */ @SuppressWarnings("static-access") @Test public void fetchEMRInfoTest() throws Exception { mockStatic(AmazonElasticMapReduceClientBuilder.class); AmazonElasticMapReduce emrClient = PowerMockito.mock(AmazonElasticMapReduce.class); AmazonElasticMapReduceClientBuilder amazonElasticFileSystemClientBuilder = PowerMockito.mock(AmazonElasticMapReduceClientBuilder.class); AWSStaticCredentialsProvider awsStaticCredentialsProvider = PowerMockito.mock(AWSStaticCredentialsProvider.class); PowerMockito.whenNew(AWSStaticCredentialsProvider.class).withAnyArguments().thenReturn(awsStaticCredentialsProvider); when(amazonElasticFileSystemClientBuilder.standard()).thenReturn(amazonElasticFileSystemClientBuilder); when(amazonElasticFileSystemClientBuilder.withCredentials(anyObject())).thenReturn(amazonElasticFileSystemClientBuilder); when(amazonElasticFileSystemClientBuilder.withRegion(anyString())).thenReturn(amazonElasticFileSystemClientBuilder); when(amazonElasticFileSystemClientBuilder.build()).thenReturn(emrClient); ListClustersResult listClustersResult = new ListClustersResult(); List<ClusterSummary> clusters = new ArrayList<>(); ClusterSummary clusterSummary = new ClusterSummary(); clusterSummary.setId("id"); clusters.add(clusterSummary); listClustersResult.setClusters(clusters); when(emrClient.listClusters(anyObject())).thenReturn(listClustersResult); DescribeClusterResult describeClusterResult = new DescribeClusterResult(); describeClusterResult.setCluster(new Cluster()); when(emrClient.describeCluster(anyObject())).thenReturn(describeClusterResult); assertThat(inventoryUtil.fetchEMRInfo(new BasicSessionCredentials("awsAccessKey", "awsSecretKey", "sessionToken"), "skipRegions", "account","accountName").size(), is(1)); }
Example #21
Source File: EmrDaoImplTest.java From herd with Apache License 2.0 | 5 votes |
@Test public void testCreateEmrClusterNoNscdBootstrapScript() { // Create an AWS parameters DTO. final AwsParamsDto awsParamsDto = new AwsParamsDto(AWS_ASSUMED_ROLE_ACCESS_KEY, AWS_ASSUMED_ROLE_SECRET_KEY, AWS_ASSUMED_ROLE_SESSION_TOKEN, HTTP_PROXY_HOST, HTTP_PROXY_PORT, AWS_REGION_NAME_US_EAST_1); EmrClusterDefinition emrClusterDefinition = new EmrClusterDefinition(); final InstanceDefinitions instanceDefinitions = new InstanceDefinitions(new MasterInstanceDefinition(), new InstanceDefinition(), new InstanceDefinition()); emrClusterDefinition.setInstanceDefinitions(instanceDefinitions); emrClusterDefinition.setNodeTags(Collections.emptyList()); AmazonElasticMapReduce amazonElasticMapReduce = AmazonElasticMapReduceClientBuilder.standard().withRegion(awsParamsDto.getAwsRegionName()) .build(); when(awsClientFactory.getEmrClient(awsParamsDto)).thenReturn(amazonElasticMapReduce); when(emrOperations.runEmrJobFlow(amazonElasticMapReduceClientArgumentCaptor.capture(), runJobFlowRequestArgumentCaptor.capture())) .thenReturn(EMR_CLUSTER_ID); // Create the cluster without NSCD script configuration String clusterId = emrDaoImpl.createEmrCluster(EMR_CLUSTER_NAME, emrClusterDefinition, awsParamsDto); // Verifications assertEquals(clusterId, EMR_CLUSTER_ID); verify(configurationHelper).getProperty(ConfigurationValue.EMR_NSCD_SCRIPT); verify(awsClientFactory).getEmrClient(awsParamsDto); verify(emrOperations).runEmrJobFlow(any(), any()); RunJobFlowRequest runJobFlowRequest = runJobFlowRequestArgumentCaptor.getValue(); List<BootstrapActionConfig> bootstrapActionConfigs = runJobFlowRequest.getBootstrapActions(); // There should be no bootstrap action assertTrue(bootstrapActionConfigs.isEmpty()); }
Example #22
Source File: ClusterManager.java From herd-mdl with Apache License 2.0 | 5 votes |
AmazonElasticMapReduce createEmrClient() { DefaultAWSCredentialsProviderChain defaultAWSCredentialsProviderChain = new DefaultAWSCredentialsProviderChain(); AWSCredentials credentials = defaultAWSCredentialsProviderChain.getCredentials(); emrClient = AmazonElasticMapReduceClientBuilder.standard() .withCredentials(new AWSStaticCredentialsProvider(credentials)) .build(); return emrClient; }
Example #23
Source File: emr-flink-cluster-transient-step.java From aws-doc-sdk-examples with Apache License 2.0 | 4 votes |
public static void main(String[] args) { AWSCredentials credentials_profile = null; try { credentials_profile = new ProfileCredentialsProvider("default").getCredentials(); } catch (Exception e) { throw new AmazonClientException( "Cannot load credentials from .aws/credentials file. " + "Make sure that the credentials file exists and the profile name is specified within it.", e); } AmazonElasticMapReduce emr = AmazonElasticMapReduceClientBuilder.standard() .withCredentials(new AWSStaticCredentialsProvider(credentials_profile)) .withRegion(Regions.US_WEST_1) .build(); List<StepConfig> stepConfigs = new ArrayList<StepConfig>(); HadoopJarStepConfig flinkWordCountConf = new HadoopJarStepConfig() .withJar("command-runner.jar") .withArgs("bash","-c", "flink", "run", "-m", "yarn-cluster", "-yn", "2", "/usr/lib/flink/examples/streaming/WordCount.jar", "--input", "s3://path/to/input-file.txt", "--output", "s3://path/to/output/"); StepConfig flinkRunWordCountStep = new StepConfig() .withName("Flink add a wordcount step and terminate") .withActionOnFailure("CONTINUE") .withHadoopJarStep(flinkWordCountConf); stepConfigs.add(flinkRunWordCountStep); Application flink = new Application().withName("Flink"); RunJobFlowRequest request = new RunJobFlowRequest() .withName("flink-transient") .withReleaseLabel("emr-5.20.0") .withApplications(flink) .withServiceRole("EMR_DefaultRole") .withJobFlowRole("EMR_EC2_DefaultRole") .withLogUri("s3://path/to/my/logfiles") .withInstances(new JobFlowInstancesConfig() .withEc2KeyName("myEc2Key") .withEc2SubnetId("subnet-12ab3c45") .withInstanceCount(3) .withKeepJobFlowAliveWhenNoSteps(false) .withMasterInstanceType("m4.large") .withSlaveInstanceType("m4.large")) .withSteps(stepConfigs); RunJobFlowResult result = emr.runJobFlow(request); System.out.println("The cluster ID is " + result.toString()); }
Example #24
Source File: EmrClusterTableProvider.java From aws-athena-query-federation with Apache License 2.0 | 4 votes |
public EmrClusterTableProvider(AmazonElasticMapReduce emr) { this.emr = emr; }
Example #25
Source File: EMRUtils.java From aws-big-data-blog with Apache License 2.0 | 4 votes |
/** * This method uses method the AWS Java to launch an Apache HBase cluster on Amazon EMR. * * @param client - AmazonElasticMapReduce client that interfaces directly with the Amazon EMR Web Service * @param clusterIdentifier - identifier of an existing cluster * @param amiVersion - AMI to use for launching this cluster * @param keypair - A keypair for SSHing into the Amazon EMR master node * @param masterInstanceType - Master node Amazon EC2 instance type * @param coreInstanceType - core nodes Amazon EC2 instance type * @param logUri - An Amazon S3 bucket for your * @param numberOfNodes - total number of nodes in this cluster including master node * @return */ public static String createCluster(AmazonElasticMapReduce client, String clusterIdentifier, String amiVersion, String keypair, String masterInstanceType, String coreInstanceType, String logUri, int numberOfNodes) { if (clusterExists(client, clusterIdentifier)) { LOG.info("Cluster " + clusterIdentifier + " is available"); return clusterIdentifier; } //Error checking if (amiVersion == null || amiVersion.isEmpty()) throw new RuntimeException("ERROR: Please specify an AMI Version"); if (keypair == null || keypair.isEmpty()) throw new RuntimeException("ERROR: Please specify a valid Amazon Key Pair"); if (masterInstanceType == null || masterInstanceType.isEmpty()) throw new RuntimeException("ERROR: Please specify a Master Instance Type"); if (logUri == null || logUri.isEmpty()) throw new RuntimeException("ERROR: Please specify a valid Amazon S3 bucket for your logs."); if (numberOfNodes < 0) throw new RuntimeException("ERROR: Please specify at least 1 node"); RunJobFlowRequest request = new RunJobFlowRequest() .withAmiVersion(amiVersion) .withBootstrapActions(new BootstrapActionConfig() .withName("Install HBase") .withScriptBootstrapAction(new ScriptBootstrapActionConfig() .withPath("s3://elasticmapreduce/bootstrap-actions/setup-hbase"))) .withName("Job Flow With HBAse Actions") .withSteps(new StepConfig() //enable debugging step .withName("Enable debugging") .withActionOnFailure("TERMINATE_CLUSTER") .withHadoopJarStep(new StepFactory().newEnableDebuggingStep()), //Start HBase step - after installing it with a bootstrap action createStepConfig("Start HBase","TERMINATE_CLUSTER", "/home/hadoop/lib/hbase.jar", getHBaseArgs()), //add HBase backup step createStepConfig("Modify backup schedule","TERMINATE_JOB_FLOW", "/home/hadoop/lib/hbase.jar", getHBaseBackupArgs())) .withLogUri(logUri) .withInstances(new JobFlowInstancesConfig() .withEc2KeyName(keypair) .withInstanceCount(numberOfNodes) .withKeepJobFlowAliveWhenNoSteps(true) .withMasterInstanceType(masterInstanceType) .withSlaveInstanceType(coreInstanceType)); RunJobFlowResult result = client.runJobFlow(request); String state = null; while (!(state = clusterState(client, result.getJobFlowId())).equalsIgnoreCase("waiting")) { try { Thread.sleep(10 * 1000); LOG.info(result.getJobFlowId() + " is " + state + ". Waiting for cluster to become available."); } catch (InterruptedException e) { } if (state.equalsIgnoreCase("TERMINATED_WITH_ERRORS")){ LOG.error("Could not create EMR Cluster"); System.exit(-1); } } LOG.info("Created cluster " + result.getJobFlowId()); LOG.info("Cluster " + clusterIdentifier + " is available"); return result.getJobFlowId(); }
Example #26
Source File: ClusterManager.java From herd-mdl with Apache License 2.0 | 4 votes |
AmazonElasticMapReduce getEmrClient() { return emrClient; }
Example #27
Source File: custom-emrfs-materials.java From aws-doc-sdk-examples with Apache License 2.0 | 4 votes |
public static void main(String[] args) { AWSCredentials credentials_profile = null; try { credentials_profile = new ProfileCredentialsProvider("default").getCredentials(); } catch (Exception e) { throw new AmazonClientException( "Cannot load credentials from .aws/credentials file. " + "Make sure that the credentials file exists and the profile name is specified within it.", e); } AmazonElasticMapReduce emr = AmazonElasticMapReduceClientBuilder.standard() .withCredentials(new AWSStaticCredentialsProvider(credentials_profile)) .withRegion(Regions.US_WEST_1) .build(); Map<String,String> emrfsProperties = new HashMap<String,String>(); emrfsProperties.put("fs.s3.cse.encryptionMaterialsProvider.uri","s3://mybucket/MyCustomEncryptionMaterialsProvider.jar"); emrfsProperties.put("fs.s3.cse.enabled","true"); emrfsProperties.put("fs.s3.consistent","true"); emrfsProperties.put("fs.s3.cse.encryptionMaterialsProvider","full.class.name.of.EncryptionMaterialsProvider"); Configuration myEmrfsConfig = new Configuration() .withClassification("emrfs-site") .withProperties(emrfsProperties); Application hive = new Application().withName("Hive"); Application spark = new Application().withName("Spark"); Application ganglia = new Application().withName("Ganglia"); Application zeppelin = new Application().withName("Zeppelin"); RunJobFlowRequest request = new RunJobFlowRequest() .withName("ClusterWithCustomEMRFSEncryptionMaterialsProvider") .withReleaseLabel("emr-5.20.0") .withApplications(hive,spark,ganglia,zeppelin) .withConfigurations(myEmrfsConfig) .withServiceRole("EMR_DefaultRole") .withJobFlowRole("EMR_EC2_DefaultRole") .withLogUri("s3://path/to/emr/logs") .withInstances(new JobFlowInstancesConfig() .withEc2KeyName("myEc2Key") .withInstanceCount(3) .withKeepJobFlowAliveWhenNoSteps(true) .withMasterInstanceType("m4.large") .withSlaveInstanceType("m4.large") ); RunJobFlowResult result = emr.runJobFlow(request); System.out.println("The cluster ID is " + result.toString()); }
Example #28
Source File: create_cluster.java From aws-doc-sdk-examples with Apache License 2.0 | 4 votes |
public static void main(String[] args) { AWSCredentials credentials_profile = null; try { credentials_profile = new ProfileCredentialsProvider("default").getCredentials(); // specifies any named profile in .aws/credentials as the credentials provider } catch (Exception e) { throw new AmazonClientException( "Cannot load credentials from .aws/credentials file. " + "Make sure that the credentials file exists and that the profile name is defined within it.", e); } // create an EMR client using the credentials and region specified in order to create the cluster AmazonElasticMapReduce emr = AmazonElasticMapReduceClientBuilder.standard() .withCredentials(new AWSStaticCredentialsProvider(credentials_profile)) .withRegion(Regions.US_WEST_1) .build(); // create a step to enable debugging in the AWS Management Console StepFactory stepFactory = new StepFactory(); StepConfig enabledebugging = new StepConfig() .withName("Enable debugging") .withActionOnFailure("TERMINATE_JOB_FLOW") .withHadoopJarStep(stepFactory.newEnableDebuggingStep()); // specify applications to be installed and configured when EMR creates the cluster Application hive = new Application().withName("Hive"); Application spark = new Application().withName("Spark"); Application ganglia = new Application().withName("Ganglia"); Application zeppelin = new Application().withName("Zeppelin"); // create the cluster RunJobFlowRequest request = new RunJobFlowRequest() .withName("MyClusterCreatedFromJava") .withReleaseLabel("emr-5.20.0") // specifies the EMR release version label, we recommend the latest release .withSteps(enabledebugging) .withApplications(hive,spark,ganglia,zeppelin) .withLogUri("s3://path/to/my/emr/logs") // a URI in S3 for log files is required when debugging is enabled .withServiceRole("EMR_DefaultRole") // replace the default with a custom IAM service role if one is used .withJobFlowRole("EMR_EC2_DefaultRole") // replace the default with a custom EMR role for the EC2 instance profile if one is used .withInstances(new JobFlowInstancesConfig() .withEc2SubnetId("subnet-12ab34c56") .withEc2KeyName("myEc2Key") .withInstanceCount(3) .withKeepJobFlowAliveWhenNoSteps(true) .withMasterInstanceType("m4.large") .withSlaveInstanceType("m4.large")); RunJobFlowResult result = emr.runJobFlow(request); System.out.println("The cluster ID is " + result.toString()); }
Example #29
Source File: create-spark-cluster.java From aws-doc-sdk-examples with Apache License 2.0 | 4 votes |
public static void main(String[] args) { AWSCredentials credentials_profile = null; try { credentials_profile = new ProfileCredentialsProvider("default").getCredentials(); } catch (Exception e) { throw new AmazonClientException( "Cannot load credentials from .aws/credentials file. " + "Make sure that the credentials file exists and the profile name is specified within it.", e); } AmazonElasticMapReduce emr = AmazonElasticMapReduceClientBuilder.standard() .withCredentials(new AWSStaticCredentialsProvider(credentials_profile)) .withRegion(Regions.US_WEST_1) .build(); // create a step to enable debugging in the AWS Management Console StepFactory stepFactory = new StepFactory(); StepConfig enabledebugging = new StepConfig() .withName("Enable debugging") .withActionOnFailure("TERMINATE_JOB_FLOW") .withHadoopJarStep(stepFactory.newEnableDebuggingStep()); Application spark = new Application().withName("Spark"); RunJobFlowRequest request = new RunJobFlowRequest() .withName("Spark Cluster") .withReleaseLabel("emr-5.20.0") .withSteps(enabledebugging) .withApplications(spark) .withLogUri("s3://path/to/my/logs/") .withServiceRole("EMR_DefaultRole") .withJobFlowRole("EMR_EC2_DefaultRole") .withInstances(new JobFlowInstancesConfig() .withEc2SubnetId("subnet-12ab3c45") .withEc2KeyName("myEc2Key") .withInstanceCount(3) .withKeepJobFlowAliveWhenNoSteps(true) .withMasterInstanceType("m4.large") .withSlaveInstanceType("m4.large") ); RunJobFlowResult result = emr.runJobFlow(request); System.out.println("The cluster ID is " + result.toString()); }
Example #30
Source File: EmrOperatorFactory.java From digdag with Apache License 2.0 | 4 votes |
private NewCluster submitNewClusterRequest(AmazonElasticMapReduce emr, String tag, StepCompiler stepCompiler, Config cluster, Filer filer, ParameterCompiler parameterCompiler) throws IOException { RemoteFile runner = prepareRunner(filer, tag); // Compile steps stepCompiler.compile(runner); List<StepConfig> stepConfigs = stepCompiler.stepConfigs(); Config ec2 = cluster.getNested("ec2"); Config master = ec2.getNestedOrGetEmpty("master"); List<Config> core = ec2.getOptional("core", Config.class).transform(ImmutableList::of).or(ImmutableList.of()); List<Config> task = ec2.getListOrEmpty("task", Config.class); List<String> applications = cluster.getListOrEmpty("applications", String.class); if (applications.isEmpty()) { applications = ImmutableList.of("Hadoop", "Hive", "Spark", "Flink"); } // TODO: allow configuring additional application parameters List<Application> applicationConfigs = applications.stream() .map(application -> new Application().withName(application)) .collect(toList()); // TODO: merge configurations with the same classification? List<Configuration> configurations = cluster.getListOrEmpty("configurations", JsonNode.class).stream() .map(this::configurations) .flatMap(Collection::stream) .collect(toList()); List<JsonNode> bootstrap = cluster.getListOrEmpty("bootstrap", JsonNode.class); List<BootstrapActionConfig> bootstrapActions = new ArrayList<>(); for (int i = 0; i < bootstrap.size(); i++) { bootstrapActions.add(bootstrapAction(i + 1, bootstrap.get(i), tag, filer, runner, parameterCompiler)); } // Stage files to S3 filer.stageFiles(); Optional<String> subnetId = ec2.getOptional("subnet_id", String.class); String defaultMasterInstanceType; String defaultCoreInstanceType; String defaultTaskInstanceType; if (subnetId.isPresent()) { // m4 requires VPC (subnet id) defaultMasterInstanceType = "m4.2xlarge"; defaultCoreInstanceType = "m4.xlarge"; defaultTaskInstanceType = "m4.xlarge"; } else { defaultMasterInstanceType = "m3.2xlarge"; defaultCoreInstanceType = "m3.xlarge"; defaultTaskInstanceType = "m3.xlarge"; } RunJobFlowRequest request = new RunJobFlowRequest() .withName(cluster.get("name", String.class, "Digdag") + " (" + tag + ")") .withReleaseLabel(cluster.get("release", String.class, "emr-5.2.0")) .withSteps(stepConfigs) .withBootstrapActions(bootstrapActions) .withApplications(applicationConfigs) .withLogUri(cluster.get("logs", String.class, null)) .withJobFlowRole(cluster.get("cluster_role", String.class, "EMR_EC2_DefaultRole")) .withServiceRole(cluster.get("service_role", String.class, "EMR_DefaultRole")) .withTags(new Tag().withKey("DIGDAG_CLUSTER_ID").withValue(tag)) .withVisibleToAllUsers(cluster.get("visible", boolean.class, true)) .withConfigurations(configurations) .withInstances(new JobFlowInstancesConfig() .withInstanceGroups(ImmutableList.<InstanceGroupConfig>builder() // Master Node .add(instanceGroupConfig("Master", master, "MASTER", defaultMasterInstanceType, 1)) // Core Group .addAll(instanceGroupConfigs("Core", core, "CORE", defaultCoreInstanceType)) // Task Groups .addAll(instanceGroupConfigs("Task %d", task, "TASK", defaultTaskInstanceType)) .build() ) .withAdditionalMasterSecurityGroups(ec2.getListOrEmpty("additional_master_security_groups", String.class)) .withAdditionalSlaveSecurityGroups(ec2.getListOrEmpty("additional_slave_security_groups", String.class)) .withEmrManagedMasterSecurityGroup(ec2.get("emr_managed_master_security_group", String.class, null)) .withEmrManagedSlaveSecurityGroup(ec2.get("emr_managed_slave_security_group", String.class, null)) .withServiceAccessSecurityGroup(ec2.get("service_access_security_group", String.class, null)) .withTerminationProtected(cluster.get("termination_protected", boolean.class, false)) .withPlacement(cluster.getOptional("availability_zone", String.class) .transform(zone -> new PlacementType().withAvailabilityZone(zone)).orNull()) .withEc2SubnetId(subnetId.orNull()) .withEc2KeyName(ec2.get("key", String.class)) .withKeepJobFlowAliveWhenNoSteps(!cluster.get("auto_terminate", boolean.class, true))); logger.info("Submitting EMR job with {} steps(s)", request.getSteps().size()); RunJobFlowResult result = emr.runJobFlow(request); logger.info("Submitted EMR job with {} step(s): {}", request.getSteps().size(), result.getJobFlowId(), result); return NewCluster.of(result.getJobFlowId(), request.getSteps().size()); }