com.amazonaws.services.elasticmapreduce.util.StepFactory Java Examples
The following examples show how to use
com.amazonaws.services.elasticmapreduce.util.StepFactory.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: emr-add-steps.java From aws-doc-sdk-examples with Apache License 2.0 | 5 votes |
public static void main(String[] args) { AWSCredentials credentials_profile = null; try { credentials_profile = new ProfileCredentialsProvider("default").getCredentials(); } catch (Exception e) { throw new AmazonClientException( "Cannot load credentials from .aws/credentials file. " + "Make sure that the credentials file exists and the profile name is specified within it.", e); } AmazonElasticMapReduce emr = AmazonElasticMapReduceClientBuilder.standard() .withCredentials(new AWSStaticCredentialsProvider(credentials_profile)) .withRegion(Regions.US_WEST_1) .build(); // Run a bash script using a predefined step in the StepFactory helper class StepFactory stepFactory = new StepFactory(); StepConfig runBashScript = new StepConfig() .withName("Run a bash script") .withHadoopJarStep(stepFactory.newScriptRunnerStep("s3://jeffgoll/emr-scripts/create_users.sh")) .withActionOnFailure("CONTINUE"); // Run a custom jar file as a step HadoopJarStepConfig hadoopConfig1 = new HadoopJarStepConfig() .withJar("s3://path/to/my/jarfolder") // replace with the location of the jar to run as a step .withMainClass("com.my.Main1") // optional main class, this can be omitted if jar above has a manifest .withArgs("--verbose"); // optional list of arguments to pass to the jar StepConfig myCustomJarStep = new StepConfig("RunHadoopJar", hadoopConfig1); AddJobFlowStepsResult result = emr.addJobFlowSteps(new AddJobFlowStepsRequest() .withJobFlowId("j-xxxxxxxxxxxx") // replace with cluster id to run the steps .withSteps(runBashScript,myCustomJarStep)); System.out.println(result.getStepIds()); }
Example #2
Source File: EmrPigStepHelper.java From herd with Apache License 2.0 | 5 votes |
@Override public StepConfig getEmrStepConfig(Object step) { EmrPigStep pigStep = (EmrPigStep) step; // Default ActionOnFailure is to cancel the execution and wait ActionOnFailure actionOnFailure = ActionOnFailure.CANCEL_AND_WAIT; if (pigStep.isContinueOnError() != null && pigStep.isContinueOnError()) { // Override based on user input actionOnFailure = ActionOnFailure.CONTINUE; } // If there are no arguments to hive script if (CollectionUtils.isEmpty(pigStep.getScriptArguments())) { // Just build the StepConfig object and return return new StepConfig().withName(pigStep.getStepName().trim()).withActionOnFailure(actionOnFailure) .withHadoopJarStep(new StepFactory().newRunPigScriptStep(pigStep.getScriptLocation().trim())); } // If there are arguments specified else { return new StepConfig().withName(pigStep.getStepName().trim()).withActionOnFailure(actionOnFailure).withHadoopJarStep(new StepFactory() .newRunPigScriptStep(pigStep.getScriptLocation().trim(), pigStep.getScriptArguments().toArray(new String[pigStep.getScriptArguments().size()]))); } }
Example #3
Source File: EmrHiveStepHelper.java From herd with Apache License 2.0 | 5 votes |
@Override public StepConfig getEmrStepConfig(Object step) { EmrHiveStep emrHiveStep = (EmrHiveStep) step; // Default ActionOnFailure is to cancel the execution and wait ActionOnFailure actionOnFailure = ActionOnFailure.CANCEL_AND_WAIT; if (emrHiveStep.isContinueOnError() != null && emrHiveStep.isContinueOnError()) { // Override based on user input actionOnFailure = ActionOnFailure.CONTINUE; } // If there are no arguments to hive script if (CollectionUtils.isEmpty(emrHiveStep.getScriptArguments())) { // Just build the StepConfig object and return return new StepConfig().withName(emrHiveStep.getStepName().trim()).withActionOnFailure(actionOnFailure) .withHadoopJarStep(new StepFactory().newRunHiveScriptStep(emrHiveStep.getScriptLocation().trim())); } // If there are arguments specified else { // For each argument, add "-d" option List<String> hiveArgs = new ArrayList<>(); for (String hiveArg : emrHiveStep.getScriptArguments()) { hiveArgs.add("-d"); hiveArgs.add(hiveArg); } // Return the StepConfig object return new StepConfig().withName(emrHiveStep.getStepName().trim()).withActionOnFailure(actionOnFailure).withHadoopJarStep( new StepFactory().newRunHiveScriptStep(emrHiveStep.getScriptLocation().trim(), hiveArgs.toArray(new String[hiveArgs.size()]))); } }
Example #4
Source File: EmrDaoImpl.java From herd with Apache License 2.0 | 5 votes |
/** * Create the step config list of objects for hive/pig installation. * * @param emrClusterDefinition the EMR definition name value. * * @return list of step configuration that contains all the steps for the given configuration. */ private List<StepConfig> getStepConfig(EmrClusterDefinition emrClusterDefinition) { StepFactory stepFactory = new StepFactory(); List<StepConfig> appSteps = new ArrayList<>(); // Create install hive step and add to the StepConfig list if (StringUtils.isNotBlank(emrClusterDefinition.getHiveVersion())) { StepConfig installHive = new StepConfig().withName("Hive " + emrClusterDefinition.getHiveVersion()).withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW) .withHadoopJarStep(stepFactory.newInstallHiveStep(emrClusterDefinition.getHiveVersion())); appSteps.add(installHive); } // Create install Pig step and add to the StepConfig List if (StringUtils.isNotBlank(emrClusterDefinition.getPigVersion())) { StepConfig installPig = new StepConfig().withName("Pig " + emrClusterDefinition.getPigVersion()).withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW) .withHadoopJarStep(stepFactory.newInstallPigStep(emrClusterDefinition.getPigVersion())); appSteps.add(installPig); } // Add the hadoop jar steps that need to be added. if (!CollectionUtils.isEmpty(emrClusterDefinition.getHadoopJarSteps())) { for (HadoopJarStep hadoopJarStep : emrClusterDefinition.getHadoopJarSteps()) { StepConfig stepConfig = emrHelper .getEmrHadoopJarStepConfig(hadoopJarStep.getStepName(), hadoopJarStep.getJarLocation(), hadoopJarStep.getMainClass(), hadoopJarStep.getScriptArguments(), hadoopJarStep.isContinueOnError()); appSteps.add(stepConfig); } } return appSteps; }
Example #5
Source File: LambdaContainer.java From aws-big-data-blog with Apache License 2.0 | 5 votes |
protected String fireEMRJob(String paramsStr,String clusterId){ StepFactory stepFactory = new StepFactory(); AmazonElasticMapReduceClient emr = new AmazonElasticMapReduceClient(); emr.setRegion(Region.getRegion(Regions.fromName(System.getenv().get("AWS_REGION")))); Application sparkConfig = new Application() .withName("Spark"); String[] params = paramsStr.split(","); StepConfig enabledebugging = new StepConfig() .withName("Enable debugging") .withActionOnFailure("TERMINATE_JOB_FLOW") .withHadoopJarStep(stepFactory.newEnableDebuggingStep()); HadoopJarStepConfig sparkStepConf = new HadoopJarStepConfig() .withJar("command-runner.jar") .withArgs(params); final StepConfig sparkStep = new StepConfig() .withName("Spark Step") .withActionOnFailure("CONTINUE") .withHadoopJarStep(sparkStepConf); AddJobFlowStepsRequest request = new AddJobFlowStepsRequest(clusterId) .withSteps(new ArrayList<StepConfig>(){{add(sparkStep);}}); AddJobFlowStepsResult result = emr.addJobFlowSteps(request); return result.getStepIds().get(0); }
Example #6
Source File: create_cluster.java From aws-doc-sdk-examples with Apache License 2.0 | 4 votes |
public static void main(String[] args) { AWSCredentials credentials_profile = null; try { credentials_profile = new ProfileCredentialsProvider("default").getCredentials(); // specifies any named profile in .aws/credentials as the credentials provider } catch (Exception e) { throw new AmazonClientException( "Cannot load credentials from .aws/credentials file. " + "Make sure that the credentials file exists and that the profile name is defined within it.", e); } // create an EMR client using the credentials and region specified in order to create the cluster AmazonElasticMapReduce emr = AmazonElasticMapReduceClientBuilder.standard() .withCredentials(new AWSStaticCredentialsProvider(credentials_profile)) .withRegion(Regions.US_WEST_1) .build(); // create a step to enable debugging in the AWS Management Console StepFactory stepFactory = new StepFactory(); StepConfig enabledebugging = new StepConfig() .withName("Enable debugging") .withActionOnFailure("TERMINATE_JOB_FLOW") .withHadoopJarStep(stepFactory.newEnableDebuggingStep()); // specify applications to be installed and configured when EMR creates the cluster Application hive = new Application().withName("Hive"); Application spark = new Application().withName("Spark"); Application ganglia = new Application().withName("Ganglia"); Application zeppelin = new Application().withName("Zeppelin"); // create the cluster RunJobFlowRequest request = new RunJobFlowRequest() .withName("MyClusterCreatedFromJava") .withReleaseLabel("emr-5.20.0") // specifies the EMR release version label, we recommend the latest release .withSteps(enabledebugging) .withApplications(hive,spark,ganglia,zeppelin) .withLogUri("s3://path/to/my/emr/logs") // a URI in S3 for log files is required when debugging is enabled .withServiceRole("EMR_DefaultRole") // replace the default with a custom IAM service role if one is used .withJobFlowRole("EMR_EC2_DefaultRole") // replace the default with a custom EMR role for the EC2 instance profile if one is used .withInstances(new JobFlowInstancesConfig() .withEc2SubnetId("subnet-12ab34c56") .withEc2KeyName("myEc2Key") .withInstanceCount(3) .withKeepJobFlowAliveWhenNoSteps(true) .withMasterInstanceType("m4.large") .withSlaveInstanceType("m4.large")); RunJobFlowResult result = emr.runJobFlow(request); System.out.println("The cluster ID is " + result.toString()); }
Example #7
Source File: create-spark-cluster.java From aws-doc-sdk-examples with Apache License 2.0 | 4 votes |
public static void main(String[] args) { AWSCredentials credentials_profile = null; try { credentials_profile = new ProfileCredentialsProvider("default").getCredentials(); } catch (Exception e) { throw new AmazonClientException( "Cannot load credentials from .aws/credentials file. " + "Make sure that the credentials file exists and the profile name is specified within it.", e); } AmazonElasticMapReduce emr = AmazonElasticMapReduceClientBuilder.standard() .withCredentials(new AWSStaticCredentialsProvider(credentials_profile)) .withRegion(Regions.US_WEST_1) .build(); // create a step to enable debugging in the AWS Management Console StepFactory stepFactory = new StepFactory(); StepConfig enabledebugging = new StepConfig() .withName("Enable debugging") .withActionOnFailure("TERMINATE_JOB_FLOW") .withHadoopJarStep(stepFactory.newEnableDebuggingStep()); Application spark = new Application().withName("Spark"); RunJobFlowRequest request = new RunJobFlowRequest() .withName("Spark Cluster") .withReleaseLabel("emr-5.20.0") .withSteps(enabledebugging) .withApplications(spark) .withLogUri("s3://path/to/my/logs/") .withServiceRole("EMR_DefaultRole") .withJobFlowRole("EMR_EC2_DefaultRole") .withInstances(new JobFlowInstancesConfig() .withEc2SubnetId("subnet-12ab3c45") .withEc2KeyName("myEc2Key") .withInstanceCount(3) .withKeepJobFlowAliveWhenNoSteps(true) .withMasterInstanceType("m4.large") .withSlaveInstanceType("m4.large") ); RunJobFlowResult result = emr.runJobFlow(request); System.out.println("The cluster ID is " + result.toString()); }
Example #8
Source File: EmrOperatorFactory.java From digdag with Apache License 2.0 | 4 votes |
private StepFactory stepFactory() { // TODO: configure region return new StepFactory(); }
Example #9
Source File: EMRUtils.java From aws-big-data-blog with Apache License 2.0 | 4 votes |
/** * This method uses method the AWS Java to launch an Apache HBase cluster on Amazon EMR. * * @param client - AmazonElasticMapReduce client that interfaces directly with the Amazon EMR Web Service * @param clusterIdentifier - identifier of an existing cluster * @param amiVersion - AMI to use for launching this cluster * @param keypair - A keypair for SSHing into the Amazon EMR master node * @param masterInstanceType - Master node Amazon EC2 instance type * @param coreInstanceType - core nodes Amazon EC2 instance type * @param logUri - An Amazon S3 bucket for your * @param numberOfNodes - total number of nodes in this cluster including master node * @return */ public static String createCluster(AmazonElasticMapReduce client, String clusterIdentifier, String amiVersion, String keypair, String masterInstanceType, String coreInstanceType, String logUri, int numberOfNodes) { if (clusterExists(client, clusterIdentifier)) { LOG.info("Cluster " + clusterIdentifier + " is available"); return clusterIdentifier; } //Error checking if (amiVersion == null || amiVersion.isEmpty()) throw new RuntimeException("ERROR: Please specify an AMI Version"); if (keypair == null || keypair.isEmpty()) throw new RuntimeException("ERROR: Please specify a valid Amazon Key Pair"); if (masterInstanceType == null || masterInstanceType.isEmpty()) throw new RuntimeException("ERROR: Please specify a Master Instance Type"); if (logUri == null || logUri.isEmpty()) throw new RuntimeException("ERROR: Please specify a valid Amazon S3 bucket for your logs."); if (numberOfNodes < 0) throw new RuntimeException("ERROR: Please specify at least 1 node"); RunJobFlowRequest request = new RunJobFlowRequest() .withAmiVersion(amiVersion) .withBootstrapActions(new BootstrapActionConfig() .withName("Install HBase") .withScriptBootstrapAction(new ScriptBootstrapActionConfig() .withPath("s3://elasticmapreduce/bootstrap-actions/setup-hbase"))) .withName("Job Flow With HBAse Actions") .withSteps(new StepConfig() //enable debugging step .withName("Enable debugging") .withActionOnFailure("TERMINATE_CLUSTER") .withHadoopJarStep(new StepFactory().newEnableDebuggingStep()), //Start HBase step - after installing it with a bootstrap action createStepConfig("Start HBase","TERMINATE_CLUSTER", "/home/hadoop/lib/hbase.jar", getHBaseArgs()), //add HBase backup step createStepConfig("Modify backup schedule","TERMINATE_JOB_FLOW", "/home/hadoop/lib/hbase.jar", getHBaseBackupArgs())) .withLogUri(logUri) .withInstances(new JobFlowInstancesConfig() .withEc2KeyName(keypair) .withInstanceCount(numberOfNodes) .withKeepJobFlowAliveWhenNoSteps(true) .withMasterInstanceType(masterInstanceType) .withSlaveInstanceType(coreInstanceType)); RunJobFlowResult result = client.runJobFlow(request); String state = null; while (!(state = clusterState(client, result.getJobFlowId())).equalsIgnoreCase("waiting")) { try { Thread.sleep(10 * 1000); LOG.info(result.getJobFlowId() + " is " + state + ". Waiting for cluster to become available."); } catch (InterruptedException e) { } if (state.equalsIgnoreCase("TERMINATED_WITH_ERRORS")){ LOG.error("Could not create EMR Cluster"); System.exit(-1); } } LOG.info("Created cluster " + result.getJobFlowId()); LOG.info("Cluster " + clusterIdentifier + " is available"); return result.getJobFlowId(); }