com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions Java Examples
The following examples show how to use
com.google.cloud.dataflow.sdk.options.DataflowPipelineOptions.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DataflowFactory.java From dockerflow with Apache License 2.0 | 6 votes |
/** * Create Dataflow Pipeline options from the standard command-line options, "--project=", * "--runner=" and "--stagingLocation=" * * @param args * @return * @throws IOException */ public static DataflowPipelineOptions pipelineOptions(String[] args) throws IOException { LOG.info("Set up Dataflow options"); DataflowPipelineOptions o = PipelineOptionsFactory.as(DataflowPipelineOptions.class); Map<String, String> m = StringUtils.parseArgs(args); o.setProject(m.get(PROJECT)); if (m.containsKey(STAGING)) { o.setStagingLocation(m.get(STAGING)); } else if (m.containsKey(STAGING_LOCATION)) { o.setStagingLocation(m.get(STAGING_LOCATION)); } else if (m.containsKey(WORKSPACE)) { o.setStagingLocation(m.get(WORKSPACE) + "/staging"); } o.setRunner(runner(m.get(RUNNER))); o.setMaxNumWorkers(m.get(MAX_WORKERS) == null ? 1 : Integer.parseInt(m.get(MAX_WORKERS))); if (m.containsKey(MACHINE_TYPE)) { o.setWorkerMachineType(m.get(MACHINE_TYPE)); } else { o.setWorkerMachineType(DEFAULT_MACHINE_TYPE); } return o; }
Example #2
Source File: TaskRunner.java From dockerflow with Apache License 2.0 | 6 votes |
/** Run a Docker workflow on Dataflow. */ public static void run(Workflow w, Map<String, WorkflowArgs> a, DataflowPipelineOptions o) throws IOException { LOG.info("Running workflow graph"); if (w.getArgs().getProjectId() == null) { throw new IllegalArgumentException("Project id is required"); } Pipeline p = DataflowFactory.dataflow(w, a, o); LOG.info("Created Dataflow pipeline"); LOG.debug(w.toString()); PipelineResult r = p.run(); LOG.info("Dataflow pipeline completed"); LOG.info("Result state: " + r.getState()); }
Example #3
Source File: LiveStateCheckerApp.java From policyscanner with Apache License 2.0 | 5 votes |
private PipelineOptions getCloudExecutionOptions(String stagingLocation) { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setProject(Constants.PROJECT_ID); options.setStagingLocation(stagingLocation); options.setRunner(BlockingDataflowPipelineRunner.class); return options; }
Example #4
Source File: UserManagedKeysApp.java From policyscanner with Apache License 2.0 | 5 votes |
private PipelineOptions getCloudExecutionOptions(String stagingLocation) { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setProject(SystemProperty.applicationId.get()); options.setStagingLocation(stagingLocation); options.setRunner(BlockingDataflowPipelineRunner.class); return options; }
Example #5
Source File: LiveStateCheckerRunner.java From policyscanner with Apache License 2.0 | 5 votes |
private static PipelineOptions getCloudExecutionOptions(String stagingLocation) { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setProject(SystemProperty.applicationId.get()); options.setStagingLocation(stagingLocation); options.setRunner(BlockingDataflowPipelineRunner.class); return options; }
Example #6
Source File: DesiredStateEnforcerApp.java From policyscanner with Apache License 2.0 | 5 votes |
private PipelineOptions getCloudExecutionOptions(String stagingLocation) { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setProject(SystemProperty.applicationId.get()); options.setStagingLocation(stagingLocation); options.setRunner(BlockingDataflowPipelineRunner.class); return options; }
Example #7
Source File: WorkflowDefn.java From dockerflow with Apache License 2.0 | 5 votes |
/** * The workflow defn implementation is responsible for defining the workflow steps and default * args, and creating a Dataflow pipeline. * * @throws URISyntaxException */ default Pipeline createDataflow( Map<String, WorkflowArgs> argsTable, DataflowPipelineOptions pipelineOptions, String[] args) throws IOException { return DataflowBuilder.of(createWorkflow(args)) .createFrom(argsTable) .pipelineOptions(pipelineOptions) .build(); }
Example #8
Source File: MultiLinearGraph.java From dockerflow with Apache License 2.0 | 5 votes |
/** * For simple linear graphs, it's not too hard to generate the Dataflow pipeline yourself. Here's * the equivalent Dataflow code for this simple example. */ public static void manualDataflow(String[] args) throws IOException { LOG.info("Parsing Dataflow options"); DataflowPipelineOptions o = DataflowFactory.pipelineOptions(args); o.setAppName(MultiLinearGraph.class.getSimpleName()); Pipeline p = Pipeline.create(o); p.apply(Create.of(ArgsTableBuilder.fromArgs(args).build())) .apply(DockerDo.of(taskOne())) .apply(DockerDo.of(taskTwo())); p.run(); }
Example #9
Source File: GatkPairedSingleSampleAlt.java From dockerflow with Apache License 2.0 | 4 votes |
/** * Only this one method is different from GatkPairedSingleSample.java. */ @Override public Pipeline createDataflow( Map<String, WorkflowArgs> argsTable, DataflowPipelineOptions pipelineOptions, String[] args) throws IOException { DataflowPipelineOptions o = DataflowFactory.pipelineOptions(args); o.setAppName(GatkPairedSingleSampleAlt.class.getSimpleName()); Pipeline p = Pipeline.create(o); // Merge sample-specific args with default workflow args for (String key : argsTable.keySet()) { WorkflowArgs instanceArgs = argsTable.get(key); instanceArgs.mergeDefaultArgs(workflowArgs); } // Declarations PCollection<KV<String, WorkflowArgs>> mainBranch, branchOne, branchTwo; PCollectionList<KV<String, WorkflowArgs>> mergeList; // Construct the workflow graph mainBranch = p.apply(Create.of(argsTable)); branchOne = mainBranch.apply(DockerDo.of(CreateSequenceGroupingTSV)); branchTwo = mainBranch.apply(DockerDo.of(BwaVersion)) .apply(DockerDo.of(SamToFastqAndBwaMem)) .apply(DockerDo.of(MergeBamAlignment)) .apply(DockerDo.of(SortAndFixReadGroupBam)) .apply(DockerDo.of(MarkDuplicates)) .apply(DockerDo.of(SortAndFixSampleBam)); mergeList = PCollectionList.of(branchOne).and(branchTwo); mainBranch = mergeList.apply(new MergeBranches()) .apply(DockerDo.of(BaseRecalibrator)) .apply(DockerDo.of(ApplyBQSR)) .apply(DockerDo.of(GatherBqsrReports)) .apply(DockerDo.of(ApplyBQSRToUnmappedReads)) .apply(DockerDo.of(GatherBamFiles)); branchOne = mainBranch.apply(DockerDo.of(ConvertToCram)); branchTwo = mainBranch.apply(DockerDo.of(HaplotypeCaller)) .apply(DockerDo.of(GatherVCFs)); mergeList = PCollectionList.of(branchOne).and(branchTwo); mainBranch = mergeList.apply(new MergeBranches()); return p; }
Example #10
Source File: DataflowFactory.java From dockerflow with Apache License 2.0 | 4 votes |
/** * Dynamically construct a Dataflow from the workflow definition. The root PCollection has one * element, the root task's name. * * @param workflow * @param dataflowArgs * @return * @throws IOException */ public static Pipeline dataflow( Workflow workflow, Map<String, WorkflowArgs> workflowArgs, DataflowPipelineOptions o) throws IOException { assert (workflow != null); assert (o != null); assert (workflow.getDefn() != null); // Set defaults if (o.getAppName() == null) { o.setAppName(workflow.getDefn().getName()); } if (o.getProject() == null && workflow.getArgs() != null) { o.setProject(workflow.getArgs().getProjectId()); } if (o.getMaxNumWorkers() == 0) { o.setMaxNumWorkers(1); } if (o.getWorkerMachineType() == null) { o.setWorkerMachineType(DEFAULT_MACHINE_TYPE); } LOG.info("Initializing dataflow pipeline"); Pipeline p = Pipeline.create(o); LOG.info("Creating input collection of workflow args"); if (workflowArgs == null) { workflowArgs = new HashMap<String, WorkflowArgs>(); } if (workflowArgs.isEmpty()) { LOG.info("No workflow args were provided. Using default values."); workflowArgs.put(workflow.getDefn().getName(), new WorkflowArgs()); } else if (workflow.getArgs() != null) { LOG.info("Merging default workflow args with instance-specific args"); for (String key : workflowArgs.keySet()) { WorkflowArgs instanceArgs = workflowArgs.get(key); instanceArgs.mergeDefaultArgs(workflow.getArgs()); LOG.debug("Merged args: " + StringUtils.toJson(instanceArgs)); } } LOG.info("Creating dataflow pipeline for workflow " + workflow.getDefn().getName()); PCollection<KV<String, WorkflowArgs>> input = p.apply(Create.of(workflowArgs)); input = dataflow(Workflow.Steps.graph(workflow), input); if (workflowArgs.values().iterator().next().getDeleteFiles()) { LOG.info("Intermediate files will be deleted"); input = input.apply( ParDo.named("DeleteIntermediateFiles").of(new DeleteIntermediateFiles(workflow))); } return p; }
Example #11
Source File: DataflowBuilder.java From dockerflow with Apache License 2.0 | 4 votes |
public DataflowBuilder pipelineOptions(DataflowPipelineOptions options) { pipelineOptions = options; pipelineOptions.setAppName(workflow.getDefn().getName()); return this; }
Example #12
Source File: FlinkPipelineOptions.java From flink-dataflow with Apache License 2.0 | 4 votes |
/** * The job name is used to identify jobs running on a Flink cluster. */ @Description("Dataflow job name, to uniquely identify active jobs. " + "Defaults to using the ApplicationName-UserName-Date.") @Default.InstanceFactory(DataflowPipelineOptions.JobNameFactory.class) String getJobName();