Java Code Examples for org.apache.beam.runners.dataflow.options.DataflowPipelineOptions#setJobName()
The following examples show how to use
org.apache.beam.runners.dataflow.options.DataflowPipelineOptions#setJobName() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DataflowPipelineTranslatorTest.java From beam with Apache License 2.0 | 6 votes |
private static DataflowPipelineOptions buildPipelineOptions() throws IOException { GcsUtil mockGcsUtil = mock(GcsUtil.class); when(mockGcsUtil.expand(any(GcsPath.class))) .then(invocation -> ImmutableList.of((GcsPath) invocation.getArguments()[0])); when(mockGcsUtil.bucketAccessible(any(GcsPath.class))).thenReturn(true); DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setRunner(DataflowRunner.class); options.setGcpCredential(new TestCredential()); options.setJobName("some-job-name"); options.setProject("some-project"); options.setRegion("some-region"); options.setTempLocation(GcsPath.fromComponents("somebucket", "some/path").toString()); options.setFilesToStage(new ArrayList<>()); options.setDataflowClient(buildMockDataflow(new IsValidCreateRequest())); options.setGcsUtil(mockGcsUtil); // Enable the FileSystems API to know about gs:// URIs in this test. FileSystems.setDefaultPipelineOptions(options); return options; }
Example 2
Source File: BatchStatefulParDoOverridesTest.java From beam with Apache License 2.0 | 6 votes |
private static DataflowPipelineOptions buildPipelineOptions(String... args) throws IOException { GcsUtil mockGcsUtil = mock(GcsUtil.class); when(mockGcsUtil.expand(any(GcsPath.class))) .then(invocation -> ImmutableList.of((GcsPath) invocation.getArguments()[0])); when(mockGcsUtil.bucketAccessible(any(GcsPath.class))).thenReturn(true); DataflowPipelineOptions options = PipelineOptionsFactory.fromArgs(args).as(DataflowPipelineOptions.class); options.setRunner(DataflowRunner.class); options.setGcpCredential(new TestCredential()); options.setJobName("some-job-name"); options.setProject("some-project"); options.setRegion("some-region"); options.setTempLocation(GcsPath.fromComponents("somebucket", "some/path").toString()); options.setFilesToStage(new ArrayList<>()); options.setGcsUtil(mockGcsUtil); // Enable the FileSystems API to know about gs:// URIs in this test. FileSystems.setDefaultPipelineOptions(options); return options; }
Example 3
Source File: DataflowRunnerTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testInvalidJobName() throws IOException { List<String> invalidNames = Arrays.asList("invalid_name", "0invalid", "invalid-"); List<String> expectedReason = Arrays.asList("JobName invalid", "JobName invalid", "JobName invalid"); for (int i = 0; i < invalidNames.size(); ++i) { DataflowPipelineOptions options = buildPipelineOptions(); options.setJobName(invalidNames.get(i)); try { DataflowRunner.fromOptions(options); fail("Expected IllegalArgumentException for jobName " + options.getJobName()); } catch (IllegalArgumentException e) { assertThat(e.getMessage(), containsString(expectedReason.get(i))); } } }
Example 4
Source File: DataflowRunnerTest.java From beam with Apache License 2.0 | 6 votes |
/** * Tests that the {@link DataflowRunner} with {@code --templateLocation} returns normally when the * runner is successfully run. */ @Test public void testTemplateRunnerFullCompletion() throws Exception { File existingFile = tmpFolder.newFile(); DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setJobName("TestJobName"); options.setGcpCredential(new TestCredential()); options.setPathValidatorClass(NoopPathValidator.class); options.setProject("test-project"); options.setRegion(REGION_ID); options.setRunner(DataflowRunner.class); options.setTemplateLocation(existingFile.getPath()); options.setTempLocation(tmpFolder.getRoot().getPath()); Pipeline p = Pipeline.create(options); p.run(); expectedLogs.verifyInfo("Template successfully created"); }
Example 5
Source File: DataflowRunnerTest.java From beam with Apache License 2.0 | 6 votes |
/** * Tests that the {@link DataflowRunner} with {@code --templateLocation} returns normally when the * runner is successfully run with upload_graph experiment turned on. The result template should * not contain raw steps and stepsLocation file should be set. */ @Test public void testTemplateRunnerWithUploadGraph() throws Exception { File existingFile = tmpFolder.newFile(); DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setExperiments(Arrays.asList("upload_graph")); options.setJobName("TestJobName"); options.setGcpCredential(new TestCredential()); options.setPathValidatorClass(NoopPathValidator.class); options.setProject("test-project"); options.setRegion(REGION_ID); options.setRunner(DataflowRunner.class); options.setTemplateLocation(existingFile.getPath()); options.setTempLocation(tmpFolder.getRoot().getPath()); Pipeline p = Pipeline.create(options); p.apply(Create.of(ImmutableList.of(1))); p.run(); expectedLogs.verifyInfo("Template successfully created"); ObjectMapper objectMapper = new ObjectMapper(); JsonNode node = objectMapper.readTree(existingFile); assertEquals(0, node.get("steps").size()); assertNotNull(node.get("stepsLocation")); }
Example 6
Source File: DataflowRunnerTest.java From beam with Apache License 2.0 | 6 votes |
/** * Tests that the {@link DataflowRunner} with {@code --templateLocation} throws the appropriate * exception when an output file is not writable. */ @Test public void testTemplateRunnerLoggedErrorForFile() throws Exception { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setJobName("TestJobName"); options.setRunner(DataflowRunner.class); options.setTemplateLocation("//bad/path"); options.setProject("test-project"); options.setRegion(REGION_ID); options.setTempLocation(tmpFolder.getRoot().getPath()); options.setGcpCredential(new TestCredential()); options.setPathValidatorClass(NoopPathValidator.class); Pipeline p = Pipeline.create(options); thrown.expectMessage("Cannot create output file at"); thrown.expect(RuntimeException.class); p.run(); }
Example 7
Source File: DataflowRunnerTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testFromOptionsWithUppercaseConvertsToLowercase() throws Exception { String mixedCase = "ThisJobNameHasMixedCase"; DataflowPipelineOptions options = buildPipelineOptions(); options.setJobName(mixedCase); DataflowRunner.fromOptions(options); assertThat(options.getJobName(), equalTo(mixedCase.toLowerCase())); }
Example 8
Source File: DataflowRunnerTest.java From beam with Apache License 2.0 | 5 votes |
/** * Invasive mock-based test for checking that the JSON generated for the pipeline options has not * had vital fields pruned. */ @Test public void testSettingOfSdkPipelineOptions() throws IOException { DataflowPipelineOptions options = buildPipelineOptions(); // These options are important only for this test, and need not be global to the test class options.setAppName(DataflowRunnerTest.class.getSimpleName()); options.setJobName("some-job-name"); Pipeline p = Pipeline.create(options); p.run(); ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class); Mockito.verify(mockJobs).create(eq(PROJECT_ID), eq(REGION_ID), jobCaptor.capture()); Map<String, Object> sdkPipelineOptions = jobCaptor.getValue().getEnvironment().getSdkPipelineOptions(); assertThat(sdkPipelineOptions, hasKey("options")); Map<String, Object> optionsMap = (Map<String, Object>) sdkPipelineOptions.get("options"); assertThat(optionsMap, hasEntry("appName", (Object) options.getAppName())); assertThat(optionsMap, hasEntry("project", (Object) options.getProject())); assertThat( optionsMap, hasEntry("pathValidatorClass", (Object) options.getPathValidatorClass().getName())); assertThat(optionsMap, hasEntry("runner", (Object) options.getRunner().getName())); assertThat(optionsMap, hasEntry("jobName", (Object) options.getJobName())); assertThat(optionsMap, hasEntry("tempLocation", (Object) options.getTempLocation())); assertThat(optionsMap, hasEntry("stagingLocation", (Object) options.getStagingLocation())); assertThat( optionsMap, hasEntry("stableUniqueNames", (Object) options.getStableUniqueNames().toString())); assertThat(optionsMap, hasEntry("streaming", (Object) options.isStreaming())); assertThat( optionsMap, hasEntry( "numberOfWorkerHarnessThreads", (Object) options.getNumberOfWorkerHarnessThreads())); }
Example 9
Source File: DataflowRunnerTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testUpdate() throws IOException { DataflowPipelineOptions options = buildPipelineOptions(); options.setUpdate(true); options.setJobName("oldJobName"); Pipeline p = buildDataflowPipeline(options); DataflowPipelineJob job = (DataflowPipelineJob) p.run(); assertEquals("newid", job.getJobId()); ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class); Mockito.verify(mockJobs).create(eq(PROJECT_ID), eq(REGION_ID), jobCaptor.capture()); assertValidJob(jobCaptor.getValue()); }
Example 10
Source File: DataflowRunnerTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testUpdateNonExistentPipeline() throws IOException { thrown.expect(IllegalArgumentException.class); thrown.expectMessage("Could not find running job named badjobname"); DataflowPipelineOptions options = buildPipelineOptions(); options.setUpdate(true); options.setJobName("badJobName"); Pipeline p = buildDataflowPipeline(options); p.run(); }
Example 11
Source File: DataflowRunnerTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testUpdateAlreadyUpdatedPipeline() throws IOException { DataflowPipelineOptions options = buildPipelineOptions(); options.setUpdate(true); options.setJobName("oldJobName"); Dataflow mockDataflowClient = options.getDataflowClient(); Dataflow.Projects.Locations.Jobs.Create mockRequest = mock(Dataflow.Projects.Locations.Jobs.Create.class); when(mockDataflowClient .projects() .locations() .jobs() .create(eq(PROJECT_ID), eq(REGION_ID), any(Job.class))) .thenReturn(mockRequest); final Job resultJob = new Job(); resultJob.setId("newid"); // Return a different request id. resultJob.setClientRequestId("different_request_id"); when(mockRequest.execute()).thenReturn(resultJob); Pipeline p = buildDataflowPipeline(options); thrown.expect(DataflowJobAlreadyUpdatedException.class); thrown.expect( new TypeSafeMatcher<DataflowJobAlreadyUpdatedException>() { @Override public void describeTo(Description description) { description.appendText("Expected job ID: " + resultJob.getId()); } @Override protected boolean matchesSafely(DataflowJobAlreadyUpdatedException item) { return resultJob.getId().equals(item.getJob().getJobId()); } }); thrown.expectMessage( "The job named oldjobname with id: oldJobId has already been updated " + "into job id: newid and cannot be updated again."); p.run(); }
Example 12
Source File: DataflowRunnerTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testValidJobName() throws IOException { List<String> names = Arrays.asList("ok", "Ok", "A-Ok", "ok-123", "this-one-is-fairly-long-01234567890123456789"); for (String name : names) { DataflowPipelineOptions options = buildPipelineOptions(); options.setJobName(name); DataflowRunner runner = DataflowRunner.fromOptions(options); assertNotNull(runner); } }
Example 13
Source File: DataflowRunnerTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testToString() { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setJobName("TestJobName"); options.setProject("test-project"); options.setRegion(REGION_ID); options.setTempLocation("gs://test/temp/location"); options.setGcpCredential(new TestCredential()); options.setPathValidatorClass(NoopPathValidator.class); options.setRunner(DataflowRunner.class); assertEquals("DataflowRunner#testjobname", DataflowRunner.fromOptions(options).toString()); }
Example 14
Source File: PubsubWordCount.java From cloud-bigtable-examples with Apache License 2.0 | 5 votes |
private static void injectMessages(BigtablePubsubOptions options) { String inputFile = options.getInputFile(); String topic = options.getPubsubTopic(); DataflowPipelineOptions copiedOptions = options.as(DataflowPipelineOptions.class); copiedOptions.setStreaming(false); copiedOptions.setNumWorkers(INJECTORNUMWORKERS); copiedOptions.setJobName(copiedOptions.getJobName() + "-injector"); Pipeline injectorPipeline = Pipeline.create(copiedOptions); injectorPipeline.apply(TextIO.read().from(inputFile)) .apply(ParDo.of(new FilterEmptyStringsFn())) .apply(PubsubIO.writeStrings().to(topic)); injectorPipeline.run().waitUntilFinish(); }
Example 15
Source File: PubSubToBQPipeline.java From pubsub-to-bigquery with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws GeneralSecurityException, IOException, ParseException, ParserConfigurationException, SAXException { String params = null; for (int i = 0; i < args.length; i++) { if (args[i].startsWith("--params=")) params = args[i].replaceFirst("--params=", ""); } System.out.println(params); init(params); GoogleCredentials credentials = ServiceAccountCredentials.fromStream(new FileInputStream(keyFile)) .createScoped(Arrays.asList(new String[] { "https://www.googleapis.com/auth/cloud-platform" })); DataflowPipelineOptions options = PipelineOptionsFactory.create().as(DataflowPipelineOptions.class); options.setRunner(DataflowRunner.class); // Your project ID is required in order to run your pipeline on the Google Cloud. options.setProject(projectId); // Your Google Cloud Storage path is required for staging local files. options.setStagingLocation(workingBucket); options.setTempLocation(workingBucket + "/temp"); options.setGcpCredential(credentials); options.setServiceAccount(accountEmail); options.setMaxNumWorkers(maxNumWorkers); options.setDiskSizeGb(diskSizeGb); options.setWorkerMachineType(machineType); options.setAutoscalingAlgorithm(AutoscalingAlgorithmType.THROUGHPUT_BASED); options.setZone(zone); options.setStreaming(isStreaming); options.setJobName(pipelineName); Pipeline pipeline = Pipeline.create(options); Gson gson = new Gson(); TableSchema schema = gson.fromJson(schemaStr, TableSchema.class); PCollection<String> streamData = null; if(pubSubTopicSub != null && !StringUtils.isEmpty(pubSubTopicSub)){ streamData = pipeline.apply("ReadPubSub",PubsubIO.readStrings().fromSubscription(String.format("projects/%1$s/subscriptions/%2$s",projectId,pubSubTopicSub))); } else if(pubSubTopic != null && !StringUtils.isEmpty(pubSubTopic)){ streamData = pipeline.apply("ReadPubSub",PubsubIO.readStrings().fromTopic(String.format("projects/%1$s/topics/%2$s",projectId,pubSubTopic))); } PCollection<TableRow> tableRow = streamData.apply("ToTableRow",ParDo.of(new PrepData.ToTableRow(owTimestamp, debugMode))); tableRow.apply("WriteToBQ", BigQueryIO.writeTableRows() .to(String.format("%1$s.%2$s",bqDataSet, bqTable)) .withSchema(schema) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND)); System.out.println("Starting pipeline " + pipelineName); pipeline.run(); }
Example 16
Source File: DataflowRunnerTest.java From beam with Apache License 2.0 | 4 votes |
@Test public void testRunWithFiles() throws IOException { // Test that the function DataflowRunner.stageFiles works as expected. final String cloudDataflowDataset = "somedataset"; // Create some temporary files. File temp1 = File.createTempFile("DataflowRunnerTest-", ".txt"); temp1.deleteOnExit(); File temp2 = File.createTempFile("DataflowRunnerTest2-", ".txt"); temp2.deleteOnExit(); String overridePackageName = "alias.txt"; when(mockGcsUtil.getObjects(anyListOf(GcsPath.class))) .thenReturn( ImmutableList.of( GcsUtil.StorageObjectOrIOException.create(new FileNotFoundException("some/path")))); DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setFilesToStage( ImmutableList.of( temp1.getAbsolutePath(), overridePackageName + "=" + temp2.getAbsolutePath())); options.setStagingLocation(VALID_STAGING_BUCKET); options.setTempLocation(VALID_TEMP_BUCKET); options.setTempDatasetId(cloudDataflowDataset); options.setProject(PROJECT_ID); options.setRegion(REGION_ID); options.setJobName("job"); options.setDataflowClient(buildMockDataflow()); options.setGcsUtil(mockGcsUtil); options.setGcpCredential(new TestCredential()); when(mockGcsUtil.create(any(GcsPath.class), anyString(), anyInt())) .then( invocation -> FileChannel.open( Files.createTempFile("channel-", ".tmp"), StandardOpenOption.CREATE, StandardOpenOption.WRITE, StandardOpenOption.DELETE_ON_CLOSE)); Pipeline p = buildDataflowPipeline(options); DataflowPipelineJob job = (DataflowPipelineJob) p.run(); assertEquals("newid", job.getJobId()); ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class); Mockito.verify(mockJobs).create(eq(PROJECT_ID), eq(REGION_ID), jobCaptor.capture()); Job workflowJob = jobCaptor.getValue(); assertValidJob(workflowJob); assertEquals(2, workflowJob.getEnvironment().getWorkerPools().get(0).getPackages().size()); DataflowPackage workflowPackage1 = workflowJob.getEnvironment().getWorkerPools().get(0).getPackages().get(0); assertThat(workflowPackage1.getName(), endsWith(getFileExtension(temp1.getAbsolutePath()))); DataflowPackage workflowPackage2 = workflowJob.getEnvironment().getWorkerPools().get(0).getPackages().get(1); assertEquals(overridePackageName, workflowPackage2.getName()); assertEquals( GcsPath.fromUri(VALID_TEMP_BUCKET).toResourceName(), workflowJob.getEnvironment().getTempStoragePrefix()); assertEquals(cloudDataflowDataset, workflowJob.getEnvironment().getDataset()); assertEquals( DataflowRunnerInfo.getDataflowRunnerInfo().getName(), workflowJob.getEnvironment().getUserAgent().get("name")); assertEquals( DataflowRunnerInfo.getDataflowRunnerInfo().getVersion(), workflowJob.getEnvironment().getUserAgent().get("version")); }