Java Code Examples for org.apache.beam.runners.dataflow.options.DataflowPipelineOptions#setGcsUtil()
The following examples show how to use
org.apache.beam.runners.dataflow.options.DataflowPipelineOptions#setGcsUtil() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DataflowPipelineTranslatorTest.java From beam with Apache License 2.0 | 6 votes |
private static DataflowPipelineOptions buildPipelineOptions() throws IOException { GcsUtil mockGcsUtil = mock(GcsUtil.class); when(mockGcsUtil.expand(any(GcsPath.class))) .then(invocation -> ImmutableList.of((GcsPath) invocation.getArguments()[0])); when(mockGcsUtil.bucketAccessible(any(GcsPath.class))).thenReturn(true); DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setRunner(DataflowRunner.class); options.setGcpCredential(new TestCredential()); options.setJobName("some-job-name"); options.setProject("some-project"); options.setRegion("some-region"); options.setTempLocation(GcsPath.fromComponents("somebucket", "some/path").toString()); options.setFilesToStage(new ArrayList<>()); options.setDataflowClient(buildMockDataflow(new IsValidCreateRequest())); options.setGcsUtil(mockGcsUtil); // Enable the FileSystems API to know about gs:// URIs in this test. FileSystems.setDefaultPipelineOptions(options); return options; }
Example 2
Source File: BatchStatefulParDoOverridesTest.java From beam with Apache License 2.0 | 6 votes |
private static DataflowPipelineOptions buildPipelineOptions(String... args) throws IOException { GcsUtil mockGcsUtil = mock(GcsUtil.class); when(mockGcsUtil.expand(any(GcsPath.class))) .then(invocation -> ImmutableList.of((GcsPath) invocation.getArguments()[0])); when(mockGcsUtil.bucketAccessible(any(GcsPath.class))).thenReturn(true); DataflowPipelineOptions options = PipelineOptionsFactory.fromArgs(args).as(DataflowPipelineOptions.class); options.setRunner(DataflowRunner.class); options.setGcpCredential(new TestCredential()); options.setJobName("some-job-name"); options.setProject("some-project"); options.setRegion("some-region"); options.setTempLocation(GcsPath.fromComponents("somebucket", "some/path").toString()); options.setFilesToStage(new ArrayList<>()); options.setGcsUtil(mockGcsUtil); // Enable the FileSystems API to know about gs:// URIs in this test. FileSystems.setDefaultPipelineOptions(options); return options; }
Example 3
Source File: DataflowRunnerTest.java From beam with Apache License 2.0 | 6 votes |
private DataflowPipelineOptions buildPipelineOptions() throws IOException { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setRunner(DataflowRunner.class); options.setProject(PROJECT_ID); options.setTempLocation(VALID_TEMP_BUCKET); options.setRegion(REGION_ID); // Set FILES_PROPERTY to empty to prevent a default value calculated from classpath. options.setFilesToStage(new ArrayList<>()); options.setDataflowClient(buildMockDataflow()); options.setGcsUtil(mockGcsUtil); options.setGcpCredential(new TestCredential()); // Configure the FileSystem registrar to use these options. FileSystems.setDefaultPipelineOptions(options); return options; }
Example 4
Source File: DataflowRunnerTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testGcsStagingLocationInitialization() throws Exception { // Set temp location (required), and check that staging location is set. DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setTempLocation(VALID_TEMP_BUCKET); options.setProject(PROJECT_ID); options.setRegion(REGION_ID); options.setGcpCredential(new TestCredential()); options.setGcsUtil(mockGcsUtil); options.setRunner(DataflowRunner.class); DataflowRunner.fromOptions(options); assertNotNull(options.getStagingLocation()); }
Example 5
Source File: DataflowRunnerTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testGcpTempAndNoTempLocationSucceeds() throws Exception { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setRunner(DataflowRunner.class); options.setGcpCredential(new TestCredential()); options.setProject("foo-project"); options.setRegion(REGION_ID); options.setGcpTempLocation(VALID_TEMP_BUCKET); options.setGcsUtil(mockGcsUtil); DataflowRunner.fromOptions(options); }
Example 6
Source File: DataflowRunnerTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testTempLocationAndNoGcpTempLocationSucceeds() throws Exception { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setRunner(DataflowRunner.class); options.setGcpCredential(new TestCredential()); options.setProject("foo-project"); options.setRegion(REGION_ID); options.setTempLocation(VALID_TEMP_BUCKET); options.setGcsUtil(mockGcsUtil); DataflowRunner.fromOptions(options); }
Example 7
Source File: DataflowRunnerTest.java From beam with Apache License 2.0 | 4 votes |
@Test public void testRunWithFiles() throws IOException { // Test that the function DataflowRunner.stageFiles works as expected. final String cloudDataflowDataset = "somedataset"; // Create some temporary files. File temp1 = File.createTempFile("DataflowRunnerTest-", ".txt"); temp1.deleteOnExit(); File temp2 = File.createTempFile("DataflowRunnerTest2-", ".txt"); temp2.deleteOnExit(); String overridePackageName = "alias.txt"; when(mockGcsUtil.getObjects(anyListOf(GcsPath.class))) .thenReturn( ImmutableList.of( GcsUtil.StorageObjectOrIOException.create(new FileNotFoundException("some/path")))); DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setFilesToStage( ImmutableList.of( temp1.getAbsolutePath(), overridePackageName + "=" + temp2.getAbsolutePath())); options.setStagingLocation(VALID_STAGING_BUCKET); options.setTempLocation(VALID_TEMP_BUCKET); options.setTempDatasetId(cloudDataflowDataset); options.setProject(PROJECT_ID); options.setRegion(REGION_ID); options.setJobName("job"); options.setDataflowClient(buildMockDataflow()); options.setGcsUtil(mockGcsUtil); options.setGcpCredential(new TestCredential()); when(mockGcsUtil.create(any(GcsPath.class), anyString(), anyInt())) .then( invocation -> FileChannel.open( Files.createTempFile("channel-", ".tmp"), StandardOpenOption.CREATE, StandardOpenOption.WRITE, StandardOpenOption.DELETE_ON_CLOSE)); Pipeline p = buildDataflowPipeline(options); DataflowPipelineJob job = (DataflowPipelineJob) p.run(); assertEquals("newid", job.getJobId()); ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class); Mockito.verify(mockJobs).create(eq(PROJECT_ID), eq(REGION_ID), jobCaptor.capture()); Job workflowJob = jobCaptor.getValue(); assertValidJob(workflowJob); assertEquals(2, workflowJob.getEnvironment().getWorkerPools().get(0).getPackages().size()); DataflowPackage workflowPackage1 = workflowJob.getEnvironment().getWorkerPools().get(0).getPackages().get(0); assertThat(workflowPackage1.getName(), endsWith(getFileExtension(temp1.getAbsolutePath()))); DataflowPackage workflowPackage2 = workflowJob.getEnvironment().getWorkerPools().get(0).getPackages().get(1); assertEquals(overridePackageName, workflowPackage2.getName()); assertEquals( GcsPath.fromUri(VALID_TEMP_BUCKET).toResourceName(), workflowJob.getEnvironment().getTempStoragePrefix()); assertEquals(cloudDataflowDataset, workflowJob.getEnvironment().getDataset()); assertEquals( DataflowRunnerInfo.getDataflowRunnerInfo().getName(), workflowJob.getEnvironment().getUserAgent().get("name")); assertEquals( DataflowRunnerInfo.getDataflowRunnerInfo().getVersion(), workflowJob.getEnvironment().getUserAgent().get("version")); }