Java Code Examples for org.apache.beam.runners.dataflow.options.DataflowPipelineOptions#setGcsUtil()

The following examples show how to use org.apache.beam.runners.dataflow.options.DataflowPipelineOptions#setGcsUtil() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DataflowPipelineTranslatorTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private static DataflowPipelineOptions buildPipelineOptions() throws IOException {
  GcsUtil mockGcsUtil = mock(GcsUtil.class);
  when(mockGcsUtil.expand(any(GcsPath.class)))
      .then(invocation -> ImmutableList.of((GcsPath) invocation.getArguments()[0]));
  when(mockGcsUtil.bucketAccessible(any(GcsPath.class))).thenReturn(true);

  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
  options.setRunner(DataflowRunner.class);
  options.setGcpCredential(new TestCredential());
  options.setJobName("some-job-name");
  options.setProject("some-project");
  options.setRegion("some-region");
  options.setTempLocation(GcsPath.fromComponents("somebucket", "some/path").toString());
  options.setFilesToStage(new ArrayList<>());
  options.setDataflowClient(buildMockDataflow(new IsValidCreateRequest()));
  options.setGcsUtil(mockGcsUtil);

  // Enable the FileSystems API to know about gs:// URIs in this test.
  FileSystems.setDefaultPipelineOptions(options);

  return options;
}
 
Example 2
Source File: BatchStatefulParDoOverridesTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private static DataflowPipelineOptions buildPipelineOptions(String... args) throws IOException {
  GcsUtil mockGcsUtil = mock(GcsUtil.class);
  when(mockGcsUtil.expand(any(GcsPath.class)))
      .then(invocation -> ImmutableList.of((GcsPath) invocation.getArguments()[0]));
  when(mockGcsUtil.bucketAccessible(any(GcsPath.class))).thenReturn(true);

  DataflowPipelineOptions options =
      PipelineOptionsFactory.fromArgs(args).as(DataflowPipelineOptions.class);
  options.setRunner(DataflowRunner.class);
  options.setGcpCredential(new TestCredential());
  options.setJobName("some-job-name");
  options.setProject("some-project");
  options.setRegion("some-region");
  options.setTempLocation(GcsPath.fromComponents("somebucket", "some/path").toString());
  options.setFilesToStage(new ArrayList<>());
  options.setGcsUtil(mockGcsUtil);

  // Enable the FileSystems API to know about gs:// URIs in this test.
  FileSystems.setDefaultPipelineOptions(options);

  return options;
}
 
Example 3
Source File: DataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private DataflowPipelineOptions buildPipelineOptions() throws IOException {
  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
  options.setRunner(DataflowRunner.class);
  options.setProject(PROJECT_ID);
  options.setTempLocation(VALID_TEMP_BUCKET);
  options.setRegion(REGION_ID);
  // Set FILES_PROPERTY to empty to prevent a default value calculated from classpath.
  options.setFilesToStage(new ArrayList<>());
  options.setDataflowClient(buildMockDataflow());
  options.setGcsUtil(mockGcsUtil);
  options.setGcpCredential(new TestCredential());

  // Configure the FileSystem registrar to use these options.
  FileSystems.setDefaultPipelineOptions(options);

  return options;
}
 
Example 4
Source File: DataflowRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testGcsStagingLocationInitialization() throws Exception {
  // Set temp location (required), and check that staging location is set.
  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
  options.setTempLocation(VALID_TEMP_BUCKET);
  options.setProject(PROJECT_ID);
  options.setRegion(REGION_ID);
  options.setGcpCredential(new TestCredential());
  options.setGcsUtil(mockGcsUtil);
  options.setRunner(DataflowRunner.class);

  DataflowRunner.fromOptions(options);

  assertNotNull(options.getStagingLocation());
}
 
Example 5
Source File: DataflowRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testGcpTempAndNoTempLocationSucceeds() throws Exception {
  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
  options.setRunner(DataflowRunner.class);
  options.setGcpCredential(new TestCredential());
  options.setProject("foo-project");
  options.setRegion(REGION_ID);
  options.setGcpTempLocation(VALID_TEMP_BUCKET);
  options.setGcsUtil(mockGcsUtil);

  DataflowRunner.fromOptions(options);
}
 
Example 6
Source File: DataflowRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testTempLocationAndNoGcpTempLocationSucceeds() throws Exception {
  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
  options.setRunner(DataflowRunner.class);
  options.setGcpCredential(new TestCredential());
  options.setProject("foo-project");
  options.setRegion(REGION_ID);
  options.setTempLocation(VALID_TEMP_BUCKET);
  options.setGcsUtil(mockGcsUtil);

  DataflowRunner.fromOptions(options);
}
 
Example 7
Source File: DataflowRunnerTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void testRunWithFiles() throws IOException {
  // Test that the function DataflowRunner.stageFiles works as expected.
  final String cloudDataflowDataset = "somedataset";

  // Create some temporary files.
  File temp1 = File.createTempFile("DataflowRunnerTest-", ".txt");
  temp1.deleteOnExit();
  File temp2 = File.createTempFile("DataflowRunnerTest2-", ".txt");
  temp2.deleteOnExit();

  String overridePackageName = "alias.txt";

  when(mockGcsUtil.getObjects(anyListOf(GcsPath.class)))
      .thenReturn(
          ImmutableList.of(
              GcsUtil.StorageObjectOrIOException.create(new FileNotFoundException("some/path"))));

  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
  options.setFilesToStage(
      ImmutableList.of(
          temp1.getAbsolutePath(), overridePackageName + "=" + temp2.getAbsolutePath()));
  options.setStagingLocation(VALID_STAGING_BUCKET);
  options.setTempLocation(VALID_TEMP_BUCKET);
  options.setTempDatasetId(cloudDataflowDataset);
  options.setProject(PROJECT_ID);
  options.setRegion(REGION_ID);
  options.setJobName("job");
  options.setDataflowClient(buildMockDataflow());
  options.setGcsUtil(mockGcsUtil);
  options.setGcpCredential(new TestCredential());

  when(mockGcsUtil.create(any(GcsPath.class), anyString(), anyInt()))
      .then(
          invocation ->
              FileChannel.open(
                  Files.createTempFile("channel-", ".tmp"),
                  StandardOpenOption.CREATE,
                  StandardOpenOption.WRITE,
                  StandardOpenOption.DELETE_ON_CLOSE));

  Pipeline p = buildDataflowPipeline(options);

  DataflowPipelineJob job = (DataflowPipelineJob) p.run();
  assertEquals("newid", job.getJobId());

  ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
  Mockito.verify(mockJobs).create(eq(PROJECT_ID), eq(REGION_ID), jobCaptor.capture());
  Job workflowJob = jobCaptor.getValue();
  assertValidJob(workflowJob);

  assertEquals(2, workflowJob.getEnvironment().getWorkerPools().get(0).getPackages().size());
  DataflowPackage workflowPackage1 =
      workflowJob.getEnvironment().getWorkerPools().get(0).getPackages().get(0);
  assertThat(workflowPackage1.getName(), endsWith(getFileExtension(temp1.getAbsolutePath())));
  DataflowPackage workflowPackage2 =
      workflowJob.getEnvironment().getWorkerPools().get(0).getPackages().get(1);
  assertEquals(overridePackageName, workflowPackage2.getName());

  assertEquals(
      GcsPath.fromUri(VALID_TEMP_BUCKET).toResourceName(),
      workflowJob.getEnvironment().getTempStoragePrefix());
  assertEquals(cloudDataflowDataset, workflowJob.getEnvironment().getDataset());
  assertEquals(
      DataflowRunnerInfo.getDataflowRunnerInfo().getName(),
      workflowJob.getEnvironment().getUserAgent().get("name"));
  assertEquals(
      DataflowRunnerInfo.getDataflowRunnerInfo().getVersion(),
      workflowJob.getEnvironment().getUserAgent().get("version"));
}