org.apache.beam.sdk.PipelineResult.State Java Examples

The following examples show how to use org.apache.beam.sdk.PipelineResult.State. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DirectRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWaitUntilFinishTimeout() throws Exception {
  DirectOptions options = PipelineOptionsFactory.as(DirectOptions.class);
  options.setBlockOnRun(false);
  options.setRunner(DirectRunner.class);
  Pipeline p = Pipeline.create(options);
  p.apply(Create.of(1L))
      .apply(
          ParDo.of(
              new DoFn<Long, Long>() {
                @ProcessElement
                public void hang(ProcessContext context) throws InterruptedException {
                  // Hangs "forever"
                  Thread.sleep(Long.MAX_VALUE);
                }
              }));
  PipelineResult result = p.run();
  // The pipeline should never complete;
  assertThat(result.getState(), is(State.RUNNING));
  // Must time out, otherwise this test will never complete
  result.waitUntilFinish(Duration.millis(1L));
  assertEquals(null, result.getState());
}
 
Example #2
Source File: DataflowJobManager.java    From feast with Apache License 2.0 6 votes vote down vote up
private String waitForJobToRun(DataflowPipelineJob pipelineResult)
    throws RuntimeException, InterruptedException {
  // TODO: add timeout
  while (true) {
    State state = pipelineResult.getState();
    if (state.isTerminal()) {
      String dataflowDashboardUrl =
          String.format(
              "https://console.cloud.google.com/dataflow/jobsDetail/locations/%s/jobs/%s",
              location, pipelineResult.getJobId());
      throw new RuntimeException(
          String.format(
              "Failed to submit dataflow job, job state is %s. Refer to the dataflow dashboard for more information: %s",
              state.toString(), dataflowDashboardUrl));
    } else if (state.equals(State.RUNNING)) {
      return pipelineResult.getJobId();
    }
    Thread.sleep(2000);
  }
}
 
Example #3
Source File: BeamEnumerableConverter.java    From beam with Apache License 2.0 6 votes vote down vote up
private static void runCollector(PipelineOptions options, BeamRelNode node) {
  Pipeline pipeline = Pipeline.create(options);
  PCollection<Row> resultCollection = BeamSqlRelUtils.toPCollection(pipeline, node);
  resultCollection.apply(ParDo.of(new Collector()));
  PipelineResult result = pipeline.run();
  if (PipelineResult.State.FAILED.equals(result.waitUntilFinish())) {
    throw new RuntimeException("Pipeline failed for unknown reason");
  }
}
 
Example #4
Source File: TestDataflowRunner.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public Void call() throws Exception {
  while (true) {
    State jobState = job.getState();

    // If we see an error, cancel and note failure
    if (messageHandler.hasSeenError() && !job.getState().isTerminal()) {
      job.cancel();
      LOG.info("Cancelling Dataflow job {}", job.getJobId());
      return null;
    }

    if (jobState.isTerminal()) {
      return null;
    }

    Thread.sleep(3000L);
  }
}
 
Example #5
Source File: DataflowMetricsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testEmptyMetricUpdates() throws IOException {
  Job modelJob = new Job();
  modelJob.setCurrentState(State.RUNNING.toString());

  DataflowPipelineJob job = mock(DataflowPipelineJob.class);
  DataflowPipelineOptions options = mock(DataflowPipelineOptions.class);
  when(options.isStreaming()).thenReturn(false);
  when(job.getDataflowOptions()).thenReturn(options);
  when(job.getState()).thenReturn(State.RUNNING);
  job.jobId = JOB_ID;

  JobMetrics jobMetrics = new JobMetrics();
  jobMetrics.setMetrics(null /* this is how the APIs represent empty metrics */);
  DataflowClient dataflowClient = mock(DataflowClient.class);
  when(dataflowClient.getJobMetrics(JOB_ID)).thenReturn(jobMetrics);

  DataflowMetrics dataflowMetrics = new DataflowMetrics(job, dataflowClient);
  MetricQueryResults result = dataflowMetrics.allMetrics();
  assertThat(ImmutableList.copyOf(result.getCounters()), is(empty()));
  assertThat(ImmutableList.copyOf(result.getDistributions()), is(empty()));
}
 
Example #6
Source File: DataflowMetricsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testCachingMetricUpdates() throws IOException {
  Job modelJob = new Job();
  modelJob.setCurrentState(State.RUNNING.toString());

  DataflowPipelineJob job = mock(DataflowPipelineJob.class);
  DataflowPipelineOptions options = mock(DataflowPipelineOptions.class);
  when(options.isStreaming()).thenReturn(false);
  when(job.getDataflowOptions()).thenReturn(options);
  when(job.getState()).thenReturn(State.DONE);
  job.jobId = JOB_ID;

  JobMetrics jobMetrics = new JobMetrics();
  jobMetrics.setMetrics(ImmutableList.of());
  DataflowClient dataflowClient = mock(DataflowClient.class);
  when(dataflowClient.getJobMetrics(JOB_ID)).thenReturn(jobMetrics);

  DataflowMetrics dataflowMetrics = new DataflowMetrics(job, dataflowClient);
  verify(dataflowClient, times(0)).getJobMetrics(JOB_ID);
  dataflowMetrics.allMetrics();
  verify(dataflowClient, times(1)).getJobMetrics(JOB_ID);
  dataflowMetrics.allMetrics();
  verify(dataflowClient, times(1)).getJobMetrics(JOB_ID);
}
 
Example #7
Source File: TestDataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testRunBatchJobThatSucceeds() throws Exception {
  Pipeline p = Pipeline.create(options);
  PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
  PAssert.that(pc).containsInAnyOrder(1, 2, 3);

  DataflowPipelineJob mockJob = Mockito.mock(DataflowPipelineJob.class);
  when(mockJob.getState()).thenReturn(State.DONE);
  when(mockJob.getProjectId()).thenReturn("test-project");
  when(mockJob.getJobId()).thenReturn("test-job");

  DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
  when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);

  TestDataflowRunner runner = TestDataflowRunner.fromOptionsAndClient(options, mockClient);
  when(mockClient.getJobMetrics(anyString()))
      .thenReturn(generateMockMetricResponse(true /* success */, true /* tentative */));
  assertEquals(mockJob, runner.run(p, mockRunner));
}
 
Example #8
Source File: TestDataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that when a batch job terminates in a failure state even if all assertions passed, it
 * throws an error to that effect.
 */
@Test
public void testRunBatchJobThatFails() throws Exception {
  Pipeline p = TestPipeline.create(options);
  PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
  PAssert.that(pc).containsInAnyOrder(1, 2, 3);

  DataflowPipelineJob mockJob = Mockito.mock(DataflowPipelineJob.class);
  when(mockJob.getState()).thenReturn(State.FAILED);
  when(mockJob.getProjectId()).thenReturn("test-project");
  when(mockJob.getJobId()).thenReturn("test-job");

  DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
  when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);

  TestDataflowRunner runner = TestDataflowRunner.fromOptionsAndClient(options, mockClient);
  when(mockClient.getJobMetrics(anyString()))
      .thenReturn(generateMockMetricResponse(true /* success */, false /* tentative */));
  expectedException.expect(RuntimeException.class);
  runner.run(p, mockRunner);
  // Note that fail throws an AssertionError which is why it is placed out here
  // instead of inside the try-catch block.
  fail("AssertionError expected");
}
 
Example #9
Source File: TestDataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/** A streaming job that terminates with no error messages is a success. */
@Test
public void testRunStreamingJobUsingPAssertThatSucceeds() throws Exception {
  options.setStreaming(true);
  Pipeline p = TestPipeline.create(options);
  PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
  PAssert.that(pc).containsInAnyOrder(1, 2, 3);

  DataflowPipelineJob mockJob = Mockito.mock(DataflowPipelineJob.class);
  when(mockJob.getState()).thenReturn(State.DONE);
  when(mockJob.waitUntilFinish(any(Duration.class), any(JobMessagesHandler.class)))
      .thenReturn(State.DONE);
  when(mockJob.getProjectId()).thenReturn("test-project");
  when(mockJob.getJobId()).thenReturn("test-job");

  DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
  when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);

  when(mockClient.getJobMetrics(anyString()))
      .thenReturn(generateMockMetricResponse(true /* success */, true /* tentative */));
  TestDataflowRunner runner = TestDataflowRunner.fromOptionsAndClient(options, mockClient);
  runner.run(p, mockRunner);
}
 
Example #10
Source File: TestDataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testRunStreamingJobNotUsingPAssertThatSucceeds() throws Exception {
  options.setStreaming(true);
  Pipeline p = TestPipeline.create(options);
  p.apply(Create.of(1, 2, 3));

  DataflowPipelineJob mockJob = Mockito.mock(DataflowPipelineJob.class);
  when(mockJob.getState()).thenReturn(State.DONE);
  when(mockJob.waitUntilFinish(any(Duration.class), any(JobMessagesHandler.class)))
      .thenReturn(State.DONE);
  when(mockJob.getProjectId()).thenReturn("test-project");
  when(mockJob.getJobId()).thenReturn("test-job");

  DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
  when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);

  when(mockClient.getJobMetrics(anyString()))
      .thenReturn(generateMockStreamingMetricResponse(ImmutableMap.of()));
  TestDataflowRunner runner = TestDataflowRunner.fromOptionsAndClient(options, mockClient);
  runner.run(p, mockRunner);
}
 
Example #11
Source File: TestDataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that when we just see a tentative failure for a {@link PAssert} it is considered a
 * conclusive failure.
 */
@Test
public void testCheckingForSuccessWhenPAssertFails() throws Exception {
  DataflowPipelineJob job = spy(new DataflowPipelineJob(mockClient, "test-job", options, null));
  Pipeline p = TestPipeline.create(options);
  PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
  PAssert.that(pc).containsInAnyOrder(1, 2, 3);

  when(mockClient.getJobMetrics(anyString()))
      .thenReturn(
          buildJobMetrics(generateMockMetrics(false /* success */, true /* tentative */)));

  TestDataflowRunner runner = TestDataflowRunner.fromOptionsAndClient(options, mockClient);
  doReturn(State.DONE).when(job).getState();
  assertThat(runner.checkForPAssertSuccess(job), equalTo(Optional.of(false)));
}
 
Example #12
Source File: TestDataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testBatchOnCreateMatcher() throws Exception {
  Pipeline p = TestPipeline.create(options);
  PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
  PAssert.that(pc).containsInAnyOrder(1, 2, 3);

  final DataflowPipelineJob mockJob = Mockito.mock(DataflowPipelineJob.class);
  when(mockJob.getState()).thenReturn(State.DONE);
  when(mockJob.getProjectId()).thenReturn("test-project");
  when(mockJob.getJobId()).thenReturn("test-job");

  DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
  when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);

  TestDataflowRunner runner = TestDataflowRunner.fromOptionsAndClient(options, mockClient);
  options.as(TestPipelineOptions.class).setOnCreateMatcher(new TestSuccessMatcher(mockJob, 0));

  when(mockClient.getJobMetrics(anyString()))
      .thenReturn(generateMockMetricResponse(true /* success */, true /* tentative */));
  runner.run(p, mockRunner);
}
 
Example #13
Source File: TestDataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testStreamingOnCreateMatcher() throws Exception {
  options.setStreaming(true);
  Pipeline p = TestPipeline.create(options);
  PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
  PAssert.that(pc).containsInAnyOrder(1, 2, 3);

  final DataflowPipelineJob mockJob = Mockito.mock(DataflowPipelineJob.class);
  when(mockJob.getState()).thenReturn(State.DONE);
  when(mockJob.getProjectId()).thenReturn("test-project");
  when(mockJob.getJobId()).thenReturn("test-job");

  DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
  when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);

  TestDataflowRunner runner = TestDataflowRunner.fromOptionsAndClient(options, mockClient);
  options.as(TestPipelineOptions.class).setOnCreateMatcher(new TestSuccessMatcher(mockJob, 0));

  when(mockJob.waitUntilFinish(any(Duration.class), any(JobMessagesHandler.class)))
      .thenReturn(State.DONE);

  when(mockClient.getJobMetrics(anyString()))
      .thenReturn(generateMockMetricResponse(true /* success */, true /* tentative */));
  runner.run(p, mockRunner);
}
 
Example #14
Source File: TestDataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that when a streaming pipeline terminates and doesn't fail due to {@link PAssert} that
 * the {@link TestPipelineOptions#setOnSuccessMatcher(SerializableMatcher) on success matcher} is
 * invoked.
 */
@Test
public void testStreamingOnSuccessMatcherWhenPipelineSucceeds() throws Exception {
  options.setStreaming(true);
  Pipeline p = TestPipeline.create(options);
  PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
  PAssert.that(pc).containsInAnyOrder(1, 2, 3);

  final DataflowPipelineJob mockJob = Mockito.mock(DataflowPipelineJob.class);
  when(mockJob.getState()).thenReturn(State.DONE);
  when(mockJob.getProjectId()).thenReturn("test-project");
  when(mockJob.getJobId()).thenReturn("test-job");

  DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
  when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);

  TestDataflowRunner runner = TestDataflowRunner.fromOptionsAndClient(options, mockClient);
  options.as(TestPipelineOptions.class).setOnSuccessMatcher(new TestSuccessMatcher(mockJob, 1));

  when(mockJob.waitUntilFinish(any(Duration.class), any(JobMessagesHandler.class)))
      .thenReturn(State.DONE);

  when(mockClient.getJobMetrics(anyString()))
      .thenReturn(generateMockMetricResponse(true /* success */, true /* tentative */));
  runner.run(p, mockRunner);
}
 
Example #15
Source File: TestDataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testCheckingForSuccessSkipsNonTentativeMetrics() throws Exception {
  DataflowPipelineJob job = spy(new DataflowPipelineJob(mockClient, "test-job", options, null));
  Pipeline p = TestPipeline.create(options);
  PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
  PAssert.that(pc).containsInAnyOrder(1, 2, 3);

  when(mockClient.getJobMetrics(anyString()))
      .thenReturn(
          buildJobMetrics(generateMockMetrics(true /* success */, false /* tentative */)));

  TestDataflowRunner runner = TestDataflowRunner.fromOptionsAndClient(options, mockClient);
  runner.updatePAssertCount(p);
  doReturn(State.RUNNING).when(job).getState();
  assertThat(runner.checkForPAssertSuccess(job), equalTo(Optional.<Boolean>absent()));
}
 
Example #16
Source File: DataflowPipelineJobTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Test that {@link DataflowPipelineJob#cancel} doesn't throw if the Dataflow service returns
 * non-terminal state even though the cancel API call failed, which can happen in practice.
 *
 * <p>TODO: delete this code if the API calls become consistent.
 */
@Test
public void testCancelTerminatedJobWithStaleState() throws IOException {
  Dataflow.Projects.Locations.Jobs.Get statusRequest =
      mock(Dataflow.Projects.Locations.Jobs.Get.class);

  Job statusResponse = new Job();
  statusResponse.setCurrentState("JOB_STATE_RUNNING");
  when(mockJobs.get(PROJECT_ID, REGION_ID, JOB_ID)).thenReturn(statusRequest);
  when(statusRequest.execute()).thenReturn(statusResponse);

  Dataflow.Projects.Locations.Jobs.Update update =
      mock(Dataflow.Projects.Locations.Jobs.Update.class);
  when(mockJobs.update(eq(PROJECT_ID), eq(REGION_ID), eq(JOB_ID), any(Job.class)))
      .thenReturn(update);
  when(update.execute()).thenThrow(new IOException("Job has terminated in state SUCCESS"));

  DataflowPipelineJob job =
      new DataflowPipelineJob(DataflowClient.create(options), JOB_ID, options, null);
  State returned = job.cancel();
  assertThat(returned, equalTo(State.RUNNING));
  expectedLogs.verifyWarn("Cancel failed because job is already terminated.");
}
 
Example #17
Source File: DataflowPipelineJobTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testCancelUnterminatedJobThatSucceeds() throws IOException {
  Dataflow.Projects.Locations.Jobs.Update update =
      mock(Dataflow.Projects.Locations.Jobs.Update.class);
  when(mockJobs.update(eq(PROJECT_ID), eq(REGION_ID), eq(JOB_ID), any(Job.class)))
      .thenReturn(update);
  when(update.execute()).thenReturn(new Job().setCurrentState("JOB_STATE_CANCELLED"));

  DataflowPipelineJob job =
      new DataflowPipelineJob(DataflowClient.create(options), JOB_ID, options, null);

  assertEquals(State.CANCELLED, job.cancel());
  Job content = new Job();
  content.setProjectId(PROJECT_ID);
  content.setId(JOB_ID);
  content.setRequestedState("JOB_STATE_CANCELLED");
  verify(mockJobs).update(eq(PROJECT_ID), eq(REGION_ID), eq(JOB_ID), eq(content));
  verifyNoMoreInteractions(mockJobs);
}
 
Example #18
Source File: DataflowPipelineJobTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testGetStateNoThrowWithExceptionReturnsUnknown() throws Exception {
  Dataflow.Projects.Locations.Jobs.Get statusRequest =
      mock(Dataflow.Projects.Locations.Jobs.Get.class);

  when(mockJobs.get(eq(PROJECT_ID), eq(REGION_ID), eq(JOB_ID))).thenReturn(statusRequest);
  when(statusRequest.execute()).thenThrow(IOException.class);

  DataflowPipelineJob job =
      new DataflowPipelineJob(DataflowClient.create(options), JOB_ID, options, ImmutableMap.of());

  long startTime = fastClock.nanoTime();
  assertEquals(
      State.UNKNOWN,
      job.getStateWithRetriesOrUnknownOnException(
          BackOffAdapter.toGcpBackOff(DataflowPipelineJob.STATUS_BACKOFF_FACTORY.backoff()),
          fastClock));
  long timeDiff = TimeUnit.NANOSECONDS.toMillis(fastClock.nanoTime() - startTime);
  checkValidInterval(
      DataflowPipelineJob.STATUS_POLLING_INTERVAL,
      DataflowPipelineJob.STATUS_POLLING_RETRIES,
      timeDiff);
}
 
Example #19
Source File: DataflowPipelineJobTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testGetStateReturnsServiceState() throws Exception {
  Dataflow.Projects.Locations.Jobs.Get statusRequest =
      mock(Dataflow.Projects.Locations.Jobs.Get.class);

  Job statusResponse = new Job();
  statusResponse.setCurrentState("JOB_STATE_" + State.RUNNING.name());

  when(mockJobs.get(eq(PROJECT_ID), eq(REGION_ID), eq(JOB_ID))).thenReturn(statusRequest);
  when(statusRequest.execute()).thenReturn(statusResponse);

  DataflowPipelineJob job =
      new DataflowPipelineJob(DataflowClient.create(options), JOB_ID, options, ImmutableMap.of());

  assertEquals(
      State.RUNNING,
      job.getStateWithRetriesOrUnknownOnException(
          BackOffAdapter.toGcpBackOff(DataflowPipelineJob.STATUS_BACKOFF_FACTORY.backoff()),
          fastClock));
}
 
Example #20
Source File: DataflowPipelineJobTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testCumulativeTimeOverflow() throws Exception {
  Dataflow.Projects.Locations.Jobs.Get statusRequest =
      mock(Dataflow.Projects.Locations.Jobs.Get.class);

  Job statusResponse = new Job();
  statusResponse.setCurrentState("JOB_STATE_RUNNING");
  when(mockJobs.get(eq(PROJECT_ID), eq(REGION_ID), eq(JOB_ID))).thenReturn(statusRequest);
  when(statusRequest.execute()).thenReturn(statusResponse);

  FastNanoClockAndFuzzySleeper clock = new FastNanoClockAndFuzzySleeper();

  DataflowPipelineJob job =
      new DataflowPipelineJob(DataflowClient.create(options), JOB_ID, options, ImmutableMap.of());
  long startTime = clock.nanoTime();
  State state = job.waitUntilFinish(Duration.millis(4), null, clock, clock);
  assertEquals(null, state);
  long timeDiff = TimeUnit.NANOSECONDS.toMillis(clock.nanoTime() - startTime);
  // Should only have slept for the 4 ms allowed.
  assertThat(timeDiff, lessThanOrEqualTo(4L));
}
 
Example #21
Source File: DataflowPipelineJobTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWaitToFinishTimeFail() throws Exception {
  Dataflow.Projects.Locations.Jobs.Get statusRequest =
      mock(Dataflow.Projects.Locations.Jobs.Get.class);

  when(mockJobs.get(eq(PROJECT_ID), eq(REGION_ID), eq(JOB_ID))).thenReturn(statusRequest);
  when(statusRequest.execute()).thenThrow(IOException.class);

  DataflowPipelineJob job =
      new DataflowPipelineJob(DataflowClient.create(options), JOB_ID, options, ImmutableMap.of());
  long startTime = fastClock.nanoTime();
  State state = job.waitUntilFinish(Duration.millis(4), null, fastClock, fastClock);
  assertEquals(null, state);
  long timeDiff = TimeUnit.NANOSECONDS.toMillis(fastClock.nanoTime() - startTime);
  // Should only have slept for the 4 ms allowed.
  assertEquals(4L, timeDiff);
}
 
Example #22
Source File: DataflowPipelineJobTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWaitToFinishFail() throws Exception {
  Dataflow.Projects.Locations.Jobs.Get statusRequest =
      mock(Dataflow.Projects.Locations.Jobs.Get.class);

  when(mockJobs.get(eq(PROJECT_ID), eq(REGION_ID), eq(JOB_ID))).thenReturn(statusRequest);
  when(statusRequest.execute()).thenThrow(IOException.class);

  DataflowPipelineJob job =
      new DataflowPipelineJob(DataflowClient.create(options), JOB_ID, options, ImmutableMap.of());

  long startTime = fastClock.nanoTime();
  State state = job.waitUntilFinish(Duration.standardMinutes(5), null, fastClock, fastClock);
  assertEquals(null, state);
  long timeDiff = TimeUnit.NANOSECONDS.toMillis(fastClock.nanoTime() - startTime);
  checkValidInterval(
      DataflowPipelineJob.MESSAGES_POLLING_INTERVAL,
      DataflowPipelineJob.MESSAGES_POLLING_RETRIES,
      timeDiff);
}
 
Example #23
Source File: DataflowPipelineJobTest.java    From beam with Apache License 2.0 6 votes vote down vote up
public State mockWaitToFinishInState(State state) throws Exception {
  Dataflow.Projects.Locations.Jobs.Get statusRequest =
      mock(Dataflow.Projects.Locations.Jobs.Get.class);

  Job statusResponse = new Job();
  statusResponse.setCurrentState("JOB_STATE_" + state.name());
  if (state == State.UPDATED) {
    statusResponse.setReplacedByJobId(REPLACEMENT_JOB_ID);
  }

  when(mockJobs.get(eq(PROJECT_ID), eq(REGION_ID), eq(JOB_ID))).thenReturn(statusRequest);
  when(statusRequest.execute()).thenReturn(statusResponse);

  DataflowPipelineJob job =
      new DataflowPipelineJob(DataflowClient.create(options), JOB_ID, options, ImmutableMap.of());

  return job.waitUntilFinish(Duration.standardMinutes(1), null, fastClock, fastClock);
}
 
Example #24
Source File: DataflowPipelineJobTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testWaitToFinishMessagesFail() throws Exception {
  Dataflow.Projects.Locations.Jobs.Get statusRequest =
      mock(Dataflow.Projects.Locations.Jobs.Get.class);

  Job statusResponse = new Job();
  statusResponse.setCurrentState("JOB_STATE_" + State.DONE.name());
  when(mockJobs.get(eq(PROJECT_ID), eq(REGION_ID), eq(JOB_ID))).thenReturn(statusRequest);
  when(statusRequest.execute()).thenReturn(statusResponse);

  MonitoringUtil.JobMessagesHandler jobHandler = mock(MonitoringUtil.JobMessagesHandler.class);
  Dataflow.Projects.Locations.Jobs.Messages mockMessages =
      mock(Dataflow.Projects.Locations.Jobs.Messages.class);
  Messages.List listRequest = mock(Dataflow.Projects.Locations.Jobs.Messages.List.class);
  when(mockJobs.messages()).thenReturn(mockMessages);
  when(mockMessages.list(eq(PROJECT_ID), eq(REGION_ID), eq(JOB_ID))).thenReturn(listRequest);
  when(listRequest.setPageToken(eq((String) null))).thenReturn(listRequest);
  when(listRequest.execute()).thenThrow(SocketTimeoutException.class);

  DataflowPipelineJob job =
      new DataflowPipelineJob(DataflowClient.create(options), JOB_ID, options, ImmutableMap.of());

  State state =
      job.waitUntilFinish(Duration.standardMinutes(5), jobHandler, fastClock, fastClock);
  assertEquals(null, state);
}
 
Example #25
Source File: TestDataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that when a streaming pipeline terminates in FAIL that the {@link
 * TestPipelineOptions#setOnSuccessMatcher(SerializableMatcher) on success matcher} is not
 * invoked.
 */
@Test
public void testStreamingOnSuccessMatcherWhenPipelineFails() throws Exception {
  options.setStreaming(true);
  Pipeline p = TestPipeline.create(options);
  PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
  PAssert.that(pc).containsInAnyOrder(1, 2, 3);

  final DataflowPipelineJob mockJob = Mockito.mock(DataflowPipelineJob.class);
  when(mockJob.getState()).thenReturn(State.FAILED);
  when(mockJob.getProjectId()).thenReturn("test-project");
  when(mockJob.getJobId()).thenReturn("test-job");

  DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
  when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);

  TestDataflowRunner runner = TestDataflowRunner.fromOptionsAndClient(options, mockClient);
  options.as(TestPipelineOptions.class).setOnSuccessMatcher(new TestFailureMatcher());

  when(mockJob.waitUntilFinish(any(Duration.class), any(JobMessagesHandler.class)))
      .thenReturn(State.FAILED);

  expectedException.expect(RuntimeException.class);
  runner.run(p, mockRunner);
  // If the onSuccessMatcher were invoked, it would have crashed here with AssertionError
}
 
Example #26
Source File: TestDataflowRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testBatchOnSuccessMatcherWhenPipelineSucceeds() throws Exception {
  Pipeline p = TestPipeline.create(options);
  PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
  PAssert.that(pc).containsInAnyOrder(1, 2, 3);

  final DataflowPipelineJob mockJob = Mockito.mock(DataflowPipelineJob.class);
  when(mockJob.getState()).thenReturn(State.DONE);
  when(mockJob.getProjectId()).thenReturn("test-project");
  when(mockJob.getJobId()).thenReturn("test-job");

  DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
  when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);

  TestDataflowRunner runner = TestDataflowRunner.fromOptionsAndClient(options, mockClient);
  options.as(TestPipelineOptions.class).setOnSuccessMatcher(new TestSuccessMatcher(mockJob, 1));

  when(mockClient.getJobMetrics(anyString()))
      .thenReturn(generateMockMetricResponse(true /* success */, true /* tentative */));
  runner.run(p, mockRunner);
}
 
Example #27
Source File: GcsKmsKeyIT.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Tests writing to tempLocation with --dataflowKmsKey set on the command line. Verifies that
 * resulting output uses specified key and is readable. Does not verify any temporary files.
 *
 * <p>This test verifies that GCS file copies work with CMEK-enabled files.
 */
@Test
public void testGcsWriteWithKmsKey() {
  TestPipelineOptions options =
      TestPipeline.testingPipelineOptions().as(TestPipelineOptions.class);
  assertNotNull(options.getTempRoot());
  options.setTempLocation(options.getTempRoot() + "/testGcsWriteWithKmsKey");
  GcsOptions gcsOptions = options.as(GcsOptions.class);

  ResourceId filenamePrefix =
      FileSystems.matchNewResource(gcsOptions.getGcpTempLocation(), true)
          .resolve(
              String.format("GcsKmsKeyIT-%tF-%<tH-%<tM-%<tS-%<tL.output", new Date()),
              StandardResolveOptions.RESOLVE_FILE);

  Pipeline p = Pipeline.create(options);
  p.apply("ReadLines", TextIO.read().from(INPUT_FILE))
      .apply("WriteLines", TextIO.write().to(filenamePrefix));

  PipelineResult result = p.run();
  State state = result.waitUntilFinish();
  assertThat(state, equalTo(State.DONE));

  String filePattern = filenamePrefix + "*-of-*";
  assertThat(new NumberedShardedFile(filePattern), fileContentsHaveChecksum(EXPECTED_CHECKSUM));

  // Verify objects have KMS key set.
  try {
    MatchResult matchResult =
        Iterables.getOnlyElement(FileSystems.match(Collections.singletonList(filePattern)));
    GcsUtil gcsUtil = gcsOptions.getGcsUtil();
    for (Metadata metadata : matchResult.metadata()) {
      String kmsKey =
          gcsUtil.getObject(GcsPath.fromUri(metadata.resourceId().toString())).getKmsKeyName();
      assertNotNull(kmsKey);
    }
  } catch (IOException e) {
    throw new AssertionError(e);
  }
}
 
Example #28
Source File: DataflowMetricsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testSingleCounterUpdates() throws IOException {
  JobMetrics jobMetrics = new JobMetrics();
  DataflowPipelineJob job = mock(DataflowPipelineJob.class);
  DataflowPipelineOptions options = mock(DataflowPipelineOptions.class);
  when(options.isStreaming()).thenReturn(false);
  when(job.getDataflowOptions()).thenReturn(options);
  when(job.getState()).thenReturn(State.RUNNING);
  job.jobId = JOB_ID;

  AppliedPTransform<?, ?, ?> myStep = mock(AppliedPTransform.class);
  when(myStep.getFullName()).thenReturn("myStepName");
  job.transformStepNames = HashBiMap.create();
  job.transformStepNames.put(myStep, "s2");

  MetricUpdate update = new MetricUpdate();
  long stepValue = 1234L;
  update.setScalar(new BigDecimal(stepValue));

  // The parser relies on the fact that one tentative and one committed metric update exist in
  // the job metrics results.
  MetricUpdate mu1 =
      makeCounterMetricUpdate("counterName", "counterNamespace", "s2", 1234L, false);
  MetricUpdate mu1Tentative =
      makeCounterMetricUpdate("counterName", "counterNamespace", "s2", 1233L, true);
  jobMetrics.setMetrics(ImmutableList.of(mu1, mu1Tentative));
  DataflowClient dataflowClient = mock(DataflowClient.class);
  when(dataflowClient.getJobMetrics(JOB_ID)).thenReturn(jobMetrics);

  DataflowMetrics dataflowMetrics = new DataflowMetrics(job, dataflowClient);
  MetricQueryResults result = dataflowMetrics.allMetrics();
  assertThat(
      result.getCounters(),
      containsInAnyOrder(
          attemptedMetricsResult("counterNamespace", "counterName", "myStepName", 1234L)));
  assertThat(
      result.getCounters(),
      containsInAnyOrder(
          committedMetricsResult("counterNamespace", "counterName", "myStepName", 1234L)));
}
 
Example #29
Source File: AvroTableProviderTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testReadAndWriteAvroTable() {
  File destinationFile = new File(tempFolder.getRoot(), "person-info.avro");

  BeamSqlEnv env = BeamSqlEnv.inMemory(new AvroTableProvider());
  env.executeDdl(
      String.format(
          "CREATE EXTERNAL TABLE PersonInfo %s TYPE avro LOCATION '%s'",
          AVRO_FIELD_NAMES, destinationFile.getAbsolutePath()));

  BeamSqlRelUtils.toPCollection(
      writePipeline,
      env.parseQuery(
          "INSERT INTO PersonInfo VALUES ('Alan', 22, 'England'), ('John', 42, 'USA')"));

  writePipeline.run().waitUntilFinish();

  PCollection<Row> rows =
      BeamSqlRelUtils.toPCollection(
          readPipeline, env.parseQuery("SELECT age, country FROM PersonInfo where age > 25"));

  PAssert.that(rows)
      .containsInAnyOrder(Row.withSchema(OUTPUT_ROW_SCHEMA).addValues(42L, "USA").build());

  PipelineResult.State state = readPipeline.run().waitUntilFinish();
  assertEquals(state, State.DONE);
}
 
Example #30
Source File: TestDataflowRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testBatchOnSuccessMatcherWhenPipelineFails() throws Exception {
  Pipeline p = TestPipeline.create(options);
  PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
  PAssert.that(pc).containsInAnyOrder(1, 2, 3);

  final DataflowPipelineJob mockJob = Mockito.mock(DataflowPipelineJob.class);
  when(mockJob.getState()).thenReturn(State.FAILED);
  when(mockJob.getProjectId()).thenReturn("test-project");
  when(mockJob.getJobId()).thenReturn("test-job");

  DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
  when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);

  TestDataflowRunner runner = TestDataflowRunner.fromOptionsAndClient(options, mockClient);
  options.as(TestPipelineOptions.class).setOnSuccessMatcher(new TestFailureMatcher());

  when(mockClient.getJobMetrics(anyString()))
      .thenReturn(generateMockMetricResponse(false /* success */, true /* tentative */));
  try {
    runner.run(p, mockRunner);
  } catch (AssertionError expected) {
    verify(mockJob, Mockito.times(1))
        .waitUntilFinish(any(Duration.class), any(JobMessagesHandler.class));
    return;
  }
  fail("Expected an exception on pipeline failure.");
}