Java Code Examples for org.apache.beam.runners.dataflow.options.DataflowPipelineOptions#setExperiments()
The following examples show how to use
org.apache.beam.runners.dataflow.options.DataflowPipelineOptions#setExperiments() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DataflowRunnerTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testUploadGraph() throws IOException { DataflowPipelineOptions options = buildPipelineOptions(); options.setExperiments(Arrays.asList("upload_graph")); Pipeline p = buildDataflowPipeline(options); DataflowPipelineJob job = (DataflowPipelineJob) p.run(); ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class); Mockito.verify(mockJobs).create(eq(PROJECT_ID), eq(REGION_ID), jobCaptor.capture()); assertValidJob(jobCaptor.getValue()); assertTrue(jobCaptor.getValue().getSteps().isEmpty()); assertTrue( jobCaptor .getValue() .getStepsLocation() .startsWith("gs://valid-bucket/temp/staging/dataflow_graph")); }
Example 2
Source File: DataflowRunnerTest.java From beam with Apache License 2.0 | 6 votes |
/** * Tests that the {@link DataflowRunner} with {@code --templateLocation} returns normally when the * runner is successfully run with upload_graph experiment turned on. The result template should * not contain raw steps and stepsLocation file should be set. */ @Test public void testTemplateRunnerWithUploadGraph() throws Exception { File existingFile = tmpFolder.newFile(); DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setExperiments(Arrays.asList("upload_graph")); options.setJobName("TestJobName"); options.setGcpCredential(new TestCredential()); options.setPathValidatorClass(NoopPathValidator.class); options.setProject("test-project"); options.setRegion(REGION_ID); options.setRunner(DataflowRunner.class); options.setTemplateLocation(existingFile.getPath()); options.setTempLocation(tmpFolder.getRoot().getPath()); Pipeline p = Pipeline.create(options); p.apply(Create.of(ImmutableList.of(1))); p.run(); expectedLogs.verifyInfo("Template successfully created"); ObjectMapper objectMapper = new ObjectMapper(); JsonNode node = objectMapper.readTree(existingFile); assertEquals(0, node.get("steps").size()); assertNotNull(node.get("stepsLocation")); }
Example 3
Source File: DataflowPipelineTranslatorTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testToSingletonTranslationWithFnApiSideInput() throws Exception { // A "change detector" test that makes sure the translation // of getting a PCollectionView<T> does not change // in bad ways during refactor DataflowPipelineOptions options = buildPipelineOptions(); options.setExperiments(Arrays.asList("beam_fn_api")); DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options); Pipeline pipeline = Pipeline.create(options); pipeline.apply(Create.of(1)).apply(View.asSingleton()); DataflowRunner runner = DataflowRunner.fromOptions(options); runner.replaceTransforms(pipeline); SdkComponents sdkComponents = createSdkComponents(options); RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true); Job job = translator .translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList()) .getJob(); assertAllStepOutputsHaveUniqueIds(job); List<Step> steps = job.getSteps(); assertEquals(9, steps.size()); Step collectionToSingletonStep = steps.get(steps.size() - 1); assertEquals("CollectionToSingleton", collectionToSingletonStep.getKind()); @SuppressWarnings("unchecked") List<Map<String, Object>> ctsOutputs = (List<Map<String, Object>>) steps.get(steps.size() - 1).getProperties().get(PropertyNames.OUTPUT_INFO); assertTrue(Structs.getBoolean(Iterables.getOnlyElement(ctsOutputs), "use_indexed_format")); }
Example 4
Source File: DataflowPipelineTranslatorTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testToIterableTranslationWithFnApiSideInput() throws Exception { // A "change detector" test that makes sure the translation // of getting a PCollectionView<Iterable<T>> does not change // in bad ways during refactor DataflowPipelineOptions options = buildPipelineOptions(); options.setExperiments(Arrays.asList("beam_fn_api")); DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options); Pipeline pipeline = Pipeline.create(options); pipeline.apply(Create.of(1, 2, 3)).apply(View.asIterable()); DataflowRunner runner = DataflowRunner.fromOptions(options); runner.replaceTransforms(pipeline); SdkComponents sdkComponents = createSdkComponents(options); RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true); Job job = translator .translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList()) .getJob(); assertAllStepOutputsHaveUniqueIds(job); List<Step> steps = job.getSteps(); assertEquals(5, steps.size()); @SuppressWarnings("unchecked") List<Map<String, Object>> ctsOutputs = (List<Map<String, Object>>) steps.get(steps.size() - 1).getProperties().get(PropertyNames.OUTPUT_INFO); assertTrue(Structs.getBoolean(Iterables.getOnlyElement(ctsOutputs), "use_indexed_format")); Step collectionToSingletonStep = steps.get(steps.size() - 1); assertEquals("CollectionToSingleton", collectionToSingletonStep.getKind()); }
Example 5
Source File: DataflowRunnerTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testWorkerHarnessContainerImage() { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); // default image set options.setWorkerHarnessContainerImage("some-container"); assertThat(getContainerImageForJob(options), equalTo("some-container")); // batch, legacy options.setWorkerHarnessContainerImage("gcr.io/IMAGE/foo"); options.setExperiments(null); options.setStreaming(false); System.setProperty("java.specification.version", "1.8"); assertThat(getContainerImageForJob(options), equalTo("gcr.io/beam-java-batch/foo")); // batch, legacy, jdk11 options.setStreaming(false); System.setProperty("java.specification.version", "11"); assertThat(getContainerImageForJob(options), equalTo("gcr.io/beam-java11-batch/foo")); // streaming, legacy System.setProperty("java.specification.version", "1.8"); options.setStreaming(true); assertThat(getContainerImageForJob(options), equalTo("gcr.io/beam-java-streaming/foo")); // streaming, legacy, jdk11 System.setProperty("java.specification.version", "11"); assertThat(getContainerImageForJob(options), equalTo("gcr.io/beam-java11-streaming/foo")); // streaming, fnapi options.setExperiments(ImmutableList.of("experiment1", "beam_fn_api")); assertThat(getContainerImageForJob(options), equalTo("gcr.io/java/foo")); }
Example 6
Source File: DataflowPipelineTranslatorTest.java From beam with Apache License 2.0 | 4 votes |
/** Smoke test to fail fast if translation of a splittable ParDo in FnAPI. */ @Test public void testSplittableParDoTranslationFnApi() throws Exception { DataflowPipelineOptions options = buildPipelineOptions(); options.setExperiments(Arrays.asList("beam_fn_api")); DataflowRunner runner = DataflowRunner.fromOptions(options); DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options); Pipeline pipeline = Pipeline.create(options); PCollection<String> windowedInput = pipeline .apply(Impulse.create()) .apply( MapElements.via( new SimpleFunction<byte[], String>() { @Override public String apply(byte[] input) { return ""; } })) .apply(Window.into(FixedWindows.of(Duration.standardMinutes(1)))); windowedInput.apply(ParDo.of(new TestSplittableFn())); runner.replaceTransforms(pipeline); SdkComponents sdkComponents = createSdkComponents(options); RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true); JobSpecification result = translator.translate( pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList()); Job job = result.getJob(); // The job should contain a ParDo step, containing a "restriction_encoding". List<Step> steps = job.getSteps(); Step splittableParDo = null; for (Step step : steps) { if ("ParallelDo".equals(step.getKind()) && step.getProperties().containsKey(PropertyNames.RESTRICTION_ENCODING)) { assertNull(splittableParDo); splittableParDo = step; } } assertNotNull(splittableParDo); String fn = Structs.getString(splittableParDo.getProperties(), PropertyNames.SERIALIZED_FN); Components componentsProto = result.getPipelineProto().getComponents(); RehydratedComponents components = RehydratedComponents.forComponents(componentsProto); RunnerApi.PTransform splittableTransform = componentsProto.getTransformsOrThrow(fn); assertEquals( PTransformTranslation.PAR_DO_TRANSFORM_URN, splittableTransform.getSpec().getUrn()); ParDoPayload payload = ParDoPayload.parseFrom(splittableTransform.getSpec().getPayload()); assertThat( ParDoTranslation.doFnWithExecutionInformationFromProto(payload.getDoFn()).getDoFn(), instanceOf(TestSplittableFn.class)); Coder expectedRestrictionAndStateCoder = KvCoder.of(SerializableCoder.of(OffsetRange.class), VoidCoder.of()); assertEquals( expectedRestrictionAndStateCoder, components.getCoder(payload.getRestrictionCoderId())); // In the Fn API case, we still translate the restriction coder into the RESTRICTION_CODER // property as a CloudObject, and it gets passed through the Dataflow backend, but in the end // the Dataflow worker will end up fetching it from the SPK transform payload instead. Coder<?> restrictionCoder = CloudObjects.coderFromCloudObject( (CloudObject) Structs.getObject( splittableParDo.getProperties(), PropertyNames.RESTRICTION_ENCODING)); assertEquals(expectedRestrictionAndStateCoder, restrictionCoder); }
Example 7
Source File: DataflowPipelineTranslatorTest.java From beam with Apache License 2.0 | 4 votes |
@Test public void testPortablePipelineContainsExpectedDependenciesAndCapabilities() throws Exception { DataflowPipelineOptions options = buildPipelineOptions(); options.setExperiments(Arrays.asList("beam_fn_api")); DataflowRunner runner = DataflowRunner.fromOptions(options); DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options); Pipeline pipeline = Pipeline.create(options); PCollection<String> windowedInput = pipeline .apply(Impulse.create()) .apply( MapElements.via( new SimpleFunction<byte[], String>() { @Override public String apply(byte[] input) { return ""; } })) .apply(Window.into(FixedWindows.of(Duration.standardMinutes(1)))); runner.replaceTransforms(pipeline); File file1 = File.createTempFile("file1-", ".txt"); file1.deleteOnExit(); File file2 = File.createTempFile("file2-", ".txt"); file2.deleteOnExit(); SdkComponents sdkComponents = SdkComponents.create(); sdkComponents.registerEnvironment( Environments.createDockerEnvironment(DataflowRunner.getContainerImageForJob(options)) .toBuilder() .addAllDependencies( Environments.getArtifacts( ImmutableList.of("file1.txt=" + file1, "file2.txt=" + file2))) .addAllCapabilities(Environments.getJavaCapabilities()) .build()); RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true); JobSpecification result = translator.translate( pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList()); Components componentsProto = result.getPipelineProto().getComponents(); assertThat( Iterables.getOnlyElement(componentsProto.getEnvironmentsMap().values()) .getCapabilitiesList(), containsInAnyOrder(Environments.getJavaCapabilities().toArray(new String[0]))); assertThat( Iterables.getOnlyElement(componentsProto.getEnvironmentsMap().values()) .getDependenciesList(), containsInAnyOrder( Environments.getArtifacts(ImmutableList.of("file1.txt=" + file1, "file2.txt=" + file2)) .toArray(new ArtifactInformation[0]))); }