org.apache.beam.sdk.options.PipelineOptionsFactory Java Examples
The following examples show how to use
org.apache.beam.sdk.options.PipelineOptionsFactory.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: UserScore.java From beam with Apache License 2.0 | 6 votes |
/** Run a batch pipeline. */ // [START DocInclude_USMain] public static void main(String[] args) throws Exception { // Begin constructing a pipeline configured by commandline flags. Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline pipeline = Pipeline.create(options); // Read events from a text file and parse them. pipeline .apply(TextIO.read().from(options.getInput())) .apply("ParseGameEvent", ParDo.of(new ParseEventFn())) // Extract and sum username/score pairs from the event data. .apply("ExtractUserScore", new ExtractAndSumScore("user")) .apply( "WriteUserScoreSums", new WriteToText<>(options.getOutput(), configureOutput(), false)); // Run the batch pipeline. pipeline.run().waitUntilFinish(); }
Example #2
Source File: CompressedSourceTest.java From beam with Apache License 2.0 | 6 votes |
/** * Test a concatenation of gzip files is correctly decompressed. * * <p>A concatenation of gzip files as one file is a valid gzip file and should decompress to be * the concatenation of those individual files. */ @Test public void testReadConcatenatedGzip() throws IOException { byte[] header = "a,b,c\n".getBytes(StandardCharsets.UTF_8); byte[] body = "1,2,3\n4,5,6\n7,8,9\n".getBytes(StandardCharsets.UTF_8); byte[] expected = concat(header, body); byte[] totalGz = concat(compressGzip(header), compressGzip(body)); File tmpFile = tmpFolder.newFile(); try (FileOutputStream os = new FileOutputStream(tmpFile)) { os.write(totalGz); } CompressedSource<Byte> source = CompressedSource.from(new ByteSource(tmpFile.getAbsolutePath(), 1)) .withDecompression(CompressionMode.GZIP); List<Byte> actual = SourceTestUtils.readFromSource(source, PipelineOptionsFactory.create()); assertEquals(Bytes.asList(expected), actual); }
Example #3
Source File: BigtableIOTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testReadWithRuntimeParametersValidationDisabled() { ReadOptions options = PipelineOptionsFactory.fromArgs().withValidation().as(ReadOptions.class); BigtableIO.Read read = BigtableIO.read() .withoutValidation() .withProjectId(options.getBigtableProject()) .withInstanceId(options.getBigtableInstanceId()) .withTableId(options.getBigtableTableId()); // Not running a pipeline therefore this is expected. thrown.expect(PipelineRunMissingException.class); p.apply(read); }
Example #4
Source File: DatastoreToText.java From DataflowTemplates with Apache License 2.0 | 6 votes |
/** * Runs a pipeline which reads in Entities from Datastore, passes in the JSON encoded Entities * to a Javascript UDF, and writes the JSON to TextIO sink. * * @param args arguments to the pipeline */ public static void main(String[] args) { DatastoreToTextOptions options = PipelineOptionsFactory.fromArgs(args) .withValidation() .as(DatastoreToTextOptions.class); Pipeline pipeline = Pipeline.create(options); pipeline .apply(ReadJsonEntities.newBuilder() .setGqlQuery(options.getDatastoreReadGqlQuery()) .setProjectId(options.getDatastoreReadProjectId()) .setNamespace(options.getDatastoreReadNamespace()) .build()) .apply(TransformTextViaJavascript.newBuilder() .setFileSystemPath(options.getJavascriptTextTransformGcsPath()) .setFunctionName(options.getJavascriptTextTransformFunctionName()) .build()) .apply(TextIO.write() .to(options.getTextWritePrefix()) .withSuffix(".json")); pipeline.run(); }
Example #5
Source File: GrpcWindmillServer.java From beam with Apache License 2.0 | 6 votes |
private GrpcWindmillServer(String name, boolean enableStreamingEngine) { this.options = PipelineOptionsFactory.create().as(StreamingDataflowWorkerOptions.class); this.streamingRpcBatchLimit = Integer.MAX_VALUE; options.setProject("project"); options.setJobId("job"); options.setWorkerId("worker"); if (enableStreamingEngine) { List<String> experiments = this.options.getExperiments(); if (experiments == null) { experiments = new ArrayList<>(); } experiments.add(GcpOptions.STREAMING_ENGINE_EXPERIMENT); options.setExperiments(experiments); } this.stubList.add(CloudWindmillServiceV1Alpha1Grpc.newStub(inProcessChannel(name))); }
Example #6
Source File: Task.java From beam with Apache License 2.0 | 6 votes |
public static void main(String[] args) { String[] lines = { "apple orange grape banana apple banana", "banana orange banana papaya" }; PipelineOptions options = PipelineOptionsFactory.fromArgs(args).create(); Pipeline pipeline = Pipeline.create(options); PCollection<String> wordCounts = pipeline.apply(Create.of(Arrays.asList(lines))); PCollection<String> output = applyTransform(wordCounts); output.apply(Log.ofElements()); pipeline.run(); }
Example #7
Source File: FlinkExecutionEnvironmentsTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void shouldSupportIPv6Batch() { FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class); options.setRunner(FlinkRunner.class); options.setFlinkMaster("[FE80:CD00:0000:0CDE:1257:0000:211E:729C]:1234"); ExecutionEnvironment bev = FlinkExecutionEnvironments.createBatchExecutionEnvironment( options, Collections.emptyList()); checkHostAndPort(bev, "FE80:CD00:0000:0CDE:1257:0000:211E:729C", 1234); options.setFlinkMaster("FE80:CD00:0000:0CDE:1257:0000:211E:729C"); bev = FlinkExecutionEnvironments.createBatchExecutionEnvironment( options, Collections.emptyList()); checkHostAndPort( bev, "FE80:CD00:0000:0CDE:1257:0000:211E:729C", RestOptions.PORT.defaultValue()); }
Example #8
Source File: Task.java From beam with Apache License 2.0 | 6 votes |
public static void main(String[] args) { PipelineOptions options = PipelineOptionsFactory.fromArgs(args).create(); Pipeline pipeline = Pipeline.create(options); PCollection<Event> events = pipeline.apply( Create.of( new Event("1", "book-order", DateTime.parse("2019-06-01T00:00:00+00:00")), new Event("2", "pencil-order", DateTime.parse("2019-06-02T00:00:00+00:00")), new Event("3", "paper-order", DateTime.parse("2019-06-03T00:00:00+00:00")), new Event("4", "pencil-order", DateTime.parse("2019-06-04T00:00:00+00:00")), new Event("5", "book-order", DateTime.parse("2019-06-05T00:00:00+00:00")) ) ); PCollection<Event> output = applyTransform(events); output.apply(Log.ofElements()); pipeline.run(); }
Example #9
Source File: ElasticsearchIOIT.java From beam with Apache License 2.0 | 6 votes |
@BeforeClass public static void beforeClass() throws Exception { PipelineOptionsFactory.register(ElasticsearchPipelineOptions.class); options = TestPipeline.testingPipelineOptions().as(ElasticsearchPipelineOptions.class); readConnectionConfiguration = ElasticsearchIOITCommon.getConnectionConfiguration( options, ElasticsearchIOITCommon.IndexMode.READ); writeConnectionConfiguration = ElasticsearchIOITCommon.getConnectionConfiguration( options, ElasticsearchIOITCommon.IndexMode.WRITE); updateConnectionConfiguration = ElasticsearchIOITCommon.getConnectionConfiguration( options, ElasticsearchIOITCommon.IndexMode.WRITE_PARTIAL); restClient = readConnectionConfiguration.createClient(); elasticsearchIOTestCommon = new ElasticsearchIOTestCommon(readConnectionConfiguration, restClient, true); }
Example #10
Source File: FileBasedSourceTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testReadEverythingFromFileWithSplits() throws IOException { PipelineOptions options = PipelineOptionsFactory.create(); String header = "<h>"; List<String> data = new ArrayList<>(); for (int i = 0; i < 10; i++) { data.add(header); data.addAll(createStringDataset(3, 9)); } String fileName = "file"; File file = createFileWithData(fileName, data); TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 64, header); List<String> expectedResults = new ArrayList<>(); expectedResults.addAll(data); // Remove all occurrences of header from expected results. expectedResults.removeAll(Collections.singletonList(header)); assertEquals(expectedResults, readFromSource(source, options)); }
Example #11
Source File: FlinkPipelineExecutionEnvironmentTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void shouldUseTransformOverrides() { boolean[] testParameters = {true, false}; for (boolean streaming : testParameters) { FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class); options.setStreaming(streaming); options.setRunner(FlinkRunner.class); FlinkPipelineExecutionEnvironment flinkEnv = new FlinkPipelineExecutionEnvironment(options); Pipeline p = Mockito.spy(Pipeline.create(options)); flinkEnv.translate(p); ArgumentCaptor<ImmutableList> captor = ArgumentCaptor.forClass(ImmutableList.class); Mockito.verify(p).replaceAll(captor.capture()); ImmutableList<PTransformOverride> overridesList = captor.getValue(); assertThat(overridesList.isEmpty(), is(false)); assertThat( overridesList.size(), is(FlinkTransformOverrides.getDefaultOverrides(options).size())); } }
Example #12
Source File: GoogleApiDebugOptionsTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testMatchingAgainstClient() throws Exception { GcsOptions options = PipelineOptionsFactory.as(GcsOptions.class); options.setGcpCredential(new TestCredential()); options.setGoogleApiTrace( new GoogleApiTracer() .addTraceFor(Transport.newStorageClient(options).build(), "TraceDestination")); Storage.Objects.Get getRequest = Transport.newStorageClient(options).build().objects().get("testBucketId", "testObjectId"); assertEquals("TraceDestination", getRequest.get("$trace")); Delete deleteRequest = GcpOptions.GcpTempLocationFactory.newCloudResourceManagerClient( options.as(CloudResourceManagerOptions.class)) .build() .projects() .delete("testProjectId"); assertNull(deleteRequest.get("$trace")); }
Example #13
Source File: FileBasedSourceTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testSplitAtFraction() throws Exception { PipelineOptions options = PipelineOptionsFactory.create(); File file = createFileWithData("file", createStringDataset(3, 100)); Metadata metadata = FileSystems.matchSingleFileSpec(file.getPath()); TestFileBasedSource source = new TestFileBasedSource(metadata, 1, 0, file.length(), null); // Shouldn't be able to split while unstarted. assertSplitAtFractionFails(source, 0, 0.7, options); assertSplitAtFractionSucceedsAndConsistent(source, 1, 0.7, options); assertSplitAtFractionSucceedsAndConsistent(source, 30, 0.7, options); assertSplitAtFractionFails(source, 0, 0.0, options); assertSplitAtFractionFails(source, 70, 0.3, options); assertSplitAtFractionFails(source, 100, 1.0, options); assertSplitAtFractionFails(source, 100, 0.99, options); assertSplitAtFractionSucceedsAndConsistent(source, 100, 0.995, options); }
Example #14
Source File: NexmarkUtilsTest.java From beam with Apache License 2.0 | 6 votes |
private void testTableName( NexmarkUtils.ResourceNameMode nameMode, String baseTableName, String queryName, Long salt, String version, Class runner, Boolean isStreaming, final String expected) { NexmarkOptions options = PipelineOptionsFactory.as(NexmarkOptions.class); options.setResourceNameMode(nameMode); options.setBigQueryTable(baseTableName); options.setRunner(runner); options.setStreaming(isStreaming); String tableName = NexmarkUtils.tableName(options, queryName, salt, version); assertEquals(expected, tableName); }
Example #15
Source File: BigQueryServicesImplTest.java From beam with Apache License 2.0 | 6 votes |
/** Tests that table creation succeeds when the table already exists. */ @Test public void testCreateTableSucceedsAlreadyExists() throws IOException { TableReference ref = new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table"); TableSchema schema = new TableSchema() .setFields( ImmutableList.of( new TableFieldSchema().setName("column1").setType("String"), new TableFieldSchema().setName("column2").setType("Integer"))); Table testTable = new Table().setTableReference(ref).setSchema(schema); when(response.getStatusCode()).thenReturn(409); // 409 means already exists BigQueryServicesImpl.DatasetServiceImpl services = new BigQueryServicesImpl.DatasetServiceImpl(bigquery, PipelineOptionsFactory.create()); Table ret = services.tryCreateTable( testTable, new RetryBoundedBackOff(0, BackOff.ZERO_BACKOFF), Sleeper.DEFAULT); assertNull(ret); verify(response, times(1)).getStatusCode(); verify(response, times(1)).getContent(); verify(response, times(1)).getContentType(); }
Example #16
Source File: ParDoEvaluatorTest.java From beam with Apache License 2.0 | 5 votes |
private ParDoEvaluator<Integer> createEvaluator( PCollectionView<Integer> singletonView, RecorderFn fn, PCollection<Integer> input, PCollection<Integer> output) { when(evaluationContext.createSideInputReader(ImmutableList.of(singletonView))) .thenReturn(new ReadyInGlobalWindowReader()); DirectExecutionContext executionContext = mock(DirectExecutionContext.class); DirectStepContext stepContext = mock(DirectStepContext.class); when(executionContext.getStepContext(Mockito.any(String.class))).thenReturn(stepContext); when(stepContext.getTimerUpdate()).thenReturn(TimerUpdate.empty()); when(evaluationContext.getExecutionContext( Mockito.any(AppliedPTransform.class), Mockito.any(StructuralKey.class))) .thenReturn(executionContext); DirectGraphs.performDirectOverrides(p); @SuppressWarnings("unchecked") AppliedPTransform<PCollection<Integer>, ?, ?> transform = (AppliedPTransform<PCollection<Integer>, ?, ?>) DirectGraphs.getProducer(output); return ParDoEvaluator.create( evaluationContext, PipelineOptionsFactory.create(), stepContext, transform, input.getCoder(), input.getWindowingStrategy(), fn, null /* key */, ImmutableList.of(singletonView), mainOutputTag, additionalOutputTags, ImmutableMap.of(mainOutputTag, output), DoFnSchemaInformation.create(), Collections.emptyMap(), ParDoEvaluator.defaultRunnerFactory()); }
Example #17
Source File: FlinkExecutionEnvironmentsTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void shouldAcceptExplicitlySetIdleSourcesFlagWithoutCheckpointing() { // Checkpointing disabled, accept flag FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class); options.setShutdownSourcesAfterIdleMs(42L); FlinkExecutionEnvironments.createStreamExecutionEnvironment(options, Collections.emptyList()); assertThat(options.getShutdownSourcesAfterIdleMs(), is(42L)); }
Example #18
Source File: CreateTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testSourceSplitVoid() throws Exception { CreateSource<Void> source = CreateSource.fromIterable(Lists.newArrayList(null, null, null, null, null), VoidCoder.of()); PipelineOptions options = PipelineOptionsFactory.create(); List<? extends BoundedSource<Void>> splitSources = source.split(3, options); SourceTestUtils.assertSourcesEqualReferenceSource(source, splitSources, options); }
Example #19
Source File: GcsUtilTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testMultipleThreadsCanCompleteOutOfOrderWithDefaultThreadPool() throws Exception { GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class); ExecutorService executorService = pipelineOptions.getExecutorService(); int numThreads = 100; final CountDownLatch[] countDownLatches = new CountDownLatch[numThreads]; for (int i = 0; i < numThreads; i++) { final int currentLatch = i; countDownLatches[i] = new CountDownLatch(1); executorService.execute( () -> { // Wait for latch N and then release latch N - 1 try { countDownLatches[currentLatch].await(); if (currentLatch > 0) { countDownLatches[currentLatch - 1].countDown(); } } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new RuntimeException(e); } }); } // Release the last latch starting the chain reaction. countDownLatches[countDownLatches.length - 1].countDown(); executorService.shutdown(); assertTrue( "Expected tasks to complete", executorService.awaitTermination(10, TimeUnit.SECONDS)); }
Example #20
Source File: ConfigGeneratorTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testBeamStoreConfig() { SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class); options.setJobName("TestStoreConfig"); options.setRunner(SamzaRunner.class); Pipeline pipeline = Pipeline.create(options); pipeline.apply(Create.of(1, 2, 3)).apply(Sum.integersGlobally()); pipeline.replaceAll(SamzaTransformOverrides.getDefaultOverrides()); final Map<PValue, String> idMap = PViewToIdMapper.buildIdMap(pipeline); final ConfigBuilder configBuilder = new ConfigBuilder(options); SamzaPipelineTranslator.createConfig(pipeline, options, idMap, configBuilder); final Config config = configBuilder.build(); assertEquals( RocksDbKeyValueStorageEngineFactory.class.getName(), config.get("stores.beamStore.factory")); assertEquals("byteArraySerde", config.get("stores.beamStore.key.serde")); assertEquals("byteSerde", config.get("stores.beamStore.msg.serde")); assertNull(config.get("stores.beamStore.changelog")); options.setStateDurable(true); SamzaPipelineTranslator.createConfig(pipeline, options, idMap, configBuilder); final Config config2 = configBuilder.build(); assertEquals( "TestStoreConfig-1-beamStore-changelog", config2.get("stores.beamStore.changelog")); }
Example #21
Source File: PubSubToElasticsearch.java From DataflowTemplates with Apache License 2.0 | 5 votes |
/** * Main entry point for executing the pipeline. * * @param args The command-line arguments to the pipeline. */ public static void main(String[] args) { // Parse the user options passed from the command-line. PubSubToElasticsearchOptions pubSubToElasticsearchOptions = PipelineOptionsFactory.fromArgs(args) .withValidation() .as(PubSubToElasticsearchOptions.class); run(pubSubToElasticsearchOptions); }
Example #22
Source File: BigQueryToElasticsearchTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
/** Test the {@link BigQueryToElasticsearch} pipeline end-to-end. */ @Test public void testBigQueryToElasticsearchE2E() { BigQueryToElasticsearch.BigQueryToElasticsearchReadOptions options = PipelineOptionsFactory.create() .as(BigQueryToElasticsearch.BigQueryToElasticsearchReadOptions.class); options.setNodeAddresses("http://my-node"); options.setIndex("test"); options.setDocumentType("_doc"); options.setInputTableSpec("my-project:my-dataset.my-table"); options.setQuery(null); // Build pipeline PCollection<String> testStrings = pipeline .apply("CreateInput", Create.of(rows)) .apply("TestTableRowToJson", ParDo.of(new BigQueryConverters.TableRowToJsonFn())); PAssert.that(testStrings) .satisfies( collection -> { String result = collection.iterator().next(); assertThat(result, is(equalTo(jsonifiedTableRow))); return null; }); // Execute pipeline pipeline.run(); }
Example #23
Source File: FlinkDoFnFunctionTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testAccumulatorRegistrationOnOperatorClose() throws Exception { FlinkDoFnFunction doFnFunction = new TestDoFnFunction( "step", WindowingStrategy.globalDefault(), Collections.emptyMap(), PipelineOptionsFactory.create(), Collections.emptyMap(), new TupleTag<>(), null, Collections.emptyMap(), DoFnSchemaInformation.create(), Collections.emptyMap()); doFnFunction.open(new Configuration()); String metricContainerFieldName = "metricContainer"; FlinkMetricContainer monitoredContainer = Mockito.spy( (FlinkMetricContainer) Whitebox.getInternalState(doFnFunction, metricContainerFieldName)); Whitebox.setInternalState(doFnFunction, metricContainerFieldName, monitoredContainer); doFnFunction.close(); Mockito.verify(monitoredContainer).registerMetricsForPipelineResult(); }
Example #24
Source File: CombineTest.java From beam with Apache License 2.0 | 5 votes |
@BeforeClass public static void beforeClass() { SparkStructuredStreamingPipelineOptions options = PipelineOptionsFactory.create().as(SparkStructuredStreamingPipelineOptions.class); options.setRunner(SparkStructuredStreamingRunner.class); options.setTestMode(true); pipeline = Pipeline.create(options); }
Example #25
Source File: AvroByteReaderTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testRequestDynamicSplitExhaustive() throws Exception { List<List<String>> elements = generateInputBlocks(5, 10 * 10, 10); Coder<String> coder = StringUtf8Coder.of(); AvroFileInfo<String> fileInfo = initInputFile(elements, coder); AvroByteReader<String> reader = new AvroByteReader<String>( fileInfo.filename, 0L, Long.MAX_VALUE, coder, PipelineOptionsFactory.create()); for (float splitFraction = 0.0F; splitFraction < 1.0F; splitFraction += 0.02F) { for (int recordsToRead = 0; recordsToRead <= 500; recordsToRead += 5) { testRequestDynamicSplitInternal( reader, splitFraction, recordsToRead, SplitVerificationBehavior.DO_NOT_VERIFY); } } }
Example #26
Source File: CreateTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testSourceSplit() throws Exception { CreateSource<Integer> source = CreateSource.fromIterable( ImmutableList.of(1, 2, 3, 4, 5, 6, 7, 8), BigEndianIntegerCoder.of()); PipelineOptions options = PipelineOptionsFactory.create(); List<? extends BoundedSource<Integer>> splitSources = source.split(12, options); assertThat(splitSources, hasSize(3)); SourceTestUtils.assertSourcesEqualReferenceSource(source, splitSources, options); }
Example #27
Source File: DataflowRunnerTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testGcsStagingLocationInitialization() throws Exception { // Set temp location (required), and check that staging location is set. DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setTempLocation(VALID_TEMP_BUCKET); options.setProject(PROJECT_ID); options.setRegion(REGION_ID); options.setGcpCredential(new TestCredential()); options.setGcsUtil(mockGcsUtil); options.setRunner(DataflowRunner.class); DataflowRunner.fromOptions(options); assertNotNull(options.getStagingLocation()); }
Example #28
Source File: AvroByteReaderTest.java From beam with Apache License 2.0 | 5 votes |
private <T> List<T> readElems( String filename, long startOffset, long endOffset, Coder<T> coder, List<Integer> actualSizes) throws Exception { AvroByteReader<T> avroReader = new AvroByteReader<>( filename, startOffset, endOffset, coder, PipelineOptionsFactory.create()); new ExecutorTestUtils.TestReaderObserver(avroReader, actualSizes); return readAllFromReader(avroReader); }
Example #29
Source File: FlinkExecutionEnvironmentsTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void shouldInferParallelismFromEnvironmentStreaming() throws IOException { String confDir = extractFlinkConfig(); FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class); options.setRunner(TestFlinkRunner.class); options.setFlinkMaster("host:80"); StreamExecutionEnvironment sev = FlinkExecutionEnvironments.createStreamExecutionEnvironment( options, Collections.emptyList(), confDir); assertThat(options.getParallelism(), is(23)); assertThat(sev.getParallelism(), is(23)); }
Example #30
Source File: KafkaToGCSTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
/** * Test whether {@link FileFormatFactory} maps the output file format to the transform to be * carried out. And throws illegal argument exception if invalid file format is passed. */ @Test @Category(NeedsRunner.class) public void testFileFormatFactoryInvalid() { // Create the test input. final String key = "Name"; final String value = "Generic"; final KV<String, String> message = KV.of(key, value); final String outputDirectory = "gs://bucket_name/path/to/output-location"; final String outputFileFormat = "json".toUpperCase(); final String outputFilenamePrefix = "output"; final Integer numShards = 1; final String tempOutputDirectory = "gs://bucket_name/path/to/temp-location"; KafkaToGCSOptions options = PipelineOptionsFactory.create().as(KafkaToGCSOptions.class); options.setOutputFileFormat(outputFileFormat); options.setOutputDirectory(outputDirectory); options.setOutputFilenamePrefix(outputFilenamePrefix); options.setNumShards(numShards); options.setTempLocation(tempOutputDirectory); exception.expect(IllegalArgumentException.class); PCollection<KV<String, String>> records = pipeline.apply( "CreateInput", Create.of(message).withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))); records.apply("WriteToGCS", FileFormatFactory.newBuilder().setOptions(options).build()); // Run the pipeline. pipeline.run(); }