com.google.cloud.dataflow.sdk.Pipeline Java Examples
The following examples show how to use
com.google.cloud.dataflow.sdk.Pipeline.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FXTimeSeriesPipelineSRGTests.java From data-timeseries-java with Apache License 2.0 | 6 votes |
@org.junit.Test public void testCompleteWindowData() { Pipeline pipeline = setup(); List<KV<String, TSProto>> pipelineData = GenerateSampleData.getTestData(); List<KV<String, TSProto>> testData = new ArrayList<KV<String, TSProto>>(pipelineData); WorkPacketConfig packetConfig = GenerateSampleData.generateWorkPacketConfig(2); PCollection<KV<String, TSProto>> completeWindowData = generateCompleteWindowData(pipeline, pipelineData, packetConfig); testData.add(KV.of(GenerateSampleData.TS3, TSProto.newBuilder().setKey(GenerateSampleData.TS3) .setIsLive(false).setTime(1451577839999L).build())); testData.add(KV.of(GenerateSampleData.TS4, TSProto.newBuilder().setKey(GenerateSampleData.TS4) .setIsLive(false).setTime(1451577839999L).build())); DataflowAssert.that(completeWindowData).containsInAnyOrder(testData); pipeline.run(); }
Example #2
Source File: FXTimeSeriesPipelineSRGTests.java From data-timeseries-java with Apache License 2.0 | 6 votes |
public PCollection<KV<String, TSProto>> setupDataInput(Pipeline pipeline, List<KV<String, TSProto>> data) { // Assert that we have 44 Elements in the PCollection PCollection<KV<String, TSProto>> tsData = pipeline.apply("ReadData", Create.of(data)) .apply(ParDo.of(new DoFn<KV<String, TSProto>, KV<String, TSProto>>() { @Override public void processElement(ProcessContext c) throws Exception { c.outputWithTimestamp(c.element(), new DateTime(c.element().getValue().getTime()).toInstant()); } })).setName("Assign TimeStamps"); return tsData; }
Example #3
Source File: TimeSeriesCoders.java From data-timeseries-java with Apache License 2.0 | 6 votes |
public static void registerCoders(Pipeline pipeline) { LOG.debug("Register TSProto coder"); pipeline.getCoderRegistry().registerCoder(TSProto.class, ProtoCoder.of(TSProto.class)); LOG.debug("Register TSAggValueProto coder"); pipeline.getCoderRegistry().registerCoder(TSAggValueProto.class, ProtoCoder.of(TSAggValueProto.class)); LOG.debug("Register WorkPacketConfig coder"); pipeline.getCoderRegistry().registerCoder(WorkPacketConfig.class, ProtoCoder.of(WorkPacketConfig.class)); LOG.debug("Register WorkPacketKey coder"); pipeline.getCoderRegistry().registerCoder(WorkPacketKey.class, ProtoCoder.of(WorkPacketKey.class)); LOG.debug("Register WorkDataPoint coder"); pipeline.getCoderRegistry().registerCoder(WorkDataPoint.class, ProtoCoder.of(WorkDataPoint.class)); LOG.debug("Register WorkPartition coder"); pipeline.getCoderRegistry().registerCoder(WorkPartition.class, ProtoCoder.of(WorkPartition.class)); LOG.debug("Register Correlation coder"); pipeline.getCoderRegistry().registerCoder(Correlation.class, ProtoCoder.of(Correlation.class)); }
Example #4
Source File: ReadSourceITCase.java From flink-dataflow with Apache License 2.0 | 6 votes |
private static void runProgram(String resultPath) { Pipeline p = FlinkTestPipeline.createForBatch(); PCollection<String> result = p .apply(Read.from(new ReadSource(1, 10))) .apply(ParDo.of(new DoFn<Integer, String>() { @Override public void processElement(ProcessContext c) throws Exception { c.output(c.element().toString()); } })); result.apply(TextIO.Write.to(resultPath)); p.run(); }
Example #5
Source File: RemoveDuplicatesITCase.java From flink-dataflow with Apache License 2.0 | 6 votes |
@Override protected void testProgram() throws Exception { List<String> strings = Arrays.asList("k1", "k5", "k5", "k2", "k1", "k2", "k3"); Pipeline p = FlinkTestPipeline.createForBatch(); PCollection<String> input = p.apply(Create.of(strings)) .setCoder(StringUtf8Coder.of()); PCollection<String> output = input.apply(RemoveDuplicates.<String>create()); output.apply(TextIO.Write.to(resultPath)); p.run(); }
Example #6
Source File: TaskRunner.java From dockerflow with Apache License 2.0 | 6 votes |
/** Run a Docker workflow on Dataflow. */ public static void run(Workflow w, Map<String, WorkflowArgs> a, DataflowPipelineOptions o) throws IOException { LOG.info("Running workflow graph"); if (w.getArgs().getProjectId() == null) { throw new IllegalArgumentException("Project id is required"); } Pipeline p = DataflowFactory.dataflow(w, a, o); LOG.info("Created Dataflow pipeline"); LOG.debug(w.toString()); PipelineResult r = p.run(); LOG.info("Dataflow pipeline completed"); LOG.info("Result state: " + r.getState()); }
Example #7
Source File: SideInputITCase.java From flink-dataflow with Apache License 2.0 | 6 votes |
@Override protected void testProgram() throws Exception { Pipeline p = FlinkTestPipeline.createForBatch(); final PCollectionView<String> sidesInput = p .apply(Create.of(expected)) .apply(View.<String>asSingleton()); p.apply(Create.of("bli")) .apply(ParDo.of(new DoFn<String, String>() { @Override public void processElement(ProcessContext c) throws Exception { String s = c.sideInput(sidesInput); c.output(s); } }).withSideInputs(sidesInput)).apply(TextIO.Write.to(resultPath)); p.run(); }
Example #8
Source File: TfIdfITCase.java From flink-dataflow with Apache License 2.0 | 6 votes |
@Override protected void testProgram() throws Exception { Pipeline pipeline = FlinkTestPipeline.createForBatch(); pipeline.getCoderRegistry().registerCoder(URI.class, StringDelegateCoder.of(URI.class)); PCollection<KV<String, KV<URI, Double>>> wordToUriAndTfIdf = pipeline .apply(Create.of( KV.of(new URI("x"), "a b c d"), KV.of(new URI("y"), "a b c"), KV.of(new URI("z"), "a m n"))) .apply(new TfIdf.ComputeTfIdf()); PCollection<String> words = wordToUriAndTfIdf .apply(Keys.<String>create()) .apply(RemoveDuplicates.<String>create()); words.apply(TextIO.Write.to(resultPath)); pipeline.run(); }
Example #9
Source File: WordCountJoin2ITCase.java From flink-dataflow with Apache License 2.0 | 6 votes |
@Override protected void testProgram() throws Exception { Pipeline p = FlinkTestPipeline.createForBatch(); /* Create two PCollections and join them */ PCollection<KV<String,Long>> occurences1 = p.apply(Create.of(WORDS_1)) .apply(ParDo.of(new ExtractWordsFn())) .apply(Count.<String>perElement()); PCollection<KV<String,Long>> occurences2 = p.apply(Create.of(WORDS_2)) .apply(ParDo.of(new ExtractWordsFn())) .apply(Count.<String>perElement()); /* CoGroup the two collections */ PCollection<KV<String, CoGbkResult>> mergedOccurences = KeyedPCollectionTuple .of(tag1, occurences1) .and(tag2, occurences2) .apply(CoGroupByKey.<String>create()); /* Format output */ mergedOccurences.apply(ParDo.of(new FormatCountsFn())) .apply(TextIO.Write.named("test").to(resultPath)); p.run(); }
Example #10
Source File: RemoveDuplicatesEmptyITCase.java From flink-dataflow with Apache License 2.0 | 6 votes |
@Override protected void testProgram() throws Exception { List<String> strings = Collections.emptyList(); Pipeline p = FlinkTestPipeline.createForBatch(); PCollection<String> input = p.apply(Create.of(strings)) .setCoder(StringUtf8Coder.of()); PCollection<String> output = input.apply(RemoveDuplicates.<String>create()); output.apply(TextIO.Write.to(resultPath)); p.run(); }
Example #11
Source File: ExportedServiceAccountKeyRemover.java From policyscanner with Apache License 2.0 | 6 votes |
private PCollection<String> constructPipeline(Pipeline pipeline, String org) { // Read projects from the CRM API. PCollection<GCPProject> projects = pipeline.apply(Read.from(new LiveProjectSource(org))); // List the service accounts of the projects. PCollection<GCPServiceAccount> serviceAccounts = projects.apply(ParDo.named("List Service Accounts").of(new ListServiceAccounts())); // List the keys of the service accounts. PCollection<GCPServiceAccountKey> serviceAccountKeys = serviceAccounts.apply(ParDo.named("List Service Account Keys") .of(new ListServiceAccountKeys())); // Construct an alert message for all the discrepancies found. return serviceAccountKeys.apply(ParDo .named("Remove user-managed keys") .of(new ExportedServiceAccountKeyMessenger())); }
Example #12
Source File: UnboundedSourceITCase.java From flink-dataflow with Apache License 2.0 | 6 votes |
private static void runProgram(String resultPath) { Pipeline p = FlinkTestPipeline.createForStreaming(); PCollection<String> result = p .apply(Read.from(new RangeReadSource(1, 10))) .apply(Window.<Integer>into(new GlobalWindows()) .triggering(AfterPane.elementCountAtLeast(10)) .discardingFiredPanes()) .apply(ParDo.of(new DoFn<Integer, String>() { @Override public void processElement(ProcessContext c) throws Exception { c.output(c.element().toString()); } })); result.apply(TextIO.Write.to(resultPath)); try { p.run(); fail(); } catch(Exception e) { assertEquals("The source terminates as expected.", e.getCause().getCause().getMessage()); } }
Example #13
Source File: FilterRides.java From cloud-dataflow-nyc-taxi-tycoon with Apache License 2.0 | 6 votes |
public static void main(String[] args) { CustomPipelineOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(CustomPipelineOptions.class); Pipeline p = Pipeline.create(options); p.apply(PubsubIO.Read.named("read from PubSub") .topic(String.format("projects/%s/topics/%s", options.getSourceProject(), options.getSourceTopic())) .timestampLabel("ts") .withCoder(TableRowJsonCoder.of())) .apply("filter lower Manhattan", ParDo.of(new FilterLowerManhattan())) .apply(PubsubIO.Write.named("WriteToPubsub") .topic(String.format("projects/%s/topics/%s", options.getSinkProject(), options.getSinkTopic())) .withCoder(TableRowJsonCoder.of())); p.run(); }
Example #14
Source File: LoadBooks.java From cloud-bigtable-examples with Apache License 2.0 | 6 votes |
public static void main(String[] args) { // CloudBigtableOptions is one way to retrieve the options. It's not required. // https://github.com/GoogleCloudPlatform/cloud-bigtable-examples/blob/master/java/dataflow-connector-examples/src/main/java/com/google/cloud/bigtable/dataflow/example/HelloWorldWrite.java BigtableCsvOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(BigtableCsvOptions.class); CloudBigtableTableConfiguration config = CloudBigtableTableConfiguration.fromCBTOptions(options); Pipeline p = Pipeline.create(options); CloudBigtableIO.initializeForWrite(p); PCollection<KV<String, Integer>> ngrams = applyPipelineToParseBooks(p.apply(TextIO.Read.from(options.getInputFile()))); PCollection<Mutation> mutations = ngrams.apply(ParDo.of(ENCODE_NGRAM)); mutations.apply(CloudBigtableIO.writeToTable(config)); // Run the pipeline. p.run(); }
Example #15
Source File: LoadBooksTest.java From cloud-bigtable-examples with Apache License 2.0 | 6 votes |
@Test public void parseBooks_returnsNgramsCounts() { // Arrange Pipeline p = TestPipeline.create(); PCollection<String> input = p.apply(Create.of(testFile)); // Act PCollection<KV<String, Integer>> output = LoadBooks.applyPipelineToParseBooks(input); // Assert DataflowAssert.that(output) .containsInAnyOrder( KV.of("despatch when art", 10), KV.of("despatch when came", 10), KV.of("despatch when published", 12), KV.of("despatch where was", 10), KV.of("despatch which made", 45), // There are two entries for "despatch which addressed". // Each entry has a different part of speech for "addressed". KV.of("despatch which addressed", 12 + 46), KV.of("despatch which admitted", 13), KV.of("despatch which allow", 14), KV.of("despatch which announced", 50), KV.of("despatch which answer", 32)); }
Example #16
Source File: CountRides.java From cloud-dataflow-nyc-taxi-tycoon with Apache License 2.0 | 6 votes |
public static void main(String[] args) { CustomPipelineOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(CustomPipelineOptions.class); Pipeline p = Pipeline.create(options); p.apply(PubsubIO.Read.named("read from PubSub") .topic(String.format("projects/%s/topics/%s", options.getSourceProject(), options.getSourceTopic())) .timestampLabel("ts") .withCoder(TableRowJsonCoder.of())) .apply("window 1s", Window.into(FixedWindows.of(Duration.standardSeconds(1)))) .apply("mark rides", MapElements.via(new MarkRides())) .apply("count similar", Count.perKey()) .apply("format rides", MapElements.via(new TransformRides())) .apply(PubsubIO.Write.named("WriteToPubsub") .topic(String.format("projects/%s/topics/%s", options.getSinkProject(), options.getSinkTopic())) .withCoder(TableRowJsonCoder.of())); p.run(); }
Example #17
Source File: CoinbaseSource.java From cloud-bigtable-examples with Apache License 2.0 | 6 votes |
public static void main(String[] args) { CloudBigtableOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(CloudBigtableOptions.class); CloudBigtableScanConfiguration config = new CloudBigtableScanConfiguration.Builder() .withProjectId(options.getBigtableProjectId()) .withInstanceId(options.getBigtableInstanceId()) .withTableId(options.getBigtableTableId()) .build(); options.setStreaming(true); options.setRunner(DataflowPipelineRunner.class); Pipeline p = Pipeline.create(options); CloudBigtableIO.initializeForWrite(p); p.apply(Read.from(new CoinbaseSource())) .apply(ParDo.named("DeserializeCoinbase").of(new DeserializeCoinbase())) .apply(ParDo.of(new HBaseBigtableWriter())) .apply(CloudBigtableIO.writeToTable(config)); p.run(); }
Example #18
Source File: JoinExamples.java From flink-dataflow with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); options.setStreaming(true); options.setCheckpointingInterval(1000L); options.setNumberOfExecutionRetries(5); options.setExecutionRetryDelay(3000L); options.setRunner(FlinkPipelineRunner.class); PTransform<? super PBegin, PCollection<String>> readSourceA = Read.from(new UnboundedSocketSource<>("localhost", 9999, '\n', 3)).named("FirstStream"); PTransform<? super PBegin, PCollection<String>> readSourceB = Read.from(new UnboundedSocketSource<>("localhost", 9998, '\n', 3)).named("SecondStream"); WindowFn<Object, ?> windowFn = FixedWindows.of(Duration.standardSeconds(options.getWindowSize())); Pipeline p = Pipeline.create(options); // the following two 'applys' create multiple inputs to our pipeline, one for each // of our two input sources. PCollection<String> streamA = p.apply(readSourceA) .apply(Window.<String>into(windowFn) .triggering(AfterWatermark.pastEndOfWindow()).withAllowedLateness(Duration.ZERO) .discardingFiredPanes()); PCollection<String> streamB = p.apply(readSourceB) .apply(Window.<String>into(windowFn) .triggering(AfterWatermark.pastEndOfWindow()).withAllowedLateness(Duration.ZERO) .discardingFiredPanes()); PCollection<String> formattedResults = joinEvents(streamA, streamB); formattedResults.apply(TextIO.Write.to("./outputJoin.txt")); p.run(); }
Example #19
Source File: WordCount.java From flink-dataflow with Apache License 2.0 | 5 votes |
public static void main(String[] args) { Options options = PipelineOptionsFactory.fromArgs(args).withValidation() .as(Options.class); options.setRunner(FlinkPipelineRunner.class); Pipeline p = Pipeline.create(options); p.apply(TextIO.Read.named("ReadLines").from(options.getInput())) .apply(new CountWords()) .apply(MapElements.via(new FormatAsTextFn())) .apply(TextIO.Write.named("WriteCounts").to(options.getOutput())); p.run(); }
Example #20
Source File: KafkaWindowedWordCountExample.java From flink-dataflow with Apache License 2.0 | 5 votes |
public static void main(String[] args) { PipelineOptionsFactory.register(KafkaStreamingWordCountOptions.class); KafkaStreamingWordCountOptions options = PipelineOptionsFactory.fromArgs(args).as(KafkaStreamingWordCountOptions.class); options.setJobName("KafkaExample - WindowSize: " + options.getWindowSize() + " seconds"); options.setStreaming(true); options.setCheckpointingInterval(1000L); options.setNumberOfExecutionRetries(5); options.setExecutionRetryDelay(3000L); options.setRunner(FlinkPipelineRunner.class); System.out.println(options.getKafkaTopic() +" "+ options.getZookeeper() +" "+ options.getBroker() +" "+ options.getGroup() ); Pipeline pipeline = Pipeline.create(options); Properties p = new Properties(); p.setProperty("zookeeper.connect", options.getZookeeper()); p.setProperty("bootstrap.servers", options.getBroker()); p.setProperty("group.id", options.getGroup()); // this is the Flink consumer that reads the input to // the program from a kafka topic. FlinkKafkaConsumer08<String> kafkaConsumer = new FlinkKafkaConsumer08<>( options.getKafkaTopic(), new SimpleStringSchema(), p); PCollection<String> words = pipeline .apply(Read.from(new UnboundedFlinkSource<>(kafkaConsumer)).named("StreamingWordCount")) .apply(ParDo.of(new ExtractWordsFn())) .apply(Window.<String>into(FixedWindows.of(Duration.standardSeconds(options.getWindowSize()))) .triggering(AfterWatermark.pastEndOfWindow()).withAllowedLateness(Duration.ZERO) .discardingFiredPanes()); PCollection<KV<String, Long>> wordCounts = words.apply(Count.<String>perElement()); wordCounts.apply(ParDo.of(new FormatAsStringFn())) .apply(TextIO.Write.to("./outputKafka.txt")); pipeline.run(); }
Example #21
Source File: TFIDF.java From flink-dataflow with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); options.setRunner(FlinkPipelineRunner.class); Pipeline pipeline = Pipeline.create(options); pipeline.getCoderRegistry().registerCoder(URI.class, StringDelegateCoder.of(URI.class)); pipeline .apply(new ReadDocuments(listInputDocuments(options))) .apply(new ComputeTfIdf()) .apply(new WriteTfIdf(options.getOutput())); pipeline.run(); }
Example #22
Source File: WindowedWordCount.java From flink-dataflow with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws IOException { StreamingWordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(StreamingWordCountOptions.class); options.setStreaming(true); options.setWindowSize(10L); options.setSlide(5L); options.setCheckpointingInterval(1000L); options.setNumberOfExecutionRetries(5); options.setExecutionRetryDelay(3000L); options.setRunner(FlinkPipelineRunner.class); LOG.info("Windpwed WordCount with Sliding Windows of " + options.getWindowSize() + " sec. and a slide of " + options.getSlide()); Pipeline pipeline = Pipeline.create(options); PCollection<String> words = pipeline .apply(Read.from(new UnboundedSocketSource<>("localhost", 9999, '\n', 3)).named("StreamingWordCount")) .apply(ParDo.of(new ExtractWordsFn())) .apply(Window.<String>into(SlidingWindows.of(Duration.standardSeconds(options.getWindowSize())) .every(Duration.standardSeconds(options.getSlide()))) .triggering(AfterWatermark.pastEndOfWindow()).withAllowedLateness(Duration.ZERO) .discardingFiredPanes()); PCollection<KV<String, Long>> wordCounts = words.apply(Count.<String>perElement()); wordCounts.apply(ParDo.of(new FormatAsStringFn())) .apply(TextIO.Write.to("./outputWordCount.txt")); pipeline.run(); }
Example #23
Source File: AutoComplete.java From flink-dataflow with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws IOException { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); options.setStreaming(true); options.setCheckpointingInterval(1000L); options.setNumberOfExecutionRetries(5); options.setExecutionRetryDelay(3000L); options.setRunner(FlinkPipelineRunner.class); PTransform<? super PBegin, PCollection<String>> readSource = Read.from(new UnboundedSocketSource<>("localhost", 9999, '\n', 3)).named("WordStream"); WindowFn<Object, ?> windowFn = FixedWindows.of(Duration.standardSeconds(options.getWindowSize())); // Create the pipeline. Pipeline p = Pipeline.create(options); PCollection<KV<String, List<CompletionCandidate>>> toWrite = p .apply(readSource) .apply(ParDo.of(new ExtractWordsFn())) .apply(Window.<String>into(windowFn) .triggering(AfterWatermark.pastEndOfWindow()).withAllowedLateness(Duration.ZERO) .discardingFiredPanes()) .apply(ComputeTopCompletions.top(10, options.getRecursive())); toWrite .apply(ParDo.named("FormatForPerTaskFile").of(new FormatForPerTaskLocalFile())) .apply(TextIO.Write.to("./outputAutoComplete.txt")); p.run(); }
Example #24
Source File: FlinkPipelineExecutionEnvironment.java From flink-dataflow with Apache License 2.0 | 5 votes |
/** * Depending on if the job is a Streaming or a Batch one, this method creates * the necessary execution environment and pipeline translator, and translates * the {@link com.google.cloud.dataflow.sdk.values.PCollection} program into * a {@link org.apache.flink.api.java.DataSet} or {@link org.apache.flink.streaming.api.datastream.DataStream} * one. * */ public void translate(Pipeline pipeline) { checkInitializationState(); if(this.flinkBatchEnv == null && this.flinkStreamEnv == null) { createPipelineExecutionEnvironment(); } if (this.flinkPipelineTranslator == null) { createPipelineTranslator(); } this.flinkPipelineTranslator.translate(pipeline); }
Example #25
Source File: FlinkPipelineRunner.java From flink-dataflow with Apache License 2.0 | 5 votes |
@Override public FlinkRunnerResult run(Pipeline pipeline) { LOG.info("Executing pipeline using FlinkPipelineRunner."); LOG.info("Translating pipeline to Flink program."); this.flinkJobEnv.translate(pipeline); LOG.info("Starting execution of Flink program."); JobExecutionResult result; try { result = this.flinkJobEnv.executePipeline(); } catch (Exception e) { LOG.error("Pipeline execution failed", e); throw new RuntimeException("Pipeline execution failed", e); } LOG.info("Execution finished in {} msecs", result.getNetRuntime()); Map<String, Object> accumulators = result.getAllAccumulatorResults(); if (accumulators != null && !accumulators.isEmpty()) { LOG.info("Final aggregator values:"); for (Map.Entry<String, Object> entry : result.getAllAccumulatorResults().entrySet()) { LOG.info("{} : {}", entry.getKey(), entry.getValue()); } } return new FlinkRunnerResult(accumulators, result.getNetRuntime()); }
Example #26
Source File: WriteSinkITCase.java From flink-dataflow with Apache License 2.0 | 5 votes |
private static void runProgram(String resultPath) { Pipeline p = FlinkTestPipeline.createForBatch(); p.apply(Create.of(EXPECTED_RESULT)).setCoder(StringUtf8Coder.of()) .apply("CustomSink", Write.to(new MyCustomSink(resultPath))); p.run(); }
Example #27
Source File: AvroITCase.java From flink-dataflow with Apache License 2.0 | 5 votes |
private static void runProgram(String tmpPath, String resultPath) { Pipeline p = FlinkTestPipeline.createForBatch(); p .apply(Create.of( new User("Joe", 3, "red"), new User("Mary", 4, "blue"), new User("Mark", 1, "green"), new User("Julia", 5, "purple")) .withCoder(AvroCoder.of(User.class))) .apply(AvroIO.Write.to(tmpPath) .withSchema(User.class)); p.run(); p = FlinkTestPipeline.createForBatch(); p .apply(AvroIO.Read.from(tmpPath).withSchema(User.class).withoutValidation()) .apply(ParDo.of(new DoFn<User, String>() { @Override public void processElement(ProcessContext c) throws Exception { User u = c.element(); String result = u.getName() + " " + u.getFavoriteColor() + " " + u.getFavoriteNumber(); c.output(result); } })) .apply(TextIO.Write.to(resultPath)); p.run(); }
Example #28
Source File: MaybeEmptyTestITCase.java From flink-dataflow with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { Pipeline p = FlinkTestPipeline.createForBatch(); p.apply(Create.of((Void) null)).setCoder(VoidCoder.of()) .apply(ParDo.of( new DoFn<Void, String>() { @Override public void processElement(DoFn<Void, String>.ProcessContext c) { c.output(expected); } })).apply(TextIO.Write.to(resultPath)); p.run(); }
Example #29
Source File: WordCountITCase.java From flink-dataflow with Apache License 2.0 | 5 votes |
@Override protected void testProgram() throws Exception { Pipeline p = FlinkTestPipeline.createForBatch(); PCollection<String> input = p.apply(Create.of(WORDS)).setCoder(StringUtf8Coder.of()); input .apply(new WordCount.CountWords()) .apply(MapElements.via(new WordCount.FormatAsTextFn())) .apply(TextIO.Write.to(resultPath)); p.run(); }
Example #30
Source File: JoinExamples.java From flink-dataflow with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); // the following two 'applys' create multiple inputs to our pipeline, one for each // of our two input sources. PCollection<TableRow> eventsTable = p.apply(BigQueryIO.Read.from(GDELT_EVENTS_TABLE)); PCollection<TableRow> countryCodes = p.apply(BigQueryIO.Read.from(COUNTRY_CODES)); PCollection<String> formattedResults = joinEvents(eventsTable, countryCodes); formattedResults.apply(TextIO.Write.to(options.getOutput())); p.run(); }