com.google.cloud.dataflow.sdk.Pipeline Java Exaples

Source File: FXTimeSeriesPipelineSRGTests.java From data-timeseries-java with Apache License 2.0

6 votes

@org.junit.Test
public void testCompleteWindowData() {

  Pipeline pipeline = setup();

  List<KV<String, TSProto>> pipelineData = GenerateSampleData.getTestData();
  List<KV<String, TSProto>> testData = new ArrayList<KV<String, TSProto>>(pipelineData);
  WorkPacketConfig packetConfig = GenerateSampleData.generateWorkPacketConfig(2);

  PCollection<KV<String, TSProto>> completeWindowData =
      generateCompleteWindowData(pipeline, pipelineData, packetConfig);

  testData.add(KV.of(GenerateSampleData.TS3, TSProto.newBuilder().setKey(GenerateSampleData.TS3)
          .setIsLive(false).setTime(1451577839999L).build()));
  testData.add(KV.of(GenerateSampleData.TS4, TSProto.newBuilder().setKey(GenerateSampleData.TS4)
          .setIsLive(false).setTime(1451577839999L).build()));
  
  DataflowAssert.that(completeWindowData).containsInAnyOrder(testData);
  pipeline.run();
}

Source File: FXTimeSeriesPipelineSRGTests.java From data-timeseries-java with Apache License 2.0

6 votes

public PCollection<KV<String, TSProto>> setupDataInput(Pipeline pipeline,
    List<KV<String, TSProto>> data) {


  // Assert that we have 44 Elements in the PCollection
  PCollection<KV<String, TSProto>> tsData =
      pipeline.apply("ReadData", Create.of(data))
          .apply(ParDo.of(new DoFn<KV<String, TSProto>, KV<String, TSProto>>() {

            @Override
            public void processElement(ProcessContext c) throws Exception {
              c.outputWithTimestamp(c.element(),
                  new DateTime(c.element().getValue().getTime()).toInstant());

            }

          })).setName("Assign TimeStamps");
  return tsData;

}

Source File: TimeSeriesCoders.java From data-timeseries-java with Apache License 2.0

6 votes

public static void registerCoders(Pipeline pipeline) {

		LOG.debug("Register TSProto coder");
		pipeline.getCoderRegistry().registerCoder(TSProto.class, ProtoCoder.of(TSProto.class));

		LOG.debug("Register TSAggValueProto coder");
		pipeline.getCoderRegistry().registerCoder(TSAggValueProto.class, ProtoCoder.of(TSAggValueProto.class));
		LOG.debug("Register WorkPacketConfig coder");
		pipeline.getCoderRegistry().registerCoder(WorkPacketConfig.class, ProtoCoder.of(WorkPacketConfig.class));
		LOG.debug("Register WorkPacketKey coder");
		pipeline.getCoderRegistry().registerCoder(WorkPacketKey.class, ProtoCoder.of(WorkPacketKey.class));
		LOG.debug("Register WorkDataPoint coder");
		pipeline.getCoderRegistry().registerCoder(WorkDataPoint.class, ProtoCoder.of(WorkDataPoint.class));
		LOG.debug("Register WorkPartition coder");
		pipeline.getCoderRegistry().registerCoder(WorkPartition.class, ProtoCoder.of(WorkPartition.class));
		LOG.debug("Register Correlation coder");
		pipeline.getCoderRegistry().registerCoder(Correlation.class, ProtoCoder.of(Correlation.class));

	}

Source File: ReadSourceITCase.java From flink-dataflow with Apache License 2.0

6 votes

private static void runProgram(String resultPath) {

		Pipeline p = FlinkTestPipeline.createForBatch();

		PCollection<String> result = p
				.apply(Read.from(new ReadSource(1, 10)))
				.apply(ParDo.of(new DoFn<Integer, String>() {
					@Override
					public void processElement(ProcessContext c) throws Exception {
						c.output(c.element().toString());
					}
				}));

		result.apply(TextIO.Write.to(resultPath));
		p.run();
	}

Source File: RemoveDuplicatesITCase.java From flink-dataflow with Apache License 2.0

6 votes

@Override
protected void testProgram() throws Exception {

	List<String> strings = Arrays.asList("k1", "k5", "k5", "k2", "k1", "k2", "k3");

	Pipeline p = FlinkTestPipeline.createForBatch();

	PCollection<String> input =
			p.apply(Create.of(strings))
					.setCoder(StringUtf8Coder.of());

	PCollection<String> output =
			input.apply(RemoveDuplicates.<String>create());

	output.apply(TextIO.Write.to(resultPath));
	p.run();
}

Source File: TaskRunner.java From dockerflow with Apache License 2.0

6 votes

/** Run a Docker workflow on Dataflow. */
public static void run(Workflow w, Map<String, WorkflowArgs> a, DataflowPipelineOptions o)
    throws IOException {
  LOG.info("Running workflow graph");
  if (w.getArgs().getProjectId() == null) {
    throw new IllegalArgumentException("Project id is required");
  }

  Pipeline p = DataflowFactory.dataflow(w, a, o);

  LOG.info("Created Dataflow pipeline");
  LOG.debug(w.toString());

  PipelineResult r = p.run();

  LOG.info("Dataflow pipeline completed");
  LOG.info("Result state: " + r.getState());
}

Source File: SideInputITCase.java From flink-dataflow with Apache License 2.0

6 votes

@Override
protected void testProgram() throws Exception {


	Pipeline p = FlinkTestPipeline.createForBatch();


	final PCollectionView<String> sidesInput = p
			.apply(Create.of(expected))
			.apply(View.<String>asSingleton());

	p.apply(Create.of("bli"))
			.apply(ParDo.of(new DoFn<String, String>() {
				@Override
				public void processElement(ProcessContext c) throws Exception {
					String s = c.sideInput(sidesInput);
					c.output(s);
				}
			}).withSideInputs(sidesInput)).apply(TextIO.Write.to(resultPath));

	p.run();
}

Source File: TfIdfITCase.java From flink-dataflow with Apache License 2.0

6 votes

@Override
protected void testProgram() throws Exception {

	Pipeline pipeline = FlinkTestPipeline.createForBatch();

	pipeline.getCoderRegistry().registerCoder(URI.class, StringDelegateCoder.of(URI.class));

	PCollection<KV<String, KV<URI, Double>>> wordToUriAndTfIdf = pipeline
			.apply(Create.of(
					KV.of(new URI("x"), "a b c d"),
					KV.of(new URI("y"), "a b c"),
					KV.of(new URI("z"), "a m n")))
			.apply(new TfIdf.ComputeTfIdf());

	PCollection<String> words = wordToUriAndTfIdf
			.apply(Keys.<String>create())
			.apply(RemoveDuplicates.<String>create());

	words.apply(TextIO.Write.to(resultPath));

	pipeline.run();
}

Source File: WordCountJoin2ITCase.java From flink-dataflow with Apache License 2.0

6 votes

@Override
protected void testProgram() throws Exception {
	Pipeline p = FlinkTestPipeline.createForBatch();

	/* Create two PCollections and join them */
	PCollection<KV<String,Long>> occurences1 = p.apply(Create.of(WORDS_1))
			.apply(ParDo.of(new ExtractWordsFn()))
			.apply(Count.<String>perElement());

	PCollection<KV<String,Long>> occurences2 = p.apply(Create.of(WORDS_2))
			.apply(ParDo.of(new ExtractWordsFn()))
			.apply(Count.<String>perElement());

	/* CoGroup the two collections */
	PCollection<KV<String, CoGbkResult>> mergedOccurences = KeyedPCollectionTuple
			.of(tag1, occurences1)
			.and(tag2, occurences2)
			.apply(CoGroupByKey.<String>create());

	/* Format output */
	mergedOccurences.apply(ParDo.of(new FormatCountsFn()))
			.apply(TextIO.Write.named("test").to(resultPath));

	p.run();
}

Source File: RemoveDuplicatesEmptyITCase.java From flink-dataflow with Apache License 2.0

6 votes

@Override
protected void testProgram() throws Exception {

	List<String> strings = Collections.emptyList();

	Pipeline p = FlinkTestPipeline.createForBatch();

	PCollection<String> input =
			p.apply(Create.of(strings))
					.setCoder(StringUtf8Coder.of());

	PCollection<String> output =
			input.apply(RemoveDuplicates.<String>create());

	output.apply(TextIO.Write.to(resultPath));
	p.run();
}

Source File: ExportedServiceAccountKeyRemover.java From policyscanner with Apache License 2.0

6 votes

private PCollection<String> constructPipeline(Pipeline pipeline, String org) {
  // Read projects from the CRM API.
  PCollection<GCPProject> projects =
      pipeline.apply(Read.from(new LiveProjectSource(org)));
  // List the service accounts of the projects.
  PCollection<GCPServiceAccount> serviceAccounts =
      projects.apply(ParDo.named("List Service Accounts").of(new ListServiceAccounts()));
  // List the keys of the service accounts.
  PCollection<GCPServiceAccountKey> serviceAccountKeys =
      serviceAccounts.apply(ParDo.named("List Service Account Keys")
          .of(new ListServiceAccountKeys()));
  // Construct an alert message for all the discrepancies found.
  return serviceAccountKeys.apply(ParDo
      .named("Remove user-managed keys")
      .of(new ExportedServiceAccountKeyMessenger()));
}

Source File: UnboundedSourceITCase.java From flink-dataflow with Apache License 2.0

6 votes

private static void runProgram(String resultPath) {

		Pipeline p = FlinkTestPipeline.createForStreaming();

		PCollection<String> result = p
			.apply(Read.from(new RangeReadSource(1, 10)))
			.apply(Window.<Integer>into(new GlobalWindows())
				.triggering(AfterPane.elementCountAtLeast(10))
				.discardingFiredPanes())
			.apply(ParDo.of(new DoFn<Integer, String>() {
				@Override
				public void processElement(ProcessContext c) throws Exception {
				c.output(c.element().toString());
				}
			}));

		result.apply(TextIO.Write.to(resultPath));

		try {
			p.run();
			fail();
		} catch(Exception e) {
			assertEquals("The source terminates as expected.", e.getCause().getCause().getMessage());
		}
	}

Source File: FilterRides.java From cloud-dataflow-nyc-taxi-tycoon with Apache License 2.0

6 votes

public static void main(String[] args) {
  CustomPipelineOptions options =
      PipelineOptionsFactory.fromArgs(args).withValidation().as(CustomPipelineOptions.class);
  Pipeline p = Pipeline.create(options);

  p.apply(PubsubIO.Read.named("read from PubSub")
      .topic(String.format("projects/%s/topics/%s", options.getSourceProject(), options.getSourceTopic()))
      .timestampLabel("ts")
      .withCoder(TableRowJsonCoder.of()))

   .apply("filter lower Manhattan", ParDo.of(new FilterLowerManhattan()))

   .apply(PubsubIO.Write.named("WriteToPubsub")
      .topic(String.format("projects/%s/topics/%s", options.getSinkProject(), options.getSinkTopic()))
      .withCoder(TableRowJsonCoder.of()));
  p.run();
}

Source File: LoadBooks.java From cloud-bigtable-examples with Apache License 2.0

6 votes

public static void main(String[] args) {
  // CloudBigtableOptions is one way to retrieve the options.  It's not required.
  // https://github.com/GoogleCloudPlatform/cloud-bigtable-examples/blob/master/java/dataflow-connector-examples/src/main/java/com/google/cloud/bigtable/dataflow/example/HelloWorldWrite.java
  BigtableCsvOptions options =
      PipelineOptionsFactory.fromArgs(args).withValidation().as(BigtableCsvOptions.class);
  CloudBigtableTableConfiguration config =
      CloudBigtableTableConfiguration.fromCBTOptions(options);

  Pipeline p = Pipeline.create(options);

  CloudBigtableIO.initializeForWrite(p);

  PCollection<KV<String, Integer>> ngrams =
      applyPipelineToParseBooks(p.apply(TextIO.Read.from(options.getInputFile())));
  PCollection<Mutation> mutations = ngrams.apply(ParDo.of(ENCODE_NGRAM));
  mutations.apply(CloudBigtableIO.writeToTable(config));

  // Run the pipeline.
  p.run();
}

Source File: LoadBooksTest.java From cloud-bigtable-examples with Apache License 2.0

6 votes

@Test
public void parseBooks_returnsNgramsCounts() {
  // Arrange
  Pipeline p = TestPipeline.create();
  PCollection<String> input = p.apply(Create.of(testFile));

  // Act
  PCollection<KV<String, Integer>> output = LoadBooks.applyPipelineToParseBooks(input);

  // Assert
  DataflowAssert.that(output)
      .containsInAnyOrder(
          KV.of("despatch when art", 10),
          KV.of("despatch when came", 10),
          KV.of("despatch when published", 12),
          KV.of("despatch where was", 10),
          KV.of("despatch which made", 45),
          // There are two entries for "despatch which addressed".
          // Each entry has a different part of speech for "addressed".
          KV.of("despatch which addressed", 12 + 46),
          KV.of("despatch which admitted", 13),
          KV.of("despatch which allow", 14),
          KV.of("despatch which announced", 50),
          KV.of("despatch which answer", 32));
}

Source File: CountRides.java From cloud-dataflow-nyc-taxi-tycoon with Apache License 2.0

6 votes

public static void main(String[] args) {
  CustomPipelineOptions options =
      PipelineOptionsFactory.fromArgs(args).withValidation().as(CustomPipelineOptions.class);
  Pipeline p = Pipeline.create(options);

  p.apply(PubsubIO.Read.named("read from PubSub")
      .topic(String.format("projects/%s/topics/%s", options.getSourceProject(), options.getSourceTopic()))
      .timestampLabel("ts")
      .withCoder(TableRowJsonCoder.of()))

   .apply("window 1s", Window.into(FixedWindows.of(Duration.standardSeconds(1))))
   .apply("mark rides", MapElements.via(new MarkRides()))
   .apply("count similar", Count.perKey())
   .apply("format rides", MapElements.via(new TransformRides()))

   .apply(PubsubIO.Write.named("WriteToPubsub")
      .topic(String.format("projects/%s/topics/%s", options.getSinkProject(), options.getSinkTopic()))
      .withCoder(TableRowJsonCoder.of()));

  p.run();
}

Source File: CoinbaseSource.java From cloud-bigtable-examples with Apache License 2.0

6 votes

public static void main(String[] args) {
  CloudBigtableOptions options =
      PipelineOptionsFactory.fromArgs(args).withValidation().as(CloudBigtableOptions.class);

  CloudBigtableScanConfiguration config =
      new CloudBigtableScanConfiguration.Builder()
          .withProjectId(options.getBigtableProjectId())
          .withInstanceId(options.getBigtableInstanceId())
          .withTableId(options.getBigtableTableId())
          .build();

  options.setStreaming(true);
  options.setRunner(DataflowPipelineRunner.class);

  Pipeline p = Pipeline.create(options);
  CloudBigtableIO.initializeForWrite(p);

  p.apply(Read.from(new CoinbaseSource()))
      .apply(ParDo.named("DeserializeCoinbase").of(new DeserializeCoinbase()))
      .apply(ParDo.of(new HBaseBigtableWriter()))
      .apply(CloudBigtableIO.writeToTable(config));

  p.run();
}

Source File: JoinExamples.java From flink-dataflow with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
	Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
	options.setStreaming(true);
	options.setCheckpointingInterval(1000L);
	options.setNumberOfExecutionRetries(5);
	options.setExecutionRetryDelay(3000L);
	options.setRunner(FlinkPipelineRunner.class);

	PTransform<? super PBegin, PCollection<String>> readSourceA =
			Read.from(new UnboundedSocketSource<>("localhost", 9999, '\n', 3)).named("FirstStream");
	PTransform<? super PBegin, PCollection<String>> readSourceB =
			Read.from(new UnboundedSocketSource<>("localhost", 9998, '\n', 3)).named("SecondStream");

	WindowFn<Object, ?> windowFn = FixedWindows.of(Duration.standardSeconds(options.getWindowSize()));

	Pipeline p = Pipeline.create(options);

	// the following two 'applys' create multiple inputs to our pipeline, one for each
	// of our two input sources.
	PCollection<String> streamA = p.apply(readSourceA)
			.apply(Window.<String>into(windowFn)
					.triggering(AfterWatermark.pastEndOfWindow()).withAllowedLateness(Duration.ZERO)
					.discardingFiredPanes());
	PCollection<String> streamB = p.apply(readSourceB)
			.apply(Window.<String>into(windowFn)
					.triggering(AfterWatermark.pastEndOfWindow()).withAllowedLateness(Duration.ZERO)
					.discardingFiredPanes());

	PCollection<String> formattedResults = joinEvents(streamA, streamB);
	formattedResults.apply(TextIO.Write.to("./outputJoin.txt"));
	p.run();
}

Source File: WordCount.java From flink-dataflow with Apache License 2.0

5 votes

public static void main(String[] args) {

		Options options = PipelineOptionsFactory.fromArgs(args).withValidation()
				.as(Options.class);
		options.setRunner(FlinkPipelineRunner.class);

		Pipeline p = Pipeline.create(options);

		p.apply(TextIO.Read.named("ReadLines").from(options.getInput()))
				.apply(new CountWords())
				.apply(MapElements.via(new FormatAsTextFn()))
				.apply(TextIO.Write.named("WriteCounts").to(options.getOutput()));

		p.run();
	}

Source File: KafkaWindowedWordCountExample.java From flink-dataflow with Apache License 2.0

5 votes

public static void main(String[] args) {
	PipelineOptionsFactory.register(KafkaStreamingWordCountOptions.class);
	KafkaStreamingWordCountOptions options = PipelineOptionsFactory.fromArgs(args).as(KafkaStreamingWordCountOptions.class);
	options.setJobName("KafkaExample - WindowSize: " + options.getWindowSize() + " seconds");
	options.setStreaming(true);
	options.setCheckpointingInterval(1000L);
	options.setNumberOfExecutionRetries(5);
	options.setExecutionRetryDelay(3000L);
	options.setRunner(FlinkPipelineRunner.class);

	System.out.println(options.getKafkaTopic() +" "+ options.getZookeeper() +" "+ options.getBroker() +" "+ options.getGroup() );
	Pipeline pipeline = Pipeline.create(options);

	Properties p = new Properties();
	p.setProperty("zookeeper.connect", options.getZookeeper());
	p.setProperty("bootstrap.servers", options.getBroker());
	p.setProperty("group.id", options.getGroup());

	// this is the Flink consumer that reads the input to
	// the program from a kafka topic.
	FlinkKafkaConsumer08<String> kafkaConsumer = new FlinkKafkaConsumer08<>(
			options.getKafkaTopic(),
			new SimpleStringSchema(), p);

	PCollection<String> words = pipeline
			.apply(Read.from(new UnboundedFlinkSource<>(kafkaConsumer)).named("StreamingWordCount"))
			.apply(ParDo.of(new ExtractWordsFn()))
			.apply(Window.<String>into(FixedWindows.of(Duration.standardSeconds(options.getWindowSize())))
					.triggering(AfterWatermark.pastEndOfWindow()).withAllowedLateness(Duration.ZERO)
					.discardingFiredPanes());

	PCollection<KV<String, Long>> wordCounts =
			words.apply(Count.<String>perElement());

	wordCounts.apply(ParDo.of(new FormatAsStringFn()))
			.apply(TextIO.Write.to("./outputKafka.txt"));

	pipeline.run();
}

Source File: TFIDF.java From flink-dataflow with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
	Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);

	options.setRunner(FlinkPipelineRunner.class);

	Pipeline pipeline = Pipeline.create(options);
	pipeline.getCoderRegistry().registerCoder(URI.class, StringDelegateCoder.of(URI.class));

	pipeline
			.apply(new ReadDocuments(listInputDocuments(options)))
			.apply(new ComputeTfIdf())
			.apply(new WriteTfIdf(options.getOutput()));

	pipeline.run();
}

Source File: WindowedWordCount.java From flink-dataflow with Apache License 2.0

5 votes

public static void main(String[] args) throws IOException {
	StreamingWordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(StreamingWordCountOptions.class);
	options.setStreaming(true);
	options.setWindowSize(10L);
	options.setSlide(5L);
	options.setCheckpointingInterval(1000L);
	options.setNumberOfExecutionRetries(5);
	options.setExecutionRetryDelay(3000L);
	options.setRunner(FlinkPipelineRunner.class);

	LOG.info("Windpwed WordCount with Sliding Windows of " + options.getWindowSize() +
			" sec. and a slide of " + options.getSlide());

	Pipeline pipeline = Pipeline.create(options);

	PCollection<String> words = pipeline
			.apply(Read.from(new UnboundedSocketSource<>("localhost", 9999, '\n', 3)).named("StreamingWordCount"))
			.apply(ParDo.of(new ExtractWordsFn()))
			.apply(Window.<String>into(SlidingWindows.of(Duration.standardSeconds(options.getWindowSize()))
					.every(Duration.standardSeconds(options.getSlide())))
					.triggering(AfterWatermark.pastEndOfWindow()).withAllowedLateness(Duration.ZERO)
					.discardingFiredPanes());

	PCollection<KV<String, Long>> wordCounts =
			words.apply(Count.<String>perElement());

	wordCounts.apply(ParDo.of(new FormatAsStringFn()))
			.apply(TextIO.Write.to("./outputWordCount.txt"));

	pipeline.run();
}

Source File: AutoComplete.java From flink-dataflow with Apache License 2.0

5 votes

public static void main(String[] args) throws IOException {
  Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
  options.setStreaming(true);
  options.setCheckpointingInterval(1000L);
  options.setNumberOfExecutionRetries(5);
  options.setExecutionRetryDelay(3000L);
  options.setRunner(FlinkPipelineRunner.class);

  PTransform<? super PBegin, PCollection<String>> readSource =
          Read.from(new UnboundedSocketSource<>("localhost", 9999, '\n', 3)).named("WordStream");
  WindowFn<Object, ?> windowFn = FixedWindows.of(Duration.standardSeconds(options.getWindowSize()));

  // Create the pipeline.
  Pipeline p = Pipeline.create(options);
  PCollection<KV<String, List<CompletionCandidate>>> toWrite = p
    .apply(readSource)
    .apply(ParDo.of(new ExtractWordsFn()))
    .apply(Window.<String>into(windowFn)
            .triggering(AfterWatermark.pastEndOfWindow()).withAllowedLateness(Duration.ZERO)
          .discardingFiredPanes())
    .apply(ComputeTopCompletions.top(10, options.getRecursive()));

  toWrite
    .apply(ParDo.named("FormatForPerTaskFile").of(new FormatForPerTaskLocalFile()))
    .apply(TextIO.Write.to("./outputAutoComplete.txt"));

  p.run();
}

Source File: FlinkPipelineExecutionEnvironment.java From flink-dataflow with Apache License 2.0

5 votes

/**
 * Depending on if the job is a Streaming or a Batch one, this method creates
 * the necessary execution environment and pipeline translator, and translates
 * the {@link com.google.cloud.dataflow.sdk.values.PCollection} program into
 * a {@link org.apache.flink.api.java.DataSet} or {@link org.apache.flink.streaming.api.datastream.DataStream}
 * one.
 * */
public void translate(Pipeline pipeline) {
	checkInitializationState();
	if(this.flinkBatchEnv == null && this.flinkStreamEnv == null) {
		createPipelineExecutionEnvironment();
	}
	if (this.flinkPipelineTranslator == null) {
		createPipelineTranslator();
	}
	this.flinkPipelineTranslator.translate(pipeline);
}

Source File: FlinkPipelineRunner.java From flink-dataflow with Apache License 2.0

5 votes

@Override
public FlinkRunnerResult run(Pipeline pipeline) {
	LOG.info("Executing pipeline using FlinkPipelineRunner.");

	LOG.info("Translating pipeline to Flink program.");

	this.flinkJobEnv.translate(pipeline);

	LOG.info("Starting execution of Flink program.");
	
	JobExecutionResult result;
	try {
		result = this.flinkJobEnv.executePipeline();
	} catch (Exception e) {
		LOG.error("Pipeline execution failed", e);
		throw new RuntimeException("Pipeline execution failed", e);
	}

	LOG.info("Execution finished in {} msecs", result.getNetRuntime());

	Map<String, Object> accumulators = result.getAllAccumulatorResults();
	if (accumulators != null && !accumulators.isEmpty()) {
		LOG.info("Final aggregator values:");

		for (Map.Entry<String, Object> entry : result.getAllAccumulatorResults().entrySet()) {
			LOG.info("{} : {}", entry.getKey(), entry.getValue());
		}
	}

	return new FlinkRunnerResult(accumulators, result.getNetRuntime());
}

Source File: WriteSinkITCase.java From flink-dataflow with Apache License 2.0

5 votes

private static void runProgram(String resultPath) {
	Pipeline p = FlinkTestPipeline.createForBatch();

	p.apply(Create.of(EXPECTED_RESULT)).setCoder(StringUtf8Coder.of())
		.apply("CustomSink", Write.to(new MyCustomSink(resultPath)));

	p.run();
}

Source File: AvroITCase.java From flink-dataflow with Apache License 2.0

5 votes

private static void runProgram(String tmpPath, String resultPath) {
	Pipeline p = FlinkTestPipeline.createForBatch();

	p
		.apply(Create.of(
				new User("Joe", 3, "red"),
				new User("Mary", 4, "blue"),
				new User("Mark", 1, "green"),
				new User("Julia", 5, "purple"))
			.withCoder(AvroCoder.of(User.class)))

		.apply(AvroIO.Write.to(tmpPath)
			.withSchema(User.class));

	p.run();

	p = FlinkTestPipeline.createForBatch();

	p
		.apply(AvroIO.Read.from(tmpPath).withSchema(User.class).withoutValidation())

			.apply(ParDo.of(new DoFn<User, String>() {
				@Override
				public void processElement(ProcessContext c) throws Exception {
					User u = c.element();
					String result = u.getName() + " " + u.getFavoriteColor() + " " + u.getFavoriteNumber();
					c.output(result);
				}
			}))

		.apply(TextIO.Write.to(resultPath));

	p.run();
}

Source File: MaybeEmptyTestITCase.java From flink-dataflow with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {

	Pipeline p = FlinkTestPipeline.createForBatch();

	p.apply(Create.of((Void) null)).setCoder(VoidCoder.of())
			.apply(ParDo.of(
					new DoFn<Void, String>() {
						@Override
						public void processElement(DoFn<Void, String>.ProcessContext c) {
							c.output(expected);
						}
					})).apply(TextIO.Write.to(resultPath));
	p.run();
}

Source File: WordCountITCase.java From flink-dataflow with Apache License 2.0

5 votes

@Override
protected void testProgram() throws Exception {

	Pipeline p = FlinkTestPipeline.createForBatch();

	PCollection<String> input = p.apply(Create.of(WORDS)).setCoder(StringUtf8Coder.of());

	input
			.apply(new WordCount.CountWords())
			.apply(MapElements.via(new WordCount.FormatAsTextFn()))
			.apply(TextIO.Write.to(resultPath));

	p.run();
}

Source File: JoinExamples.java From flink-dataflow with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
	Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
	Pipeline p = Pipeline.create(options);
	// the following two 'applys' create multiple inputs to our pipeline, one for each
	// of our two input sources.
	PCollection<TableRow> eventsTable = p.apply(BigQueryIO.Read.from(GDELT_EVENTS_TABLE));
	PCollection<TableRow> countryCodes = p.apply(BigQueryIO.Read.from(COUNTRY_CODES));
	PCollection<String> formattedResults = joinEvents(eventsTable, countryCodes);
	formattedResults.apply(TextIO.Write.to(options.getOutput()));
	p.run();
}

com.google.cloud.dataflow.sdk.Pipeline Java Examples