org.apache.flink.streaming.util.serialization.SimpleStringSchema Java Exaples

Source File: WriteIntoKafka.java From kafka-example with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
	// create execution environment
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	// parse user parameters
	ParameterTool parameterTool = ParameterTool.fromArgs(args);

	// add a simple source which is writing some strings
	DataStream<String> messageStream = env.addSource(new SimpleStringGenerator());

	// write stream to Kafka
	messageStream.addSink(new KafkaSink<>(parameterTool.getRequired("bootstrap.servers"),
			parameterTool.getRequired("topic"),
			new SimpleStringSchema()));

	env.execute();
}

Source File: ReadFromKafka.java From kafka-example with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
	// create execution environment
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	// parse user parameters
	ParameterTool parameterTool = ParameterTool.fromArgs(args);

	DataStream<String> messageStream = env.addSource(new FlinkKafkaConsumer082<>(parameterTool.getRequired("topic"), new SimpleStringSchema(), parameterTool.getProperties()));

	// print() will write the contents of the stream to the TaskManager's standard out stream
	// the rebelance call is causing a repartitioning of the data so that all machines
	// see the messages (for example in cases when "num kafka partitions" < "num flink operators"
	messageStream.rebalance().map(new MapFunction<String, String>() {
		private static final long serialVersionUID = -6867736771747690202L;

		@Override
		public String map(String value) throws Exception {
			return "Kafka and Flink says: " + value;
		}
	}).print();

	env.execute();
}

Source File: ReadFromKafka.java From kafka-flink-101 with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
  // create execution environment
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

  Properties properties = new Properties();
  properties.setProperty("bootstrap.servers", "localhost:9092");
  properties.setProperty("group.id", "flink_consumer");


  DataStream<String> stream = env
          .addSource(new FlinkKafkaConsumer09<>("flink-demo", new SimpleStringSchema(), properties));

  stream.map(new MapFunction<String, String>() {
    private static final long serialVersionUID = -6867736771747690202L;

    @Override
    public String map(String value) throws Exception {
      return "Stream Value: " + value;
    }
  }).print();

  env.execute();
}

Source File: AdvertisingTopologyFlinkState.java From yahoo-streaming-benchmark with Apache License 2.0

5 votes

/**
 * Create a Kafka source
 */
private static FlinkKafkaConsumer082<String> kafkaSource(BenchmarkConfig config) {
  return new FlinkKafkaConsumer082<>(
    config.kafkaTopic,
    new SimpleStringSchema(),
    config.getParameters().getProperties());
}

Source File: KafkaWindowedWordCountExample.java From flink-dataflow with Apache License 2.0

5 votes

public static void main(String[] args) {
	PipelineOptionsFactory.register(KafkaStreamingWordCountOptions.class);
	KafkaStreamingWordCountOptions options = PipelineOptionsFactory.fromArgs(args).as(KafkaStreamingWordCountOptions.class);
	options.setJobName("KafkaExample - WindowSize: " + options.getWindowSize() + " seconds");
	options.setStreaming(true);
	options.setCheckpointingInterval(1000L);
	options.setNumberOfExecutionRetries(5);
	options.setExecutionRetryDelay(3000L);
	options.setRunner(FlinkPipelineRunner.class);

	System.out.println(options.getKafkaTopic() +" "+ options.getZookeeper() +" "+ options.getBroker() +" "+ options.getGroup() );
	Pipeline pipeline = Pipeline.create(options);

	Properties p = new Properties();
	p.setProperty("zookeeper.connect", options.getZookeeper());
	p.setProperty("bootstrap.servers", options.getBroker());
	p.setProperty("group.id", options.getGroup());

	// this is the Flink consumer that reads the input to
	// the program from a kafka topic.
	FlinkKafkaConsumer08<String> kafkaConsumer = new FlinkKafkaConsumer08<>(
			options.getKafkaTopic(),
			new SimpleStringSchema(), p);

	PCollection<String> words = pipeline
			.apply(Read.from(new UnboundedFlinkSource<>(kafkaConsumer)).named("StreamingWordCount"))
			.apply(ParDo.of(new ExtractWordsFn()))
			.apply(Window.<String>into(FixedWindows.of(Duration.standardSeconds(options.getWindowSize())))
					.triggering(AfterWatermark.pastEndOfWindow()).withAllowedLateness(Duration.ZERO)
					.discardingFiredPanes());

	PCollection<KV<String, Long>> wordCounts =
			words.apply(Count.<String>perElement());

	wordCounts.apply(ParDo.of(new FormatAsStringFn()))
			.apply(TextIO.Write.to("./outputKafka.txt"));

	pipeline.run();
}

Source File: AdvertisingTopologyNative.java From yahoo-streaming-benchmark with Apache License 2.0

5 votes

/**
 * Create Kafka Source
 */
private static FlinkKafkaConsumer082<String> kafkaSource(BenchmarkConfig config) {
  return new FlinkKafkaConsumer082<>(
    config.kafkaTopic,
    new SimpleStringSchema(),
    config.getParameters().getProperties());
}

Source File: AdvertisingTopologyFlinkWindows.java From yahoo-streaming-benchmark with Apache License 2.0

5 votes

/**
 * Configure Kafka source
 */
private static FlinkKafkaConsumer082<String> kafkaSource(BenchmarkConfig config) {
  return new FlinkKafkaConsumer082<>(
    config.kafkaTopic,
    new SimpleStringSchema(),
    config.getParameters().getProperties());
}

Source File: AdvertisingTopologyRedisDirect.java From yahoo-streaming-benchmark with Apache License 2.0

5 votes

/**
 * Choose either Kafka or data generator as source
 */
private static DataStream<String> sourceStream(BenchmarkConfig config, StreamExecutionEnvironment env) {
  RichParallelSourceFunction<String> source;
  String sourceName;
  if (config.useLocalEventGenerator) {
    HighKeyCardinalityGeneratorSource eventGenerator = new HighKeyCardinalityGeneratorSource(config);
    source = eventGenerator;
    sourceName = "EventGenerator";
  } else {
    source = new FlinkKafkaConsumer082<>(config.kafkaTopic, new SimpleStringSchema(), config.getParameters().getProperties());
    sourceName = "Kafka";
  }

  return env.addSource(source, sourceName);
}

Source File: FlinkKafkaSourceExample.java From huaweicloud-cs-sdk with Apache License 2.0

5 votes

public void readKafka() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    DataStream<String> messageStream = env.addSource(new FlinkKafkaConsumer010<String>(topic,
            new SimpleStringSchema(),
            properties));
    messageStream.rebalance().print();
    try {
        env.execute();
    }catch(Exception e) {
        System.out.println(e.getMessage());
    }
}

Source File: AdvertisingTopologyFlinkStateHighKeyCard.java From yahoo-streaming-benchmark with Apache License 2.0

5 votes

/**
 * Setup kafka source
 */
private static FlinkKafkaConsumer08<String> kafkaSource(BenchmarkConfig config) {
  return new FlinkKafkaConsumer08<>(
    config.kafkaTopic,
    new SimpleStringSchema(),
    config.getParameters().getProperties());
}

Source File: KafkaTopicValidator.java From yahoo-streaming-benchmark with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.getConfig().setGlobalJobParameters(parameterTool);
	DataStream<String> rawMessageStream = env.addSource(new FlinkKafkaConsumer082<>(
		parameterTool.getRequired("kafka.topic"),
		new SimpleStringSchema(),
		parameterTool.getProperties()));

	rawMessageStream.print();

	env.execute();
}

Source File: TwitterIntoKafka.java From flink-streaming-etl with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
	// set up the streaming execution environment
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	ParameterTool params = ParameterTool.fromPropertiesFile(args[0]);
	DataStream<String> twitterStreamString = env.addSource(new TwitterSource(params.getProperties()));
	DataStream<String> filteredStream = twitterStreamString.flatMap(new ParseJson());
	filteredStream.flatMap(new ThroughputLogger(5000L)).setParallelism(1);

	filteredStream.addSink(new FlinkKafkaProducer09<>("twitter", new SimpleStringSchema(), params.getProperties()));

	// execute program
	env.execute("Ingest data from Twitter to Kafka");
}

Source File: WriteToKafka.java From kafka-flink-101 with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

  Properties properties = new Properties();
  properties.setProperty("bootstrap.servers", "localhost:9092");

  DataStream<String> stream = env.addSource(new SimpleStringGenerator());
  stream.addSink(new FlinkKafkaProducer09<>("flink-demo", new SimpleStringSchema(), properties));

  env.execute();
}

Source File: WriteIntoKafka.java From flinkDemo with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
    // create execution environment
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    Map properties= new HashMap();
    properties.put("bootstrap.servers", "192.168.10.63:6667,192.168.10.64:6667,192.168.10.65:6667");
    properties.put("group.id", "t10");
    properties.put("enable.auto.commit", "false");
    properties.put("auto.commit.interval.ms", "1000");
    properties.put("auto.offset.reset", "earliest");
    properties.put("session.timeout.ms", "30000");
    properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    properties.put("topic", "kks-topic-FFT");
    //KafkaConsumer<String,String> kafkaConsumer = new KafkaConsumer<String, String>(properties);
    // parse user parameters
    //ParameterTool parameterTool = ParameterTool.fromArgs(args);
    ParameterTool parameterTool = ParameterTool.fromMap(properties);

    // add a simple source which is writing some strings
    DataStream<String> messageStream = env.addSource(new SimpleStringGenerator());

    // write stream to Kafka
    messageStream.addSink(new FlinkKafkaProducer010<>(parameterTool.getRequired("bootstrap.servers"),
            parameterTool.getRequired("topic"),
            new SimpleStringSchema()));

    messageStream.rebalance().map(new MapFunction<String, String>() {
        private static final long serialVersionUID = 1L;

        @Override
        public String map(String value) throws Exception {
            return value;
        }
    });

    messageStream.print();

    env.execute();
}

Source File: FlinkKafkaSinkExample.java From huaweicloud-cs-sdk with Apache License 2.0

5 votes

public void writeKafka() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    DataStream<String> messageStream = env.addSource(new KafkaSourceGenerator());
    messageStream.addSink(new FlinkKafkaProducer010<String>(topic,
            new SimpleStringSchema(),
            properties));
    try {
        env.execute();
    }catch(Exception e) {
        System.out.println(e.getMessage());
    }
}

Source File: StreamingJob.java From Mastering-Apache-Flink with MIT License

4 votes

public static void main(String[] args) throws Exception {
	// set up the streaming execution environment
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// env.enableCheckpointing(5000);
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	Properties properties = new Properties();
	properties.setProperty("bootstrap.servers", "localhost:9092");

	properties.setProperty("zookeeper.connect", "localhost:2181");
	properties.setProperty("group.id", "test");

	FlinkKafkaConsumer09<String> myConsumer = new FlinkKafkaConsumer09<>("temp", new SimpleStringSchema(),
			properties);
	myConsumer.assignTimestampsAndWatermarks(new CustomWatermarkEmitter());


	DataStream<Tuple2<String, Double>> keyedStream = env.addSource(myConsumer).flatMap(new Splitter()).keyBy(0)
			.timeWindow(Time.seconds(300))
			.apply(new WindowFunction<Tuple2<String, Double>, Tuple2<String, Double>, Tuple, TimeWindow>() {

				@Override
				public void apply(Tuple key, TimeWindow window, Iterable<Tuple2<String, Double>> input,
						Collector<Tuple2<String, Double>> out) throws Exception {
					double sum = 0L;
					int count = 0;
					for (Tuple2<String, Double> record : input) {
						sum += record.f1;
						count++;
					}

					Tuple2<String, Double> result = input.iterator().next();
					result.f1 = (sum/count);
					out.collect(result);

				}
			});

	keyedStream.print();

	// execute program
	env.execute("Flink Streaming Java API Skeleton");
}

Source File: UnitTestSuiteFlink.java From df_data_service with Apache License 2.0

4 votes

public static void testFlinkSQL() {

        LOG.info("Only Unit Testing Function is enabled");
        String resultFile = "/home/vagrant/test.txt";

        try {

            String jarPath = DFInitService.class.getProtectionDomain().getCodeSource().getLocation().getPath();
            StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", 6123, jarPath)
                    .setParallelism(1);
            String kafkaTopic = "finance";
            String kafkaTopic_stage = "df_trans_stage_finance";
            String kafkaTopic_out = "df_trans_out_finance";



            StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
            Properties properties = new Properties();
            properties.setProperty("bootstrap.servers", "localhost:9092");
            properties.setProperty("group.id", "consumer3");

            // Internal covert Json String to Json - Begin
            DataStream<String> stream = env
                    .addSource(new FlinkKafkaConsumer09<>(kafkaTopic, new SimpleStringSchema(), properties));

            stream.map(new MapFunction<String, String>() {
                @Override
                public String map(String jsonString) throws Exception {
                    return jsonString.replaceAll("\\\\", "").replace("\"{", "{").replace("}\"","}");
                }
            }).addSink(new FlinkKafkaProducer09<String>("localhost:9092", kafkaTopic_stage, new SimpleStringSchema()));
            // Internal covert Json String to Json - End

            String[] fieldNames =  new String[] {"name"};
            Class<?>[] fieldTypes = new Class<?>[] {String.class};

            Kafka09AvroTableSource kafkaTableSource = new Kafka09AvroTableSource(
                    kafkaTopic_stage,
                    properties,
                    fieldNames,
                    fieldTypes);

            //kafkaTableSource.setFailOnMissingField(true);

            tableEnv.registerTableSource("Orders", kafkaTableSource);

            //Table result = tableEnv.sql("SELECT STREAM name FROM Orders");
            Table result = tableEnv.sql("SELECT name FROM Orders");

            Files.deleteIfExists(Paths.get(resultFile));

            // create a TableSink
            TableSink sink = new CsvTableSink(resultFile, "|");
            // write the result Table to the TableSink
            result.writeToSink(sink);

            env.execute("FlinkConsumer");

        } catch (Exception e) {
            e.printStackTrace();
        }
    }

Source File: AdvertisingTopologyNative.java From streaming-benchmarks with Apache License 2.0

4 votes

public static void main(final String[] args) throws Exception {

        ParameterTool parameterTool = ParameterTool.fromArgs(args);

        Map conf = Utils.findAndReadConfigFile(parameterTool.getRequired("confPath"), true);
        int kafkaPartitions = ((Number)conf.get("kafka.partitions")).intValue();
        int hosts = ((Number)conf.get("process.hosts")).intValue();
        int cores = ((Number)conf.get("process.cores")).intValue();

        ParameterTool flinkBenchmarkParams = ParameterTool.fromMap(getFlinkConfs(conf));

        LOG.info("conf: {}", conf);
        LOG.info("Parameters used: {}", flinkBenchmarkParams.toMap());

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.getConfig().setGlobalJobParameters(flinkBenchmarkParams);

		// Set the buffer timeout (default 100)
        // Lowering the timeout will lead to lower latencies, but will eventually reduce throughput.
        env.setBufferTimeout(flinkBenchmarkParams.getLong("flink.buffer-timeout", 100));

        if(flinkBenchmarkParams.has("flink.checkpoint-interval")) {
            // enable checkpointing for fault tolerance
            env.enableCheckpointing(flinkBenchmarkParams.getLong("flink.checkpoint-interval", 1000));
        }
        // set default parallelism for all operators (recommended value: number of available worker CPU cores in the cluster (hosts * cores))
        env.setParallelism(hosts * cores);

        DataStream<String> messageStream = env
                .addSource(new FlinkKafkaConsumer082<String>(
                        flinkBenchmarkParams.getRequired("topic"),
                        new SimpleStringSchema(),
                        flinkBenchmarkParams.getProperties())).setParallelism(Math.min(hosts * cores, kafkaPartitions));

        messageStream
                .rebalance()
                // Parse the String as JSON
                .flatMap(new DeserializeBolt())

                //Filter the records if event type is "view"
                .filter(new EventFilterBolt())

                // project the event
                .<Tuple2<String, String>>project(2, 5)

                // perform join with redis data
                .flatMap(new RedisJoinBolt())

                // process campaign
                .keyBy(0)
                .flatMap(new CampaignProcessor());


        env.execute();
    }

Source File: ReadFromKafka.java From flinkDemo with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {
    // create execution environment
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    Map properties= new HashMap();
    properties.put("bootstrap.servers", "192.168.10.63:6667,192.168.10.64:6667,192.168.10.65:6667");
    properties.put("group.id", "dec-esc-group-vib-calc");
    properties.put("enable.auto.commit", "true");
    properties.put("auto.commit.interval.ms", "1000");
    properties.put("auto.offset.reset", "earliest");
    properties.put("session.timeout.ms", "30000");
    properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    properties.put("topic", "dec-vibration-test");
    //KafkaConsumer<String,String> kafkaConsumer = new KafkaConsumer<String, String>(properties);
    // parse user parameters
    //ParameterTool parameterTool = ParameterTool.fromArgs(args);
    ParameterTool parameterTool = ParameterTool.fromMap(properties);

    FlinkKafkaConsumer010 consumer010 = new FlinkKafkaConsumer010(
                     parameterTool.getRequired("topic"), new SimpleStringSchema(), parameterTool.getProperties());

  //  consumer010.setStartFromEarliest();

    DataStream<String> messageStream = env
            .addSource(consumer010);

    // print() will write the contents of the stream to the TaskManager's standard out stream
    // the rebelance call is causing a repartitioning of the data so that all machines
    // see the messages (for example in cases when "num kafka partitions" < "num flink operators"
    messageStream.rebalance().map(new MapFunction<String, String>() {
        private static final long serialVersionUID = 1L;

        @Override
        public String map(String value) throws Exception {
            return value;

        }
    });


    messageStream.print();

    env.execute();
}

Source File: WriteIntoKafka.java From kafka-example with Apache License 2.0

4 votes

public SimpleStringSchema() {
}

org.apache.flink.streaming.util.serialization.SimpleStringSchema Java Examples