Java Code Examples for org.apache.spark.streaming.kafka.KafkaUtils#createStream()
The following examples show how to use
org.apache.spark.streaming.kafka.KafkaUtils#createStream() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: KafkaStreaming.java From sparkResearch with Apache License 2.0 | 8 votes |
public static void main(String[] args) { SparkConf sparkConf = new SparkConf().setAppName("KafkaWordCount").setMaster("local[2]"); JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(10000)); //设置检查点 streamingContext.checkpoint("HDFS URL"); Map<String, Integer> topicThread = new HashMap<>(1); topicThread.put(TOPIC, THREAD); JavaPairInputDStream<String, String> dStream = KafkaUtils.createStream(streamingContext, HOST, GROP, topicThread); JavaDStream<String> words = dStream.flatMap((FlatMapFunction<Tuple2<String, String>, String>) stringStringTuple2 -> Arrays.asList(SPACE.split(stringStringTuple2._2)).iterator()); //统计 JavaPairDStream<String, Integer> result = words.mapToPair((PairFunction<String, String, Integer>) s -> new Tuple2<>(s, 1)).reduceByKey((Function2<Integer, Integer, Integer>) (v1, v2) -> v1 + v2); try { result.print(); streamingContext.start(); streamingContext.awaitTermination(); } catch (InterruptedException e) { e.printStackTrace(); } }
Example 2
Source File: StreamingContextConfiguration.java From Decision with Apache License 2.0 | 5 votes |
private void configureDataContext(JavaStreamingContext context) { Map<String, Integer> baseTopicMap = new HashMap<>(); configurationContext.getDataTopics().forEach( dataTopic -> baseTopicMap.put(dataTopic, 1)); kafkaTopicService.createTopicsIfNotExist(configurationContext.getDataTopics(), configurationContext .getKafkaReplicationFactor(), configurationContext.getKafkaPartitions()); HashMap<String, String> kafkaParams = new HashMap<>(); kafkaParams.put("zookeeper.connect", configurationContext.getZookeeperHostsQuorumWithPath()); kafkaParams.put("group.id", configurationContext.getGroupId()); /* groupId must be the cluster groupId. Kafka assigns each partition of a topic to one, and one only, consumer of the group. Decision topics has only one partition (by default), so if we have two o more decision instances (consumers) reading the same topic with the same groupId, only one instance will be able to read from the topic */ JavaPairDStream<String, byte[]> messages = KafkaUtils.createStream(context, String.class, byte[].class, kafka.serializer.StringDecoder.class, kafka.serializer.DefaultDecoder.class, kafkaParams, baseTopicMap, StorageLevel.MEMORY_AND_DISK_SER()); AvroDeserializeMessageFunction avroDeserializeMessageFunction = new AvroDeserializeMessageFunction(); JavaDStream<StratioStreamingMessage> insertRequests = messages.filter( new FilterAvroMessagesByOperationFunction(STREAM_OPERATIONS.MANIPULATION.INSERT)) .map(avroDeserializeMessageFunction); InsertIntoStreamFunction insertIntoStreamFunction = new InsertIntoStreamFunction(streamOperationService, configurationContext.getZookeeperHostsQuorum()); insertRequests.foreachRDD(insertIntoStreamFunction); }
Example 3
Source File: JavaKafkaWordCount.java From SparkDemo with MIT License | 4 votes |
public static void main(String[] args) throws Exception { if (args.length < 4) { System.err.println("Usage: JavaKafkaWordCount <zkQuorum> <group> <topics> <numThreads>"); System.exit(1); } StreamingExamples.setStreamingLogLevels(); SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaWordCount"); // Create the context with 2 seconds batch size JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(2000)); int numThreads = Integer.parseInt(args[3]); Map<String, Integer> topicMap = new HashMap<>(); String[] topics = args[2].split(","); for (String topic: topics) { topicMap.put(topic, numThreads); } JavaPairReceiverInputDStream<String, String> messages = KafkaUtils.createStream(jssc, args[0], args[1], topicMap); JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterator<String> call(String x) { return Arrays.asList(SPACE.split(x)).iterator(); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair( new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); jssc.start(); jssc.awaitTermination(); }
Example 4
Source File: JavaKafkaReceiverWordCount.java From SparkDemo with MIT License | 4 votes |
public static void main(String[] args) { StreamingExamples.setStreamingLogLevels(); SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaReceiverWordCount").setMaster("local[4]"); JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(6)); Map<String, Integer> topicMap = new HashMap<String, Integer>(); // key是topic名称,value是线程数量 topicMap.put("2017-7-26", 1); String zookeeperList = "master:2181,slave1:2181,slave2:2181"; JavaPairReceiverInputDStream<String, String> messages = KafkaUtils.createStream(jssc, zookeeperList, "JavaKafkaReceiverWordCount", topicMap); JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterator<String> call(String x) { return Lists.newArrayList(SPACE.split(x)).iterator(); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); jssc.start(); try { jssc.awaitTermination(); } catch (Exception e) { e.printStackTrace(); } }
Example 5
Source File: KafkaReceiverWordCountJava.java From Building-Data-Streaming-Applications-with-Apache-Kafka with MIT License | 4 votes |
public static void main(String[] args) throws Exception { String zkQuorum = "localhost:2181"; String groupName = "stream"; int numThreads = 3; String topicsName = "test1"; SparkConf sparkConf = new SparkConf().setAppName("WordCountKafkaStream"); JavaStreamingContext javaStreamingContext = new JavaStreamingContext(sparkConf, new Duration(5000)); Map<String, Integer> topicToBeUsedBySpark = new HashMap<>(); String[] topics = topicsName.split(","); for (String topic : topics) { topicToBeUsedBySpark.put(topic, numThreads); } JavaPairReceiverInputDStream<String, String> streamMessages = KafkaUtils.createStream(javaStreamingContext, zkQuorum, groupName, topicToBeUsedBySpark); JavaDStream<String> lines = streamMessages.map(new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterator<String> call(String x) { return Arrays.asList(WORD_DELIMETER.split(x)).iterator(); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair( new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); javaStreamingContext.start(); javaStreamingContext.awaitTermination(); }
Example 6
Source File: KafkaStreamRestHandler.java From elasticsearch-rest-command with The Unlicense | 4 votes |
@Override protected void handleRequest(RestRequest request, RestChannel channel, Client client) throws Exception { final String topic = request.param("topic", ""); final boolean schema = request.paramAsBoolean("schema", false); final String master = request.param("masterAddress", "local"); final String hdfs = request.param("hdfs", "hdfs://localhost:50070"); final String memory = request.param("memory", "2g"); final String appName = request.param("appName", "appName-"+topic); final int duration = request.paramAsInt("duration", 1000); Thread exec = new Thread(new Runnable(){ @Override public void run() { SparkConf sparkConf = new SparkConf().setAppName(appName).setMaster(master).set("spark.executor.memory", memory); JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(duration)); Map<String, Integer> topicMap = new HashMap<String, Integer>(); topicMap.put(topic, 3); JavaPairReceiverInputDStream<String, byte[]> kafkaStream = KafkaUtils.createStream(jssc, String.class, byte[].class, kafka.serializer.DefaultDecoder.class, kafka.serializer.DefaultDecoder.class, null, topicMap, StorageLevel.MEMORY_ONLY()); //JobConf confHadoop = new JobConf(); //confHadoop.set("mapred.output.compress", "true"); //confHadoop.set("mapred.output.compression.codec", "com.hadoop.compression.lzo.LzopCodec"); kafkaStream.saveAsHadoopFiles(hdfs, "seq", Text.class, BytesWritable.class, KafkaStreamSeqOutputFormat.class); topicContextMap.put(topic, jssc); jssc.start(); jssc.awaitTermination(); } }); exec.start(); channel.sendResponse(new BytesRestResponse(RestStatus.OK, String.format("{\"topic\":\"%s\"}", topic))); }
Example 7
Source File: StreamingContextConfiguration.java From Decision with Apache License 2.0 | 4 votes |
private void configureActionContext(JavaStreamingContext context) { Map<String, Integer> baseTopicMap = new HashMap<>(); String topicName = InternalTopic.TOPIC_ACTION.getTopicName(); if (configurationContext.isClusteringEnabled() && configurationContext.getGroupId()!=null){ topicName = topicName.concat("_").concat(configurationContext.getGroupId()); } baseTopicMap.put(topicName, 1); kafkaTopicService.createTopicIfNotExist(topicName, configurationContext.getKafkaReplicationFactor(), configurationContext.getKafkaPartitions()); HashMap<String, String> kafkaParams = new HashMap<>(); kafkaParams.put("zookeeper.connect", configurationContext.getZookeeperHostsQuorumWithPath()); kafkaParams.put("group.id", configurationContext.getGroupId()); /* groupId must be the cluster groupId. Kafka assigns each partition of a topic to one, and one only, consumer of the group. Decision topics has only one partition (by default), so if we have two o more decision instances (consumers) reading the same topic with the same groupId, only one instance will be able to read from the topic */ JavaPairDStream<String, byte[]> messages = KafkaUtils.createStream(context, String.class, byte[].class, kafka.serializer.StringDecoder.class, kafka.serializer.DefaultDecoder.class, kafkaParams, baseTopicMap, StorageLevel.MEMORY_AND_DISK_SER()); AvroDeserializeMessageFunction avroDeserializeMessageFunction = new AvroDeserializeMessageFunction(); JavaDStream<StratioStreamingMessage> parsedDataDstream = messages.map(avroDeserializeMessageFunction); JavaPairDStream<StreamAction, StratioStreamingMessage> pairedDataDstream = parsedDataDstream .mapPartitionsToPair(new PairDataFunction()); JavaPairDStream<StreamAction, Iterable<StratioStreamingMessage>> groupedDataDstream = pairedDataDstream .groupByKey(); groupedDataDstream.persist(StorageLevel.MEMORY_AND_DISK_SER()); try { SaveToCassandraActionExecutionFunction saveToCassandraActionExecutionFunction = new SaveToCassandraActionExecutionFunction(configurationContext.getCassandraHostsQuorum(), configurationContext.getCassandraPort(), configurationContext.getCassandraMaxBatchSize(), configurationContext.getCassandraBatchType(), saveToCassandraOperationsService); if (saveToCassandraActionExecutionFunction.check()) { log.info("Cassandra is configured properly"); groupedDataDstream.filter(new FilterDataFunction(StreamAction.SAVE_TO_CASSANDRA)).foreachRDD( saveToCassandraActionExecutionFunction); } else { log.warn("Cassandra is NOT configured properly"); } SaveToMongoActionExecutionFunction saveToMongoActionExecutionFunction = new SaveToMongoActionExecutionFunction(configurationContext.getMongoHosts(), configurationContext.getMongoUsername(), configurationContext .getMongoPassword(), configurationContext.getMongoMaxBatchSize(), mongoClient, mongoDB); if (saveToMongoActionExecutionFunction.check()) { log.info("MongoDB is configured properly"); groupedDataDstream.filter(new FilterDataFunction(StreamAction.SAVE_TO_MONGO)).foreachRDD( saveToMongoActionExecutionFunction); } else { log.warn("MongoDB is NOT configured properly"); } SaveToElasticSearchActionExecutionFunction saveToElasticSearchActionExecutionFunction = new SaveToElasticSearchActionExecutionFunction(configurationContext.getElasticSearchHosts(), configurationContext.getElasticSearchClusterName(), configurationContext .getElasticSearchMaxBatchSize(), elasticsearchClient); if (saveToElasticSearchActionExecutionFunction.check()) { log.info("ElasticSearch is configured properly"); groupedDataDstream.filter(new FilterDataFunction(StreamAction.SAVE_TO_ELASTICSEARCH)).foreachRDD(saveToElasticSearchActionExecutionFunction); } else { log.warn("ElasticSearch is NOT configured properly"); } SaveToSolrActionExecutionFunction saveToSolrActionExecutionFunction = new SaveToSolrActionExecutionFunction(configurationContext.getSolrHost(), configurationContext .getSolrCloudZkHost(), configurationContext.getSolrCloud(), configurationContext.getSolrDataDir(), configurationContext.getSolrMaxBatchSize(), solrOperationsService); if (saveToSolrActionExecutionFunction.check()) { log.info("Solr is configured properly"); groupedDataDstream.filter(new FilterDataFunction(StreamAction.SAVE_TO_SOLR)).foreachRDD( saveToSolrActionExecutionFunction); } else { log.warn("Solr is NOT configured properly"); } groupedDataDstream.filter(new FilterDataFunction(StreamAction.LISTEN)).foreachRDD( new SendToKafkaActionExecutionFunction(configurationContext.getKafkaHostsQuorum())); } catch (Exception e) { e.printStackTrace(); } }