Java Code Examples for org.apache.spark.streaming.Durations#seconds()
The following examples show how to use
org.apache.spark.streaming.Durations#seconds() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SparkStreamDemo.java From sparkResearch with Apache License 2.0 | 6 votes |
public static void main(String[] args) { //创建两个核心的本地线程,批处理的间隔为1秒 SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("sparkStreamIng"); JavaStreamingContext javaStreamingContext = new JavaStreamingContext(conf, Durations.seconds(1)); //创建一个连接到IP:localhost,PORT:8080的DStream JavaReceiverInputDStream<String> dStream = javaStreamingContext.socketTextStream("localhost", 8080); JavaDStream<String> errorLine = dStream.filter(new Function<String, Boolean>() { @Override public Boolean call(String v1) throws Exception { return v1.contains("error"); } }); //打印包含error的行 errorLine.print(); try { //开始计算 javaStreamingContext.start(); //等待计算完成 javaStreamingContext.awaitTermination(); } catch (InterruptedException e) { e.printStackTrace(); } }
Example 2
Source File: StreamingEngine.java From spark-streaming-direct-kafka with Apache License 2.0 | 6 votes |
public void start() { SparkConf sparkConf = getSparkConf(); streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(Long.parseLong(config.getStreamingBatchIntervalInSec()))); JavaInputDStream<MessageAndMetadata<String, byte[]>> dStream = buildInputDStream(streamingContext); JavaPairDStream<String, byte[]> pairDStream = dStream.mapToPair(km -> new Tuple2<>(km.key(), km.message())); pairDStream.foreachRDD(new ProcessStreamingData<>(config)); // process data dStream.foreachRDD(new UpdateOffsetsFn<>(config.getKafkaGroupId(), config.getZkOffsetManager())); streamingContext.start(); }
Example 3
Source File: Window.java From sparkResearch with Apache License 2.0 | 6 votes |
public static void main(String[] args) { SparkConf sparkConf = new SparkConf().setAppName("window").setMaster("local[2]"); JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(10)); //检查点设置 streamingContext.checkpoint("hdfs://localhost:9300"); JavaDStream<String> dStream = streamingContext.socketTextStream("localhost", 8080); JavaDStream<String> winDstream = dStream.window(Durations.seconds(30), Durations.seconds(20)); JavaDStream<Long> result = winDstream.count(); try { streamingContext.start(); streamingContext.awaitTermination(); } catch (InterruptedException e) { e.printStackTrace(); } }
Example 4
Source File: StreamingService.java From cxf with Apache License 2.0 | 6 votes |
private void processStreamOneWay(List<String> inputStrings) { try { SparkConf sparkConf = new SparkConf().setMaster("local[*]") .setAppName("JAX-RS Spark Connect OneWay " + SparkUtils.getRandomId()); JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(1)); JavaDStream<String> receiverStream = null; if ("queue".equals(receiverType)) { Queue<JavaRDD<String>> rddQueue = new LinkedList<>(); for (int i = 0; i < 30; i++) { rddQueue.add(jssc.sparkContext().parallelize(inputStrings)); } receiverStream = jssc.queueStream(rddQueue); } else { receiverStream = jssc.receiverStream(new StringListReceiver(inputStrings)); } JavaPairDStream<String, Integer> wordCounts = SparkUtils.createOutputDStream(receiverStream, false); wordCounts.foreachRDD(new PrintOutputFunction(jssc)); jssc.start(); } catch (Exception ex) { // ignore } }
Example 5
Source File: StreamingService.java From cxf with Apache License 2.0 | 5 votes |
private void processStream(AsyncResponse async, List<String> inputStrings) { try { SparkConf sparkConf = new SparkConf().setMaster("local[*]") .setAppName("JAX-RS Spark Connect " + SparkUtils.getRandomId()); JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(1)); SparkStreamingOutput streamOut = new SparkStreamingOutput(jssc); SparkStreamingListener sparkListener = new SparkStreamingListener(streamOut); jssc.addStreamingListener(sparkListener); JavaDStream<String> receiverStream = null; if ("queue".equals(receiverType)) { Queue<JavaRDD<String>> rddQueue = new LinkedList<>(); for (int i = 0; i < 30; i++) { rddQueue.add(jssc.sparkContext().parallelize(inputStrings)); } receiverStream = jssc.queueStream(rddQueue); } else { receiverStream = jssc.receiverStream(new StringListReceiver(inputStrings)); } JavaPairDStream<String, Integer> wordCounts = SparkUtils.createOutputDStream(receiverStream, false); wordCounts.foreachRDD(new OutputFunction(streamOut)); jssc.start(); executor.execute(new SparkJob(async, sparkListener)); } catch (Exception ex) { // the compiler does not allow to catch SparkException directly if (ex instanceof SparkException) { async.cancel(60); } else { async.resume(new WebApplicationException(ex)); } } }
Example 6
Source File: StreamingContextConfigurationTests.java From Decision with Apache License 2.0 | 5 votes |
@Before public void setUp() throws Exception { System.clearProperty("spark.driver,port"); System.clearProperty("spark.hostPort"); conf = new SparkConf().setMaster("local[4]").setAppName("magic"); ssc = new JavaStreamingContext(conf, Durations.seconds(1)); // TODO simulate a configurationContext sc = streamingContextConfiguration.streamingContext(); //sc.start(); // sc.ssc().conf(); // sc.start(); // ssc.start(); }
Example 7
Source File: WordCountSocketStateful.java From Apache-Spark-2x-for-Java-Developers with MIT License | 5 votes |
public static void main(String[] args) throws Exception { System.setProperty("hadoop.home.dir", "E:\\hadoop"); SparkConf sparkConf = new SparkConf().setAppName("WordCountSocketEx").setMaster("local[*]"); JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(1)); streamingContext.checkpoint("E:\\hadoop\\checkpoint"); // Initial state RDD input to mapWithState @SuppressWarnings("unchecked") List<Tuple2<String, Integer>> tuples =Arrays.asList(new Tuple2<>("hello", 1), new Tuple2<>("world", 1)); JavaPairRDD<String, Integer> initialRDD = streamingContext.sparkContext().parallelizePairs(tuples); JavaReceiverInputDStream<String> StreamingLines = streamingContext.socketTextStream( "10.0.75.1", Integer.parseInt("9000"), StorageLevels.MEMORY_AND_DISK_SER); JavaDStream<String> words = StreamingLines.flatMap( str -> Arrays.asList(str.split(" ")).iterator() ); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(str-> new Tuple2<>(str, 1)).reduceByKey((count1,count2) ->count1+count2 ); // Update the cumulative count function Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc = new Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>() { @Override public Tuple2<String, Integer> call(String word, Optional<Integer> one, State<Integer> state) { int sum = one.orElse(0) + (state.exists() ? state.get() : 0); Tuple2<String, Integer> output = new Tuple2<>(word, sum); state.update(sum); return output; } }; // DStream made of get cumulative counts that get updated in every batch JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordCounts.mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD)); stateDstream.print(); streamingContext.start(); streamingContext.awaitTermination(); }
Example 8
Source File: SparkBatchProcessingTest.java From OSTMap with Apache License 2.0 | 5 votes |
private static StreamingContext createSparkStreamingContext(){ SparkConf conf = new SparkConf() .setAppName("Spark Batch Processing Test") .set("spark.serializer", KryoSerializer.class.getCanonicalName()) .set("spark.eventLog.enabled", "true"); return new StreamingContext(conf, Durations.seconds(15)); }
Example 9
Source File: WordCountSocketJava8Ex.java From Apache-Spark-2x-for-Java-Developers with MIT License | 5 votes |
public static void main(String[] args) throws Exception { System.setProperty("hadoop.home.dir", "E:\\hadoop"); SparkConf sparkConf = new SparkConf().setAppName("WordCountSocketEx").setMaster("local[*]"); JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(1)); List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<>("hello", 10), new Tuple2<>("world", 10)); JavaPairRDD<String, Integer> initialRDD = streamingContext.sparkContext().parallelizePairs(tuples); JavaReceiverInputDStream<String> StreamingLines = streamingContext.socketTextStream( "10.0.75.1", Integer.parseInt("9000"), StorageLevels.MEMORY_AND_DISK_SER); JavaDStream<String> words = StreamingLines.flatMap( str -> Arrays.asList(str.split(" ")).iterator() ); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(str-> new Tuple2<>(str, 1)).reduceByKey((count1,count2) ->count1+count2 ); wordCounts.print(); JavaPairDStream<String, Integer> joinedDstream = wordCounts.transformToPair( new Function<JavaPairRDD<String, Integer>, JavaPairRDD<String, Integer>>() { @Override public JavaPairRDD<String, Integer> call(JavaPairRDD<String, Integer> rdd) throws Exception { rdd.join(initialRDD).mapToPair(new PairFunction<Tuple2<String,Tuple2<Integer,Integer>>, String, Integer>() { @Override public Tuple2<String, Integer> call(Tuple2<String, Tuple2<Integer, Integer>> joinedTuple) throws Exception { // TODO Auto-generated method stub return new Tuple2<>( joinedTuple._1(), (joinedTuple._2()._1()+joinedTuple._2()._2()) ); } }); return rdd; } }); joinedDstream.print(); streamingContext.start(); streamingContext.awaitTermination(); }
Example 10
Source File: JavaNetworkWordCount.java From SparkDemo with MIT License | 4 votes |
public static void main(String[] args) { /** * 资源.setMaster("local[2]")必须大于1 一个负责取数据 其他负责计算 */ // if (args.length < 2) { // System.err.println("Usage: JavaNetworkWordCount <hostname> <port>"); // System.exit(1); // } StreamingExamples.setStreamingLogLevels(); // Create the context with a 1 second batch size SparkConf sparkConf = SparkUtils.getLocalSparkConf(JavaNetworkWordCount.class); /* * 创建该对象类似于spark core中的JavaSparkContext * 该对象除了接受SparkConf对象,还接收了一个BatchInterval参数,就算说,每收集多长时间去划分一个人Batch即RDD去执行 */ JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(2)); /* * 首先创建输入DStream,代表一个数据比如这里从socket或KafKa来持续不断的进入实时数据流 * 创建一个监听Socket数据量,RDD里面的每一个元素就是一行行的文本 */ JavaReceiverInputDStream<String> lines = ssc.socketTextStream("192.168.2.1", 9999, StorageLevels.MEMORY_AND_DISK_SER); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterator<String> call(String x) { return Lists.newArrayList(SPACE.split(x)).iterator(); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair( new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); ssc.start(); try { ssc.awaitTermination(); } catch (Exception e) { e.printStackTrace(); } }
Example 11
Source File: JavaSqlNetworkWordCount.java From SparkDemo with MIT License | 4 votes |
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: JavaNetworkWordCount <hostname> <port>"); System.exit(1); } StreamingExamples.setStreamingLogLevels(); // Create the context with a 1 second batch size SparkConf sparkConf = new SparkConf().setAppName("JavaSqlNetworkWordCount"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1)); // Create a JavaReceiverInputDStream on target ip:port and count the // words in input stream of \n delimited text (eg. generated by 'nc') // Note that no duplication in storage level only for running locally. // Replication necessary in distributed scenario for fault tolerance. JavaReceiverInputDStream<String> lines = ssc.socketTextStream( args[0], Integer.parseInt(args[1]), StorageLevels.MEMORY_AND_DISK_SER); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterator<String> call(String x) { return Arrays.asList(SPACE.split(x)).iterator(); } }); // Convert RDDs of the words DStream to DataFrame and run SQL query words.foreachRDD(new VoidFunction2<JavaRDD<String>, Time>() { @Override public void call(JavaRDD<String> rdd, Time time) { SparkSession spark = JavaSparkSessionSingleton.getInstance(rdd.context().getConf()); // Convert JavaRDD[String] to JavaRDD[bean class] to DataFrame JavaRDD<JavaRecord> rowRDD = rdd.map(new Function<String, JavaRecord>() { @Override public JavaRecord call(String word) { JavaRecord record = new JavaRecord(); record.setWord(word); return record; } }); Dataset<Row> wordsDataFrame = spark.createDataFrame(rowRDD, JavaRecord.class); // Creates a temporary view using the DataFrame wordsDataFrame.createOrReplaceTempView("words"); // Do word count on table using SQL and print it Dataset<Row> wordCountsDataFrame = spark.sql("select word, count(*) as total from words group by word"); System.out.println("========= " + time + "========="); wordCountsDataFrame.show(); } }); ssc.start(); ssc.awaitTermination(); }
Example 12
Source File: JavaNetworkWordCount.java From SparkDemo with MIT License | 4 votes |
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: JavaNetworkWordCount <hostname> <port>"); System.exit(1); } StreamingExamples.setStreamingLogLevels(); // Create the context with a 1 second batch size SparkConf sparkConf = new SparkConf().setAppName("JavaNetworkWordCount"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1)); // Create a JavaReceiverInputDStream on target ip:port and count the // words in input stream of \n delimited text (eg. generated by 'nc') // Note that no duplication in storage level only for running locally. // Replication necessary in distributed scenario for fault tolerance. JavaReceiverInputDStream<String> lines = ssc.socketTextStream( args[0], Integer.parseInt(args[1]), StorageLevels.MEMORY_AND_DISK_SER); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterator<String> call(String x) { return Arrays.asList(SPACE.split(x)).iterator(); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair( new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); ssc.start(); ssc.awaitTermination(); }
Example 13
Source File: JavaDirectKafkaWordCount.java From SparkDemo with MIT License | 4 votes |
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: JavaDirectKafkaWordCount <brokers> <topics>\n" + " <brokers> is a list of one or more Kafka brokers\n" + " <topics> is a list of one or more kafka topics to consume from\n\n"); System.exit(1); } StreamingExamples.setStreamingLogLevels(); String brokers = args[0]; String topics = args[1]; // Create context with a 2 seconds batch interval SparkConf sparkConf = new SparkConf().setAppName("JavaDirectKafkaWordCount"); JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(2)); Set<String> topicsSet = new HashSet<>(Arrays.asList(topics.split(","))); Map<String, String> kafkaParams = new HashMap<>(); kafkaParams.put("metadata.broker.list", brokers); // Create direct kafka stream with brokers and topics JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream( jssc, String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet ); // Get the lines, split them into words, count the words and print JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterator<String> call(String x) { return Arrays.asList(SPACE.split(x)).iterator(); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair( new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<>(s, 1); } }).reduceByKey( new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); // Start the computation jssc.start(); jssc.awaitTermination(); }
Example 14
Source File: SampleConsumer.java From kafka-spark-consumer with Apache License 2.0 | 4 votes |
@SuppressWarnings("deprecation") private void run() { Properties props = new Properties(); props.put("zookeeper.hosts", "zkhost"); props.put("zookeeper.port", "2181"); props.put("kafka.topic", "topicA,topicB,topicC"); props.put("kafka.consumer.id", "kafka-consumer"); // Optional Properties props.put("zookeeper.broker.path", "/brokers"); props.put("zookeeper.consumer.path", "/consumers"); props.put("consumer.forcefromstart", "false"); props.put("max.poll.records", "10"); props.put("consumer.fillfreqms", "500"); props.put("consumer.backpressure.enabled", "true"); //Kafka properties props.put("bootstrap.servers", "kafkahost-1:6667," + "kafkahost-2:6667," + "kafkahost-3:6667," + "kafkahost-4:6667"); props.put("security.protocol", "SSL"); props.put("ssl.truststore.location","~/kafka-securitykafka.server.truststore.jks"); props.put("ssl.truststore.password", "test1234"); SparkConf _sparkConf = new SparkConf(); JavaStreamingContext jsc = new JavaStreamingContext(_sparkConf, Durations.seconds(30)); // Specify number of Receivers you need. int numberOfReceivers = 6; JavaDStream<MessageAndMetadata<byte[]>> unionStreams = ReceiverLauncher.launch( jsc, props, numberOfReceivers, StorageLevel.MEMORY_ONLY()); unionStreams.foreachRDD(new VoidFunction<JavaRDD<MessageAndMetadata<byte[]>>>() { @Override public void call(JavaRDD<MessageAndMetadata<byte[]>> rdd) throws Exception { //Start Application Logic rdd.foreachPartition(new VoidFunction<Iterator<MessageAndMetadata<byte[]>>>() { @Override public void call(Iterator<MessageAndMetadata<byte[]>> mmItr) throws Exception { int countTopicA = 0; int countTopicB = 0; int countTopicC = 0; while(mmItr.hasNext()) { MessageAndMetadata<byte[]> mm = mmItr.next(); if(mm.getTopic().equals("topicA")) { countTopicA++; } else if (mm.getTopic().equals("topicB")) { countTopicB++; } else if (mm.getTopic().equals("topicC")) { countTopicC++; } } System.out.println("topicA count " + countTopicA); System.out.println("topicB count " + countTopicB); System.out.println("topicC count " + countTopicC); } }); System.out.println("RDD count " + rdd.count()); //End Application Logic //commit offset System.out.println("Commiting Offset"); ProcessedOffsetManager.persistsPartition(rdd, props); } }); try { jsc.start(); jsc.awaitTermination(); }catch (Exception ex ) { jsc.ssc().sc().cancelAllJobs(); jsc.stop(true, false); System.exit(-1); } }
Example 15
Source File: SparkRuntime.java From jMetalSP with MIT License | 4 votes |
public SparkRuntime(int duration, SparkConf sparkConf) { this.sparkConf = sparkConf ; this.duration = duration ; streamingContext = new JavaStreamingContext(this.sparkConf, Durations.seconds(this.duration)) ; //streamingContext.sparkContext().setLogLevel("ALL"); }
Example 16
Source File: FraudDetectionApp.java From Building-Data-Streaming-Applications-with-Apache-Kafka with MIT License | 4 votes |
public static void main(String[] args) throws Exception { String brokers = "localhost:9092"; String topics = "iplog"; CacheIPLookup cacheIPLookup = new CacheIPLookup(); SparkConf sparkConf = new SparkConf().setAppName("IP_FRAUD"); JavaStreamingContext javaStreamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(2)); Set<String> topicsSet = new HashSet<>(Arrays.asList(topics.split(","))); Map<String, String> kafkaConfiguration = new HashMap<>(); kafkaConfiguration.put("metadata.broker.list", brokers); kafkaConfiguration.put("group.id", "ipfraud"); kafkaConfiguration.put("auto.offset.reset", "smallest"); JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream( javaStreamingContext, String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaConfiguration, topicsSet ); JavaDStream<String> lines = messages.map(Tuple2::_2); JavaDStream<String> fraudIPs = lines.filter(new Function<String, Boolean>() { @Override public Boolean call(String s) throws Exception { String IP = s.split(" ")[0]; String[] ranges = IP.split("\\."); String range = null; try { range = ranges[0] + "." + ranges[1]; } catch (ArrayIndexOutOfBoundsException ex) { } return cacheIPLookup.isFraudIP(range); } }); DStream<String> fraudDstream = fraudIPs.dstream(); fraudDstream.saveAsTextFiles("FraudRecord", ""); javaStreamingContext.start(); javaStreamingContext.awaitTermination(); }
Example 17
Source File: JavaKafkaReceiverWordCount.java From SparkDemo with MIT License | 4 votes |
public static void main(String[] args) { StreamingExamples.setStreamingLogLevels(); SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaReceiverWordCount").setMaster("local[4]"); JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(6)); Map<String, Integer> topicMap = new HashMap<String, Integer>(); // key是topic名称,value是线程数量 topicMap.put("2017-7-26", 1); String zookeeperList = "master:2181,slave1:2181,slave2:2181"; JavaPairReceiverInputDStream<String, String> messages = KafkaUtils.createStream(jssc, zookeeperList, "JavaKafkaReceiverWordCount", topicMap); JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() { @Override public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterator<String> call(String x) { return Lists.newArrayList(SPACE.split(x)).iterator(); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); jssc.start(); try { jssc.awaitTermination(); } catch (Exception e) { e.printStackTrace(); } }
Example 18
Source File: IoTDataProcessor.java From iot-traffic-monitor with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { //read Spark and Cassandra properties and create SparkConf Properties prop = PropertyFileReader.readPropertyFile(); SparkConf conf = new SparkConf() .setAppName(prop.getProperty("com.iot.app.spark.app.name")) .setMaster(prop.getProperty("com.iot.app.spark.master")) .set("spark.cassandra.connection.host", prop.getProperty("com.iot.app.cassandra.host")) .set("spark.cassandra.connection.port", prop.getProperty("com.iot.app.cassandra.port")) .set("spark.cassandra.connection.keep_alive_ms", prop.getProperty("com.iot.app.cassandra.keep_alive")); //batch interval of 5 seconds for incoming stream JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5)); //add check point directory jssc.checkpoint(prop.getProperty("com.iot.app.spark.checkpoint.dir")); //read and set Kafka properties Map<String, String> kafkaParams = new HashMap<String, String>(); kafkaParams.put("zookeeper.connect", prop.getProperty("com.iot.app.kafka.zookeeper")); kafkaParams.put("metadata.broker.list", prop.getProperty("com.iot.app.kafka.brokerlist")); String topic = prop.getProperty("com.iot.app.kafka.topic"); Set<String> topicsSet = new HashSet<String>(); topicsSet.add(topic); //create direct kafka stream JavaPairInputDStream<String, IoTData> directKafkaStream = KafkaUtils.createDirectStream( jssc, String.class, IoTData.class, StringDecoder.class, IoTDataDecoder.class, kafkaParams, topicsSet ); logger.info("Starting Stream Processing"); //We need non filtered stream for poi traffic data calculation JavaDStream<IoTData> nonFilteredIotDataStream = directKafkaStream.map(tuple -> tuple._2()); //We need filtered stream for total and traffic data calculation JavaPairDStream<String,IoTData> iotDataPairStream = nonFilteredIotDataStream.mapToPair(iot -> new Tuple2<String,IoTData>(iot.getVehicleId(),iot)).reduceByKey((a, b) -> a ); // Check vehicle Id is already processed JavaMapWithStateDStream<String, IoTData, Boolean, Tuple2<IoTData,Boolean>> iotDStreamWithStatePairs = iotDataPairStream .mapWithState(StateSpec.function(processedVehicleFunc).timeout(Durations.seconds(3600)));//maintain state for one hour // Filter processed vehicle ids and keep un-processed JavaDStream<Tuple2<IoTData,Boolean>> filteredIotDStreams = iotDStreamWithStatePairs.map(tuple2 -> tuple2) .filter(tuple -> tuple._2.equals(Boolean.FALSE)); // Get stream of IoTdata JavaDStream<IoTData> filteredIotDataStream = filteredIotDStreams.map(tuple -> tuple._1); //cache stream as it is used in total and window based computation filteredIotDataStream.cache(); //process data IoTTrafficDataProcessor iotTrafficProcessor = new IoTTrafficDataProcessor(); iotTrafficProcessor.processTotalTrafficData(filteredIotDataStream); iotTrafficProcessor.processWindowTrafficData(filteredIotDataStream); //poi data POIData poiData = new POIData(); poiData.setLatitude(33.877495); poiData.setLongitude(-95.50238); poiData.setRadius(30);//30 km //broadcast variables. We will monitor vehicles on Route 37 which are of type Truck Broadcast<Tuple3<POIData, String, String>> broadcastPOIValues = jssc.sparkContext().broadcast(new Tuple3<>(poiData,"Route-37","Truck")); //call method to process stream iotTrafficProcessor.processPOIData(nonFilteredIotDataStream,broadcastPOIValues); //start context jssc.start(); jssc.awaitTermination(); }
Example 19
Source File: ComputeStreamingResponse.java From incubator-retired-pirk with Apache License 2.0 | 4 votes |
public ComputeStreamingResponse(FileSystem fileSys) throws PIRException { fs = fileSys; storage = new HadoopFileSystemStore(fs); dataInputFormat = SystemConfiguration.getProperty("pir.dataInputFormat"); if (!InputFormatConst.ALLOWED_FORMATS.contains(dataInputFormat)) { throw new IllegalArgumentException("inputFormat = " + dataInputFormat + " is of an unknown form"); } logger.info("inputFormat = " + dataInputFormat); if (dataInputFormat.equals(InputFormatConst.BASE_FORMAT)) { inputData = SystemConfiguration.getProperty("pir.inputData", "none"); if (inputData.equals("none")) { throw new IllegalArgumentException("For inputFormat = " + dataInputFormat + " an inputFile must be specified"); } logger.info("inputFile = " + inputData); } else if (dataInputFormat.equals(InputFormatConst.ES)) { esQuery = SystemConfiguration.getProperty("pir.esQuery", "none"); esResource = SystemConfiguration.getProperty("pir.esResource", "none"); if (esQuery.equals("none")) { throw new IllegalArgumentException("esQuery must be specified"); } if (esResource.equals("none")) { throw new IllegalArgumentException("esResource must be specified"); } logger.info("esQuery = " + esQuery + " esResource = " + esResource); } outputFile = SystemConfiguration.getProperty("pir.outputFile"); outputDirExp = outputFile + "_exp"; queryInput = SystemConfiguration.getProperty("pir.queryInput"); String stopListFile = SystemConfiguration.getProperty("pir.stopListFile"); logger.info("outputFile = " + outputFile + " queryInputDir = " + queryInput + " stopListFile = " + stopListFile + " esQuery = " + esQuery + " esResource = " + esResource); // Pull the batchSeconds and windowLength parameters long batchSeconds = SystemConfiguration.getLongProperty("pir.sparkstreaming.batchSeconds", 30); windowLength = SystemConfiguration.getLongProperty("pir.sparkstreaming.windowLength", 60); if (windowLength % batchSeconds != 0) { throw new IllegalArgumentException("batchSeconds = " + batchSeconds + " must divide windowLength = " + windowLength); } useQueueStream = SystemConfiguration.getBooleanProperty("pir.sparkstreaming.useQueueStream", false); logger.info("useQueueStream = " + useQueueStream); // Set the necessary configurations SparkConf conf = new SparkConf().setAppName("SparkPIR").setMaster("yarn-cluster"); conf.set("es.nodes", SystemConfiguration.getProperty("es.nodes", "none")); conf.set("es.port", SystemConfiguration.getProperty("es.port", "none")); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); conf.set("spark.streaming.stopGracefullyOnShutdown", SystemConfiguration.getProperty("spark.streaming.stopGracefullyOnShutdown", "false")); JavaSparkContext sc = new JavaSparkContext(conf); jssc = new JavaStreamingContext(sc, Durations.seconds(batchSeconds)); // Setup, run query, teardown logger.info("Setting up for query run"); try { setup(); } catch (IOException e) { throw new PIRException("An error occurred setting up the streaming responder.", e); } logger.info("Setup complete"); }
Example 20
Source File: SparkKafkaTest.java From BigDataPlatform with GNU General Public License v3.0 | 4 votes |
public static void main(String[] args) throws Exception { if (args.length < 4) { System.err.println("Usage: JavaKafkaWordCount <zkQuorum> <group> <topics> <numThreads>"); System.exit(1); } SparkConf conf = new SparkConf() .setMaster("local[2]") .setAppName("JavaKafkaWordCount"); // 咱们这里项目中,就设置5秒钟的batch interval // 每隔5秒钟,咱们的spark streaming作业就会收集最近5秒内的数据源接收过来的数据 JavaStreamingContext jssc = new JavaStreamingContext( conf, Durations.seconds(5)); jssc.checkpoint("hdfs://Master:9000/streaming_checkpoint"); // 正式开始进行代码的编写 // 实现咱们需要的实时计算的业务逻辑和功能 // 创建针对Kafka数据来源的输入DStream(离线流,代表了一个源源不断的数据来源,抽象) // 选用kafka direct api(很多好处,包括自己内部自适应调整每次接收数据量的特性,等等) // 构建kafka参数map // 主要要放置的就是,你要连接的kafka集群的地址(broker集群的地址列表) Map<String, String> kafkaParams = new HashMap<String, String>(); kafkaParams.put("metadata.broker.list", ConfigurationManager.getProperty(Constants.KAFKA_METADATA_BROKER_LIST)); // 构建topic set String kafkaTopics = "streamingtopic";//ConfigurationManager.getProperty("streamingtopic"); String[] kafkaTopicsSplited = kafkaTopics.split(","); Set<String> topics = new HashSet<String>(); for (String kafkaTopic : kafkaTopicsSplited) { topics.add(kafkaTopic); } // 基于kafka direct api模式,构建出了针对kafka集群中指定topic的输入DStream // 两个值,val1,val2;val1没有什么特殊的意义;val2中包含了kafka topic中的一条一条的实时日志数据 JavaPairInputDStream<String, String> adRealTimeLogDStream = KafkaUtils.createDirectStream( jssc, String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topics); JavaDStream<String> lines = adRealTimeLogDStream.map(Tuple2::_2); JavaDStream<String> words = lines.flatMap(x -> Arrays.asList(SPACE.split(x)).iterator()); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(s -> new Tuple2<>(s, 1)) .reduceByKey((i1, i2) -> i1 + i2); wordCounts.print(); jssc.start(); jssc.awaitTermination(); }