org.apache.spark.streaming.api.java.JavaStreamingContext Java Examples
The following examples show how to use
org.apache.spark.streaming.api.java.JavaStreamingContext.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: KafkaStreaming.java From sparkResearch with Apache License 2.0 | 8 votes |
public static void main(String[] args) { SparkConf sparkConf = new SparkConf().setAppName("KafkaWordCount").setMaster("local[2]"); JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(10000)); //设置检查点 streamingContext.checkpoint("HDFS URL"); Map<String, Integer> topicThread = new HashMap<>(1); topicThread.put(TOPIC, THREAD); JavaPairInputDStream<String, String> dStream = KafkaUtils.createStream(streamingContext, HOST, GROP, topicThread); JavaDStream<String> words = dStream.flatMap((FlatMapFunction<Tuple2<String, String>, String>) stringStringTuple2 -> Arrays.asList(SPACE.split(stringStringTuple2._2)).iterator()); //统计 JavaPairDStream<String, Integer> result = words.mapToPair((PairFunction<String, String, Integer>) s -> new Tuple2<>(s, 1)).reduceByKey((Function2<Integer, Integer, Integer>) (v1, v2) -> v1 + v2); try { result.print(); streamingContext.start(); streamingContext.awaitTermination(); } catch (InterruptedException e) { e.printStackTrace(); } }
Example #2
Source File: Window.java From sparkResearch with Apache License 2.0 | 6 votes |
public static void main(String[] args) { SparkConf sparkConf = new SparkConf().setAppName("window").setMaster("local[2]"); JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(10)); //检查点设置 streamingContext.checkpoint("hdfs://localhost:9300"); JavaDStream<String> dStream = streamingContext.socketTextStream("localhost", 8080); JavaDStream<String> winDstream = dStream.window(Durations.seconds(30), Durations.seconds(20)); JavaDStream<Long> result = winDstream.count(); try { streamingContext.start(); streamingContext.awaitTermination(); } catch (InterruptedException e) { e.printStackTrace(); } }
Example #3
Source File: StreamingRsvpsDStreamCountWindow.java From -Data-Stream-Development-with-Apache-Spark-Kafka-and-Spring-Boot with MIT License | 6 votes |
public static void main(String[] args) throws InterruptedException { System.setProperty("hadoop.home.dir", HADOOP_HOME_DIR_VALUE); final SparkConf conf = new SparkConf() .setMaster(RUN_LOCAL_WITH_AVAILABLE_CORES) .setAppName(APPLICATION_NAME) .set("spark.mongodb.output.uri", MONGODB_OUTPUT_URI) .set("spark.streaming.kafka.consumer.cache.enabled", "false"); final JavaStreamingContext streamingContext = new JavaStreamingContext(conf, new Duration(BATCH_DURATION_INTERVAL_MS)); streamingContext.checkpoint(CHECKPOINT_FOLDER); final JavaInputDStream<ConsumerRecord<String, String>> meetupStream = KafkaUtils.createDirectStream( streamingContext, LocationStrategies.PreferConsistent(), ConsumerStrategies.<String, String>Subscribe(TOPICS, KAFKA_CONSUMER_PROPERTIES) ); // transformations, streaming algorithms, etc JavaDStream<Long> countStream = meetupStream.countByWindow( new Duration(WINDOW_LENGTH_MS), new Duration(SLIDING_INTERVAL_MS)); countStream.foreachRDD((JavaRDD<Long> countRDD) -> { MongoSpark.save( countRDD.map( r -> Document.parse("{\"rsvps_count\":\"" + String.valueOf(r) + "\"}") ) ); }); // some time later, after outputs have completed meetupStream.foreachRDD((JavaRDD<ConsumerRecord<String, String>> meetupRDD) -> { OffsetRange[] offsetRanges = ((HasOffsetRanges) meetupRDD.rdd()).offsetRanges(); ((CanCommitOffsets) meetupStream.inputDStream()) .commitAsync(offsetRanges, new MeetupOffsetCommitCallback()); }); streamingContext.start(); streamingContext.awaitTermination(); }
Example #4
Source File: SparkRunnerStreamingContextFactory.java From beam with Apache License 2.0 | 6 votes |
private void checkpoint(JavaStreamingContext jssc, CheckpointDir checkpointDir) { Path rootCheckpointPath = checkpointDir.getRootCheckpointDir(); Path sparkCheckpointPath = checkpointDir.getSparkCheckpointDir(); Path beamCheckpointPath = checkpointDir.getBeamCheckpointDir(); try { FileSystem fileSystem = rootCheckpointPath.getFileSystem(jssc.sparkContext().hadoopConfiguration()); if (!fileSystem.exists(rootCheckpointPath)) { fileSystem.mkdirs(rootCheckpointPath); } if (!fileSystem.exists(sparkCheckpointPath)) { fileSystem.mkdirs(sparkCheckpointPath); } if (!fileSystem.exists(beamCheckpointPath)) { fileSystem.mkdirs(beamCheckpointPath); } } catch (IOException e) { throw new RuntimeException("Failed to create checkpoint dir", e); } jssc.checkpoint(sparkCheckpointPath.toString()); }
Example #5
Source File: SparkStreamingBinding.java From datacollector with Apache License 2.0 | 6 votes |
@Override @SuppressWarnings("unchecked") public JavaStreamingContext create() { sparkConf.set("spark.streaming.kafka.maxRatePerPartition", String.valueOf(maxRatePerPartition)); // Use our classpath first, since we ship a newer version of Jackson and possibly other deps in the future. sparkConf.set("spark.driver.userClassPathFirst", "true"); sparkConf.set("spark.executor.userClassPathFirst", "true"); session = SparkSession.builder().config(sparkConf).getOrCreate(); JavaStreamingContext result = new JavaStreamingContext(new JavaSparkContext(session.sparkContext()), new Duration(duration)); Map<String, Object> props = new HashMap<>(); props.put("group.id", groupId); props.put("key.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer"); props.put("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer"); for (Map.Entry<String, Object> map : props.entrySet()) { logMessage(Utils.format("Adding extra kafka config, {}:{}", map.getKey(), map.getValue()), isRunningInMesos); } logMessage("Meta data broker list " + metaDataBrokerList, isRunningInMesos); logMessage("Topic is " + topic, isRunningInMesos); logMessage("Auto offset reset is set to " + autoOffsetValue, isRunningInMesos); return createDStream(result, props); }
Example #6
Source File: ReduceByKeyAndWindow.java From sparkResearch with Apache License 2.0 | 6 votes |
public static void main(String[] args) { SparkConf sparkConf = new SparkConf().setAppName("reduceByKeyAndWindow").setMaster("local[2]"); JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(10)); //检查点设置 streamingContext.checkpoint("hdfs://localhost:9300"); //数据源 JavaDStream<String> dStream = streamingContext.socketTextStream("localhost", 8080); JavaPairDStream<String, Long> ipPairDstream = dStream.mapToPair(new GetIp()); JavaPairDStream<String, Long> result = ipPairDstream.reduceByKeyAndWindow(new AddLongs(), new SubtractLongs(), Durations.seconds(30), Durations.seconds(10)); try { streamingContext.start(); streamingContext.awaitTermination(); } catch (InterruptedException e) { e.printStackTrace(); } }
Example #7
Source File: SparkStreamDemo.java From sparkResearch with Apache License 2.0 | 6 votes |
public static void main(String[] args) { //创建两个核心的本地线程,批处理的间隔为1秒 SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("sparkStreamIng"); JavaStreamingContext javaStreamingContext = new JavaStreamingContext(conf, Durations.seconds(1)); //创建一个连接到IP:localhost,PORT:8080的DStream JavaReceiverInputDStream<String> dStream = javaStreamingContext.socketTextStream("localhost", 8080); JavaDStream<String> errorLine = dStream.filter(new Function<String, Boolean>() { @Override public Boolean call(String v1) throws Exception { return v1.contains("error"); } }); //打印包含error的行 errorLine.print(); try { //开始计算 javaStreamingContext.start(); //等待计算完成 javaStreamingContext.awaitTermination(); } catch (InterruptedException e) { e.printStackTrace(); } }
Example #8
Source File: StreamingIngestionFileSystemTextFileToDataframeMultipleClassesApp.java From net.jgp.labs.spark with Apache License 2.0 | 6 votes |
private void start() { // Create a local StreamingContext with two working thread and batch // interval of // 1 second SparkConf conf = new SparkConf().setMaster("local[2]").setAppName( "Streaming Ingestion File System Text File to Dataframe"); JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations .seconds(5)); JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils .getInputDirectory()); msgDataStream.print(); // Create JavaRDD<Row> msgDataStream.foreachRDD(new RowProcessor()); jssc.start(); try { jssc.awaitTermination(); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
Example #9
Source File: StreamingIngestionFileSystemTextFileApp.java From net.jgp.labs.spark with Apache License 2.0 | 6 votes |
private void start() { // Create a local StreamingContext with two working thread and batch // interval of // 1 second SparkConf conf = new SparkConf().setMaster("local[2]").setAppName( "NetworkWordCount"); JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations .seconds(5)); JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils .getInputDirectory()); msgDataStream.print(); jssc.start(); try { jssc.awaitTermination(); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
Example #10
Source File: WordCountRecoverableEx.java From Apache-Spark-2x-for-Java-Developers with MIT License | 6 votes |
public static void main(String[] args) throws Exception { System.setProperty("hadoop.home.dir", "E:\\hadoop"); final String ip = "10.0.75.1"; final int port = Integer.parseInt("9000"); final String checkpointDirectory = "E:\\hadoop\\checkpoint"; // Function to create JavaStreamingContext without any output operations // (used to detect the new context) Function0<JavaStreamingContext> createContextFunc = new Function0<JavaStreamingContext>() { @Override public JavaStreamingContext call() { return createContext(ip, port, checkpointDirectory); } }; JavaStreamingContext ssc = JavaStreamingContext.getOrCreate(checkpointDirectory, createContextFunc); ssc.start(); ssc.awaitTermination(); }
Example #11
Source File: StreamingService.java From cxf with Apache License 2.0 | 6 votes |
private void processStreamOneWay(List<String> inputStrings) { try { SparkConf sparkConf = new SparkConf().setMaster("local[*]") .setAppName("JAX-RS Spark Connect OneWay " + SparkUtils.getRandomId()); JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(1)); JavaDStream<String> receiverStream = null; if ("queue".equals(receiverType)) { Queue<JavaRDD<String>> rddQueue = new LinkedList<>(); for (int i = 0; i < 30; i++) { rddQueue.add(jssc.sparkContext().parallelize(inputStrings)); } receiverStream = jssc.queueStream(rddQueue); } else { receiverStream = jssc.receiverStream(new StringListReceiver(inputStrings)); } JavaPairDStream<String, Integer> wordCounts = SparkUtils.createOutputDStream(receiverStream, false); wordCounts.foreachRDD(new PrintOutputFunction(jssc)); jssc.start(); } catch (Exception ex) { // ignore } }
Example #12
Source File: SparkStreamServiceImpl.java From searchanalytics-bigdata with MIT License | 6 votes |
@Override public void setup() { // Create a StreamingContext with a SparkConf configuration SparkConf sparkConf = new SparkConf(false) .setAppName("JaiSpark") .setSparkHome("target/sparkhome") .setMaster("local") .set("spark.executor.memory", "128m") .set("spark.local.dir", new File("target/sparkhome/tmp").getAbsolutePath()) .set("spark.cores.max", "2").set("spark.akka.threads", "2") .set("spark.akka.timeout", "60").set("spark.logConf", "true") .set("spark.cleaner.delay", "3700") .set("spark.cleaner.ttl", "86400") .set("spark.shuffle.spill", "false") .set("spark.driver.host", "localhost") .set("spark.driver.port", "43214"); jssc = new JavaStreamingContext(sparkConf, new Duration(5000)); String checkpointDir = hadoopClusterService.getHDFSUri() + "/sparkcheckpoint"; jssc.checkpoint(checkpointDir); startFlumeStream(); }
Example #13
Source File: BatchUpdateFunction.java From oryx with Apache License 2.0 | 6 votes |
BatchUpdateFunction(Config config, Class<K> keyClass, Class<M> messageClass, Class<? extends Writable> keyWritableClass, Class<? extends Writable> messageWritableClass, String dataDirString, String modelDirString, BatchLayerUpdate<K,M,U> updateInstance, JavaStreamingContext streamingContext) { this.keyClass = keyClass; this.messageClass = messageClass; this.keyWritableClass = keyWritableClass; this.messageWritableClass = messageWritableClass; this.dataDirString = dataDirString; this.modelDirString = modelDirString; this.updateBroker = ConfigUtils.getOptionalString(config, "oryx.update-topic.broker"); this.updateTopic = ConfigUtils.getOptionalString(config, "oryx.update-topic.message.topic"); this.updateInstance = updateInstance; this.sparkContext = streamingContext.sparkContext(); }
Example #14
Source File: SparkStreaming.java From kafka-spark-avro-example with Apache License 2.0 | 6 votes |
private static void processStream(JavaStreamingContext ssc, JavaSparkContext sc) { System.out.println("--> Processing stream"); Map<String, String> props = new HashMap<>(); props.put("bootstrap.servers", "localhost:9092"); props.put("schema.registry.url", "http://localhost:8081"); props.put("group.id", "spark"); props.put("specific.avro.reader", "true"); props.put("value.deserializer", "io.confluent.kafka.serializers.KafkaAvroDeserializer"); props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); Set<String> topicsSet = new HashSet<>(Collections.singletonList("test")); JavaPairInputDStream<String, Object> stream = KafkaUtils.createDirectStream(ssc, String.class, Object.class, StringDecoder.class, KafkaAvroDecoder.class, props, topicsSet); stream.foreachRDD(rdd -> { rdd.foreachPartition(iterator -> { while (iterator.hasNext()) { Tuple2<String, Object> next = iterator.next(); Model model = (Model) next._2(); System.out.println(next._1() + " --> " + model); } } ); }); }
Example #15
Source File: StreamingEngine.java From spark-streaming-direct-kafka with Apache License 2.0 | 6 votes |
public void start() { SparkConf sparkConf = getSparkConf(); streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(Long.parseLong(config.getStreamingBatchIntervalInSec()))); JavaInputDStream<MessageAndMetadata<String, byte[]>> dStream = buildInputDStream(streamingContext); JavaPairDStream<String, byte[]> pairDStream = dStream.mapToPair(km -> new Tuple2<>(km.key(), km.message())); pairDStream.foreachRDD(new ProcessStreamingData<>(config)); // process data dStream.foreachRDD(new UpdateOffsetsFn<>(config.getKafkaGroupId(), config.getZkOffsetManager())); streamingContext.start(); }
Example #16
Source File: StreamingContextConfiguration.java From Decision with Apache License 2.0 | 5 votes |
private void configureDataContext(JavaStreamingContext context) { Map<String, Integer> baseTopicMap = new HashMap<>(); configurationContext.getDataTopics().forEach( dataTopic -> baseTopicMap.put(dataTopic, 1)); kafkaTopicService.createTopicsIfNotExist(configurationContext.getDataTopics(), configurationContext .getKafkaReplicationFactor(), configurationContext.getKafkaPartitions()); HashMap<String, String> kafkaParams = new HashMap<>(); kafkaParams.put("zookeeper.connect", configurationContext.getZookeeperHostsQuorumWithPath()); kafkaParams.put("group.id", configurationContext.getGroupId()); /* groupId must be the cluster groupId. Kafka assigns each partition of a topic to one, and one only, consumer of the group. Decision topics has only one partition (by default), so if we have two o more decision instances (consumers) reading the same topic with the same groupId, only one instance will be able to read from the topic */ JavaPairDStream<String, byte[]> messages = KafkaUtils.createStream(context, String.class, byte[].class, kafka.serializer.StringDecoder.class, kafka.serializer.DefaultDecoder.class, kafkaParams, baseTopicMap, StorageLevel.MEMORY_AND_DISK_SER()); AvroDeserializeMessageFunction avroDeserializeMessageFunction = new AvroDeserializeMessageFunction(); JavaDStream<StratioStreamingMessage> insertRequests = messages.filter( new FilterAvroMessagesByOperationFunction(STREAM_OPERATIONS.MANIPULATION.INSERT)) .map(avroDeserializeMessageFunction); InsertIntoStreamFunction insertIntoStreamFunction = new InsertIntoStreamFunction(streamOperationService, configurationContext.getZookeeperHostsQuorum()); insertRequests.foreachRDD(insertIntoStreamFunction); }
Example #17
Source File: AbstractSparkLayer.java From oryx with Apache License 2.0 | 5 votes |
protected final JavaInputDStream<ConsumerRecord<K,M>> buildInputDStream( JavaStreamingContext streamingContext) { Preconditions.checkArgument( KafkaUtils.topicExists(inputTopicLockMaster, inputTopic), "Topic %s does not exist; did you create it?", inputTopic); if (updateTopic != null && updateTopicLockMaster != null) { Preconditions.checkArgument( KafkaUtils.topicExists(updateTopicLockMaster, updateTopic), "Topic %s does not exist; did you create it?", updateTopic); } String groupID = getGroupID(); Map<String,Object> kafkaParams = new HashMap<>(); kafkaParams.put("group.id", groupID); // Don't re-consume old messages from input by default kafkaParams.put("auto.offset.reset", "latest"); // Ignored by Kafka 0.10 Spark integration kafkaParams.put("bootstrap.servers", inputBroker); kafkaParams.put("key.deserializer", keyDecoderClass.getName()); kafkaParams.put("value.deserializer", messageDecoderClass.getName()); LocationStrategy locationStrategy = LocationStrategies.PreferConsistent(); ConsumerStrategy<K,M> consumerStrategy = ConsumerStrategies.Subscribe( Collections.singleton(inputTopic), kafkaParams, Collections.emptyMap()); return org.apache.spark.streaming.kafka010.KafkaUtils.createDirectStream( streamingContext, locationStrategy, consumerStrategy); }
Example #18
Source File: StreamingContextConfigurationTests.java From Decision with Apache License 2.0 | 5 votes |
@Test public void testActionBaseFunctionCall() throws Exception { // sc.sparkContext().emptyRDD().rdd().first(); // ssc.sparkContext().emptyRDD().rdd().first(); assertEquals(sc instanceof JavaStreamingContext, false); assertEquals(ssc.sparkContext().appName(), "magic"); }
Example #19
Source File: ReceiverLauncher.java From kafka-spark-consumer with Apache License 2.0 | 5 votes |
private static <E> void assignReceiversToPartitions(int numberOfReceivers, int numberOfPartition, List<JavaDStream<MessageAndMetadata<E>>> streamsList, KafkaConfig config, StorageLevel storageLevel, KafkaMessageHandler<E> messageHandler, JavaStreamingContext jsc ) { // Create as many Receiver as Partition if (numberOfReceivers >= numberOfPartition) { for (int i = 0; i < numberOfPartition; i++) { streamsList.add(jsc.receiverStream(new KafkaReceiver( config, i, storageLevel, messageHandler))); } } else { // create Range Receivers.. Map<Integer, Set<Integer>> rMap = new HashMap<Integer, Set<Integer>>(); for (int i = 0; i < numberOfPartition; i++) { int j = i % numberOfReceivers; Set<Integer> pSet = rMap.get(j); if (pSet == null) { pSet = new HashSet<Integer>(); pSet.add(i); } else { pSet.add(i); } rMap.put(j, pSet); } for (int i = 0; i < numberOfReceivers; i++) { streamsList.add(jsc.receiverStream(new KafkaRangeReceiver(config, rMap .get(i), storageLevel, messageHandler))); } } }
Example #20
Source File: BatchLayer.java From oryx with Apache License 2.0 | 5 votes |
public void await() throws InterruptedException { JavaStreamingContext theStreamingContext; synchronized (this) { theStreamingContext = streamingContext; Preconditions.checkState(theStreamingContext != null); } log.info("Spark Streaming is running"); theStreamingContext.awaitTermination(); // Can't do this with lock }
Example #21
Source File: TrackStreamingSourcesTest.java From beam with Apache License 2.0 | 5 votes |
private StreamingSourceTracker( JavaStreamingContext jssc, Pipeline pipeline, Class<? extends PTransform> transformClassToAssert, Integer... expected) { this.ctxt = new EvaluationContext(jssc.sparkContext(), pipeline, options, jssc); this.evaluator = new SparkRunner.Evaluator( new StreamingTransformTranslator.Translator(new TransformTranslator.Translator()), ctxt); this.transformClassToAssert = transformClassToAssert; this.expected = expected; }
Example #22
Source File: StreamingContextConfiguration.java From Decision with Apache License 2.0 | 5 votes |
@Bean(name = "streamingContext", destroyMethod = "stop") public JavaStreamingContext streamingContext() { JavaStreamingContext context = this.create("stratio-streaming-context", 4040, configurationContext.getInternalStreamingBatchTime(), configurationContext.getInternalSparkHost()); configureRequestContext(context); configureActionContext(context); configureDataContext(context); return context; }
Example #23
Source File: WordCountRecoverableEx.java From Apache-Spark-2x-for-Java-Developers with MIT License | 5 votes |
protected static JavaStreamingContext createContext(String ip, int port, String checkpointDirectory) { SparkConf sparkConf = new SparkConf().setAppName("WordCountRecoverableEx").setMaster("local[*]"); JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(1)); streamingContext.checkpoint(checkpointDirectory); // Initial state RDD input to mapWithState @SuppressWarnings("unchecked") List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<>("hello", 1), new Tuple2<>("world", 1)); JavaPairRDD<String, Integer> initialRDD = streamingContext.sparkContext().parallelizePairs(tuples); JavaReceiverInputDStream<String> StreamingLines = streamingContext.socketTextStream(ip,port, StorageLevels.MEMORY_AND_DISK_SER); JavaDStream<String> words = StreamingLines.flatMap(str -> Arrays.asList(str.split(" ")).iterator()); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(str -> new Tuple2<>(str, 1)) .reduceByKey((count1, count2) -> count1 + count2); // Update the cumulative count function Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc = new Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>() { @Override public Tuple2<String, Integer> call(String word, Optional<Integer> one, State<Integer> state) { int sum = one.orElse(0) + (state.exists() ? state.get() : 0); Tuple2<String, Integer> output = new Tuple2<>(word, sum); state.update(sum); return output; } }; // DStream made of get cumulative counts that get updated in every batch JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordCounts .mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD)); stateDstream.print(); return streamingContext; }
Example #24
Source File: Kafka010SparkStreamingBinding.java From datacollector with Apache License 2.0 | 5 votes |
@Override public JavaStreamingContext createDStream(JavaStreamingContext result, Map<String, Object> props) { props.put("bootstrap.servers", metaDataBrokerList); if (!autoOffsetValue.isEmpty()) { autoOffsetValue = getConfigurableAutoOffsetResetIfNonEmpty(autoOffsetValue); props.put(AUTO_OFFSET_RESET, autoOffsetValue); } props.putAll(extraKafkaConfigs); List<String> topics = ImmutableList.of(topic); JavaInputDStream<ConsumerRecord<byte[], byte[]>> stream; if (offsetHelper.isSDCCheckPointing()) { Map<TopicPartition, Long> fromOffsets = KafkaOffsetManagerImpl.get().getOffsetForDStream(topic, numberOfPartitions); stream = KafkaUtils.createDirectStream( result, LocationStrategies.PreferConsistent(), ConsumerStrategies.<byte[], byte[]>Assign(new ArrayList<TopicPartition>(fromOffsets.keySet()), props, fromOffsets) ); } else { stream = KafkaUtils.createDirectStream( result, LocationStrategies.PreferConsistent(), ConsumerStrategies.<byte[], byte[]>Subscribe(topics, props) ); } Driver$.MODULE$.foreach(stream.dstream(), KafkaOffsetManagerImpl.get()); return result; }
Example #25
Source File: SparkStreamingJob.java From zipkin-sparkstreaming with Apache License 2.0 | 5 votes |
@Memoized JavaStreamingContext jsc() { SparkConf conf = new SparkConf(true) .setMaster(master()) .setAppName(getClass().getName()); if (!jars().isEmpty()) conf.setJars(jars().toArray(new String[0])); for (Map.Entry<String, String> entry : conf().entrySet()) { conf.set(entry.getKey(), entry.getValue()); } return new JavaStreamingContext(conf, new Duration(batchDuration())); }
Example #26
Source File: MapRStreamingBinding.java From datacollector with Apache License 2.0 | 5 votes |
@Override public JavaStreamingContext createDStream(JavaStreamingContext result, Map<String, Object> props) { List<String> topics = ImmutableList.of(topic); if (!autoOffsetValue.isEmpty()) { props.put(SparkStreamingBinding.AUTO_OFFSET_RESET, autoOffsetValue); } props.putAll(extraKafkaConfigs); JavaInputDStream<ConsumerRecord<byte[], byte[]>> stream; if (offsetHelper.isSDCCheckPointing()) { Map<TopicPartition, Long> fromOffsets = MaprStreamsOffsetManagerImpl.get().getOffsetForDStream(topic, numberOfPartitions); stream = KafkaUtils.createDirectStream( result, LocationStrategies.PreferConsistent(), ConsumerStrategies.<byte[], byte[]>Assign(new ArrayList<TopicPartition>(fromOffsets.keySet()), props, fromOffsets) ); } else { stream = KafkaUtils.createDirectStream( result, LocationStrategies.PreferConsistent(), ConsumerStrategies.<byte[], byte[]>Subscribe(topics, props) ); } Driver$.MODULE$.foreach(stream.dstream(), MaprStreamsOffsetManagerImpl.get()); return result; }
Example #27
Source File: ReaderWriterExample.java From spliceengine with GNU Affero General Public License v3.0 | 5 votes |
public static void main(String[] args) throws Exception { final String dbUrl = args[0]; final String hostname = args[1]; final String port = args[2]; final String inTargetSchema = args[3]; final String inTargetTable = args[4]; SparkConf conf = new SparkConf(); JavaStreamingContext ssc = new JavaStreamingContext(conf, new Duration(500)); JavaReceiverInputDStream<String> stream = ssc.socketTextStream(hostname, Integer.parseInt(port)); SparkSession spark = SparkSession.builder().getOrCreate(); // Create a SplicemachineContext based on the provided DB connection SplicemachineContext splicemachineContext = new SplicemachineContext(dbUrl); // Set target tablename and schemaname final String table = inTargetSchema + "." + inTargetTable; stream.foreachRDD((VoidFunction<JavaRDD<String>>) rdd -> { JavaRDD<Row> rowRDD = rdd.map((Function<String, Row>) s -> RowFactory.create(s)); Dataset<Row> df = spark.createDataFrame(rowRDD, splicemachineContext.getSchema(table)); splicemachineContext.insert(df, table); }); ssc.start(); ssc.awaitTermination(); }
Example #28
Source File: SparkStreaming.java From kafka-spark-avro-example with Apache License 2.0 | 5 votes |
public static void main(String... args) { SparkConf conf = new SparkConf(); conf.setMaster("local[2]"); conf.setAppName("Spark Streaming Test Java"); JavaSparkContext sc = new JavaSparkContext(conf); JavaStreamingContext ssc = new JavaStreamingContext(sc, Durations.seconds(10)); processStream(ssc, sc); ssc.start(); ssc.awaitTermination(); }
Example #29
Source File: SparkStreamingSqlAnalyse.java From sylph with Apache License 2.0 | 5 votes |
public SparkStreamingSqlAnalyse(StreamingContext ssc, ConnectorStore connectorStore, boolean isCompile) { this.ssc = ssc; this.connectorStore = connectorStore; this.sparkBean = binder -> { binder.bind(StreamingContext.class, ssc); binder.bind(JavaStreamingContext.class, new JavaStreamingContext(ssc)); }; this.isCompile = isCompile; }
Example #30
Source File: WordCountSocketJava8Ex.java From Apache-Spark-2x-for-Java-Developers with MIT License | 5 votes |
public static void main(String[] args) throws Exception { System.setProperty("hadoop.home.dir", "E:\\hadoop"); SparkConf sparkConf = new SparkConf().setAppName("WordCountSocketEx").setMaster("local[*]"); JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(1)); List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<>("hello", 10), new Tuple2<>("world", 10)); JavaPairRDD<String, Integer> initialRDD = streamingContext.sparkContext().parallelizePairs(tuples); JavaReceiverInputDStream<String> StreamingLines = streamingContext.socketTextStream( "10.0.75.1", Integer.parseInt("9000"), StorageLevels.MEMORY_AND_DISK_SER); JavaDStream<String> words = StreamingLines.flatMap( str -> Arrays.asList(str.split(" ")).iterator() ); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(str-> new Tuple2<>(str, 1)).reduceByKey((count1,count2) ->count1+count2 ); wordCounts.print(); JavaPairDStream<String, Integer> joinedDstream = wordCounts.transformToPair( new Function<JavaPairRDD<String, Integer>, JavaPairRDD<String, Integer>>() { @Override public JavaPairRDD<String, Integer> call(JavaPairRDD<String, Integer> rdd) throws Exception { rdd.join(initialRDD).mapToPair(new PairFunction<Tuple2<String,Tuple2<Integer,Integer>>, String, Integer>() { @Override public Tuple2<String, Integer> call(Tuple2<String, Tuple2<Integer, Integer>> joinedTuple) throws Exception { // TODO Auto-generated method stub return new Tuple2<>( joinedTuple._1(), (joinedTuple._2()._1()+joinedTuple._2()._2()) ); } }); return rdd; } }); joinedDstream.print(); streamingContext.start(); streamingContext.awaitTermination(); }