org.apache.spark.streaming.api.java.JavaStreamingContext Scala Examples
The following examples show how to use org.apache.spark.streaming.api.java.JavaStreamingContext.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: MQTTUtils.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.mqtt import scala.reflect.ClassTag import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.api.java.{JavaDStream, JavaReceiverInputDStream, JavaStreamingContext} import org.apache.spark.streaming.dstream.ReceiverInputDStream object MQTTUtils { private[mqtt] class MQTTUtilsPythonHelper { def createStream( jssc: JavaStreamingContext, brokerUrl: String, topic: String, storageLevel: StorageLevel ): JavaDStream[String] = { MQTTUtils.createStream(jssc, brokerUrl, topic, storageLevel) } }
Example 2
Source File: StreamingUtils.scala From infinispan-spark with Apache License 2.0 | 5 votes |
package org.infinispan.spark.test import java.time.{Duration => JDuration} import java.util.concurrent.TimeUnit import java.util.{List => JList} import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.api.java.{JavaReceiverInputDStream, JavaStreamingContext} import org.apache.spark.streaming.dstream.ReceiverInputDStream import org.apache.spark.streaming.receiver.Receiver import scala.annotation.meta.param import scala.collection.JavaConverters._ import scala.concurrent.duration.Duration import scala.reflect.ClassTag object StreamingUtils { class TestReceiver[T](of: Seq[T], streamItemEvery: Duration) extends Receiver[T](StorageLevel.MEMORY_ONLY) { override def onStart(): Unit = { of.foreach { item => Thread.sleep(streamItemEvery.toMillis) store(item) } } override def onStop(): Unit = {} } class TestInputDStream[T: ClassTag](@(transient@param) ssc_ : StreamingContext, of: Seq[T], streamItemEvery: Duration) extends ReceiverInputDStream[T](ssc_) { override def getReceiver(): Receiver[T] = new TestReceiver[T](of, streamItemEvery) } def createJavaReceiverDInputStream[T](jssc: JavaStreamingContext, of: JList[T], streamItemEvery: JDuration): JavaReceiverInputDStream[T] = { implicit val cmt: ClassTag[T] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]] JavaReceiverInputDStream.fromReceiverInputDStream(new TestInputDStream[T](jssc.ssc, of.asScala, Duration(streamItemEvery.getNano, TimeUnit.NANOSECONDS))) } }
Example 3
Source File: JavaSparkStream.scala From infinispan-spark with Apache License 2.0 | 5 votes |
package org.infinispan.spark.test import org.apache.spark.SparkConf import org.apache.spark.api.java.JavaSparkContext import org.apache.spark.streaming.Seconds import org.apache.spark.streaming.api.java.JavaStreamingContext import org.infinispan.spark.serializer.JBossMarshallingSerializer import org.scalatest.{BeforeAndAfterEach, Suite} trait JavaSparkStream extends BeforeAndAfterEach { this: Suite with RemoteTest => private lazy val config: SparkConf = new SparkConf().setMaster("local[4]") .setAppName(this.getClass.getName) .set("spark.serializer", classOf[JBossMarshallingSerializer].getName) .set("spark.driver.host","127.0.0.1") protected var jssc: JavaStreamingContext = _ protected var jsc: JavaSparkContext = _ override protected def beforeEach(): Unit = { jsc = new JavaSparkContext(config) jssc = new JavaStreamingContext(jsc, Seconds(1)) getRemoteCache.clear() super.beforeEach() } override protected def afterEach(): Unit = { jssc.stop(stopSparkContext = true) jsc.stop() super.afterEach() } }
Example 4
Source File: InfinispanJavaDStream.scala From infinispan-spark with Apache License 2.0 | 5 votes |
package org.infinispan.spark.stream import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.api.java.{JavaDStream, JavaInputDStream, JavaPairDStream, JavaStreamingContext} import org.infinispan.client.hotrod.event.ClientEvent import org.infinispan.spark._ import org.infinispan.spark.config.ConnectorConfiguration object InfinispanJavaDStream { def writeToInfinispan[K, V](javaDStream: JavaPairDStream[K, V], configuration: ConnectorConfiguration) = { javaDStream.dstream.foreachRDD(rdd => rdd.writeToInfinispan(configuration)) } def writeToInfinispan[K, V](javaDStream: JavaDStream[(K, V)], configuration: ConnectorConfiguration) = { javaDStream.dstream.foreachRDD(rdd => rdd.writeToInfinispan(configuration)) } def createInfinispanInputDStream[K, V](javaStreamingContext: JavaStreamingContext, storageLevel: StorageLevel, configuration: ConnectorConfiguration, includeState: Boolean) = { val infinispanDStream: InfinispanInputDStream[K, V] = new InfinispanInputDStream[K, V](javaStreamingContext.ssc, storageLevel, configuration, includeState) JavaInputDStream.fromInputDStream(infinispanDStream) } def createInfinispanInputDStream[K, V](javaStreamingContext: JavaStreamingContext, storageLevel: StorageLevel, configuration: ConnectorConfiguration): JavaInputDStream[(K, V, ClientEvent.Type)] = createInfinispanInputDStream(javaStreamingContext, storageLevel, configuration, includeState = false) }
Example 5
Source File: JavaTestUtils.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming import java.util.{List => JList} import scala.collection.JavaConverters._ import scala.reflect.ClassTag import org.apache.spark.api.java.JavaRDDLike import org.apache.spark.streaming.api.java.{JavaDStreamLike, JavaDStream, JavaStreamingContext} def runStreamsWithPartitions[V](ssc: JavaStreamingContext, numBatches: Int, numExpectedOutput: Int): JList[JList[JList[V]]] = { implicit val cm: ClassTag[V] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V]] val res = runStreamsWithPartitions[V](ssc.ssc, numBatches, numExpectedOutput) res.map(entry => entry.map(_.asJava).asJava).asJava } } object JavaTestUtils extends JavaTestBase { override def maxWaitTimeMillis = 20000 } object JavaCheckpointTestUtils extends JavaTestBase { override def actuallyWait = true }
Example 6
Source File: MQTTUtils.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.mqtt import scala.reflect.ClassTag import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.api.java.{JavaDStream, JavaReceiverInputDStream, JavaStreamingContext} import org.apache.spark.streaming.dstream.ReceiverInputDStream object MQTTUtils { private[mqtt] class MQTTUtilsPythonHelper { def createStream( jssc: JavaStreamingContext, brokerUrl: String, topic: String, storageLevel: StorageLevel ): JavaDStream[String] = { MQTTUtils.createStream(jssc, brokerUrl, topic, storageLevel) } }
Example 7
Source File: JavaTestUtils.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming import java.util.{List => JList} import scala.collection.JavaConverters._ import scala.reflect.ClassTag import org.apache.spark.api.java.JavaRDDLike import org.apache.spark.streaming.api.java.{JavaDStream, JavaDStreamLike, JavaStreamingContext} def runStreamsWithPartitions[V](ssc: JavaStreamingContext, numBatches: Int, numExpectedOutput: Int): JList[JList[JList[V]]] = { implicit val cm: ClassTag[V] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V]] val res = runStreamsWithPartitions[V](ssc.ssc, numBatches, numExpectedOutput) res.map(entry => entry.map(_.asJava).asJava).toSeq.asJava } } object JavaTestUtils extends JavaTestBase { override def maxWaitTimeMillis: Int = 20000 } object JavaCheckpointTestUtils extends JavaTestBase { override def actuallyWait: Boolean = true }
Example 8
Source File: SystemArg.scala From mist with Apache License 2.0 | 5 votes |
package mist.api import mist.api.data.JsMap import org.apache.spark.{SparkContext, SparkSessionUtils} import org.apache.spark.api.java.JavaSparkContext import org.apache.spark.sql.{SQLContext, SparkSession} import org.apache.spark.sql.hive.HiveContext import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.api.java.JavaStreamingContext trait SystemArg[A] extends ArgDef[A] { final def validate(params: JsMap): Extraction[Unit] = Extracted(()) } object SystemArg { def apply[A](tags: Seq[String], f: => Extraction[A]): ArgDef[A] = new SystemArg[A] { override def extract(ctx: FnContext): Extraction[A] = f override def describe() = Seq(InternalArgument(tags)) } def apply[A](tags: Seq[String], f: FullFnContext => Extraction[A]): ArgDef[A] = new SystemArg[A] { override def extract(ctx: FnContext): Extraction[A] = ctx match { case c: FullFnContext => f(c) case _ => val desc = s"Unknown type of job context ${ctx.getClass.getSimpleName} " + s"expected ${FullFnContext.getClass.getSimpleName}" Failed.InternalError(desc) } override def describe() = Seq(InternalArgument(tags)) } } trait SparkArgs { val sparkContextArg: ArgDef[SparkContext] = SystemArg( Seq.empty, c => Extracted(c.sc) ) val streamingContextArg: ArgDef[StreamingContext] = SystemArg(Seq(ArgInfo.StreamingContextTag), ctx => { val ssc = StreamingContext.getActiveOrCreate(() => new StreamingContext(ctx.sc, ctx.streamingDuration)) Extracted(ssc) } ) val sqlContextArg: ArgDef[SQLContext] = SystemArg(Seq(ArgInfo.SqlContextTag), ctx => sparkContextArg.map(SQLContext.getOrCreate).extract(ctx) ) // HiveContext should be cached per jvm // see #325 val hiveContextArg: ArgDef[HiveContext] = new SystemArg[HiveContext] { var cache: HiveContext = _ override def extract(ctx: FnContext): Extraction[HiveContext] = synchronized { ctx match { case c: FullFnContext => if (cache == null) cache = new HiveContext(c.sc) Extracted(cache) case _ => Failed.InternalError(s"Unknown type of job context ${ctx.getClass.getSimpleName} expected ${FullFnContext.getClass.getSimpleName}") } } override def describe(): Seq[ArgInfo] = Seq(InternalArgument( Seq(ArgInfo.HiveContextTag, ArgInfo.SqlContextTag))) } val javaSparkContextArg: ArgDef[JavaSparkContext] = sparkContextArg.map(sc => new JavaSparkContext(sc)) val javaStreamingContextArg: ArgDef[JavaStreamingContext] = SystemArg(Seq(ArgInfo.StreamingContextTag), ctx => streamingContextArg.map(scc => new JavaStreamingContext(scc)).extract(ctx)) val sparkSessionArg: ArgDef[SparkSession] = SystemArg(Seq(ArgInfo.SqlContextTag), ctx => sparkContextArg.map(sc => SparkSessionUtils.getOrCreate(sc, false)).extract(ctx) ) val sparkSessionWithHiveArg: ArgDef[SparkSession] = SystemArg( Seq(ArgInfo.SqlContextTag, ArgInfo.HiveContextTag), ctx => sparkContextArg.map(sc => SparkSessionUtils.getOrCreate(sc, true)).extract(ctx)) } object SparkArgs extends SparkArgs
Example 9
Source File: JavaTestUtils.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming import scala.collection.mutable.{SynchronizedBuffer, ArrayBuffer} import scala.reflect.ClassTag import java.util.{List => JList} import org.apache.spark.streaming.api.java.{JavaPairDStream, JavaDStreamLike, JavaDStream, JavaStreamingContext} import org.apache.spark.streaming._ import java.util.ArrayList import collection.JavaConversions._ import org.apache.spark.api.java.JavaRDDLike import org.apache.spark.streaming.dstream.DStream def runStreamsWithPartitions[V](ssc: JavaStreamingContext, numBatches: Int, numExpectedOutput: Int): JList[JList[JList[V]]] = { implicit val cm: ClassTag[V] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V]] val res = runStreamsWithPartitions[V](ssc.ssc, numBatches, numExpectedOutput) val out = new ArrayList[JList[JList[V]]]() res.map{entry => val lists = entry.map(new ArrayList[V](_)) out.append(new ArrayList[JList[V]](lists)) } out } } object JavaTestUtils extends JavaTestBase { override def maxWaitTimeMillis = 20000 } object JavaCheckpointTestUtils extends JavaTestBase { override def actuallyWait = true }
Example 10
Source File: JavaTestUtils.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming import java.util.{List => JList} import scala.collection.JavaConverters._ import scala.reflect.ClassTag import org.apache.spark.api.java.JavaRDDLike import org.apache.spark.streaming.api.java.{JavaDStreamLike, JavaDStream, JavaStreamingContext} def runStreamsWithPartitions[V](ssc: JavaStreamingContext, numBatches: Int, numExpectedOutput: Int): JList[JList[JList[V]]] = { implicit val cm: ClassTag[V] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V]] val res = runStreamsWithPartitions[V](ssc.ssc, numBatches, numExpectedOutput) res.map(entry => entry.map(_.asJava).asJava).toSeq.asJava } } object JavaTestUtils extends JavaTestBase { override def maxWaitTimeMillis = 20000 } object JavaCheckpointTestUtils extends JavaTestBase { override def actuallyWait = true }
Example 11
Source File: JavaTestUtils.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming import scala.collection.mutable.{SynchronizedBuffer, ArrayBuffer} import scala.reflect.ClassTag import java.util.{List => JList} import org.apache.spark.streaming.api.java.{JavaPairDStream, JavaDStreamLike, JavaDStream, JavaStreamingContext} import org.apache.spark.streaming._ import java.util.ArrayList import collection.JavaConversions._ import org.apache.spark.api.java.JavaRDDLike import org.apache.spark.streaming.dstream.DStream def runStreamsWithPartitions[V](ssc: JavaStreamingContext, numBatches: Int, numExpectedOutput: Int): JList[JList[JList[V]]] = { implicit val cm: ClassTag[V] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V]] val res = runStreamsWithPartitions[V](ssc.ssc, numBatches, numExpectedOutput) val out = new ArrayList[JList[JList[V]]]() res.map{entry => val lists = entry.map(new ArrayList[V](_)) out.append(new ArrayList[JList[V]](lists)) } out } } object JavaTestUtils extends JavaTestBase { override def maxWaitTimeMillis = 20000 } object JavaCheckpointTestUtils extends JavaTestBase { override def actuallyWait = true }
Example 12
Source File: MQTTUtils.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.mqtt import scala.reflect.ClassTag import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.api.java.{JavaReceiverInputDStream, JavaStreamingContext, JavaDStream} import org.apache.spark.streaming.dstream.{ReceiverInputDStream, DStream} object MQTTUtils { def createStream( jssc: JavaStreamingContext, brokerUrl: String, topic: String, storageLevel: StorageLevel ): JavaReceiverInputDStream[String] = { implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[String]] createStream(jssc.ssc, brokerUrl, topic, storageLevel) } }
Example 13
Source File: JavaTestUtils.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming import java.util.{List => JList} import scala.collection.JavaConverters._ import scala.reflect.ClassTag import org.apache.spark.api.java.JavaRDDLike import org.apache.spark.streaming.api.java.{JavaDStreamLike, JavaDStream, JavaStreamingContext} def runStreamsWithPartitions[V](ssc: JavaStreamingContext, numBatches: Int, numExpectedOutput: Int): JList[JList[JList[V]]] = { implicit val cm: ClassTag[V] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V]] val res = runStreamsWithPartitions[V](ssc.ssc, numBatches, numExpectedOutput) res.map(entry => entry.map(_.asJava).asJava).toSeq.asJava } } object JavaTestUtils extends JavaTestBase { override def maxWaitTimeMillis = 20000 } object JavaCheckpointTestUtils extends JavaTestBase { override def actuallyWait = true }
Example 14
Source File: PubNubUtils.scala From bahir with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.pubnub import java.util.{Set => JSet} import collection.JavaConverters._ import com.pubnub.api.PNConfiguration import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.api.java.JavaReceiverInputDStream import org.apache.spark.streaming.api.java.JavaStreamingContext import org.apache.spark.streaming.dstream.ReceiverInputDStream object PubNubUtils { def createStream( jssc: JavaStreamingContext, configuration: PNConfiguration, channels: JSet[String], channelGroups: JSet[String], timeToken: Option[Long], storageLevel: StorageLevel): JavaReceiverInputDStream[SparkPubNubMessage] = { createStream( jssc.ssc, configuration, Seq.empty ++ channels.asScala, Seq.empty ++ channelGroups.asScala, timeToken, storageLevel ) } }
Example 15
Source File: JavaTestUtils.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming import java.util.{List => JList} import scala.collection.JavaConverters._ import scala.reflect.ClassTag import org.apache.spark.api.java.JavaRDDLike import org.apache.spark.streaming.api.java.{JavaDStreamLike, JavaDStream, JavaStreamingContext} def runStreamsWithPartitions[V](ssc: JavaStreamingContext, numBatches: Int, numExpectedOutput: Int): JList[JList[JList[V]]] = { implicit val cm: ClassTag[V] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V]] val res = runStreamsWithPartitions[V](ssc.ssc, numBatches, numExpectedOutput) res.map(entry => entry.map(_.asJava).asJava).toSeq.asJava } } object JavaTestUtils extends JavaTestBase { override def maxWaitTimeMillis = 20000 } object JavaCheckpointTestUtils extends JavaTestBase { override def actuallyWait = true }
Example 16
Source File: HttpInputDStream.scala From prosparkstreaming with Apache License 2.0 | 5 votes |
package org.apress.prospark import java.util.Timer import java.util.TimerTask import scala.reflect.ClassTag import org.apache.http.client.methods.HttpGet import org.apache.http.impl.client.CloseableHttpClient import org.apache.http.impl.client.HttpClients import org.apache.http.util.EntityUtils import org.apache.spark.Logging import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.api.java.JavaDStream import org.apache.spark.streaming.api.java.JavaDStream.fromDStream import org.apache.spark.streaming.api.java.JavaStreamingContext import org.apache.spark.streaming.dstream.DStream import org.apache.spark.streaming.dstream.ReceiverInputDStream import org.apache.spark.streaming.receiver.Receiver class HttpInputDStream( @transient ssc_ : StreamingContext, storageLevel: StorageLevel, url: String, interval: Long) extends ReceiverInputDStream[String](ssc_) with Logging { def getReceiver(): Receiver[String] = { new HttpReceiver(storageLevel, url, interval) } } class HttpReceiver( storageLevel: StorageLevel, url: String, interval: Long) extends Receiver[String](storageLevel) with Logging { var httpClient: CloseableHttpClient = _ var trigger: Timer = _ def onStop() { httpClient.close() logInfo("Disconnected from Http Server") } def onStart() { httpClient = HttpClients.createDefault() trigger = new Timer() trigger.scheduleAtFixedRate(new TimerTask { def run() = doGet() }, 0, interval * 1000) logInfo("Http Receiver initiated") } def doGet() { logInfo("Fetching data from Http source") val response = httpClient.execute(new HttpGet(url)) try { val content = EntityUtils.toString(response.getEntity()) store(content) } catch { case e: Exception => restart("Error! Problems while connecting", e) } finally { response.close() } } } object HttpUtils { def createStream( ssc: StreamingContext, storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2, url: String, interval: Long): DStream[String] = { new HttpInputDStream(ssc, storageLevel, url, interval) } def createStream( jssc: JavaStreamingContext, storageLevel: StorageLevel, url: String, interval: Long): JavaDStream[String] = { implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[String]] createStream(jssc.ssc, storageLevel, url, interval) } }
Example 17
Source File: HttpInputDStream.scala From prosparkstreaming with Apache License 2.0 | 5 votes |
package org.apress.prospark import java.util.Timer import java.util.TimerTask import scala.reflect.ClassTag import org.apache.http.client.methods.HttpGet import org.apache.http.impl.client.CloseableHttpClient import org.apache.http.impl.client.HttpClients import org.apache.http.util.EntityUtils import org.apache.spark.Logging import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.api.java.JavaDStream import org.apache.spark.streaming.api.java.JavaDStream.fromDStream import org.apache.spark.streaming.api.java.JavaStreamingContext import org.apache.spark.streaming.dstream.DStream import org.apache.spark.streaming.dstream.ReceiverInputDStream import org.apache.spark.streaming.receiver.Receiver class HttpInputDStream( @transient ssc_ : StreamingContext, storageLevel: StorageLevel, url: String, interval: Long) extends ReceiverInputDStream[String](ssc_) with Logging { def getReceiver(): Receiver[String] = { new HttpReceiver(storageLevel, url, interval) } } class HttpReceiver( storageLevel: StorageLevel, url: String, interval: Long) extends Receiver[String](storageLevel) with Logging { var httpClient: CloseableHttpClient = _ var trigger: Timer = _ def onStop() { httpClient.close() logInfo("Disconnected from Http Server") } def onStart() { httpClient = HttpClients.createDefault() trigger = new Timer() trigger.scheduleAtFixedRate(new TimerTask { def run() = doGet() }, 0, interval * 1000) logInfo("Http Receiver initiated") } def doGet() { logInfo("Fetching data from Http source") val response = httpClient.execute(new HttpGet(url)) try { val content = EntityUtils.toString(response.getEntity()) store(content) } catch { case e: Exception => restart("Error! Problems while connecting", e) } finally { response.close() } } } object HttpUtils { def createStream( ssc: StreamingContext, storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2, url: String, interval: Long): DStream[String] = { new HttpInputDStream(ssc, storageLevel, url, interval) } def createStream( jssc: JavaStreamingContext, storageLevel: StorageLevel, url: String, interval: Long): JavaDStream[String] = { implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[String]] createStream(jssc.ssc, storageLevel, url, interval) } }
Example 18
Source File: HttpInputDStreamAsync.scala From prosparkstreaming with Apache License 2.0 | 5 votes |
package org.apress.prospark import scala.reflect.ClassTag import org.apache.spark.Logging import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.api.java.JavaDStream import org.apache.spark.streaming.api.java.JavaDStream.fromDStream import org.apache.spark.streaming.api.java.JavaStreamingContext import org.apache.spark.streaming.dstream.DStream import org.apache.spark.streaming.dstream.ReceiverInputDStream import org.apache.spark.streaming.receiver.Receiver import com.ning.http.client.AsyncCompletionHandler import com.ning.http.client.AsyncHttpClient import com.ning.http.client.Response class HttpInputDStreamAsync( @transient ssc_ : StreamingContext, storageLevel: StorageLevel, url: String) extends ReceiverInputDStream[String](ssc_) with Logging { def getReceiver(): Receiver[String] = { new HttpReceiverAsync(storageLevel, url) } } class HttpReceiverAsync( storageLevel: StorageLevel, url: String) extends Receiver[String](storageLevel) with Logging { var asyncHttpClient: AsyncHttpClient = _ def onStop() { asyncHttpClient.close() logInfo("Disconnected from Http Server") } def onStart() { asyncHttpClient = new AsyncHttpClient() asyncHttpClient.prepareGet(url).execute(new AsyncCompletionHandler[Response]() { override def onCompleted(response: Response): Response = { store(response.getResponseBody) return response } override def onThrowable(t: Throwable) { restart("Error! Problems while connecting", t) } }); logInfo("Http Connection initiated") } } object HttpUtilsAsync { def createStream( ssc: StreamingContext, storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2, url: String): DStream[String] = { new HttpInputDStreamAsync(ssc, storageLevel, url) } def createStream( jssc: JavaStreamingContext, storageLevel: StorageLevel, url: String): JavaDStream[String] = { implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[String]] createStream(jssc.ssc, storageLevel, url) } }