org.apache.spark.streaming.receiver.Receiver Scala Examples
The following examples show how to use org.apache.spark.streaming.receiver.Receiver.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: MeetupReceiver.scala From meetup-stream with Apache License 2.0 | 5 votes |
package receiver import org.apache.spark.streaming.receiver.Receiver import org.apache.spark.storage.StorageLevel import org.apache.spark.Logging import com.ning.http.client.AsyncHttpClientConfig import com.ning.http.client._ import scala.collection.mutable.ArrayBuffer import java.io.OutputStream import java.io.ByteArrayInputStream import java.io.InputStreamReader import java.io.BufferedReader import java.io.InputStream import java.io.PipedInputStream import java.io.PipedOutputStream class MeetupReceiver(url: String) extends Receiver[String](StorageLevel.MEMORY_AND_DISK_2) with Logging { @transient var client: AsyncHttpClient = _ @transient var inputPipe: PipedInputStream = _ @transient var outputPipe: PipedOutputStream = _ def onStart() { val cf = new AsyncHttpClientConfig.Builder() cf.setRequestTimeout(Integer.MAX_VALUE) cf.setReadTimeout(Integer.MAX_VALUE) cf.setPooledConnectionIdleTimeout(Integer.MAX_VALUE) client= new AsyncHttpClient(cf.build()) inputPipe = new PipedInputStream(1024 * 1024) outputPipe = new PipedOutputStream(inputPipe) val producerThread = new Thread(new DataConsumer(inputPipe)) producerThread.start() client.prepareGet(url).execute(new AsyncHandler[Unit]{ def onBodyPartReceived(bodyPart: HttpResponseBodyPart) = { bodyPart.writeTo(outputPipe) AsyncHandler.STATE.CONTINUE } def onStatusReceived(status: HttpResponseStatus) = { AsyncHandler.STATE.CONTINUE } def onHeadersReceived(headers: HttpResponseHeaders) = { AsyncHandler.STATE.CONTINUE } def onCompleted = { println("completed") } def onThrowable(t: Throwable)={ t.printStackTrace() } }) } def onStop() { if (Option(client).isDefined) client.close() if (Option(outputPipe).isDefined) { outputPipe.flush() outputPipe.close() } if (Option(inputPipe).isDefined) { inputPipe.close() } } class DataConsumer(inputStream: InputStream) extends Runnable { override def run() { val bufferedReader = new BufferedReader( new InputStreamReader( inputStream )) var input=bufferedReader.readLine() while(input!=null){ store(input) input=bufferedReader.readLine() } } } }
Example 2
Source File: SocketInputDStream.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import java.io._ import java.net.{ConnectException, Socket} import java.nio.charset.StandardCharsets import scala.reflect.ClassTag import scala.util.control.NonFatal import org.apache.spark.internal.Logging import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.receiver.Receiver import org.apache.spark.util.NextIterator private[streaming] class SocketInputDStream[T: ClassTag]( _ssc: StreamingContext, host: String, port: Int, bytesToObjects: InputStream => Iterator[T], storageLevel: StorageLevel ) extends ReceiverInputDStream[T](_ssc) { def getReceiver(): Receiver[T] = { new SocketReceiver(host, port, bytesToObjects, storageLevel) } } private[streaming] class SocketReceiver[T: ClassTag]( host: String, port: Int, bytesToObjects: InputStream => Iterator[T], storageLevel: StorageLevel ) extends Receiver[T](storageLevel) with Logging { private var socket: Socket = _ def onStart() { logInfo(s"Connecting to $host:$port") try { socket = new Socket(host, port) } catch { case e: ConnectException => restart(s"Error connecting to $host:$port", e) return } logInfo(s"Connected to $host:$port") // Start the thread that receives data over a connection new Thread("Socket Receiver") { setDaemon(true) override def run() { receive() } }.start() } def onStop() { // in case restart thread close it twice synchronized { if (socket != null) { socket.close() socket = null logInfo(s"Closed socket to $host:$port") } } } def bytesToLines(inputStream: InputStream): Iterator[String] = { val dataInputStream = new BufferedReader( new InputStreamReader(inputStream, StandardCharsets.UTF_8)) new NextIterator[String] { protected override def getNext() = { val nextValue = dataInputStream.readLine() if (nextValue == null) { finished = true } nextValue } protected override def close() { dataInputStream.close() } } } }
Example 3
Source File: CustomReceiver.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.examples.streaming import java.io.{InputStreamReader, BufferedReader, InputStream} import java.net.Socket import org.apache.spark.{SparkConf, Logging} import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.streaming.receiver.Receiver private def receive() { var socket: Socket = null var userInput: String = null try { logInfo("Connecting to " + host + ":" + port) socket = new Socket(host, port) logInfo("Connected to " + host + ":" + port) val reader = new BufferedReader(new InputStreamReader(socket.getInputStream(), "UTF-8")) userInput = reader.readLine() while(!isStopped && userInput != null) { store(userInput) userInput = reader.readLine() } reader.close() socket.close() logInfo("Stopped receiving") restart("Trying to connect again") } catch { case e: java.net.ConnectException => restart("Error connecting to " + host + ":" + port, e) case t: Throwable => restart("Error receiving data", t) } } }
Example 4
Source File: TwitterInputDStream.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.twitter import twitter4j._ import twitter4j.auth.Authorization import twitter4j.conf.ConfigurationBuilder import twitter4j.auth.OAuthAuthorization import org.apache.spark.streaming._ import org.apache.spark.streaming.dstream._ import org.apache.spark.storage.StorageLevel import org.apache.spark.Logging import org.apache.spark.streaming.receiver.Receiver private[streaming] class TwitterInputDStream( @transient ssc_ : StreamingContext, twitterAuth: Option[Authorization], filters: Seq[String], storageLevel: StorageLevel ) extends ReceiverInputDStream[Status](ssc_) { private def createOAuthAuthorization(): Authorization = { new OAuthAuthorization(new ConfigurationBuilder().build()) } private val authorization = twitterAuth.getOrElse(createOAuthAuthorization()) override def getReceiver(): Receiver[Status] = { new TwitterReceiver(authorization, filters, storageLevel) } } private[streaming] class TwitterReceiver( twitterAuth: Authorization, filters: Seq[String], storageLevel: StorageLevel ) extends Receiver[Status](storageLevel) with Logging { @volatile private var twitterStream: TwitterStream = _ @volatile private var stopped = false def onStart() { try { val newTwitterStream = new TwitterStreamFactory().getInstance(twitterAuth) newTwitterStream.addListener(new StatusListener { def onStatus(status: Status): Unit = { store(status) } // Unimplemented def onDeletionNotice(statusDeletionNotice: StatusDeletionNotice) {} def onTrackLimitationNotice(i: Int) {} def onScrubGeo(l: Long, l1: Long) {} def onStallWarning(stallWarning: StallWarning) {} def onException(e: Exception) { if (!stopped) { restart("Error receiving tweets", e) } } }) val query = new FilterQuery if (filters.size > 0) { query.track(filters.toArray) newTwitterStream.filter(query) } else { newTwitterStream.sample() } setTwitterStream(newTwitterStream) logInfo("Twitter receiver started") stopped = false } catch { case e: Exception => restart("Error starting Twitter stream", e) } } def onStop() { stopped = true setTwitterStream(null) logInfo("Twitter receiver stopped") } private def setTwitterStream(newTwitterStream: TwitterStream) = synchronized { if (twitterStream != null) { twitterStream.shutdown() } twitterStream = newTwitterStream } }
Example 5
Source File: MQTTInputDStream.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.mqtt import org.eclipse.paho.client.mqttv3.IMqttDeliveryToken import org.eclipse.paho.client.mqttv3.MqttCallback import org.eclipse.paho.client.mqttv3.MqttClient import org.eclipse.paho.client.mqttv3.MqttMessage import org.eclipse.paho.client.mqttv3.persist.MemoryPersistence import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.dstream._ import org.apache.spark.streaming.receiver.Receiver private[streaming] class MQTTInputDStream( @transient ssc_ : StreamingContext, brokerUrl: String, topic: String, storageLevel: StorageLevel ) extends ReceiverInputDStream[String](ssc_) { private[streaming] override def name: String = s"MQTT stream [$id]" def getReceiver(): Receiver[String] = { new MQTTReceiver(brokerUrl, topic, storageLevel) } } private[streaming] class MQTTReceiver( brokerUrl: String, topic: String, storageLevel: StorageLevel ) extends Receiver[String](storageLevel) { def onStop() { } def onStart() { // Set up persistence for messages val persistence = new MemoryPersistence() // Initializing Mqtt Client specifying brokerUrl, clientID and MqttClientPersistance val client = new MqttClient(brokerUrl, MqttClient.generateClientId(), persistence) // Callback automatically triggers as and when new message arrives on specified topic val callback = new MqttCallback() { // Handles Mqtt message override def messageArrived(topic: String, message: MqttMessage) { store(new String(message.getPayload(), "utf-8")) } override def deliveryComplete(token: IMqttDeliveryToken) { } override def connectionLost(cause: Throwable) { restart("Connection lost ", cause) } } // Set up callback for MqttClient. This needs to happen before // connecting or subscribing, otherwise messages may be lost client.setCallback(callback) // Connect to MqttBroker client.connect() // Subscribe to Mqtt topic client.subscribe(topic) } }
Example 6
Source File: SocketInputDStream.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.util.control.NonFatal import org.apache.spark.streaming.StreamingContext import org.apache.spark.storage.StorageLevel import org.apache.spark.util.NextIterator import scala.reflect.ClassTag import java.io._ import java.net.{UnknownHostException, Socket} import org.apache.spark.Logging import org.apache.spark.streaming.receiver.Receiver private[streaming] class SocketInputDStream[T: ClassTag]( @transient ssc_ : StreamingContext, host: String, port: Int, bytesToObjects: InputStream => Iterator[T], storageLevel: StorageLevel ) extends ReceiverInputDStream[T](ssc_) { def getReceiver(): Receiver[T] = { new SocketReceiver(host, port, bytesToObjects, storageLevel) } } private[streaming] class SocketReceiver[T: ClassTag]( host: String, port: Int, bytesToObjects: InputStream => Iterator[T], storageLevel: StorageLevel ) extends Receiver[T](storageLevel) with Logging { def onStart() { // Start the thread that receives data over a connection new Thread("Socket Receiver") { setDaemon(true) override def run() { receive() } }.start() } def onStop() { // There is nothing much to do as the thread calling receive() // is designed to stop by itself isStopped() returns false } def bytesToLines(inputStream: InputStream): Iterator[String] = { val dataInputStream = new BufferedReader(new InputStreamReader(inputStream, "UTF-8")) new NextIterator[String] { protected override def getNext() = { val nextValue = dataInputStream.readLine() if (nextValue == null) { finished = true } nextValue } protected override def close() { dataInputStream.close() } } } }
Example 7
Source File: CustomReceiver.scala From spark1.52 with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.streaming import java.io.{InputStreamReader, BufferedReader, InputStream} import java.net.Socket import org.apache.spark.{SparkConf, Logging} import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.streaming.receiver.Receiver import java.io.File import java.io.FileInputStream private def receive() { var socket: Socket = null var userInput: String = null try { logInfo("Connecting to " + host + ":" + port) socket = new Socket(host, port) //连接机器 logInfo("Connected to " + host + ":" + port) //获取网络连接输入流 println("isConnected:"+socket.isConnected()) val socketInput=socket.getInputStream() // //val inputFile=new File("../data/mllib/als/testCustomReceiver.data") // val in = new FileInputStream(inputFile) // val in = new FileInputStream(socketInput) val reader = new BufferedReader(new InputStreamReader(socketInput, "UTF-8")) userInput = reader.readLine() while(!isStopped && userInput != null) { store(userInput)//存储数据 userInput = reader.readLine()//读取数据 println("userInput:"+userInput) } reader.close()//关闭流 socket.close()//关闭连接 logInfo("Stopped receiving") restart("Trying to connect again") } catch { case e: java.net.ConnectException => restart("Error connecting to " + host + ":" + port, e) case t: Throwable => restart("Error receiving data", t) } } } // scalastyle:on println
Example 8
Source File: ImageInputDStream.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.examples.streaming import java.io.InputStream import java.net.Socket import org.apache.hadoop.io.BytesWritable import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.dstream.ReceiverInputDStream import org.apache.spark.streaming.receiver.Receiver import scala.collection.mutable.ArrayBuffer import org.apache.spark.Logging class ImageInputDStream(@transient ssc_ : StreamingContext, host: String, port: Int, storageLevel: StorageLevel) extends ReceiverInputDStream[BytesWritable](ssc_) with Logging { override def getReceiver(): Receiver[BytesWritable] = { new ImageRecevier(host, port, storageLevel) } } class ImageRecevier(host: String, port: Int, storageLevel: StorageLevel) extends Receiver[BytesWritable](storageLevel) with Logging { override def onStart(): Unit = { new Thread("Image Socket") { setDaemon(true) override def run(): Unit = { receive() } }.start() } def receive(): Unit = { var socket: Socket = null var in: InputStream = null try { log.info("Connecting to " + host + ":" + port) socket = new Socket(host, port) log.info("Connected to " + host + ":" + port) in = socket.getInputStream val buf = new ArrayBuffer[Byte]() var bytes = new Array[Byte](1024) var len = 0 while (-1 < len) { len = in.read(bytes) if (len > 0) { buf ++= bytes } } val bw = new BytesWritable(buf.toArray) log.error("byte:::::" + bw.getLength) store(bw) log.info("Stopped receiving") restart("Retrying connecting to " + host + ":" + port) } catch { case e: java.net.ConnectException => restart("Error connecting to " + host + ":" + port, e) case t: Throwable => restart("Error receiving data", t) } finally { if (in != null) { in.close() } if (socket != null) { socket.close() log.info("Closed socket to " + host + ":" + port) } } } override def onStop(): Unit = { } }
Example 9
Source File: TwitterInputDStream.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.twitter import twitter4j._ import twitter4j.auth.Authorization import twitter4j.conf.ConfigurationBuilder import twitter4j.auth.OAuthAuthorization import org.apache.spark.streaming._ import org.apache.spark.streaming.dstream._ import org.apache.spark.storage.StorageLevel import org.apache.spark.Logging import org.apache.spark.streaming.receiver.Receiver private[streaming] class TwitterInputDStream( @transient ssc_ : StreamingContext, twitterAuth: Option[Authorization], filters: Seq[String], storageLevel: StorageLevel ) extends ReceiverInputDStream[Status](ssc_) { private def createOAuthAuthorization(): Authorization = { new OAuthAuthorization(new ConfigurationBuilder().build()) } private val authorization = twitterAuth.getOrElse(createOAuthAuthorization()) override def getReceiver(): Receiver[Status] = { new TwitterReceiver(authorization, filters, storageLevel) } } private[streaming] class TwitterReceiver( twitterAuth: Authorization, filters: Seq[String], storageLevel: StorageLevel ) extends Receiver[Status](storageLevel) with Logging { @volatile private var twitterStream: TwitterStream = _ @volatile private var stopped = false def onStart() { try { val newTwitterStream = new TwitterStreamFactory().getInstance(twitterAuth) newTwitterStream.addListener(new StatusListener { def onStatus(status: Status): Unit = { store(status) } // Unimplemented def onDeletionNotice(statusDeletionNotice: StatusDeletionNotice) {} def onTrackLimitationNotice(i: Int) {} def onScrubGeo(l: Long, l1: Long) {} def onStallWarning(stallWarning: StallWarning) {} def onException(e: Exception) { if (!stopped) { restart("Error receiving tweets", e) } } }) val query = new FilterQuery if (filters.size > 0) { query.track(filters.toArray) newTwitterStream.filter(query) } else { newTwitterStream.sample() } setTwitterStream(newTwitterStream) logInfo("Twitter receiver started") stopped = false } catch { case e: Exception => restart("Error starting Twitter stream", e) } } def onStop() { stopped = true setTwitterStream(null) logInfo("Twitter receiver stopped") } private def setTwitterStream(newTwitterStream: TwitterStream) = synchronized { if (twitterStream != null) { twitterStream.shutdown() } twitterStream = newTwitterStream } }
Example 10
Source File: KinesisInputDStream.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kinesis import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream import org.apache.spark.rdd.RDD import org.apache.spark.storage.{BlockId, StorageLevel} import org.apache.spark.streaming.dstream.ReceiverInputDStream import org.apache.spark.streaming.receiver.Receiver import org.apache.spark.streaming.scheduler.ReceivedBlockInfo import org.apache.spark.streaming.{Duration, StreamingContext, Time} private[kinesis] class KinesisInputDStream( @transient _ssc: StreamingContext, streamName: String, endpointUrl: String, regionName: String, initialPositionInStream: InitialPositionInStream, checkpointAppName: String, checkpointInterval: Duration, storageLevel: StorageLevel, awsCredentialsOption: Option[SerializableAWSCredentials] ) extends ReceiverInputDStream[Array[Byte]](_ssc) { private[streaming] override def createBlockRDD(time: Time, blockInfos: Seq[ReceivedBlockInfo]): RDD[Array[Byte]] = { // This returns true even for when blockInfos is empty val allBlocksHaveRanges = blockInfos.map { _.metadataOption }.forall(_.nonEmpty) if (allBlocksHaveRanges) { // Create a KinesisBackedBlockRDD, even when there are no blocks val blockIds = blockInfos.map { _.blockId.asInstanceOf[BlockId] }.toArray val seqNumRanges = blockInfos.map { _.metadataOption.get.asInstanceOf[SequenceNumberRanges] }.toArray val isBlockIdValid = blockInfos.map { _.isBlockIdValid() }.toArray logDebug(s"Creating KinesisBackedBlockRDD for $time with ${seqNumRanges.length} " + s"seq number ranges: ${seqNumRanges.mkString(", ")} ") new KinesisBackedBlockRDD( context.sc, regionName, endpointUrl, blockIds, seqNumRanges, isBlockIdValid = isBlockIdValid, retryTimeoutMs = ssc.graph.batchDuration.milliseconds.toInt, awsCredentialsOption = awsCredentialsOption) } else { logWarning("Kinesis sequence number information was not present with some block metadata," + " it may not be possible to recover from failures") super.createBlockRDD(time, blockInfos) } } override def getReceiver(): Receiver[Array[Byte]] = { new KinesisReceiver(streamName, endpointUrl, regionName, initialPositionInStream, checkpointAppName, checkpointInterval, storageLevel, awsCredentialsOption) } }
Example 11
Source File: SocketInputDStream.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.util.control.NonFatal import org.apache.spark.streaming.StreamingContext import org.apache.spark.storage.StorageLevel import org.apache.spark.util.NextIterator import scala.reflect.ClassTag import java.io._ import java.net.{UnknownHostException, Socket} import org.apache.spark.Logging import org.apache.spark.streaming.receiver.Receiver private[streaming] class SocketInputDStream[T: ClassTag]( @transient ssc_ : StreamingContext, host: String, port: Int, bytesToObjects: InputStream => Iterator[T], storageLevel: StorageLevel ) extends ReceiverInputDStream[T](ssc_) { def getReceiver(): Receiver[T] = { new SocketReceiver(host, port, bytesToObjects, storageLevel) } } private[streaming] class SocketReceiver[T: ClassTag]( host: String, port: Int, bytesToObjects: InputStream => Iterator[T], storageLevel: StorageLevel ) extends Receiver[T](storageLevel) with Logging { def onStart() { // Start the thread that receives data over a connection //启动接收到连接上的数据的线程 new Thread("Socket Receiver") { setDaemon(true) override def run() { receive() } }.start() } def onStop() { // There is nothing much to do as the thread calling receive() //没有什么可做的线程调用receive() // is designed to stop by itself isStopped() returns false //是为了阻止自己isstopped()返回false } def bytesToLines(inputStream: InputStream): Iterator[String] = { val dataInputStream = new BufferedReader(new InputStreamReader(inputStream, "UTF-8")) new NextIterator[String] { protected override def getNext() = { val nextValue = dataInputStream.readLine() if (nextValue == null) { finished = true } nextValue } protected override def close() { dataInputStream.close() } } } }
Example 12
Source File: KinesisInputDStream.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kinesis import scala.reflect.ClassTag import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream import com.amazonaws.services.kinesis.model.Record import org.apache.spark.rdd.RDD import org.apache.spark.storage.{BlockId, StorageLevel} import org.apache.spark.streaming.{Duration, StreamingContext, Time} import org.apache.spark.streaming.dstream.ReceiverInputDStream import org.apache.spark.streaming.receiver.Receiver import org.apache.spark.streaming.scheduler.ReceivedBlockInfo private[kinesis] class KinesisInputDStream[T: ClassTag]( _ssc: StreamingContext, streamName: String, endpointUrl: String, regionName: String, initialPositionInStream: InitialPositionInStream, checkpointAppName: String, checkpointInterval: Duration, storageLevel: StorageLevel, messageHandler: Record => T, awsCredentialsOption: Option[SerializableAWSCredentials] ) extends ReceiverInputDStream[T](_ssc) { private[streaming] override def createBlockRDD(time: Time, blockInfos: Seq[ReceivedBlockInfo]): RDD[T] = { // This returns true even for when blockInfos is empty val allBlocksHaveRanges = blockInfos.map { _.metadataOption }.forall(_.nonEmpty) if (allBlocksHaveRanges) { // Create a KinesisBackedBlockRDD, even when there are no blocks val blockIds = blockInfos.map { _.blockId.asInstanceOf[BlockId] }.toArray val seqNumRanges = blockInfos.map { _.metadataOption.get.asInstanceOf[SequenceNumberRanges] }.toArray val isBlockIdValid = blockInfos.map { _.isBlockIdValid() }.toArray logDebug(s"Creating KinesisBackedBlockRDD for $time with ${seqNumRanges.length} " + s"seq number ranges: ${seqNumRanges.mkString(", ")} ") new KinesisBackedBlockRDD( context.sc, regionName, endpointUrl, blockIds, seqNumRanges, isBlockIdValid = isBlockIdValid, retryTimeoutMs = ssc.graph.batchDuration.milliseconds.toInt, messageHandler = messageHandler, awsCredentialsOption = awsCredentialsOption) } else { logWarning("Kinesis sequence number information was not present with some block metadata," + " it may not be possible to recover from failures") super.createBlockRDD(time, blockInfos) } } override def getReceiver(): Receiver[T] = { new KinesisReceiver(streamName, endpointUrl, regionName, initialPositionInStream, checkpointAppName, checkpointInterval, storageLevel, messageHandler, awsCredentialsOption) } }
Example 13
Source File: CustomReceiver.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.streaming import java.io.{BufferedReader, InputStreamReader} import java.net.Socket import java.nio.charset.StandardCharsets import org.apache.spark.SparkConf import org.apache.spark.internal.Logging import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.streaming.receiver.Receiver private def receive() { var socket: Socket = null var userInput: String = null try { logInfo(s"Connecting to $host : $port") socket = new Socket(host, port) logInfo(s"Connected to $host : $port") val reader = new BufferedReader( new InputStreamReader(socket.getInputStream(), StandardCharsets.UTF_8)) userInput = reader.readLine() while(!isStopped && userInput != null) { store(userInput) userInput = reader.readLine() } reader.close() socket.close() logInfo("Stopped receiving") restart("Trying to connect again") } catch { case e: java.net.ConnectException => restart(s"Error connecting to $host : $port", e) case t: Throwable => restart("Error receiving data", t) } } } // scalastyle:on println
Example 14
Source File: SocketInputDStream.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import java.io._ import java.net.{ConnectException, Socket} import java.nio.charset.StandardCharsets import scala.reflect.ClassTag import scala.util.control.NonFatal import org.apache.spark.internal.Logging import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.receiver.Receiver import org.apache.spark.util.NextIterator private[streaming] class SocketInputDStream[T: ClassTag]( _ssc: StreamingContext, host: String, port: Int, bytesToObjects: InputStream => Iterator[T], storageLevel: StorageLevel ) extends ReceiverInputDStream[T](_ssc) { def getReceiver(): Receiver[T] = { new SocketReceiver(host, port, bytesToObjects, storageLevel) } } private[streaming] class SocketReceiver[T: ClassTag]( host: String, port: Int, bytesToObjects: InputStream => Iterator[T], storageLevel: StorageLevel ) extends Receiver[T](storageLevel) with Logging { private var socket: Socket = _ def onStart() { logInfo(s"Connecting to $host:$port") try { socket = new Socket(host, port) } catch { case e: ConnectException => restart(s"Error connecting to $host:$port", e) return } logInfo(s"Connected to $host:$port") // Start the thread that receives data over a connection new Thread("Socket Receiver") { setDaemon(true) override def run() { receive() } }.start() } def onStop() { // in case restart thread close it twice synchronized { if (socket != null) { socket.close() socket = null logInfo(s"Closed socket to $host:$port") } } } def bytesToLines(inputStream: InputStream): Iterator[String] = { val dataInputStream = new BufferedReader( new InputStreamReader(inputStream, StandardCharsets.UTF_8)) new NextIterator[String] { protected override def getNext() = { val nextValue = dataInputStream.readLine() if (nextValue == null) { finished = true } nextValue } protected override def close() { dataInputStream.close() } } } }
Example 15
Source File: CustomReceiver.scala From BigDatalog with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.streaming import java.io.{InputStreamReader, BufferedReader, InputStream} import java.net.Socket import org.apache.spark.{SparkConf, Logging} import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.streaming.receiver.Receiver private def receive() { var socket: Socket = null var userInput: String = null try { logInfo("Connecting to " + host + ":" + port) socket = new Socket(host, port) logInfo("Connected to " + host + ":" + port) val reader = new BufferedReader(new InputStreamReader(socket.getInputStream(), "UTF-8")) userInput = reader.readLine() while(!isStopped && userInput != null) { store(userInput) userInput = reader.readLine() } reader.close() socket.close() logInfo("Stopped receiving") restart("Trying to connect again") } catch { case e: java.net.ConnectException => restart("Error connecting to " + host + ":" + port, e) case t: Throwable => restart("Error receiving data", t) } } } // scalastyle:on println
Example 16
Source File: TwitterInputDStream.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.twitter import twitter4j._ import twitter4j.auth.Authorization import twitter4j.conf.ConfigurationBuilder import twitter4j.auth.OAuthAuthorization import org.apache.spark.streaming._ import org.apache.spark.streaming.dstream._ import org.apache.spark.storage.StorageLevel import org.apache.spark.Logging import org.apache.spark.streaming.receiver.Receiver private[streaming] class TwitterInputDStream( ssc_ : StreamingContext, twitterAuth: Option[Authorization], filters: Seq[String], storageLevel: StorageLevel ) extends ReceiverInputDStream[Status](ssc_) { private def createOAuthAuthorization(): Authorization = { new OAuthAuthorization(new ConfigurationBuilder().build()) } private val authorization = twitterAuth.getOrElse(createOAuthAuthorization()) override def getReceiver(): Receiver[Status] = { new TwitterReceiver(authorization, filters, storageLevel) } } private[streaming] class TwitterReceiver( twitterAuth: Authorization, filters: Seq[String], storageLevel: StorageLevel ) extends Receiver[Status](storageLevel) with Logging { @volatile private var twitterStream: TwitterStream = _ @volatile private var stopped = false def onStart() { try { val newTwitterStream = new TwitterStreamFactory().getInstance(twitterAuth) newTwitterStream.addListener(new StatusListener { def onStatus(status: Status): Unit = { store(status) } // Unimplemented def onDeletionNotice(statusDeletionNotice: StatusDeletionNotice) {} def onTrackLimitationNotice(i: Int) {} def onScrubGeo(l: Long, l1: Long) {} def onStallWarning(stallWarning: StallWarning) {} def onException(e: Exception) { if (!stopped) { restart("Error receiving tweets", e) } } }) val query = new FilterQuery if (filters.size > 0) { query.track(filters.mkString(",")) newTwitterStream.filter(query) } else { newTwitterStream.sample() } setTwitterStream(newTwitterStream) logInfo("Twitter receiver started") stopped = false } catch { case e: Exception => restart("Error starting Twitter stream", e) } } def onStop() { stopped = true setTwitterStream(null) logInfo("Twitter receiver stopped") } private def setTwitterStream(newTwitterStream: TwitterStream) = synchronized { if (twitterStream != null) { twitterStream.shutdown() } twitterStream = newTwitterStream } }
Example 17
Source File: MQTTInputDStream.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.mqtt import org.eclipse.paho.client.mqttv3.IMqttDeliveryToken import org.eclipse.paho.client.mqttv3.MqttCallback import org.eclipse.paho.client.mqttv3.MqttClient import org.eclipse.paho.client.mqttv3.MqttMessage import org.eclipse.paho.client.mqttv3.persist.MemoryPersistence import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.dstream._ import org.apache.spark.streaming.receiver.Receiver private[streaming] class MQTTInputDStream( ssc_ : StreamingContext, brokerUrl: String, topic: String, storageLevel: StorageLevel ) extends ReceiverInputDStream[String](ssc_) { private[streaming] override def name: String = s"MQTT stream [$id]" def getReceiver(): Receiver[String] = { new MQTTReceiver(brokerUrl, topic, storageLevel) } } private[streaming] class MQTTReceiver( brokerUrl: String, topic: String, storageLevel: StorageLevel ) extends Receiver[String](storageLevel) { def onStop() { } def onStart() { // Set up persistence for messages val persistence = new MemoryPersistence() // Initializing Mqtt Client specifying brokerUrl, clientID and MqttClientPersistance val client = new MqttClient(brokerUrl, MqttClient.generateClientId(), persistence) // Callback automatically triggers as and when new message arrives on specified topic val callback = new MqttCallback() { // Handles Mqtt message override def messageArrived(topic: String, message: MqttMessage) { store(new String(message.getPayload(), "utf-8")) } override def deliveryComplete(token: IMqttDeliveryToken) { } override def connectionLost(cause: Throwable) { restart("Connection lost ", cause) } } // Set up callback for MqttClient. This needs to happen before // connecting or subscribing, otherwise messages may be lost client.setCallback(callback) // Connect to MqttBroker client.connect() // Subscribe to Mqtt topic client.subscribe(topic) } }
Example 18
Source File: KinesisInputDStream.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kinesis import scala.reflect.ClassTag import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream import com.amazonaws.services.kinesis.model.Record import org.apache.spark.rdd.RDD import org.apache.spark.storage.{BlockId, StorageLevel} import org.apache.spark.streaming.dstream.ReceiverInputDStream import org.apache.spark.streaming.receiver.Receiver import org.apache.spark.streaming.scheduler.ReceivedBlockInfo import org.apache.spark.streaming.{Duration, StreamingContext, Time} private[kinesis] class KinesisInputDStream[T: ClassTag]( @transient _ssc: StreamingContext, streamName: String, endpointUrl: String, regionName: String, initialPositionInStream: InitialPositionInStream, checkpointAppName: String, checkpointInterval: Duration, storageLevel: StorageLevel, messageHandler: Record => T, awsCredentialsOption: Option[SerializableAWSCredentials] ) extends ReceiverInputDStream[T](_ssc) { private[streaming] override def createBlockRDD(time: Time, blockInfos: Seq[ReceivedBlockInfo]): RDD[T] = { // This returns true even for when blockInfos is empty val allBlocksHaveRanges = blockInfos.map { _.metadataOption }.forall(_.nonEmpty) if (allBlocksHaveRanges) { // Create a KinesisBackedBlockRDD, even when there are no blocks val blockIds = blockInfos.map { _.blockId.asInstanceOf[BlockId] }.toArray val seqNumRanges = blockInfos.map { _.metadataOption.get.asInstanceOf[SequenceNumberRanges] }.toArray val isBlockIdValid = blockInfos.map { _.isBlockIdValid() }.toArray logDebug(s"Creating KinesisBackedBlockRDD for $time with ${seqNumRanges.length} " + s"seq number ranges: ${seqNumRanges.mkString(", ")} ") new KinesisBackedBlockRDD( context.sc, regionName, endpointUrl, blockIds, seqNumRanges, isBlockIdValid = isBlockIdValid, retryTimeoutMs = ssc.graph.batchDuration.milliseconds.toInt, messageHandler = messageHandler, awsCredentialsOption = awsCredentialsOption) } else { logWarning("Kinesis sequence number information was not present with some block metadata," + " it may not be possible to recover from failures") super.createBlockRDD(time, blockInfos) } } override def getReceiver(): Receiver[T] = { new KinesisReceiver(streamName, endpointUrl, regionName, initialPositionInStream, checkpointAppName, checkpointInterval, storageLevel, messageHandler, awsCredentialsOption) } }
Example 19
Source File: SocketInputDStream.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.util.control.NonFatal import org.apache.spark.streaming.StreamingContext import org.apache.spark.storage.StorageLevel import org.apache.spark.util.NextIterator import scala.reflect.ClassTag import java.io._ import java.net.{UnknownHostException, Socket} import org.apache.spark.Logging import org.apache.spark.streaming.receiver.Receiver private[streaming] class SocketInputDStream[T: ClassTag]( ssc_ : StreamingContext, host: String, port: Int, bytesToObjects: InputStream => Iterator[T], storageLevel: StorageLevel ) extends ReceiverInputDStream[T](ssc_) { def getReceiver(): Receiver[T] = { new SocketReceiver(host, port, bytesToObjects, storageLevel) } } private[streaming] class SocketReceiver[T: ClassTag]( host: String, port: Int, bytesToObjects: InputStream => Iterator[T], storageLevel: StorageLevel ) extends Receiver[T](storageLevel) with Logging { def onStart() { // Start the thread that receives data over a connection new Thread("Socket Receiver") { setDaemon(true) override def run() { receive() } }.start() } def onStop() { // There is nothing much to do as the thread calling receive() // is designed to stop by itself isStopped() returns false } def bytesToLines(inputStream: InputStream): Iterator[String] = { val dataInputStream = new BufferedReader(new InputStreamReader(inputStream, "UTF-8")) new NextIterator[String] { protected override def getNext() = { val nextValue = dataInputStream.readLine() if (nextValue == null) { finished = true } nextValue } protected override def close() { dataInputStream.close() } } } }
Example 20
Source File: InfinispanInputDStream.scala From infinispan-spark with Apache License 2.0 | 5 votes |
package org.infinispan.spark.stream import java.nio._ import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.dstream.ReceiverInputDStream import org.apache.spark.streaming.receiver.Receiver import org.infinispan.client.hotrod.annotation._ import org.infinispan.client.hotrod.event.{ClientCacheEntryCustomEvent, ClientEvent} import org.infinispan.client.hotrod.{DataFormat, RemoteCache, RemoteCacheManager} import org.infinispan.commons.configuration.ClassWhiteList import org.infinispan.commons.io.UnsignedNumeric import org.infinispan.spark._ import org.infinispan.spark.config.ConnectorConfiguration import org.infinispan.spark.rdd.RemoteCacheManagerBuilder class InfinispanInputDStream[K, V](@transient val ssc_ : StreamingContext, storage: StorageLevel, configuration: ConnectorConfiguration, includeState: Boolean = false) extends ReceiverInputDStream[(K, V, ClientEvent.Type)](ssc_) { override def getReceiver(): Receiver[(K, V, ClientEvent.Type)] = new EventsReceiver(storage, configuration, includeState) } private class EventsReceiver[K, V](storageLevel: StorageLevel, configuration: ConnectorConfiguration, includeState: Boolean) extends Receiver[(K, V, ClientEvent.Type)](storageLevel) { @transient private lazy val listener = if (includeState) new EventListenerWithState(remoteCache.getDataFormat) else new EventListenerWithoutState(remoteCache.getDataFormat) @transient private var cacheManager: RemoteCacheManager = _ @transient private var remoteCache: RemoteCache[K, V] = _ override def onStart(): Unit = { cacheManager = RemoteCacheManagerBuilder.create(configuration) remoteCache = getCache[K, V](configuration, cacheManager) remoteCache.addClientListener(listener) } override def onStop(): Unit = { if (cacheManager != null) { cacheManager.stop() cacheManager = null } } private sealed trait EventListener { var dataFormat: DataFormat @ClientCacheEntryRemoved @ClientCacheEntryExpired def onRemove(event: ClientCacheEntryCustomEvent[Array[Byte]]) { emitEvent(event, ignoreValue = true) } @ClientCacheEntryCreated @ClientCacheEntryModified def onAddModify(event: ClientCacheEntryCustomEvent[Array[Byte]]) { emitEvent(event, ignoreValue = false) } private def emitEvent(event: ClientCacheEntryCustomEvent[Array[Byte]], ignoreValue: Boolean) = { val eventData = event.getEventData val rawData = ByteBuffer.wrap(eventData) val rawKey = readElement(rawData) val classWhiteList = new ClassWhiteList() val key: K = dataFormat.keyToObj[K](rawKey, new ClassWhiteList()) val value = if (!ignoreValue) { val rawValue = readElement(rawData) dataFormat.valueToObj[V](rawValue, classWhiteList) } else null.asInstanceOf[V] store((key, value, event.getType)) } private def readElement(in: ByteBuffer): Array[Byte] = { val length = UnsignedNumeric.readUnsignedInt(in) val element = new Array[Byte](length) in.get(element) element } } @ClientListener(converterFactoryName = "___eager-key-value-version-converter", useRawData = true, includeCurrentState = true) private class EventListenerWithState(var dataFormat: DataFormat) extends EventListener @ClientListener(converterFactoryName = "___eager-key-value-version-converter", useRawData = true, includeCurrentState = false) private class EventListenerWithoutState(var dataFormat: DataFormat) extends EventListener }
Example 21
Source File: StreamingUtils.scala From infinispan-spark with Apache License 2.0 | 5 votes |
package org.infinispan.spark.test import java.time.{Duration => JDuration} import java.util.concurrent.TimeUnit import java.util.{List => JList} import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.api.java.{JavaReceiverInputDStream, JavaStreamingContext} import org.apache.spark.streaming.dstream.ReceiverInputDStream import org.apache.spark.streaming.receiver.Receiver import scala.annotation.meta.param import scala.collection.JavaConverters._ import scala.concurrent.duration.Duration import scala.reflect.ClassTag object StreamingUtils { class TestReceiver[T](of: Seq[T], streamItemEvery: Duration) extends Receiver[T](StorageLevel.MEMORY_ONLY) { override def onStart(): Unit = { of.foreach { item => Thread.sleep(streamItemEvery.toMillis) store(item) } } override def onStop(): Unit = {} } class TestInputDStream[T: ClassTag](@(transient@param) ssc_ : StreamingContext, of: Seq[T], streamItemEvery: Duration) extends ReceiverInputDStream[T](ssc_) { override def getReceiver(): Receiver[T] = new TestReceiver[T](of, streamItemEvery) } def createJavaReceiverDInputStream[T](jssc: JavaStreamingContext, of: JList[T], streamItemEvery: JDuration): JavaReceiverInputDStream[T] = { implicit val cmt: ClassTag[T] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[T]] JavaReceiverInputDStream.fromReceiverInputDStream(new TestInputDStream[T](jssc.ssc, of.asScala, Duration(streamItemEvery.getNano, TimeUnit.NANOSECONDS))) } }
Example 22
Source File: FileStreamReader.scala From piglet with Apache License 2.0 | 5 votes |
package dbis.piglet.backends.spark import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.receiver.Receiver import scala.io.Source import java.io.{ FileNotFoundException, IOException } import org.apache.spark.streaming.scheduler._ import org.apache.spark.streaming.StreamingContext class FileStreamReader(file: String, @transient val ssc: StreamingContext) extends Receiver[String](StorageLevel.MEMORY_AND_DISK_2) { def onStart() { // Start the thread that reads data from a file new Thread("FileStreamReader") { override def run() { receive() } }.start() } def onStop() { // There is nothing to do here } private def receive() { try { for (line <- Source.fromFile(file).getLines()) { store(line) //Thread sleep 1000 // for testing } //stop("stopped ...") // stop receiver //ssc.stop() //SparkStream.ssc.stop(true, true) // stop streaming context gracefully } catch { case ex: FileNotFoundException => println(s"Could not find $file file.") case ex: IOException => println(s"Had an IOException during reading $file file") } finally { stop("Stopped Receiver") ssc.stop(true, true) SparkStream.ssc.stop(true, true) //sys.exit() } } } class FileReader(ssc: StreamingContext) { def readFile(file: String) = ssc.receiverStream(new FileStreamReader(file, ssc)) } object FileStreamReader { implicit def customFileStreamReader(ssc: StreamingContext) = new FileReader(ssc) }
Example 23
Source File: HttpInputDStream.scala From prosparkstreaming with Apache License 2.0 | 5 votes |
package org.apress.prospark import java.util.Timer import java.util.TimerTask import scala.reflect.ClassTag import org.apache.http.client.methods.HttpGet import org.apache.http.impl.client.CloseableHttpClient import org.apache.http.impl.client.HttpClients import org.apache.http.util.EntityUtils import org.apache.spark.Logging import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.api.java.JavaDStream import org.apache.spark.streaming.api.java.JavaDStream.fromDStream import org.apache.spark.streaming.api.java.JavaStreamingContext import org.apache.spark.streaming.dstream.DStream import org.apache.spark.streaming.dstream.ReceiverInputDStream import org.apache.spark.streaming.receiver.Receiver class HttpInputDStream( @transient ssc_ : StreamingContext, storageLevel: StorageLevel, url: String, interval: Long) extends ReceiverInputDStream[String](ssc_) with Logging { def getReceiver(): Receiver[String] = { new HttpReceiver(storageLevel, url, interval) } } class HttpReceiver( storageLevel: StorageLevel, url: String, interval: Long) extends Receiver[String](storageLevel) with Logging { var httpClient: CloseableHttpClient = _ var trigger: Timer = _ def onStop() { httpClient.close() logInfo("Disconnected from Http Server") } def onStart() { httpClient = HttpClients.createDefault() trigger = new Timer() trigger.scheduleAtFixedRate(new TimerTask { def run() = doGet() }, 0, interval * 1000) logInfo("Http Receiver initiated") } def doGet() { logInfo("Fetching data from Http source") val response = httpClient.execute(new HttpGet(url)) try { val content = EntityUtils.toString(response.getEntity()) store(content) } catch { case e: Exception => restart("Error! Problems while connecting", e) } finally { response.close() } } } object HttpUtils { def createStream( ssc: StreamingContext, storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2, url: String, interval: Long): DStream[String] = { new HttpInputDStream(ssc, storageLevel, url, interval) } def createStream( jssc: JavaStreamingContext, storageLevel: StorageLevel, url: String, interval: Long): JavaDStream[String] = { implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[String]] createStream(jssc.ssc, storageLevel, url, interval) } }
Example 24
Source File: KinesisInputDStream.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kinesis import scala.reflect.ClassTag import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream import com.amazonaws.services.kinesis.model.Record import org.apache.spark.rdd.RDD import org.apache.spark.storage.{BlockId, StorageLevel} import org.apache.spark.streaming.{Duration, StreamingContext, Time} import org.apache.spark.streaming.dstream.ReceiverInputDStream import org.apache.spark.streaming.receiver.Receiver import org.apache.spark.streaming.scheduler.ReceivedBlockInfo private[kinesis] class KinesisInputDStream[T: ClassTag]( _ssc: StreamingContext, streamName: String, endpointUrl: String, regionName: String, initialPositionInStream: InitialPositionInStream, checkpointAppName: String, checkpointInterval: Duration, storageLevel: StorageLevel, messageHandler: Record => T, awsCredentialsOption: Option[SerializableAWSCredentials] ) extends ReceiverInputDStream[T](_ssc) { private[streaming] override def createBlockRDD(time: Time, blockInfos: Seq[ReceivedBlockInfo]): RDD[T] = { // This returns true even for when blockInfos is empty val allBlocksHaveRanges = blockInfos.map { _.metadataOption }.forall(_.nonEmpty) if (allBlocksHaveRanges) { // Create a KinesisBackedBlockRDD, even when there are no blocks val blockIds = blockInfos.map { _.blockId.asInstanceOf[BlockId] }.toArray val seqNumRanges = blockInfos.map { _.metadataOption.get.asInstanceOf[SequenceNumberRanges] }.toArray val isBlockIdValid = blockInfos.map { _.isBlockIdValid() }.toArray logDebug(s"Creating KinesisBackedBlockRDD for $time with ${seqNumRanges.length} " + s"seq number ranges: ${seqNumRanges.mkString(", ")} ") new KinesisBackedBlockRDD( context.sc, regionName, endpointUrl, blockIds, seqNumRanges, isBlockIdValid = isBlockIdValid, retryTimeoutMs = ssc.graph.batchDuration.milliseconds.toInt, messageHandler = messageHandler, awsCredentialsOption = awsCredentialsOption) } else { logWarning("Kinesis sequence number information was not present with some block metadata," + " it may not be possible to recover from failures") super.createBlockRDD(time, blockInfos) } } override def getReceiver(): Receiver[T] = { new KinesisReceiver(streamName, endpointUrl, regionName, initialPositionInStream, checkpointAppName, checkpointInterval, storageLevel, messageHandler, awsCredentialsOption) } }
Example 25
Source File: FlumeInputDStream.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.flume import java.io.{Externalizable, ObjectInput, ObjectOutput} import java.net.InetSocketAddress import java.nio.ByteBuffer import java.util.concurrent.Executors import scala.collection.JavaConverters._ import scala.reflect.ClassTag import org.apache.avro.ipc.NettyServer import org.apache.avro.ipc.specific.SpecificResponder import org.apache.flume.source.avro.{AvroFlumeEvent, AvroSourceProtocol, Status} import org.jboss.netty.channel.{ChannelPipeline, ChannelPipelineFactory, Channels} import org.jboss.netty.channel.socket.nio.NioServerSocketChannelFactory import org.jboss.netty.handler.codec.compression._ import org.apache.spark.internal.Logging import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.dstream._ import org.apache.spark.streaming.receiver.Receiver import org.apache.spark.util.Utils private[streaming] class FlumeInputDStream[T: ClassTag]( _ssc: StreamingContext, host: String, port: Int, storageLevel: StorageLevel, enableDecompression: Boolean ) extends ReceiverInputDStream[SparkFlumeEvent](_ssc) { override def getReceiver(): Receiver[SparkFlumeEvent] = { new FlumeReceiver(host, port, storageLevel, enableDecompression) } } private[streaming] class CompressionChannelPipelineFactory extends ChannelPipelineFactory { def getPipeline(): ChannelPipeline = { val pipeline = Channels.pipeline() val encoder = new ZlibEncoder(6) pipeline.addFirst("deflater", encoder) pipeline.addFirst("inflater", new ZlibDecoder()) pipeline } } }
Example 26
Source File: SocketInputDStream.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import java.io._ import java.net.{ConnectException, Socket} import java.nio.charset.StandardCharsets import scala.reflect.ClassTag import scala.util.control.NonFatal import org.apache.spark.internal.Logging import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.receiver.Receiver import org.apache.spark.util.NextIterator private[streaming] class SocketInputDStream[T: ClassTag]( _ssc: StreamingContext, host: String, port: Int, bytesToObjects: InputStream => Iterator[T], storageLevel: StorageLevel ) extends ReceiverInputDStream[T](_ssc) { def getReceiver(): Receiver[T] = { new SocketReceiver(host, port, bytesToObjects, storageLevel) } } private[streaming] class SocketReceiver[T: ClassTag]( host: String, port: Int, bytesToObjects: InputStream => Iterator[T], storageLevel: StorageLevel ) extends Receiver[T](storageLevel) with Logging { private var socket: Socket = _ def onStart() { logInfo(s"Connecting to $host:$port") try { socket = new Socket(host, port) } catch { case e: ConnectException => restart(s"Error connecting to $host:$port", e) return } logInfo(s"Connected to $host:$port") // Start the thread that receives data over a connection new Thread("Socket Receiver") { setDaemon(true) override def run() { receive() } }.start() } def onStop() { // in case restart thread close it twice synchronized { if (socket != null) { socket.close() socket = null logInfo(s"Closed socket to $host:$port") } } } def bytesToLines(inputStream: InputStream): Iterator[String] = { val dataInputStream = new BufferedReader( new InputStreamReader(inputStream, StandardCharsets.UTF_8)) new NextIterator[String] { protected override def getNext() = { val nextValue = dataInputStream.readLine() if (nextValue == null) { finished = true } nextValue } protected override def close() { dataInputStream.close() } } } }
Example 27
Source File: WebSocketReceiver.scala From spark-streaming-demo with Apache License 2.0 | 5 votes |
package com.datastax.examples.meetup.websocket import com.datastax.examples.meetup.model._ import org.apache.spark.storage.StorageLevel import scalawebsocket.WebSocket import org.apache.spark.streaming.receiver.Receiver import org.apache.spark.Logging import org.json4s._ import org.json4s.jackson.JsonMethods._ class WebSocketReceiver(url: String, storageLevel: StorageLevel) extends Receiver[MeetupRsvp](storageLevel) with Logging { @volatile private var webSocket: WebSocket = _ def onStart() { try{ logInfo("Connecting to WebSocket: " + url) val newWebSocket = WebSocket().open(url).onTextMessage({ msg: String => parseJson(msg) }) setWebSocket(newWebSocket) logInfo("Connected to: WebSocket" + url) } catch { case e: Exception => restart("Error starting WebSocket stream", e) } } def onStop() { setWebSocket(null) logInfo("WebSocket receiver stopped") } private def setWebSocket(newWebSocket: WebSocket) = synchronized { if (webSocket != null) { webSocket.shutdown() } webSocket = newWebSocket } private def parseJson(jsonStr: String): Unit = { implicit lazy val formats = DefaultFormats try { val json = parse(jsonStr) val rsvp = json.extract[MeetupRsvp] store(rsvp) } catch { case e: MappingException => logError("Unable to map JSON message to MeetupRsvp object:" + e.msg) case e: Exception => logError("Unable to map JSON message to MeetupRsvp object") } } }
Example 28
Source File: RedisInputDStream.scala From spark-redis with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.redislabs.provider.redis.streaming import com.redislabs.provider.redis.RedisConfig import org.apache.curator.utils.ThreadUtils import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.receiver.Receiver import org.apache.spark.streaming.dstream.ReceiverInputDStream import redis.clients.jedis._ import scala.reflect.{ClassTag, classTag} import scala.util.control.NonFatal keys.foreach{ key => executorPool.submit(new MessageHandler(redisConfig.connectionForKey(key), key)) } } finally { executorPool.shutdown() } } def onStop() { } private class MessageHandler(conn: Jedis, key: String) extends Runnable { def run() { try { while(!isStopped) { val response = conn.blpop(2, key) if (response == null || response.isEmpty) { // no-op } else if (classTag[T] == classTag[String]) { store(response.get(1).asInstanceOf[T]) } else if (classTag[T] == classTag[(String, String)]) { store((response.get(0), response.get(1)).asInstanceOf[T]) } else { throw new scala.Exception("Unknown Redis Streaming type") } } } catch { case NonFatal(e) => restart("Error receiving data", e) } finally { onStop() } } } }
Example 29
Source File: CustomReceiver.scala From Learning-Spark-SQL with MIT License | 5 votes |
import java.io.{BufferedReader, InputStreamReader} import java.net.Socket import java.nio.charset.StandardCharsets import org.apache.spark.SparkConf import org.apache.spark.internal.Logging import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.streaming.receiver.Receiver private def receive() { var socket: Socket = null var userInput: String = null try { println("Connecting to " + host + ":" + port) socket = new Socket(host, port) println("Connected to " + host + ":" + port) val reader = new BufferedReader( new InputStreamReader(socket.getInputStream(), StandardCharsets.UTF_8)) userInput = reader.readLine() while(!isStopped && userInput != null) { store(userInput) userInput = reader.readLine() } reader.close() socket.close() println("Stopped receiving") restart("Trying to connect again") } catch { case e: java.net.ConnectException => restart("Error connecting to " + host + ":" + port, e) case t: Throwable => restart("Error receiving data", t) } } }
Example 30
Source File: TFLArrivalPredictionsByLine.scala From Learning-Spark-SQL with MIT License | 5 votes |
import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.receiver.Receiver import org.jfarcand.wcs.{TextListener, WebSocket} import scala.util.parsing.json.JSON import scalaj.http.Http import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import org.apache.http.HttpResponse; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.DefaultHttpClient; class TFLArrivalPredictionsByLine() extends Receiver[String](StorageLevel.MEMORY_ONLY) with Runnable { private val tflUrl = "https://api.tfl.gov.uk/Line/circle/Arrivals?stopPointId=940GZZLUERC&app_id=a73727f3&app_key=dc8150560a2422afae2b70cf291c4327" @transient private var thread: Thread = _ override def onStart(): Unit = { thread = new Thread(this) thread.start() } override def onStop(): Unit = { thread.interrupt() } override def run(): Unit = { while (true){ receive(); Thread.sleep(60*1000); } } private def receive(): Unit = { val httpClient = new DefaultHttpClient(); val getRequest = new HttpGet(tflUrl); getRequest.addHeader("accept", "application/json"); val response = httpClient.execute(getRequest); if (response.getStatusLine().getStatusCode() != 200) { throw new RuntimeException("Failed : HTTP error code : " + response.getStatusLine().getStatusCode()); } val br = new BufferedReader( new InputStreamReader((response.getEntity().getContent()))); var output=br.readLine(); while(output!=null){ println(output) output=br.readLine() } } }
Example 31
Source File: TFLCustomReceiver.scala From Learning-Spark-SQL with MIT License | 5 votes |
import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import org.apache.http.HttpResponse; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.receiver.Receiver import org.apache.spark.SparkConf import org.apache.spark.streaming.{Seconds, StreamingContext} object TFLCustomReceiver { private val url = "https://api.tfl.gov.uk/Line/circle/Arrivals?stopPointId=940GZZLUERC&app_id=a73727f3&app_key=dc8150560a2422afae2b70cf291c4327" def main(args: Array[String]) { // Create the context with a 1 second batch size val sparkConf = new SparkConf().setAppName("TFLCustomReceiver") val ssc = new StreamingContext(sparkConf, Seconds(300)) val lines = ssc.receiverStream(new TFLCustomReceiver(url)) lines.print() ssc.start() ssc.awaitTermination() } } class TFLCustomReceiver(url: String) extends Receiver[String](StorageLevel.MEMORY_AND_DISK_2) { def onStart() { // Start the thread that receives data over a connection new Thread("Http Receiver") { override def run() { receive() } }.start() } def onStop() { // There is nothing much to do as the thread calling receive() // is designed to stop by itself if isStopped() returns false } private def receive() { var userInput: String = null var httpClient: DefaultHttpClient = null var getRequest: HttpGet = null try { // Connect to host:port httpClient = new DefaultHttpClient(); getRequest = new HttpGet(url); getRequest.addHeader("accept", "application/json"); while(!isStopped) { val response = httpClient.execute(getRequest); if (response.getStatusLine().getStatusCode() != 200) { throw new RuntimeException("Failed : HTTP error code : "+ response.getStatusLine().getStatusCode()); } val reader = new BufferedReader(new InputStreamReader((response.getEntity().getContent()))); userInput = reader.readLine() while(userInput != null) { store(userInput) //println(userInput) userInput = reader.readLine() } reader.close() Thread.sleep(60*1000) } httpClient.close() // Restart in an attempt to connect again when server is active again //restart("Trying to connect again") } catch { case e: java.net.ConnectException => // restart if could not connect to server restart("Error connecting to " + url, e) case t: Throwable => // restart if there is any other error restart("Error receiving data", t) } } }
Example 32
Source File: StressReceiver.scala From spark-cassandra-stress with Apache License 2.0 | 5 votes |
package com.datastax.sparkstress import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.receiver.Receiver import org.apache.log4j.Logger class StressReceiver[T]( index: Int, rowGenerator: RowGenerator[T], config: Config, blockIntervalInMs: Int, storageLevel: StorageLevel) extends Receiver[T](storageLevel) { class EmitterThread(receiver: StressReceiver[_]) extends Thread(s"Emitter$index") { override def run(): Unit = { val rowIterator = rowGenerator.generatePartition(config.seed, index) val throughPutPerBlockInterval = (blockIntervalInMs / (config.streamingBatchIntervalSeconds * 1000.0) * config.receiverThroughputPerBatch).toLong while (rowIterator.hasNext) { val batchBegin = System.currentTimeMillis() for (x <- 1l to throughPutPerBlockInterval if rowIterator.hasNext) { store(rowIterator.next()) } val batchEnd = System.currentTimeMillis() val napTime = blockIntervalInMs - (batchEnd - batchBegin) if (napTime > 0) Thread.sleep(napTime) } receiver.stop("Iterator Empty") } } def onStart() = { new EmitterThread(this).start() } def onStop() = { } }
Example 33
Source File: HttpInputDStreamAsync.scala From prosparkstreaming with Apache License 2.0 | 5 votes |
package org.apress.prospark import scala.reflect.ClassTag import org.apache.spark.Logging import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.api.java.JavaDStream import org.apache.spark.streaming.api.java.JavaDStream.fromDStream import org.apache.spark.streaming.api.java.JavaStreamingContext import org.apache.spark.streaming.dstream.DStream import org.apache.spark.streaming.dstream.ReceiverInputDStream import org.apache.spark.streaming.receiver.Receiver import com.ning.http.client.AsyncCompletionHandler import com.ning.http.client.AsyncHttpClient import com.ning.http.client.Response class HttpInputDStreamAsync( @transient ssc_ : StreamingContext, storageLevel: StorageLevel, url: String) extends ReceiverInputDStream[String](ssc_) with Logging { def getReceiver(): Receiver[String] = { new HttpReceiverAsync(storageLevel, url) } } class HttpReceiverAsync( storageLevel: StorageLevel, url: String) extends Receiver[String](storageLevel) with Logging { var asyncHttpClient: AsyncHttpClient = _ def onStop() { asyncHttpClient.close() logInfo("Disconnected from Http Server") } def onStart() { asyncHttpClient = new AsyncHttpClient() asyncHttpClient.prepareGet(url).execute(new AsyncCompletionHandler[Response]() { override def onCompleted(response: Response): Response = { store(response.getResponseBody) return response } override def onThrowable(t: Throwable) { restart("Error! Problems while connecting", t) } }); logInfo("Http Connection initiated") } } object HttpUtilsAsync { def createStream( ssc: StreamingContext, storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2, url: String): DStream[String] = { new HttpInputDStreamAsync(ssc, storageLevel, url) } def createStream( jssc: JavaStreamingContext, storageLevel: StorageLevel, url: String): JavaDStream[String] = { implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[String]] createStream(jssc.ssc, storageLevel, url) } }
Example 34
Source File: CustomReceiver.scala From drizzle-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.streaming import java.io.{BufferedReader, InputStreamReader} import java.net.Socket import java.nio.charset.StandardCharsets import org.apache.spark.SparkConf import org.apache.spark.internal.Logging import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.streaming.receiver.Receiver private def receive() { var socket: Socket = null var userInput: String = null try { logInfo("Connecting to " + host + ":" + port) socket = new Socket(host, port) logInfo("Connected to " + host + ":" + port) val reader = new BufferedReader( new InputStreamReader(socket.getInputStream(), StandardCharsets.UTF_8)) userInput = reader.readLine() while(!isStopped && userInput != null) { store(userInput) userInput = reader.readLine() } reader.close() socket.close() logInfo("Stopped receiving") restart("Trying to connect again") } catch { case e: java.net.ConnectException => restart("Error connecting to " + host + ":" + port, e) case t: Throwable => restart("Error receiving data", t) } } } // scalastyle:on println
Example 35
Source File: HttpInputDStream.scala From prosparkstreaming with Apache License 2.0 | 5 votes |
package org.apress.prospark import java.util.Timer import java.util.TimerTask import scala.reflect.ClassTag import org.apache.http.client.methods.HttpGet import org.apache.http.impl.client.CloseableHttpClient import org.apache.http.impl.client.HttpClients import org.apache.http.util.EntityUtils import org.apache.spark.Logging import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.api.java.JavaDStream import org.apache.spark.streaming.api.java.JavaDStream.fromDStream import org.apache.spark.streaming.api.java.JavaStreamingContext import org.apache.spark.streaming.dstream.DStream import org.apache.spark.streaming.dstream.ReceiverInputDStream import org.apache.spark.streaming.receiver.Receiver class HttpInputDStream( @transient ssc_ : StreamingContext, storageLevel: StorageLevel, url: String, interval: Long) extends ReceiverInputDStream[String](ssc_) with Logging { def getReceiver(): Receiver[String] = { new HttpReceiver(storageLevel, url, interval) } } class HttpReceiver( storageLevel: StorageLevel, url: String, interval: Long) extends Receiver[String](storageLevel) with Logging { var httpClient: CloseableHttpClient = _ var trigger: Timer = _ def onStop() { httpClient.close() logInfo("Disconnected from Http Server") } def onStart() { httpClient = HttpClients.createDefault() trigger = new Timer() trigger.scheduleAtFixedRate(new TimerTask { def run() = doGet() }, 0, interval * 1000) logInfo("Http Receiver initiated") } def doGet() { logInfo("Fetching data from Http source") val response = httpClient.execute(new HttpGet(url)) try { val content = EntityUtils.toString(response.getEntity()) store(content) } catch { case e: Exception => restart("Error! Problems while connecting", e) } finally { response.close() } } } object HttpUtils { def createStream( ssc: StreamingContext, storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2, url: String, interval: Long): DStream[String] = { new HttpInputDStream(ssc, storageLevel, url, interval) } def createStream( jssc: JavaStreamingContext, storageLevel: StorageLevel, url: String, interval: Long): JavaDStream[String] = { implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[String]] createStream(jssc.ssc, storageLevel, url, interval) } }
Example 36
Source File: WebSocketReceiver.scala From sparta with Apache License 2.0 | 5 votes |
package com.stratio.sparta.plugin.input.websocket import akka.event.slf4j.SLF4JLogging import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.receiver.Receiver class WebSocketReceiver(url: String, storageLevel: StorageLevel) extends Receiver[String](storageLevel) with SLF4JLogging { private var webSocket: Option[WebSocket] = None def onStart() { try { log.info("Connecting to WebSocket: " + url) val newWebSocket = WebSocket().open(url) .onTextMessage({ msg: String => store(msg) }) .onBinaryMessage({ msg: Array[Byte] => store(new Predef.String(msg)) }) setWebSocket(Option(newWebSocket)) log.info("Connected to: WebSocket" + url) } catch { case e: Exception => restart("Error starting WebSocket stream", e) } } def onStop() { setWebSocket() log.info("WebSocket receiver stopped") } private def setWebSocket(newWebSocket: Option[WebSocket] = None) = synchronized { if (webSocket.isDefined) webSocket.get.shutdown() webSocket = newWebSocket } }
Example 37
Source File: CustomReceiver.scala From sparkoscope with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.streaming import java.io.{BufferedReader, InputStreamReader} import java.net.Socket import java.nio.charset.StandardCharsets import org.apache.spark.SparkConf import org.apache.spark.internal.Logging import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.streaming.receiver.Receiver private def receive() { var socket: Socket = null var userInput: String = null try { logInfo("Connecting to " + host + ":" + port) socket = new Socket(host, port) logInfo("Connected to " + host + ":" + port) val reader = new BufferedReader( new InputStreamReader(socket.getInputStream(), StandardCharsets.UTF_8)) userInput = reader.readLine() while(!isStopped && userInput != null) { store(userInput) userInput = reader.readLine() } reader.close() socket.close() logInfo("Stopped receiving") restart("Trying to connect again") } catch { case e: java.net.ConnectException => restart("Error connecting to " + host + ":" + port, e) case t: Throwable => restart("Error receiving data", t) } } } // scalastyle:on println
Example 38
Source File: KinesisInputDStream.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kinesis import scala.reflect.ClassTag import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream import com.amazonaws.services.kinesis.model.Record import org.apache.spark.rdd.RDD import org.apache.spark.storage.{BlockId, StorageLevel} import org.apache.spark.streaming.{Duration, StreamingContext, Time} import org.apache.spark.streaming.dstream.ReceiverInputDStream import org.apache.spark.streaming.receiver.Receiver import org.apache.spark.streaming.scheduler.ReceivedBlockInfo private[kinesis] class KinesisInputDStream[T: ClassTag]( _ssc: StreamingContext, streamName: String, endpointUrl: String, regionName: String, initialPositionInStream: InitialPositionInStream, checkpointAppName: String, checkpointInterval: Duration, storageLevel: StorageLevel, messageHandler: Record => T, awsCredentialsOption: Option[SerializableAWSCredentials] ) extends ReceiverInputDStream[T](_ssc) { private[streaming] override def createBlockRDD(time: Time, blockInfos: Seq[ReceivedBlockInfo]): RDD[T] = { // This returns true even for when blockInfos is empty val allBlocksHaveRanges = blockInfos.map { _.metadataOption }.forall(_.nonEmpty) if (allBlocksHaveRanges) { // Create a KinesisBackedBlockRDD, even when there are no blocks val blockIds = blockInfos.map { _.blockId.asInstanceOf[BlockId] }.toArray val seqNumRanges = blockInfos.map { _.metadataOption.get.asInstanceOf[SequenceNumberRanges] }.toArray val isBlockIdValid = blockInfos.map { _.isBlockIdValid() }.toArray logDebug(s"Creating KinesisBackedBlockRDD for $time with ${seqNumRanges.length} " + s"seq number ranges: ${seqNumRanges.mkString(", ")} ") new KinesisBackedBlockRDD( context.sc, regionName, endpointUrl, blockIds, seqNumRanges, isBlockIdValid = isBlockIdValid, retryTimeoutMs = ssc.graph.batchDuration.milliseconds.toInt, messageHandler = messageHandler, awsCredentialsOption = awsCredentialsOption) } else { logWarning("Kinesis sequence number information was not present with some block metadata," + " it may not be possible to recover from failures") super.createBlockRDD(time, blockInfos) } } override def getReceiver(): Receiver[T] = { new KinesisReceiver(streamName, endpointUrl, regionName, initialPositionInStream, checkpointAppName, checkpointInterval, storageLevel, messageHandler, awsCredentialsOption) } }
Example 39
Source File: SocketInputDStream.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import java.io._ import java.net.{ConnectException, Socket} import java.nio.charset.StandardCharsets import scala.reflect.ClassTag import scala.util.control.NonFatal import org.apache.spark.internal.Logging import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.receiver.Receiver import org.apache.spark.util.NextIterator private[streaming] class SocketInputDStream[T: ClassTag]( _ssc: StreamingContext, host: String, port: Int, bytesToObjects: InputStream => Iterator[T], storageLevel: StorageLevel ) extends ReceiverInputDStream[T](_ssc) { def getReceiver(): Receiver[T] = { new SocketReceiver(host, port, bytesToObjects, storageLevel) } } private[streaming] class SocketReceiver[T: ClassTag]( host: String, port: Int, bytesToObjects: InputStream => Iterator[T], storageLevel: StorageLevel ) extends Receiver[T](storageLevel) with Logging { private var socket: Socket = _ def onStart() { logInfo(s"Connecting to $host:$port") try { socket = new Socket(host, port) } catch { case e: ConnectException => restart(s"Error connecting to $host:$port", e) return } logInfo(s"Connected to $host:$port") // Start the thread that receives data over a connection new Thread("Socket Receiver") { setDaemon(true) override def run() { receive() } }.start() } def onStop() { // in case restart thread close it twice synchronized { if (socket != null) { socket.close() socket = null logInfo(s"Closed socket to $host:$port") } } } def bytesToLines(inputStream: InputStream): Iterator[String] = { val dataInputStream = new BufferedReader( new InputStreamReader(inputStream, StandardCharsets.UTF_8)) new NextIterator[String] { protected override def getNext() = { val nextValue = dataInputStream.readLine() if (nextValue == null) { finished = true } nextValue } protected override def close() { dataInputStream.close() } } } }
Example 40
Source File: AMQPInputDStream.scala From streaming-amqp with Apache License 2.0 | 5 votes |
package io.radanalytics.streaming.amqp import org.apache.qpid.proton.message.Message import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.amqp.ReliableAMQPReceiver import org.apache.spark.streaming.dstream.ReceiverInputDStream import org.apache.spark.streaming.receiver.Receiver import scala.reflect.ClassTag class AMQPInputDStream[T: ClassTag]( ssc: StreamingContext, host: String, port: Int, username: Option[String], password: Option[String], address: String, messageConverter: Message => Option[T], useReliableReceiver: Boolean, storageLevel: StorageLevel ) extends ReceiverInputDStream[T](ssc) { def getReceiver(): Receiver[T] = { if (!useReliableReceiver) { new AMQPReceiver(host, port, username, password, address, messageConverter, storageLevel) } else { new ReliableAMQPReceiver(host, port, username, password, address, messageConverter, storageLevel) } } }
Example 41
Source File: ChangesReceiver.scala From bahir with Apache License 2.0 | 5 votes |
package org.apache.bahir.cloudant.internal import java.io.{BufferedReader, InputStreamReader} import java.util.concurrent.TimeUnit import com.google.gson.JsonParser import okhttp3._ import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.receiver.Receiver import org.apache.bahir.cloudant.CloudantChangesConfig import org.apache.bahir.cloudant.common._ class ChangesReceiver(config: CloudantChangesConfig) extends Receiver[String](StorageLevel.MEMORY_AND_DISK) { def onStart() { // Start the thread that receives data over a connection new Thread("Cloudant Receiver") { override def run() { receive() } }.start() } private def receive(): Unit = { val okHttpClient: OkHttpClient = new OkHttpClient.Builder() .connectTimeout(5, TimeUnit.SECONDS) .readTimeout(60, TimeUnit.SECONDS) .build val url = config.getChangesReceiverUrl.toString val builder = new Request.Builder().url(url) if (config.username != null) { val credential = Credentials.basic(config.username, config.password) builder.header("Authorization", credential) } if(config.getSelector != null) { val jsonType = MediaType.parse("application/json; charset=utf-8") val selector = "{\"selector\":" + config.getSelector + "}" val selectorBody = RequestBody.create(jsonType, selector) builder.post(selectorBody) } val request = builder.build val response = okHttpClient.newCall(request).execute val status_code = response.code if (status_code == 200) { val changesInputStream = response.body.byteStream var json = new ChangesRow() if (changesInputStream != null) { val bufferedReader = new BufferedReader(new InputStreamReader(changesInputStream)) while ((json = ChangesRowScanner.readRowFromReader(bufferedReader)) != null) { if (!isStopped() && json != null && !json.getDoc.has("_deleted")) { store(json.getDoc.toString) } } } } else { val responseAsJson = new JsonParser().parse(response.body.string) val errorMsg = "Error retrieving _changes feed data from database " + "'" + config.getDbname + "' with response code " + status_code + ": " + responseAsJson.toString reportError(errorMsg, new CloudantException(errorMsg)) CloudantChangesConfig.receiverErrorMsg = errorMsg } } override def onStop(): Unit = { } }
Example 42
Source File: CloudantReceiver.scala From bahir with Apache License 2.0 | 5 votes |
package org.apache.bahir.cloudant import java.io.{BufferedReader, InputStreamReader} import java.util.concurrent.TimeUnit import okhttp3._ import org.apache.spark.SparkConf import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.receiver.Receiver import org.apache.bahir.cloudant.common._ class CloudantReceiver(sparkConf: SparkConf, cloudantParams: Map[String, String]) extends Receiver[String](StorageLevel.MEMORY_AND_DISK) { // CloudantChangesConfig requires `_changes` endpoint option lazy val config: CloudantChangesConfig = { JsonStoreConfigManager.getConfig(sparkConf, cloudantParams + ("cloudant.endpoint" -> JsonStoreConfigManager.CHANGES_INDEX) ).asInstanceOf[CloudantChangesConfig] } def onStart() { // Start the thread that receives data over a connection new Thread("Cloudant Receiver") { override def run() { receive() } }.start() } private def receive(): Unit = { val okHttpClient: OkHttpClient = new OkHttpClient.Builder() .connectTimeout(5, TimeUnit.SECONDS) .readTimeout(60, TimeUnit.SECONDS) .build val url = config.getChangesReceiverUrl.toString val builder = new Request.Builder().url(url) if (config.username != null) { val credential = Credentials.basic(config.username, config.password) builder.header("Authorization", credential) } if(config.getSelector != null) { val jsonType = MediaType.parse("application/json; charset=utf-8") val selector = "{\"selector\":" + config.getSelector + "}" val selectorBody = RequestBody.create(jsonType, selector) builder.post(selectorBody) } val request = builder.build val response = okHttpClient.newCall(request).execute val status_code = response.code if (status_code == 200) { val changesInputStream = response.body.byteStream var json = new ChangesRow() if (changesInputStream != null) { val bufferedReader = new BufferedReader(new InputStreamReader(changesInputStream)) while ((json = ChangesRowScanner.readRowFromReader(bufferedReader)) != null) { if (!isStopped() && json != null && !json.getDoc.has("_deleted")) { store(json.getDoc.toString) } } } } else { val errorMsg = "Error retrieving _changes feed " + config.getDbname + ": " + status_code reportError(errorMsg, new CloudantException(errorMsg)) } } def onStop(): Unit = { } }
Example 43
Source File: TwitterInputDStream.scala From bahir with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.twitter import twitter4j._ import twitter4j.auth.Authorization import twitter4j.auth.OAuthAuthorization import twitter4j.conf.ConfigurationBuilder import org.apache.spark.internal.Logging import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming._ import org.apache.spark.streaming.dstream._ import org.apache.spark.streaming.receiver.Receiver private[streaming] class TwitterInputDStream( _ssc: StreamingContext, twitterAuth: Option[Authorization], query: Option[FilterQuery], storageLevel: StorageLevel ) extends ReceiverInputDStream[Status](_ssc) { private def createOAuthAuthorization(): Authorization = { new OAuthAuthorization(new ConfigurationBuilder().build()) } private val authorization = twitterAuth.getOrElse(createOAuthAuthorization()) override def getReceiver(): Receiver[Status] = { new TwitterReceiver(authorization, query, storageLevel) } } private[streaming] class TwitterReceiver( twitterAuth: Authorization, query: Option[FilterQuery], storageLevel: StorageLevel ) extends Receiver[Status](storageLevel) with Logging { @volatile private var twitterStream: TwitterStream = _ @volatile private var stopped = false def onStart() { try { val newTwitterStream = new TwitterStreamFactory().getInstance(twitterAuth) newTwitterStream.addListener(new StatusListener { def onStatus(status: Status): Unit = { store(status) } // Unimplemented def onDeletionNotice(statusDeletionNotice: StatusDeletionNotice) {} def onTrackLimitationNotice(i: Int) {} def onScrubGeo(l: Long, l1: Long) {} def onStallWarning(stallWarning: StallWarning) {} def onException(e: Exception) { if (!stopped) { restart("Error receiving tweets", e) } } }) if (query.isDefined) { newTwitterStream.filter(query.get) } else { newTwitterStream.sample() } setTwitterStream(newTwitterStream) logInfo("Twitter receiver started") stopped = false } catch { case e: Exception => restart("Error starting Twitter stream", e) } } def onStop() { stopped = true setTwitterStream(null) logInfo("Twitter receiver stopped") } private def setTwitterStream(newTwitterStream: TwitterStream) = synchronized { if (twitterStream != null) { twitterStream.shutdown() } twitterStream = newTwitterStream } }
Example 44
Source File: CustomReceiver.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.streaming import java.io.{BufferedReader, InputStreamReader} import java.net.Socket import java.nio.charset.StandardCharsets import org.apache.spark.SparkConf import org.apache.spark.internal.Logging import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.streaming.receiver.Receiver private def receive() { var socket: Socket = null var userInput: String = null try { logInfo("Connecting to " + host + ":" + port) socket = new Socket(host, port) logInfo("Connected to " + host + ":" + port) val reader = new BufferedReader( new InputStreamReader(socket.getInputStream(), StandardCharsets.UTF_8)) userInput = reader.readLine() while(!isStopped && userInput != null) { store(userInput) userInput = reader.readLine() } reader.close() socket.close() logInfo("Stopped receiving") restart("Trying to connect again") } catch { case e: java.net.ConnectException => restart("Error connecting to " + host + ":" + port, e) case t: Throwable => restart("Error receiving data", t) } } } // scalastyle:on println