kafka.serializer.DefaultDecoder Scala Examples
The following examples show how to use kafka.serializer.DefaultDecoder.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: CheckpointingKafkaExtractor.scala From streamliner-examples with Apache License 2.0 | 5 votes |
package com.memsql.spark.examples.kafka import com.memsql.spark.etl.api.{UserExtractConfig, PhaseConfig, ByteArrayExtractor} import com.memsql.spark.etl.utils.PhaseLogger import org.apache.spark.sql.SQLContext import org.apache.spark.streaming.StreamingContext import kafka.serializer.{DefaultDecoder, StringDecoder} import org.apache.spark.streaming.kafka.{CheckpointedDirectKafkaInputDStream, CheckpointedKafkaUtils} import org.apache.spark.streaming.dstream.InputDStream class CheckpointingKafkaExtractor extends ByteArrayExtractor { var CHECKPOINT_DATA_VERSION = 1 var dstream: CheckpointedDirectKafkaInputDStream[String, Array[Byte], StringDecoder, DefaultDecoder, Array[Byte]] = null var zkQuorum: String = null var topic: String = null override def initialize(ssc: StreamingContext, sqlContext: SQLContext, config: PhaseConfig, batchInterval: Long, logger: PhaseLogger): Unit = { val kafkaConfig = config.asInstanceOf[UserExtractConfig] zkQuorum = kafkaConfig.getConfigString("zk_quorum").getOrElse { throw new IllegalArgumentException("\"zk_quorum\" must be set in the config") } topic = kafkaConfig.getConfigString("topic").getOrElse { throw new IllegalArgumentException("\"topic\" must be set in the config") } } def extract(ssc: StreamingContext, extractConfig: PhaseConfig, batchDuration: Long, logger: PhaseLogger): InputDStream[Array[Byte]] = { val kafkaParams = Map[String, String]( "memsql.zookeeper.connect" -> zkQuorum ) val topics = Set(topic) dstream = CheckpointedKafkaUtils.createDirectStreamFromZookeeper[String, Array[Byte], StringDecoder, DefaultDecoder]( ssc, kafkaParams, topics, batchDuration, lastCheckpoint) dstream } override def batchCheckpoint: Option[Map[String, Any]] = { dstream match { case null => None case default => { val currentOffsets = dstream.getCurrentOffsets.map { case (tp, offset) => Map("topic" -> tp.topic, "partition" -> tp.partition, "offset" -> offset) } Some(Map("offsets" -> currentOffsets, "zookeeper" -> zkQuorum, "version" -> CHECKPOINT_DATA_VERSION)) } } } override def batchRetry: Unit = { if (dstream.prevOffsets != null) { dstream.setCurrentOffsets(dstream.prevOffsets) } } }
Example 2
Source File: EtlProcessor.scala From etl-light with MIT License | 5 votes |
package yamrcraft.etlite.processors import kafka.common.TopicAndPartition import kafka.message.MessageAndMetadata import kafka.serializer.DefaultDecoder import org.apache.spark._ import org.apache.spark.rdd.RDD import org.apache.spark.streaming.kafka._ import org.slf4j.LoggerFactory import yamrcraft.etlite.Settings import yamrcraft.etlite.state.{KafkaOffsetsState, KafkaStateManager} import yamrcraft.etlite.transformers.InboundMessage object EtlProcessor { val logger = LoggerFactory.getLogger(this.getClass) def run(settings: Settings) = { val context = createContext(settings) val stateManager = new KafkaStateManager(settings.etl.state) val lastState = stateManager.readState logger.info(s"last persisted state: $lastState") val currState = stateManager.fetchNextState(lastState, settings) logger.info(s"batch working state: $currState") val rdd = createRDD(context, currState, settings) processRDD(rdd, currState.jobId, settings) logger.info("committing state") stateManager.commitState(currState) } private def createContext(settings: Settings) = { val sparkConf = new SparkConf() .setAppName(settings.spark.appName) .setAll(settings.spark.conf) new SparkContext(sparkConf) } private def createRDD(context: SparkContext, state: KafkaOffsetsState, settings: Settings): RDD[InboundMessage] = { KafkaUtils.createRDD[Array[Byte], Array[Byte], DefaultDecoder, DefaultDecoder, InboundMessage]( context, settings.kafka.properties, state.ranges.toArray, Map[TopicAndPartition, Broker](), (msgAndMeta: MessageAndMetadata[Array[Byte], Array[Byte]]) => { InboundMessage(msgAndMeta.topic, msgAndMeta.key(), msgAndMeta.message()) } ) } private def processRDD(kafkaRDD: RDD[InboundMessage], jobId: Long, settings: Settings) = { // passed to remote workers val etlSettings = settings.etl logger.info(s"RDD processing started [rdd=${kafkaRDD.id}, jobId=$jobId]") val rdd = settings.etl.maxNumOfOutputFiles.map(kafkaRDD.coalesce(_)).getOrElse(kafkaRDD) rdd.foreachPartition { partition => // executed at the worker new PartitionProcessor(jobId, TaskContext.get.partitionId(), etlSettings) .processPartition(partition) } logger.info(s"RDD processing ended [rdd=${kafkaRDD.id}, jobId=$jobId]") } }
Example 3
Source File: KafkaJsonConsumer.scala From coral with Apache License 2.0 | 5 votes |
package io.coral.lib import java.util.Properties import com.fasterxml.jackson.core.JsonParseException import kafka.consumer._ import kafka.serializer.{Decoder, DefaultDecoder} import org.json4s.JsonAST.{JNothing, JValue} import org.json4s.jackson.JsonMethods._ object KafkaJsonConsumer { def apply() = new KafkaJsonConsumer(JsonDecoder) def apply(decoder: Decoder[JValue]) = new KafkaJsonConsumer(decoder) } class KafkaJsonConsumer(decoder: Decoder[JValue]) { def stream(topic: String, properties: Properties): KafkaJsonStream = { val connection = Consumer.create(new ConsumerConfig(properties)) val stream = connection.createMessageStreamsByFilter( Whitelist(topic), 1, new DefaultDecoder, decoder)(0) new KafkaJsonStream(connection, stream) } } class KafkaJsonStream(connection: ConsumerConnector, stream: KafkaStream[Array[Byte], JValue]) { private lazy val it = stream.iterator // this method relies on a timeout value having been set @inline def hasNextInTime: Boolean = try { it.hasNext } catch { case cte: ConsumerTimeoutException => false } @inline def next: JValue = it.next.message @inline def commitOffsets = connection.commitOffsets } object JsonDecoder extends Decoder[JValue] { val encoding = "UTF8" override def fromBytes(bytes: Array[Byte]): JValue = { val s = new String(bytes, encoding) try { parse(s) } catch { case jpe: JsonParseException => JNothing } } }