kafka.serializer.DefaultDecoder Scala Examples

The following examples show how to use kafka.serializer.DefaultDecoder. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: CheckpointingKafkaExtractor.scala    From streamliner-examples   with Apache License 2.0 5 votes vote down vote up
package com.memsql.spark.examples.kafka

import com.memsql.spark.etl.api.{UserExtractConfig, PhaseConfig, ByteArrayExtractor}
import com.memsql.spark.etl.utils.PhaseLogger
import org.apache.spark.sql.SQLContext
import org.apache.spark.streaming.StreamingContext

import kafka.serializer.{DefaultDecoder, StringDecoder}
import org.apache.spark.streaming.kafka.{CheckpointedDirectKafkaInputDStream, CheckpointedKafkaUtils}
import org.apache.spark.streaming.dstream.InputDStream


class CheckpointingKafkaExtractor extends ByteArrayExtractor {
  var CHECKPOINT_DATA_VERSION = 1

  var dstream: CheckpointedDirectKafkaInputDStream[String, Array[Byte], StringDecoder, DefaultDecoder, Array[Byte]] = null

  var zkQuorum: String = null
  var topic: String = null

  override def initialize(ssc: StreamingContext, sqlContext: SQLContext, config: PhaseConfig, batchInterval: Long, logger: PhaseLogger): Unit = {
    val kafkaConfig  = config.asInstanceOf[UserExtractConfig]
    zkQuorum = kafkaConfig.getConfigString("zk_quorum").getOrElse {
      throw new IllegalArgumentException("\"zk_quorum\" must be set in the config")
    }
    topic = kafkaConfig.getConfigString("topic").getOrElse {
      throw new IllegalArgumentException("\"topic\" must be set in the config")
    }
  }

  def extract(ssc: StreamingContext, extractConfig: PhaseConfig, batchDuration: Long, logger: PhaseLogger): InputDStream[Array[Byte]] = {
    val kafkaParams = Map[String, String](
      "memsql.zookeeper.connect" -> zkQuorum
    )
    val topics = Set(topic)

    dstream = CheckpointedKafkaUtils.createDirectStreamFromZookeeper[String, Array[Byte], StringDecoder, DefaultDecoder](
      ssc, kafkaParams, topics, batchDuration, lastCheckpoint)
    dstream
  }

  override def batchCheckpoint: Option[Map[String, Any]] = {
    dstream match {
      case null => None
      case default => {
        val currentOffsets = dstream.getCurrentOffsets.map { case (tp, offset) =>
          Map("topic" -> tp.topic, "partition" -> tp.partition, "offset" -> offset)
        }
        Some(Map("offsets" -> currentOffsets, "zookeeper" -> zkQuorum, "version" -> CHECKPOINT_DATA_VERSION))
      }
    }
  }

  override def batchRetry: Unit = {
    if (dstream.prevOffsets != null) {
      dstream.setCurrentOffsets(dstream.prevOffsets)
    }
  }
} 
Example 2
Source File: EtlProcessor.scala    From etl-light   with MIT License 5 votes vote down vote up
package yamrcraft.etlite.processors

import kafka.common.TopicAndPartition
import kafka.message.MessageAndMetadata
import kafka.serializer.DefaultDecoder
import org.apache.spark._
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.kafka._
import org.slf4j.LoggerFactory
import yamrcraft.etlite.Settings
import yamrcraft.etlite.state.{KafkaOffsetsState, KafkaStateManager}
import yamrcraft.etlite.transformers.InboundMessage

object EtlProcessor {

  val logger = LoggerFactory.getLogger(this.getClass)

  def run(settings: Settings) = {
    val context = createContext(settings)

    val stateManager = new KafkaStateManager(settings.etl.state)

    val lastState = stateManager.readState
    logger.info(s"last persisted state: $lastState")

    val currState = stateManager.fetchNextState(lastState, settings)
    logger.info(s"batch working state: $currState")

    val rdd = createRDD(context, currState, settings)
    processRDD(rdd, currState.jobId, settings)

    logger.info("committing state")
    stateManager.commitState(currState)
  }

  private def createContext(settings: Settings) = {
    val sparkConf = new SparkConf()
      .setAppName(settings.spark.appName)
      .setAll(settings.spark.conf)

    new SparkContext(sparkConf)
  }

  private def createRDD(context: SparkContext, state: KafkaOffsetsState, settings: Settings): RDD[InboundMessage] = {
    KafkaUtils.createRDD[Array[Byte], Array[Byte], DefaultDecoder, DefaultDecoder, InboundMessage](
      context,
      settings.kafka.properties,
      state.ranges.toArray,
      Map[TopicAndPartition, Broker](),
      (msgAndMeta: MessageAndMetadata[Array[Byte], Array[Byte]]) => { InboundMessage(msgAndMeta.topic, msgAndMeta.key(), msgAndMeta.message()) }
    )
  }

  private def processRDD(kafkaRDD: RDD[InboundMessage], jobId: Long, settings: Settings) = {
    // passed to remote workers
    val etlSettings = settings.etl

    logger.info(s"RDD processing started [rdd=${kafkaRDD.id}, jobId=$jobId]")

    val rdd = settings.etl.maxNumOfOutputFiles.map(kafkaRDD.coalesce(_)).getOrElse(kafkaRDD)

    rdd.foreachPartition { partition =>
        // executed at the worker
        new PartitionProcessor(jobId, TaskContext.get.partitionId(), etlSettings)
          .processPartition(partition)
      }

    logger.info(s"RDD processing ended [rdd=${kafkaRDD.id}, jobId=$jobId]")
  }


} 
Example 3
Source File: KafkaJsonConsumer.scala    From coral   with Apache License 2.0 5 votes vote down vote up
package io.coral.lib

import java.util.Properties

import com.fasterxml.jackson.core.JsonParseException
import kafka.consumer._
import kafka.serializer.{Decoder, DefaultDecoder}
import org.json4s.JsonAST.{JNothing, JValue}
import org.json4s.jackson.JsonMethods._

object KafkaJsonConsumer {
	def apply() = new KafkaJsonConsumer(JsonDecoder)
	def apply(decoder: Decoder[JValue]) = new KafkaJsonConsumer(decoder)
}

class KafkaJsonConsumer(decoder: Decoder[JValue]) {
	def stream(topic: String, properties: Properties): KafkaJsonStream = {
		val connection = Consumer.create(new ConsumerConfig(properties))
		val stream = connection.createMessageStreamsByFilter(
			Whitelist(topic), 1, new DefaultDecoder, decoder)(0)
		new KafkaJsonStream(connection, stream)
	}
}

class KafkaJsonStream(connection: ConsumerConnector, stream: KafkaStream[Array[Byte], JValue]) {
	private lazy val it = stream.iterator

	// this method relies on a timeout value having been set
	@inline def hasNextInTime: Boolean =
		try {
			it.hasNext
		} catch {
			case cte: ConsumerTimeoutException => false
		}

	@inline def next: JValue = it.next.message
	@inline def commitOffsets = connection.commitOffsets
}

object JsonDecoder extends Decoder[JValue] {
	val encoding = "UTF8"

	override def fromBytes(bytes: Array[Byte]): JValue = {
		val s = new String(bytes, encoding)
		try {
			parse(s)
		} catch {
			case jpe: JsonParseException => JNothing
		}
	}
}