org.apache.avro.Schema Scala Examples
The following examples show how to use org.apache.avro.Schema.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: AvroDataToCatalyst.scala From spark-schema-registry with Apache License 2.0 | 6 votes |
package com.hortonworks.spark.registry.avro import java.io.ByteArrayInputStream import com.hortonworks.registries.schemaregistry.{SchemaVersionInfo, SchemaVersionKey} import com.hortonworks.registries.schemaregistry.client.SchemaRegistryClient import com.hortonworks.registries.schemaregistry.serdes.avro.AvroSnapshotDeserializer import org.apache.avro.Schema import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, UnaryExpression} import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} import org.apache.spark.sql.types.{BinaryType, DataType} import scala.collection.JavaConverters._ case class AvroDataToCatalyst(child: Expression, schemaName: String, version: Option[Int], config: Map[String, Object]) extends UnaryExpression with ExpectsInputTypes { override def inputTypes = Seq(BinaryType) @transient private lazy val srDeser: AvroSnapshotDeserializer = { val obj = new AvroSnapshotDeserializer() obj.init(config.asJava) obj } @transient private lazy val srSchema = fetchSchemaVersionInfo(schemaName, version) @transient private lazy val avroSchema = new Schema.Parser().parse(srSchema.getSchemaText) override lazy val dataType: DataType = SchemaConverters.toSqlType(avroSchema).dataType @transient private lazy val avroDeser= new AvroDeserializer(avroSchema, dataType) override def nullable: Boolean = true override def nullSafeEval(input: Any): Any = { val binary = input.asInstanceOf[Array[Byte]] val row = avroDeser.deserialize(srDeser.deserialize(new ByteArrayInputStream(binary), srSchema.getVersion)) val result = row match { case r: InternalRow => r.copy() case _ => row } result } override def simpleString: String = { s"from_sr(${child.sql}, ${dataType.simpleString})" } override def sql: String = { s"from_sr(${child.sql}, ${dataType.catalogString})" } override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val expr = ctx.addReferenceObj("this", this) defineCodeGen(ctx, ev, input => s"(${ctx.boxedType(dataType)})$expr.nullSafeEval($input)") } private def fetchSchemaVersionInfo(schemaName: String, version: Option[Int]): SchemaVersionInfo = { val srClient = new SchemaRegistryClient(config.asJava) version.map(v => srClient.getSchemaVersionInfo(new SchemaVersionKey(schemaName, v))) .getOrElse(srClient.getLatestSchemaVersionInfo(schemaName)) } }
Example 2
Source File: StopWordsRemover.scala From aardpfark with Apache License 2.0 | 5 votes |
package com.ibm.aardpfark.spark.ml.feature import com.ibm.aardpfark.pfa.document.{Cell, PFABuilder, PFADocument} import com.ibm.aardpfark.pfa.expression.PFAExpression import com.ibm.aardpfark.pfa.types.WithSchema import com.ibm.aardpfark.spark.ml.PFAModel import com.sksamuel.avro4s.{AvroNamespace, AvroSchema} import org.apache.avro.{Schema, SchemaBuilder} import org.apache.spark.ml.feature.StopWordsRemover @AvroNamespace("com.ibm.aardpfark.exec.spark.spark.ml.feature") case class StopWords(words: Seq[String]) extends WithSchema { def schema = AvroSchema[this.type ] } class PFAStopWordsRemover(override val sparkTransformer: StopWordsRemover) extends PFAModel[StopWords] { import com.ibm.aardpfark.pfa.dsl._ private val inputCol = sparkTransformer.getInputCol private val outputCol = sparkTransformer.getOutputCol private val inputExpr = StringExpr(s"input.${inputCol}") private val stopWords = sparkTransformer.getStopWords private val caseSensitive = sparkTransformer.getCaseSensitive private def filterFn = FunctionDef[String, Boolean]("word") { w => Seq(core.not(a.contains(wordsRef, if (caseSensitive) w else s.lower(w)))) } override def inputSchema: Schema = { SchemaBuilder.record(withUid(inputBaseName)).fields() .name(inputCol).`type`().array().items().stringType().noDefault() .endRecord() } override def outputSchema: Schema = { SchemaBuilder.record(withUid(outputBaseName)).fields() .name(outputCol).`type`().array().items().stringType().noDefault() .endRecord() } override protected def cell = { Cell(StopWords(stopWords)) } private val wordsRef = modelCell.ref("words") override def action: PFAExpression = { NewRecord(outputSchema, Map(outputCol -> a.filter(inputExpr, filterFn))) } override def pfa: PFADocument = PFABuilder() .withName(sparkTransformer.uid) .withMetadata(getMetadata) .withInput(inputSchema) .withOutput(outputSchema) .withCell(modelCell) .withAction(action) .pfa }
Example 3
Source File: ControlStructuresSuite.scala From aardpfark with Apache License 2.0 | 5 votes |
package com.ibm.aardpfark.pfa.expression import com.ibm.aardpfark.pfa.DSLSuiteBase import com.ibm.aardpfark.pfa.document.PFABuilder import com.ibm.aardpfark.pfa.dsl._ import org.apache.avro.Schema class ControlStructuresSuite extends DSLSuiteBase { test("DSL: If-then statements") { val action = If {core.gt(inputExpr, 0.0)} Then "Positive" Else "Negative" val pfaDoc = new PFABuilder() .withInput[Double] .withOutput[String] .withAction(action) .pfa val engine = getPFAEngine(pfaDoc.toJSON()) assert("Positive" == engine.action(engine.jsonInput("1.0"))) assert("Negative" == engine.action(engine.jsonInput("-1.0"))) } }
Example 4
Source File: AttrSuite.scala From aardpfark with Apache License 2.0 | 5 votes |
package com.ibm.aardpfark.pfa.expression import com.ibm.aardpfark.pfa.dsl._ import com.ibm.aardpfark.pfa.DSLSuiteBase import com.ibm.aardpfark.pfa.document.PFABuilder import org.apache.avro.Schema class AttrSuite extends DSLSuiteBase { test("DSL: Attr") { val action = Attr(Attr(inputExpr, "element"), 1) val pfaDoc = new PFABuilder() .withInput(Schema.createMap(Schema.createArray(Schema.create(Schema.Type.DOUBLE)))) .withOutput[Double] .withAction(action) .pfa val engine = getPFAEngine(pfaDoc.toJSON()) val result = engine.action(engine.jsonInput("""{"element": [0.0, 3.0]}""")) assert(result == 3.0) } }
Example 5
Source File: LoopsSuite.scala From aardpfark with Apache License 2.0 | 5 votes |
package com.ibm.aardpfark.pfa.expression import com.ibm.aardpfark.pfa.DSLSuiteBase import com.ibm.aardpfark.pfa.document.PFABuilder import com.ibm.aardpfark.pfa.dsl._ import org.apache.avro.Schema class LoopsSuite extends DSLSuiteBase { test("DSL: For loop") { val sum = Let("sum", 0.0) val foreach = ForEach(StringExpr("element"), inputExpr) { e => Seq(Set(sum.ref, core.plus(sum.ref, e))) } val action = Action(sum, foreach, sum.ref) val pfaDoc = new PFABuilder() .withInput(Schema.createArray(Schema.create(Schema.Type.DOUBLE))) .withOutput[Double] .withAction(action) .pfa val engine = getPFAEngine(pfaDoc.toJSON()) val result = engine.action(engine.jsonInput("[3.0, 4.0, 5.0]")) assert(result == 12.0) } }
Example 6
Source File: PFABuilder.scala From aardpfark with Apache License 2.0 | 5 votes |
package com.ibm.aardpfark.pfa.document import scala.collection.mutable.ArrayBuffer import com.ibm.aardpfark.pfa.expression.PFAExpression import com.ibm.aardpfark.pfa.types.WithSchema import com.sksamuel.avro4s.SchemaFor import org.apache.avro.Schema class PFABuilder { import com.ibm.aardpfark.pfa.dsl._ private var name: Option[String] = None private var meta: Map[String, String] = Map() private var input: Schema = null private var output: Schema = null private val cells = collection.mutable.HashMap[String, Cell[_]]() private val action = ArrayBuffer[PFAExpression]() private val functions = collection.mutable.HashMap[String, FunctionDef]() def withInput(schema: Schema): this.type = { input = schema this } def withInput[T](implicit ev: SchemaFor[T]): this.type = withInput(ev()) def withOutput(schema: Schema): this.type = { output = schema this } def withName(name: String): this.type = { this.name = Some(name) this } def withMetadata(meta: Map[String, String]): this.type = { this.meta = meta this } def withOutput[T](implicit ev: SchemaFor[T]): this.type = withOutput(ev()) def withCell[T <: WithSchema](name: String, cell: Cell[T]): this.type = { cells += name -> cell this } def withCell[T <: WithSchema](namedCell: NamedCell[T]): this.type = { cells += namedCell.name -> namedCell.cell this } def withFunction(name: String, fn: FunctionDef): this.type = { functions += name -> fn this } def withFunction(namedFn: NamedFunctionDef): this.type = { functions += namedFn.name -> namedFn.fn this } def withAction(expr: PFAExpression): this.type = { expr match { case ExprSeq(s) => action ++= s case _ => action += expr } this } def pfa: PFADocument = { PFADocument(name = name, metadata = meta, input = input, output = output, action = action, cells = cells.toMap, fcns = functions.toMap ) } } object PFABuilder { def apply(): PFABuilder = new PFABuilder() }
Example 7
Source File: JSONSerializers.scala From aardpfark with Apache License 2.0 | 5 votes |
package com.ibm.aardpfark.pfa.document import scala.util.Try import com.ibm.aardpfark.pfa.dsl._ import com.ibm.aardpfark.pfa.expression.PFAExpression import com.ibm.aardpfark.spark.ml.tree.{TreeNode, Trees} import org.apache.avro.Schema import org.json4s.native.JsonMethods.parse import org.json4s.{CustomSerializer, JValue} object SchemaSerializer { def convert(s: Schema): JValue = { import Schema.Type._ import org.json4s.JsonDSL._ s.getType match { case DOUBLE | FLOAT | INT | LONG | STRING | BOOLEAN | BYTES | NULL => ("type" -> s.getType.getName) case _ => parse(s.toString) } } } class SchemaSerializer extends CustomSerializer[Schema](format => ( { case j: JValue => new Schema.Parser().parse(j.toString) }, { case s: Schema => SchemaSerializer.convert(s) } ) ) class PFAExpressionSerializer extends CustomSerializer[PFAExpression](format => ( { case j: JValue => throw new UnsupportedOperationException("cannot deserialize") }, { case expr: PFAExpression => expr.json } ) ) class TreeSerializer extends CustomSerializer[TreeNode](format => ( { case j: JValue => throw new UnsupportedOperationException("cannot deserialize") }, { case tree: TreeNode => Trees.json(tree) } ) ) class ParamSerializer extends CustomSerializer[Param](format => ( { case j: JValue => throw new UnsupportedOperationException("cannot deserialize") }, { case p: Param => import org.json4s.JsonDSL._ if (p.simpleSchema) { (p.name -> p.`type`.getFullName) } else { val schemaSerializer = new SchemaSerializer().serialize(format) (p.name -> schemaSerializer(p.`type`)) } } ) )
Example 8
Source File: PFADocument.scala From aardpfark with Apache License 2.0 | 5 votes |
package com.ibm.aardpfark.pfa.document import com.ibm.aardpfark.pfa.dsl._ import com.ibm.aardpfark.pfa.expression.PFAExpression import com.ibm.aardpfark.pfa.utils.Utils import org.apache.avro.Schema import org.json4s.native.Serialization import org.json4s.native.Serialization.{write, writePretty} import org.json4s.{FieldSerializer, NoTypeHints} trait ToPFA { def pfa: PFADocument } trait HasAction { protected def action: PFAExpression } trait HasModelCell { protected def modelCell: NamedCell[_] } case class PFADocument( name: Option[String] = None, version: Option[Long] = Some(1L), doc: Option[String] = Some(s"Auto-generated by Aardpfark at ${Utils.getCurrentDate}"), metadata: Map[String, String] = Map(), // options, input: Schema, output: Schema, // begin: Seq[String] = Seq(), // end: Seq[String] = Seq(), // method: String = "map", action: Seq[PFAExpression], cells: Map[String, Cell[_]] = Map(), // pools fcns: Map[String, FunctionDef] = Map() // randseed // zero // merge ) { implicit val formats = Serialization.formats(NoTypeHints) + new SchemaSerializer + new PFAExpressionSerializer + new ParamSerializer + new FieldSerializer[Cell[_]] + new TreeSerializer def toJSON(pretty: Boolean = false) = { if (pretty) writePretty(this) else write(this) } }
Example 9
package com.ibm.aardpfark.pfa.expression import com.ibm.aardpfark.pfa.document.SchemaSerializer import com.sksamuel.avro4s.{AvroSchema, SchemaFor, ToSchema} import org.apache.avro.Schema import org.json4s.JValue import org.json4s.JsonAST.JString import org.json4s.native.JsonMethods.parse trait New { object NewRecord { def apply(schema: Schema, init: Map[String, PFAExpression], fullSchema: Boolean = true) = NewRecordExpr(schema, init, fullSchema) } case class NewRecordExpr(schema: Schema, init: Map[String, PFAExpression], fullSchema: Boolean) extends PFAExpression { import org.json4s.JsonDSL._ private val s = if (fullSchema) SchemaSerializer.convert(schema) else JString(schema.getFullName) override def json: JValue = { ("type" -> s) ~ ("new" -> init.mapValues(_.json)) } } case class NewArrayExpr(schema: Schema, init: Seq[PFAExpression]) extends PFAExpression { import org.json4s.JsonDSL._ override def json: JValue = { ("type" -> parse(schema.toString)) ~ ("new" -> init.map(_.json)) } } object NewArray { def apply(schema: Schema, init: Seq[PFAExpression]) = NewArrayExpr(schema, init) def apply[T](init: Seq[PFAExpression])(implicit s: ToSchema[Seq[T]]) = { NewArrayExpr(s(), init) } } case class NewMap(schema: Schema, init: Map[String, PFAExpression]) extends PFAExpression { import org.json4s.JsonDSL._ override def json: JValue = { ("type" -> parse(schema.toString)) ~ ("new" -> init.mapValues(_.json)) } } }
Example 10
Source File: Casts.scala From aardpfark with Apache License 2.0 | 5 votes |
package com.ibm.aardpfark.pfa.expression import com.ibm.aardpfark.pfa.document.SchemaSerializer import com.ibm.aardpfark.pfa.dsl.StringExpr import com.sksamuel.avro4s.{SchemaFor, ToSchema} import org.apache.avro.Schema import org.json4s.JValue trait Casts { case class As(schema: Schema, named: String, `do`: PFAExpression) object As { def apply(schema: Schema, named: String, `do`: (StringExpr) => PFAExpression): As = { As(schema, named, `do`(StringExpr(named))) } def apply[T](named: String, `do`: (StringExpr) => PFAExpression)(implicit s: ToSchema[T]): As = { As(s(), named, `do`(StringExpr(named))) } } object Cast { def apply(cast: PFAExpression, cases: Seq[As]) = new CastExpr(cast, cases) def apply(cast: PFAExpression, case1: As, cases: As*) = new CastExpr(cast, Seq(case1) ++ cases) } class CastExpr(cast: PFAExpression, cases: Seq[As]) extends PFAExpression { import org.json4s.JsonDSL._ implicit val converter: Schema => JValue = SchemaSerializer.convert override def json: JValue = { ("cast" -> cast.json) ~ ("cases" -> cases.map { as => ("as" -> as.schema) ~ ("named" -> as.named) ~ ("do" -> as.`do`.json) }) } } }
Example 11
Source File: MLPClassifier.scala From aardpfark with Apache License 2.0 | 5 votes |
package com.ibm.aardpfark.spark.ml.classification import scala.collection.mutable.ArrayBuffer import com.ibm.aardpfark.pfa.document.{Cell, PFABuilder, PFADocument} import com.ibm.aardpfark.pfa.dsl._ import com.ibm.aardpfark.pfa.expression._ import com.ibm.aardpfark.pfa.types.WithSchema import com.ibm.aardpfark.spark.ml.PFAPredictionModel import breeze.linalg.{DenseMatrix, DenseVector} import com.sksamuel.avro4s.{AvroNamespace, AvroSchema} import org.apache.avro.Schema import org.apache.spark.ml.classification.MultilayerPerceptronClassificationModel @AvroNamespace("com.ibm.aardpfark.exec.spark.ml.classification") case class Layer(weights: Array[Array[Double]], bias: Array[Double]) @AvroNamespace("com.ibm.aardpfark.exec.spark.ml.classification") case class Layers(layers: Seq[Layer]) extends WithSchema { override def schema: Schema = AvroSchema[this.type] } class PFAMultilayerPerceptronClassificationModel( override val sparkTransformer: MultilayerPerceptronClassificationModel) extends PFAPredictionModel[Layers] { private def getLayers = { val weights = sparkTransformer.weights.toArray val inputLayers = sparkTransformer.layers val layers = ArrayBuffer[Layer]() var offset = 0 for (i <- 0 to inputLayers.size - 2) { val in = inputLayers(i) val out = inputLayers(i + 1) val wOffset = out * in val wData = weights.slice(offset, offset + wOffset) val bData = weights.slice(offset + wOffset, offset + wOffset + out) val w = Array.ofDim[Double](out, in) new DenseMatrix[Double](out, in, wData).foreachPair { case ((ii, jj), v) => w(ii)(jj) = v } val b = new DenseVector[Double](bData).toArray layers += Layer(w, b) offset += wOffset + out } layers.toArray } override protected def cell = Cell(Layers(getLayers)) private val doubleSigmoid = NamedFunctionDef("doubleSigmoid", FunctionDef[Double, Double]( "x", m.link.logit("x") )) override def action: PFAExpression = { val forward = model.neural.simpleLayers(inputExpr, modelCell.ref("layers"), doubleSigmoid.ref) val softmax = m.link.softmax(forward) NewRecord(outputSchema, Map(predictionCol -> a.argmax(softmax))) } override def pfa: PFADocument = { PFABuilder() .withName(sparkTransformer.uid) .withMetadata(getMetadata) .withInput(inputSchema) .withOutput(outputSchema) .withCell(modelCell) .withFunction(doubleSigmoid) .withAction(action) .pfa } }
Example 12
Source File: Merge.scala From aardpfark with Apache License 2.0 | 5 votes |
package com.ibm.aardpfark.spark.ml import com.ibm.aardpfark.pfa.document.Cell import com.ibm.aardpfark.pfa.expression._ import org.apache.avro.{Schema, SchemaBuilder} import org.apache.spark.ml.PipelineModel val first = docs.head val last = docs.last var name = "merged" var version = 0L val inputSchema = is val outputSchema = last.output var meta: Map[String, String] = Map() var cells: Map[String, Cell[_]] = Map() var action: PFAExpression = StringExpr("input") var fcns: Map[String, FunctionDef] = Map() var currentSchema = inputSchema docs.zipWithIndex.foreach { case (doc, idx) => val inputParam = Param("input", currentSchema) val inputFields = currentSchema.getFields.toSeq val newFields = doc.output.getFields.toSeq val outputFields = inputFields ++ newFields val bldr = SchemaBuilder.record(s"Stage_${idx + 1}_output_schema").fields() outputFields.foreach { field => bldr .name(field.name()) .`type`(field.schema()) .noDefault() } currentSchema = bldr.endRecord() val let = Let(s"Stage_${idx + 1}_action_output", Do(doc.action)) val inputExprs = inputFields.map { field => field.name -> StringExpr(s"input.${field.name}") } val newExprs = newFields.map { field => field.name -> StringExpr(s"${let.x}.${field.name}") } val exprs = inputExprs ++ newExprs val stageOutput = NewRecord(currentSchema, exprs.toMap) val le = new LetExpr(Seq((let.x, let.`type`, let.expr))) val stageActionFn = NamedFunctionDef(s"Stage_${idx + 1}_action", FunctionDef( Seq(inputParam), currentSchema, Seq(le, stageOutput) )) fcns = fcns ++ doc.fcns + (stageActionFn.name -> stageActionFn.fn) cells = cells ++ doc.cells meta = meta ++ doc.metadata action = stageActionFn.call(action) } first.copy( name = Some(name), version = Some(version), metadata = meta, cells = cells, fcns = fcns, action = action, input = inputSchema, output = currentSchema ) } }
Example 13
Source File: KMeans.scala From aardpfark with Apache License 2.0 | 5 votes |
package com.ibm.aardpfark.spark.ml.clustering import com.ibm.aardpfark.pfa.dsl.StringExpr import com.ibm.aardpfark.pfa.document.{Cell, PFABuilder, PFADocument} import com.ibm.aardpfark.pfa.dsl._ import com.ibm.aardpfark.pfa.expression.PFAExpression import com.ibm.aardpfark.pfa.types.WithSchema import com.ibm.aardpfark.spark.ml.PFAModel import com.sksamuel.avro4s.{AvroNamespace, AvroSchema} import org.apache.avro.{Schema, SchemaBuilder} import org.apache.spark.ml.clustering.KMeansModel @AvroNamespace("com.ibm.aardpfark.exec.spark.ml.clustering") case class Cluster(id: Int, center: Seq[Double]) @AvroNamespace("com.ibm.aardpfark.exec.spark.ml.clustering") case class KMeansModelData(clusters: Seq[Cluster]) extends WithSchema { override def schema: Schema = AvroSchema[this.type] } class PFAKMeansModel(override val sparkTransformer: KMeansModel) extends PFAModel[KMeansModelData] { private val inputCol = sparkTransformer.getFeaturesCol private val outputCol = sparkTransformer.getPredictionCol private val inputExpr = StringExpr(s"input.${inputCol}") override def inputSchema = { SchemaBuilder.record(withUid(inputBaseName)).fields() .name(inputCol).`type`().array().items().doubleType().noDefault() .endRecord() } override def outputSchema = SchemaBuilder.record(withUid(outputBaseName)).fields() .name(outputCol).`type`().intType().noDefault() .endRecord() override def cell = { val clusters = sparkTransformer.clusterCenters.zipWithIndex.map { case (v, i) => Cluster(i, v.toArray) } Cell(KMeansModelData(clusters)) } override def action: PFAExpression = { val closest = model.cluster.closest(inputExpr, modelCell.ref("clusters")) NewRecord(outputSchema, Map(outputCol -> Attr(closest, "id"))) } override def pfa: PFADocument = { PFABuilder() .withName(sparkTransformer.uid) .withMetadata(getMetadata) .withInput(inputSchema) .withOutput(outputSchema) .withCell(modelCell) .withAction(action) .pfa } }
Example 14
Source File: FunctionSuite.scala From aardpfark with Apache License 2.0 | 5 votes |
package com.ibm.aardpfark.pfa.expression import com.ibm.aardpfark.pfa.dsl._ import com.ibm.aardpfark.pfa.DSLSuiteBase import com.ibm.aardpfark.pfa.document.PFABuilder import org.apache.avro.Schema class FunctionSuite extends DSLSuiteBase { test("DSL: NamedFunctionDef") { val squared = FunctionDef[Int, Int]("x") { x => Seq(core.mult(x, x)) } val namedSquared = NamedFunctionDef("squared", squared) val cubed = FunctionDef[Int, Int]("x") {x => Seq(core.mult(x, namedSquared(x))) } val namedCubed = NamedFunctionDef("cubed", cubed) val action = Action(namedSquared(namedCubed(inputExpr))) val pfaDoc = new PFABuilder() .withInput[Int] .withOutput[Int] .withAction(action) .withFunction(namedSquared) .withFunction(namedCubed) .pfa val engine = getPFAEngine(pfaDoc.toJSON()) assert(1 == engine.action(engine.jsonInput("1"))) assert(64 == engine.action(engine.jsonInput("2"))) } test("DSL: FunctionDef anonymous"){ val squared = FunctionDef[Int, Int]("x") { x => Seq(core.mult(x, x)) } val arraySchema = Schema.createArray(Schema.create(Schema.Type.INT)) val action = Action( a.map(inputExpr, squared) ) val pfaDoc = new PFABuilder() .withInput(arraySchema) .withOutput(arraySchema) .withAction(action) .pfa val engine = getPFAEngine(pfaDoc.toJSON()) assert("[1,4,9]" == engine.jsonOutput(engine.action(engine.jsonInput("[1,2,3]")))) assert("[1,4,9]" == engine.jsonOutput(engine.action(engine.jsonInput("[-1,-2,3]")))) assert("[9,64,256]" == engine.jsonOutput(engine.action(engine.jsonInput("[3,8,16]")))) } test("DSL: FunctionDef multiple args with same input type") { val fn = NamedFunctionDef("plusAll", FunctionDef[Double, Double]("x", "y", "z") { case Seq(x, y, z) => core.plus(core.plus(x, y), z) }) val action = Action(fn.call(inputExpr, core.mult(inputExpr, 2.0), 6.0)) val pfaDoc = new PFABuilder() .withInput[Double] .withOutput[Double] .withAction(action) .withFunction(fn) .pfa val engine = getPFAEngine(pfaDoc.toJSON()) assert(12.0 == engine.action(engine.jsonInput("2.0"))) } }
Example 15
package com.ibm.aardpfark.spark.ml.feature import com.ibm.aardpfark.pfa.document.{Cell, PFABuilder, PFADocument} import com.ibm.aardpfark.pfa.expression._ import com.ibm.aardpfark.pfa.types.WithSchema import com.ibm.aardpfark.spark.ml.PFAModel import com.sksamuel.avro4s.{AvroNamespace, AvroSchema} import org.apache.avro.{Schema, SchemaBuilder} import org.apache.spark.ml.feature.IDFModel @AvroNamespace("com.ibm.aardpfark.exec.spark.ml.feature") case class IDFData(idf: Seq[Double]) extends WithSchema { override def schema: Schema = AvroSchema[this.type] } override def action: PFAExpression = { NewRecord(outputSchema, Map(outputCol -> a.zipmap(inputExpr, idfRef, multFn.ref))) } override def pfa: PFADocument = { PFABuilder() .withName(sparkTransformer.uid) .withMetadata(getMetadata) .withInput(inputSchema) .withOutput(outputSchema) .withCell(modelCell) .withFunction(multFn) .withAction(action) .pfa } }
Example 16
Source File: VectorAssembler.scala From aardpfark with Apache License 2.0 | 5 votes |
package com.ibm.aardpfark.spark.ml.feature import com.ibm.aardpfark.pfa.document.{PFABuilder, PFADocument} import com.ibm.aardpfark.pfa.expression.PFAExpression import com.ibm.aardpfark.spark.ml.PFATransformer import org.apache.avro.{Schema, SchemaBuilder} import org.apache.spark.ml.feature.VectorAssembler import org.json4s.DefaultFormats class PFAVectorAssembler(override val sparkTransformer: VectorAssembler) extends PFATransformer { import com.ibm.aardpfark.pfa.dsl._ implicit val formats = DefaultFormats private val inputCols = sparkTransformer.getInputCols private val outputCol = sparkTransformer.getOutputCol type DorSeqD = Either[Double, Seq[Double]] override protected def inputSchema: Schema = { val builder = SchemaBuilder.record(withUid(inputBaseName)).fields() for (inputCol <- inputCols) { builder.name(inputCol).`type`() .unionOf() .doubleType().and() .array().items().doubleType() .endUnion().noDefault() } builder.endRecord() } override protected def outputSchema: Schema = { SchemaBuilder.record(withUid(outputBaseName)).fields() .name(outputCol).`type`().array().items().doubleType().noDefault() .endRecord() } private val asDouble = As[Double]("x", x => NewArray[Double](x)) private val asArray = As[Array[Double]]("x", x => x) private val castFn = NamedFunctionDef("castToArray", FunctionDef[DorSeqD, Seq[Double]]("x") { x => Cast(x, asDouble, asArray) } ) override protected def action: PFAExpression = { val cols = Let("cols", NewArray[DorSeqD](inputCols.map(c => StringExpr(s"input.$c")))) Action( cols, NewRecord(outputSchema, Map(outputCol -> a.flatten(a.map(cols.ref, castFn.ref)))) ) } override def pfa: PFADocument = { PFABuilder() .withName(sparkTransformer.uid) .withMetadata(getMetadata) .withInput(inputSchema) .withOutput(outputSchema) .withAction(action) .withFunction(castFn) .pfa } }
Example 17
Source File: PCAModel.scala From aardpfark with Apache License 2.0 | 5 votes |
package com.ibm.aardpfark.spark.ml.feature import com.ibm.aardpfark.pfa.document.{Cell, PFABuilder, PFADocument} import com.ibm.aardpfark.pfa.expression.PFAExpression import com.ibm.aardpfark.pfa.types.WithSchema import com.ibm.aardpfark.spark.ml.PFAModel import com.sksamuel.avro4s.{AvroNamespace, AvroSchema} import org.apache.avro.{Schema, SchemaBuilder} import org.apache.spark.ml.feature.PCAModel @AvroNamespace("com.ibm.aardpfark.exec.spark.ml.feature") case class PCAData(pc: Array[Array[Double]]) extends WithSchema { override def schema: Schema = AvroSchema[this.type] } class PFAPCAModel(override val sparkTransformer: PCAModel) extends PFAModel[PCAData] { import com.ibm.aardpfark.pfa.dsl._ private val inputCol = sparkTransformer.getInputCol private val outputCol = sparkTransformer.getOutputCol private val inputExpr = StringExpr(s"input.${inputCol}") override def inputSchema = { SchemaBuilder.record(withUid(inputBaseName)).fields() .name(inputCol).`type`().array().items().doubleType().noDefault() .endRecord() } override def outputSchema = { SchemaBuilder.record(withUid(outputBaseName)).fields() .name(outputCol).`type`().array().items().doubleType().noDefault() .endRecord() } override protected def cell = { val pc = sparkTransformer.pc.transpose.rowIter.map(v => v.toArray).toArray Cell(PCAData(pc)) } override def action: PFAExpression = { val dot = la.dot(modelCell.ref("pc"), inputExpr) NewRecord(outputSchema, Map(outputCol -> dot)) } override def pfa: PFADocument = { PFABuilder() .withName(sparkTransformer.uid) .withMetadata(getMetadata) .withInput(inputSchema) .withOutput(outputSchema) .withCell(modelCell) .withAction(action) .pfa } }
Example 18
Source File: Main.scala From geotrellis-osm-elevation with Apache License 2.0 | 5 votes |
package geotrellis.osme.server import geotrellis.raster._ import geotrellis.raster.io.geotiff._ import geotrellis.raster.render._ import geotrellis.raster.resample._ import geotrellis.spark._ import geotrellis.spark.io._ import geotrellis.spark.io.file._ import geotrellis.spark.io.avro._ import geotrellis.spark.io.avro.codecs._ import geotrellis.spark.io.index._ import org.apache.spark._ import org.apache.avro.Schema import com.github.nscala_time.time.Imports._ import akka.actor._ import akka.io.IO import spray.can.Http import spray.routing.{HttpService, RequestContext} import spray.routing.directives.CachingDirectives import spray.http.MediaTypes import spray.json._ import spray.json.DefaultJsonProtocol._ import com.typesafe.config.ConfigFactory import scala.concurrent._ import scala.collection.JavaConverters._ import scala.reflect.ClassTag object Main { def main(args: Array[String]): Unit = { val conf = new SparkConf() .setIfMissing("spark.master", "local[*]") .setAppName("Osme Server") .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") .set("spark.kryo.registrator", "geotrellis.spark.io.hadoop.KryoRegistrator") implicit val sc = new SparkContext(conf) implicit val system = akka.actor.ActorSystem("demo-system") // create and start our service actor val service = system.actorOf(Props(classOf[OsmeServiceActor], sc), "osme") // start a new HTTP server on port 8088 with our service actor as the handler IO(Http) ! Http.Bind(service, "0.0.0.0", 8088) } }
Example 19
Source File: BasicTest.scala From kafka-testing with Apache License 2.0 | 5 votes |
package com.landoop.kafka.testing import org.apache.avro.Schema import org.apache.avro.generic.GenericData import org.apache.kafka.clients.producer.ProducerRecord class BasicTest extends ClusterTestingCapabilities { private val createAvroRecord = { val userSchema = "{\"namespace\": \"example.avro\", \"type\": \"record\", " + "\"name\": \"User\"," + "\"fields\": [{\"name\": \"name\", \"type\": \"string\"}]}" val parser = new Schema.Parser val schema = parser.parse(userSchema) val avroRecord = new GenericData.Record(schema) avroRecord.put("name", "testUser") avroRecord } "KCluster" should { "start up and be able to handle avro records being sent " in { val topic = "testAvro" + System.currentTimeMillis() val avroRecord = createAvroRecord val objects = Array[AnyRef](avroRecord) val producerProps = stringAvroProducerProps val producer = createProducer[String, Any](producerProps) for (o <- objects) { val message = new ProducerRecord[String, Any](topic, o) producer.send(message) } val consumerProps = stringAvroConsumerProps() val consumer = createStringAvroConsumer(consumerProps) val records = consumeStringAvro(consumer, topic, objects.length) objects.toSeq shouldBe records } "write and read avro records" in { val topic = "testAvro" + System.currentTimeMillis() val avroRecord = createAvroRecord val objects = Array[Any](avroRecord, true, 130, 345L, 1.23f, 2.34d, "abc", "def".getBytes) val producerProps = stringAvroProducerProps val producer = createProducer[String, Any](producerProps) for (o <- objects) { producer.send(new ProducerRecord[String, Any](topic, o)) } val consumerProps = stringAvroConsumerProps("group" + System.currentTimeMillis()) val consumer = createStringAvroConsumer(consumerProps) val records = consumeStringAvro(consumer, topic, objects.length) objects.deep shouldBe records.toArray.deep } } }
Example 20
Source File: AvroSerializer.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.bloomberg.avro import java.io.ByteArrayOutputStream import com.datamountaineer.streamreactor.connect.bloomberg.BloombergData import com.datamountaineer.streamreactor.connect.bloomberg.avro.AvroSchemaGenerator._ import org.apache.avro.Schema import org.apache.avro.generic.GenericData.Record import org.apache.avro.generic.{GenericData, GenericDatumWriter, GenericRecord} import org.apache.avro.io.EncoderFactory import scala.collection.JavaConverters._ object AvroSerializer { private def recursive(record: GenericData.Record, schema: Schema, fieldName: String, value: Any): Unit = { value match { case _: Boolean => record.put(fieldName, value) case _: Int => record.put(fieldName, value) case _: Long => record.put(fieldName, value) case _: Double => record.put(fieldName, value) case _: Char => record.put(fieldName, value) case _: Float => record.put(fieldName, value) case _: String => record.put(fieldName, value) case list: java.util.List[_] => val tmpSchema = schema.getField(fieldName).schema() val itemSchema = if (tmpSchema.getType == Schema.Type.UNION) tmpSchema.getTypes.get(1) else tmpSchema require(itemSchema.getType == Schema.Type.ARRAY) //we might have a record not a primitive if (itemSchema.getElementType.getType == Schema.Type.RECORD) { val items = new GenericData.Array[GenericData.Record](list.size(), itemSchema) list.asScala.foreach { i => //only map is allowed val m = i.asInstanceOf[java.util.Map[String, Any]] items.add(m.toAvroRecord(itemSchema.getElementType)) } record.put(fieldName, items) } else { val items = new GenericData.Array[Any](list.size(), itemSchema) items.addAll(list) record.put(fieldName, items) } case map: java.util.LinkedHashMap[String @unchecked, _] => //record schema val fieldSchema = schema.getField(fieldName).schema() val nestedSchema = if (fieldSchema.getType == Schema.Type.UNION) fieldSchema.getTypes.get(1) else fieldSchema val nestedRecord = new Record(nestedSchema) map.entrySet().asScala.foreach(e => recursive(nestedRecord, nestedSchema, e.getKey, e.getValue)) record.put(fieldName, nestedRecord) } } } }
Example 21
Source File: AvroRecordRowKeyBuilderTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.hbase import com.datamountaineer.streamreactor.connect.hbase.BytesHelper._ import com.datamountaineer.streamreactor.connect.hbase.avro.AvroRecordFieldExtractorMapFn import org.apache.avro.Schema import org.apache.avro.generic.GenericRecord import org.apache.hadoop.hbase.util.Bytes import org.apache.kafka.connect.sink.SinkRecord import org.mockito.MockitoSugar import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class AvroRecordRowKeyBuilderTest extends AnyWordSpec with Matchers with MockitoSugar { val schema: Schema = new Schema.Parser().parse(PersonAvroSchema.schema) "AvroRecordRowKeyBuilder" should { "extract the values from the avro record and create the key" in { val keys = Seq("firstName", "lastName", "age") val rowKeyBuilder = new AvroRecordRowKeyBuilderBytes(AvroRecordFieldExtractorMapFn(schema, keys), keys) val sinkRecord = mock[SinkRecord] val firstName = "Jack" val lastName = "Smith" val age = 29 val record = new GenericRecord { val values: Map[String, AnyRef] = Map("firstName" -> firstName, "lastName" -> lastName, "age" -> Int.box(age)) override def get(key: String): AnyRef = values(key) override def put(key: String, v: scala.Any): Unit = sys.error("not supported") override def get(i: Int): AnyRef = sys.error("not supported") override def put(i: Int, v: scala.Any): Unit = sys.error("not supported") override def getSchema: Schema = sys.error("not supported") } val expectedValue = Bytes.add( Array( firstName.fromString(), rowKeyBuilder.delimBytes, lastName.fromString(), rowKeyBuilder.delimBytes, age.fromInt())) rowKeyBuilder.build(sinkRecord, record) shouldBe expectedValue } } }
Example 22
Source File: AvroSchemaFieldsExistFnTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.hbase.avro import com.datamountaineer.streamreactor.connect.hbase.PersonAvroSchema import org.apache.avro.Schema import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class AvroSchemaFieldsExistFnTest extends AnyWordSpec with Matchers { val schema: Schema = new Schema.Parser().parse(PersonAvroSchema.schema) "AvroSchemaFieldsExistFn" should { "raise an exception if the field is not present" in { intercept[IllegalArgumentException] { AvroSchemaFieldsExistFn(schema, Seq("notpresent")) } intercept[IllegalArgumentException] { AvroSchemaFieldsExistFn(schema, Seq(" lastName")) } } "not raise an exception if the fields are present" in { AvroSchemaFieldsExistFn(schema, Seq("lastName", "age", "address")) } } }
Example 23
Source File: AvroRecordFieldExtractorMapFnTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.hbase.avro import java.nio.file.Paths import org.apache.avro.Schema import org.apache.hadoop.hbase.util.Bytes import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class AvroRecordFieldExtractorMapFnTest extends AnyWordSpec with Matchers { val schema: Schema = new Schema.Parser().parse(Paths.get(getClass.getResource("/person.avsc").toURI).toFile) "AvroRecordFieldExtractorMapFn" should { "raise an exception if the given field does not exist in the schema" in { intercept[IllegalArgumentException] { AvroRecordFieldExtractorMapFn(schema, Seq("wrongField")) } } "raise an exception if the given field is not a primitive" in { intercept[IllegalArgumentException] { AvroRecordFieldExtractorMapFn(schema, Seq("address")) } } "create the mappings for all the given fields" in { val mappings = AvroRecordFieldExtractorMapFn(schema, Seq("firstName", "age")) val fnFirstName = mappings("firstName") val firstName = "Beaky" fnFirstName(firstName) shouldBe Bytes.toBytes(firstName) val fnAge = mappings("age") val age = 31 fnAge(age) shouldBe Bytes.toBytes(age) intercept[ClassCastException] { fnAge(12.4) } } } }
Example 24
Source File: AvroSchemaFieldsExistFn.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.hbase.avro import org.apache.avro.{AvroRuntimeException, Schema} object AvroSchemaFieldsExistFn { def apply(schema: Schema, fields: Seq[String]) : Unit = { fields.foreach { field => try { if (Option(schema.getField(field)).isEmpty) { throw new IllegalArgumentException(s"[$field] is not found in the schema fields") } } catch { case avroException: AvroRuntimeException => throw new IllegalArgumentException(s"$field is not found in the schema", avroException) } } } }
Example 25
Source File: ToTableRow.scala From scio with Apache License 2.0 | 5 votes |
package com.spotify.scio.extra.bigquery import com.spotify.scio.extra.bigquery.AvroConverters.AvroConversionException import java.math.{BigDecimal => JBigDecimal} import java.nio.ByteBuffer import java.util import com.spotify.scio.bigquery.TableRow import org.apache.avro.Schema import org.apache.avro.generic.{GenericFixed, IndexedRecord} import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.io.BaseEncoding import org.joda.time.format.DateTimeFormat import org.joda.time.{DateTime, LocalDate, LocalTime} import scala.jdk.CollectionConverters._ private[bigquery] trait ToTableRow { private lazy val encodingPropName: String = "bigquery.bytes.encoder" private lazy val base64Encoding: BaseEncoding = BaseEncoding.base64() private lazy val hexEncoding: BaseEncoding = BaseEncoding.base16() // YYYY-[M]M-[D]D private[this] val localDateFormatter = DateTimeFormat.forPattern("yyyy-MM-dd").withZoneUTC() // YYYY-[M]M-[D]D[( |T)[H]H:[M]M:[S]S[.DDDDDD]] private[this] val localTimeFormatter = DateTimeFormat.forPattern("HH:mm:ss.SSSSSS") // YYYY-[M]M-[D]D[( |T)[H]H:[M]M:[S]S[.DDDDDD]][time zone] private[this] val timestampFormatter = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSS") private[bigquery] def toTableRowField(fieldValue: Any, field: Schema.Field): Any = fieldValue match { case x: CharSequence => x.toString case x: Enum[_] => x.name() case x: JBigDecimal => x.toString case x: Number => x case x: Boolean => x case x: GenericFixed => encodeByteArray(x.bytes(), field.schema()) case x: ByteBuffer => encodeByteArray(toByteArray(x), field.schema()) case x: util.Map[_, _] => toTableRowFromMap(x.asScala, field) case x: java.lang.Iterable[_] => toTableRowFromIterable(x.asScala, field) case x: IndexedRecord => AvroConverters.toTableRow(x) case x: LocalDate => localDateFormatter.print(x) case x: LocalTime => localTimeFormatter.print(x) case x: DateTime => timestampFormatter.print(x) case _ => throw AvroConversionException( s"ToTableRow conversion failed:" + s"could not match ${fieldValue.getClass}" ) } private def toTableRowFromIterable(iterable: Iterable[Any], field: Schema.Field): util.List[_] = iterable .map { item => if (item.isInstanceOf[Iterable[_]] || item.isInstanceOf[Map[_, _]]) { throw AvroConversionException( s"ToTableRow conversion failed for item $item: " + s"iterable and map types not supported" ) } toTableRowField(item, field) } .toList .asJava private def toTableRowFromMap(map: Iterable[Any], field: Schema.Field): util.List[_] = map .map { case (k, v) => new TableRow() .set("key", toTableRowField(k, field)) .set("value", toTableRowField(v, field)) } .toList .asJava private def encodeByteArray(bytes: Array[Byte], fieldSchema: Schema): String = Option(fieldSchema.getProp(encodingPropName)) match { case Some("BASE64") => base64Encoding.encode(bytes) case Some("HEX") => hexEncoding.encode(bytes) case Some(encoding) => throw AvroConversionException(s"Unsupported encoding $encoding") case None => base64Encoding.encode(bytes) } private def toByteArray(buffer: ByteBuffer) = { val copy = buffer.asReadOnlyBuffer val bytes = new Array[Byte](copy.limit) copy.rewind copy.get(bytes) bytes } }
Example 26
Source File: JsonConverterWithSchemaEvolutionTest.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.converters.source import java.util.Collections import com.datamountaineer.streamreactor.connect.converters.MsgKey import com.sksamuel.avro4s.{RecordFormat, SchemaFor} import io.confluent.connect.avro.AvroData import org.apache.avro.Schema import org.apache.kafka.connect.data.Struct import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class JsonConverterWithSchemaEvolutionTest extends AnyWordSpec with Matchers { val topic = "the_real_topic" val sourceTopic = "source_topic" val avroData = new AvroData(4) "JsonConverter" should { "throw IllegalArgumentException if payload is null" in { intercept[IllegalArgumentException] { val converter = new JsonConverterWithSchemaEvolution val record = converter.convert("topic", "somesource", "1000", null) } } "handle a simple json" in { val json = JacksonJson.toJson(Car("LaFerrari", "Ferrari", 2015, 963, 0.0001)) val converter = new JsonConverterWithSchemaEvolution val record = converter.convert(topic, sourceTopic, "100", json.getBytes) record.keySchema() shouldBe MsgKey.schema record.key().asInstanceOf[Struct].getString("topic") shouldBe sourceTopic record.key().asInstanceOf[Struct].getString("id") shouldBe "100" val schema = new Schema.Parser().parse( SchemaFor[CarOptional]().toString .replace("\"name\":\"CarOptional\"", s"""\"name\":\"$sourceTopic\"""") .replace(s""",\"namespace\":\"${getClass.getCanonicalName.dropRight(getClass.getSimpleName.length+1)}\"""", "") ) val format = RecordFormat[CarOptional] val carOptional = format.to(CarOptional(Option("LaFerrari"), Option("Ferrari"), Option(2015), Option(963), Option(0.0001))) record.valueSchema() shouldBe avroData.toConnectSchema(schema) record.value() shouldBe avroData.toConnectData(schema, carOptional).value() record.sourcePartition() shouldBe null record.sourceOffset() shouldBe Collections.singletonMap(JsonConverterWithSchemaEvolution.ConfigKey, avroData.fromConnectSchema(avroData.toConnectSchema(schema)).toString()) } } } case class Car(name: String, manufacturer: String, model: Long, bhp: Long, price: Double) case class CarOptional(name: Option[String], manufacturer: Option[String], model: Option[Long], bhp: Option[Long], price: Option[Double])
Example 27
Source File: JsonSimpleConverterTest.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.converters.source import java.util.Collections import com.datamountaineer.streamreactor.connect.converters.MsgKey import com.sksamuel.avro4s.{RecordFormat, SchemaFor} import io.confluent.connect.avro.AvroData import org.apache.avro.Schema import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class JsonSimpleConverterTest extends AnyWordSpec with Matchers { val topic = "the_real_topic" val sourceTopic = "source_topic" val avroData = new AvroData(4) "JsonSimpleConverter" should { "convert from json to the struct" in { val car = Car("LaFerrari", "Ferrari", 2015, 963, 0.0001) val json = JacksonJson.toJson(car) val converter = new JsonSimpleConverter val record = converter.convert(topic, sourceTopic, "100", json.getBytes) record.keySchema() shouldBe MsgKey.schema record.key() shouldBe MsgKey.getStruct(sourceTopic, "100") val schema = new Schema.Parser().parse( SchemaFor[Car]().toString .replace("\"name\":\"Car\"", s"""\"name\":\"$sourceTopic\"""") .replace(s"""\"namespace\":\"${getClass.getCanonicalName.dropRight(getClass.getSimpleName.length+1)}\",""", "") ) val format = RecordFormat[Car] val avro = format.to(car) record.valueSchema() shouldBe avroData.toConnectSchema(schema) record.value() shouldBe avroData.toConnectData(schema, avro).value() record.sourcePartition() shouldBe Collections.singletonMap(Converter.TopicKey, sourceTopic) record.sourceOffset() shouldBe null } } }
Example 28
Source File: AvroSerializer.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.serialization import java.io.{ByteArrayOutputStream, InputStream, OutputStream} import com.sksamuel.avro4s.{RecordFormat, SchemaFor} import org.apache.avro.Schema import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord} import org.apache.avro.io.{DecoderFactory, EncoderFactory} object AvroSerializer { def write[T <: Product](t: T)(implicit os: OutputStream, formatter: RecordFormat[T], schemaFor: SchemaFor[T]): Unit = write(apply(t), schemaFor()) def write(record: GenericRecord, schema: Schema)(implicit os: OutputStream) = { val writer = new GenericDatumWriter[GenericRecord](schema) val encoder = EncoderFactory.get().binaryEncoder(os, null) writer.write(record, encoder) encoder.flush() os.flush() } def getBytes[T <: Product](t: T)(implicit recordFormat: RecordFormat[T], schemaFor: SchemaFor[T]): Array[Byte] = getBytes(recordFormat.to(t), schemaFor()) def getBytes(record: GenericRecord, schema: Schema): Array[Byte] = { implicit val output = new ByteArrayOutputStream() write(record, schema) output.toByteArray } def read(is: InputStream, schema: Schema): GenericRecord = { val reader = new GenericDatumReader[GenericRecord](schema) val decoder = DecoderFactory.get().binaryDecoder(is, null) reader.read(null, decoder) } def read[T <: Product](is: InputStream)(implicit schemaFor: SchemaFor[T], recordFormat: RecordFormat[T]): T = recordFormat.from(read(is, schemaFor())) def apply[T <: Product](t: T)(implicit formatter: RecordFormat[T]): GenericRecord = formatter.to(t) }
Example 29
Source File: AvroSerde.scala From event-sourcing-kafka-streams with MIT License | 5 votes |
package org.amitayh.invoices.common.serde import java.io.ByteArrayOutputStream import java.nio.ByteBuffer import java.time.Instant import java.util import java.util.UUID import com.sksamuel.avro4s._ import org.amitayh.invoices.common.domain._ import org.amitayh.invoices.common.serde.UuidConverters.{fromByteBuffer, toByteBuffer} import org.apache.avro.Schema import org.apache.avro.Schema.Field import org.apache.kafka.common.serialization.{Deserializer, Serde, Serializer} object AvroSerde { implicit val instantToSchema: ToSchema[Instant] = new ToSchema[Instant] { override val schema: Schema = Schema.create(Schema.Type.STRING) } implicit val instantToValue: ToValue[Instant] = new ToValue[Instant] { override def apply(value: Instant): String = value.toString } implicit val instantFromValue: FromValue[Instant] = new FromValue[Instant] { override def apply(value: Any, field: Field): Instant = Instant.parse(value.toString) } implicit val uuidToSchema: ToSchema[UUID] = new ToSchema[UUID] { override val schema: Schema = Schema.create(Schema.Type.BYTES) } implicit val uuidToValue: ToValue[UUID] = new ToValue[UUID] { override def apply(value: UUID): ByteBuffer = toByteBuffer(value) } implicit val uuidFromValue: FromValue[UUID] = new FromValue[UUID] { override def apply(value: Any, field: Field): UUID = fromByteBuffer(value.asInstanceOf[ByteBuffer]) } val CommandSerde: Serde[Command] = serdeFor[Command] val CommandResultSerde: Serde[CommandResult] = serdeFor[CommandResult] val SnapshotSerde: Serde[InvoiceSnapshot] = serdeFor[InvoiceSnapshot] val EventSerde: Serde[Event] = serdeFor[Event] def toBytes[T: SchemaFor: ToRecord](data: T): Array[Byte] = { val baos = new ByteArrayOutputStream val output = AvroOutputStream.binary[T](baos) output.write(data) output.close() baos.toByteArray } def fromBytes[T: SchemaFor: FromRecord](data: Array[Byte]): T = { val input = AvroInputStream.binary[T](data) input.iterator.next() } private def serdeFor[T: SchemaFor: ToRecord: FromRecord]: Serde[T] = new Serde[T] { override val serializer: Serializer[T] = new Serializer[T] { override def serialize(topic: String, data: T): Array[Byte] = toBytes(data) override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = () override def close(): Unit = () } override val deserializer: Deserializer[T] = new Deserializer[T] { override def deserialize(topic: String, data: Array[Byte]): T = fromBytes(data) override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = () override def close(): Unit = () } override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = () override def close(): Unit = () } }
Example 30
Source File: AvroCoders.scala From scio with Apache License 2.0 | 5 votes |
package com.spotify.scio.coders.instances import java.io.{InputStream, OutputStream} import com.spotify.scio.coders.{AvroCoderMacros, Coder} import org.apache.avro.Schema import org.apache.avro.generic.GenericRecord import org.apache.avro.specific.{SpecificData, SpecificFixed} import org.apache.beam.sdk.coders.Coder.NonDeterministicException import org.apache.beam.sdk.coders.{AtomicCoder, AvroCoder, StringUtf8Coder} import org.apache.beam.sdk.util.common.ElementByteSizeObserver import scala.reflect.{classTag, ClassTag} final private class SlowGenericRecordCoder extends AtomicCoder[GenericRecord] { // TODO: can we find something more efficient than String ? private[this] val sc = StringUtf8Coder.of() override def encode(value: GenericRecord, os: OutputStream): Unit = { val schema = value.getSchema val coder = AvroCoder.of(schema) sc.encode(schema.toString, os) coder.encode(value, os) } override def decode(is: InputStream): GenericRecord = { val schemaStr = sc.decode(is) val schema = new Schema.Parser().parse(schemaStr) val coder = AvroCoder.of(schema) coder.decode(is) } // delegate methods for determinism and equality checks override def verifyDeterministic(): Unit = throw new NonDeterministicException( this, "Coder[GenericRecord] without schema is non-deterministic" ) override def consistentWithEquals(): Boolean = false override def structuralValue(value: GenericRecord): AnyRef = AvroCoder.of(value.getSchema).structuralValue(value) // delegate methods for byte size estimation override def isRegisterByteSizeObserverCheap(value: GenericRecord): Boolean = AvroCoder.of(value.getSchema).isRegisterByteSizeObserverCheap(value) override def registerByteSizeObserver( value: GenericRecord, observer: ElementByteSizeObserver ): Unit = AvroCoder.of(value.getSchema).registerByteSizeObserver(value, observer) } // TODO: Use a coder that does not serialize the schema def avroGenericRecordCoder(schema: Schema): Coder[GenericRecord] = Coder.beam(AvroCoder.of(schema)) // XXX: similar to GenericAvroSerializer def avroGenericRecordCoder: Coder[GenericRecord] = Coder.beam(new SlowGenericRecordCoder) import org.apache.avro.specific.SpecificRecordBase implicit def genAvro[T <: SpecificRecordBase]: Coder[T] = macro AvroCoderMacros.staticInvokeCoder[T] implicit def avroSpecificFixedCoder[T <: SpecificFixed: ClassTag]: Coder[T] = SpecificFixedCoder[T] }
Example 31
Source File: AvroSerializer.scala From scio with Apache License 2.0 | 5 votes |
package com.spotify.scio.coders.instances.kryo import com.esotericsoftware.kryo.Kryo import com.esotericsoftware.kryo.io.{Input, Output} import com.twitter.chill.KSerializer import org.apache.avro.Schema import org.apache.avro.generic.GenericRecord import org.apache.avro.specific.SpecificRecordBase import org.apache.beam.sdk.coders.AvroCoder import scala.collection.mutable.{Map => MMap} import scala.util.Try private[coders] class GenericAvroSerializer extends KSerializer[GenericRecord] { private lazy val cache: MMap[String, AvroCoder[GenericRecord]] = MMap() private def getCoder(schemaStr: String): AvroCoder[GenericRecord] = cache.getOrElseUpdate(schemaStr, AvroCoder.of(new Schema.Parser().parse(schemaStr))) private def getCoder(schemaStr: String, schema: Schema): AvroCoder[GenericRecord] = cache.getOrElseUpdate(schemaStr, AvroCoder.of(schema)) override def write(kryo: Kryo, out: Output, obj: GenericRecord): Unit = { val schemaStr = obj.getSchema.toString val coder = this.getCoder(schemaStr, obj.getSchema) // write schema before every record in case it's not in reader serializer's cache out.writeString(schemaStr) coder.encode(obj, out) } override def read(kryo: Kryo, in: Input, cls: Class[GenericRecord]): GenericRecord = { val coder = this.getCoder(in.readString()) coder.decode(in) } } private[coders] class SpecificAvroSerializer[T <: SpecificRecordBase] extends KSerializer[T] { private lazy val cache: MMap[Class[T], AvroCoder[T]] = MMap() private def getCoder(cls: Class[T]): AvroCoder[T] = cache.getOrElseUpdate( cls, Try(cls.getConstructor().newInstance().getSchema) .map(AvroCoder.of(cls, _)) .getOrElse(AvroCoder.of(cls)) ) override def write(kser: Kryo, out: Output, obj: T): Unit = this.getCoder(obj.getClass.asInstanceOf[Class[T]]).encode(obj, out) override def read(kser: Kryo, in: Input, cls: Class[T]): T = this.getCoder(cls).decode(in) }
Example 32
Source File: StorageUtil.scala From scio with Apache License 2.0 | 5 votes |
package com.spotify.scio.bigquery import com.google.api.services.bigquery.model.{TableFieldSchema, TableSchema} import com.google.cloud.bigquery.storage.v1beta1.ReadOptions.TableReadOptions import org.apache.avro.Schema import org.apache.avro.Schema.Type import scala.jdk.CollectionConverters._ object StorageUtil { def tableReadOptions( selectedFields: List[String] = Nil, rowRestriction: Option[String] = None ): TableReadOptions = TableReadOptions .newBuilder() .addAllSelectedFields(selectedFields.asJava) .setRowRestriction(rowRestriction.getOrElse("")) .build() // https://cloud.google.com/bigquery/docs/reference/storage/ def toTableSchema(avroSchema: Schema): TableSchema = { val fields = getFieldSchemas(avroSchema) new TableSchema().setFields(fields.asJava) } private def getFieldSchemas(avroSchema: Schema): List[TableFieldSchema] = avroSchema.getFields.asScala.map(toTableFieldSchema).toList private def toTableFieldSchema(field: Schema.Field): TableFieldSchema = { val schema = field.schema val (mode, tpe) = schema.getType match { case Type.UNION => val types = schema.getTypes assert(types.size == 2 && types.get(0).getType == Type.NULL) ("NULLABLE", types.get(1)) case Type.ARRAY => ("REPEATED", schema.getElementType) case _ => ("REQUIRED", schema) } val tableField = new TableFieldSchema().setName(field.name).setMode(mode) setRawType(tableField, tpe) tableField } private def setRawType(tableField: TableFieldSchema, schema: Schema): Unit = { val tpe = schema.getType match { case Type.BOOLEAN => "BOOLEAN" case Type.LONG => schema.getLogicalType match { case null => "INT64" case t if t.getName == "timestamp-micros" => "TIMESTAMP" case t if t.getName == "time-micros" => "TIME" case t => throw new IllegalStateException(s"Unsupported logical type: $t") } case Type.DOUBLE => "FLOAT64" case Type.BYTES => schema.getLogicalType match { case null => "BYTES" case t if t.getName == "decimal" => assert(schema.getObjectProp("precision").asInstanceOf[Int] == 38) assert(schema.getObjectProp("scale").asInstanceOf[Int] == 9) "NUMERIC" case t => s"Unsupported logical type: $t" } case Type.INT => schema.getLogicalType match { case t if t.getName == "date" => "DATE" case t => s"Unsupported logical type: $t" } case Type.STRING => // FIXME: schema.getLogicalType == null in this case, BigQuery service side bug? if (schema.getProp("logicalType") == "datetime") { "DATETIME" } else { schema.getLogicalType match { case null => "STRING" case t if t.getName == "datetime" => "DATETIME" case t if t.getName == "geography" => "GEOGRAPHY" case t => throw new IllegalStateException(s"Unsupported logical type: $t") } } case Type.RECORD => tableField.setFields(getFieldSchemas(schema).asJava) "RECORD" case t => throw new IllegalStateException(s"Unsupported type: $t") } tableField.setType(tpe) () } }
Example 33
Source File: Cache.scala From scio with Apache License 2.0 | 5 votes |
package com.spotify.scio.bigquery.client import java.io.File import com.google.api.services.bigquery.model.{TableReference, TableSchema} import com.spotify.scio.bigquery.BigQueryUtil import org.apache.beam.sdk.io.gcp.{bigquery => bq} import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Charsets import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.hash.Hashing import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.io.Files import scala.util.Try import org.apache.avro.Schema private[client] object Cache { sealed trait Show[T] { def show(t: T): String } object Show { @inline final def apply[T](implicit t: Show[T]): Show[T] = t implicit val showTableSchema: Show[TableSchema] = new Show[TableSchema] { override def show(t: TableSchema): String = t.toPrettyString() } implicit val showTableRef: Show[TableReference] = new Show[TableReference] { override def show(table: TableReference): String = bq.BigQueryHelpers.toTableSpec(table) } implicit val showAvroSchema: Show[Schema] = new Show[Schema] { override def show(t: Schema): String = t.toString() } } sealed trait Read[T] { def read(s: String): Option[T] } object Read { @inline final def apply[T](implicit t: Read[T]): Read[T] = t implicit val readTableSchema: Read[TableSchema] = new Read[TableSchema] { override def read(s: String): Option[TableSchema] = Try(BigQueryUtil.parseSchema(s)).toOption } implicit val readTableRef: Read[TableReference] = new Read[TableReference] { override def read(table: String): Option[TableReference] = Try(bq.BigQueryHelpers.parseTableSpec(table)).toOption } implicit val readAvroSchema: Read[Schema] = new Read[Schema] { override def read(s: String): Option[Schema] = Try { new Schema.Parser().parse(s) }.toOption } } private[this] def isCacheEnabled: Boolean = BigQueryConfig.isCacheEnabled def getOrElse[T: Read: Show](key: String, f: String => File)(method: => T): T = if (isCacheEnabled) { get(key, f) match { case Some(schema) => schema case None => val schema = method set(key, schema, f) schema } } else { method } def set[T: Show](key: String, t: T, f: String => File): Unit = Files .asCharSink(f(key), Charsets.UTF_8) .write(Show[T].show(t)) def get[T: Read](key: String, f: String => File): Option[T] = Try(scala.io.Source.fromFile(f(key)).mkString).toOption.flatMap(Read[T].read) val SchemaCache: String => File = key => cacheFile(key, ".schema.json") val TableCache: String => File = key => cacheFile(key, ".table.txt") private[this] def cacheFile(key: String, suffix: String): File = { val cacheDir = BigQueryConfig.cacheDirectory val filename = Hashing.murmur3_128().hashString(key, Charsets.UTF_8).toString + suffix val cacheFile = cacheDir.resolve(filename).toFile() Files.createParentDirs(cacheFile) cacheFile } }
Example 34
Source File: GroupByBenchmark.scala From scio with Apache License 2.0 | 5 votes |
package com.spotify.scio.jmh import com.spotify.scio.{ScioContext, ScioExecutionContext} import com.spotify.scio.avro._ import com.spotify.scio.coders._ import org.apache.beam.sdk.coders.{KvCoder, Coder => BCoder} import org.apache.beam.sdk.values.KV import org.apache.beam.sdk.transforms.GroupByKey import org.apache.beam.sdk.options.{PipelineOptions, PipelineOptionsFactory} import java.util.concurrent.TimeUnit import org.apache.avro.Schema import org.apache.avro.generic.GenericRecord import org.openjdk.jmh.annotations._ import scala.jdk.CollectionConverters._ @BenchmarkMode(Array(Mode.AverageTime)) @OutputTimeUnit(TimeUnit.SECONDS) @State(Scope.Thread) class GroupByBenchmark { val schema = """ { "type": "record", "name": "Event", "namespace": "smbjoin", "fields": [ { "name": "id", "type": "string" }, { "name": "value", "type": "double" } ] } """ val avroSchema = new Schema.Parser().parse(schema) private def runWithContext[T](fn: ScioContext => T): ScioExecutionContext = { val opts = PipelineOptionsFactory.as(classOf[PipelineOptions]) val sc = ScioContext(opts) fn(sc) sc.run() } val source = "src/test/resources/events-10000-0.avro" implicit val coderGenericRecord: Coder[GenericRecord] = Coder.avroGenericRecordCoder(avroSchema) val charCoder = CoderMaterializer.beamWithDefault(Coder[Char]) val doubleCoder = CoderMaterializer.beamWithDefault(Coder[Double]) val kvCoder: BCoder[KV[Char, Double]] = KvCoder.of(charCoder, doubleCoder) @Benchmark def testScioGroupByKey: ScioExecutionContext = runWithContext { sc => sc.avroFile(source, schema = avroSchema) .map(rec => (rec.get("id").toString.head, rec.get("value").asInstanceOf[Double])) .groupByKey } @Benchmark def testBeamGroupByKey: ScioExecutionContext = runWithContext { sc => sc.wrap { sc.avroFile(source, schema = avroSchema) .map { rec => KV.of(rec.get("id").toString.head, rec.get("value").asInstanceOf[Double]) } .internal .setCoder(kvCoder) .apply(GroupByKey.create[Char, Double]) }.map(kv => (kv.getKey, kv.getValue.asScala)) } }
Example 35
Source File: AvroConverters.scala From scio with Apache License 2.0 | 5 votes |
package com.spotify.scio.extra.bigquery import com.google.api.services.bigquery.model.TableSchema import com.spotify.scio.annotations.experimental import com.spotify.scio.bigquery.TableRow import org.apache.avro.Schema import org.apache.avro.generic.IndexedRecord import scala.jdk.CollectionConverters._ object AvroConverters extends ToTableRow with ToTableSchema { @experimental def toTableRow[T <: IndexedRecord](record: T): TableRow = { val row = new TableRow record.getSchema.getFields.asScala.foreach { field => Option(record.get(field.pos)).foreach { fieldValue => row.set(field.name, toTableRowField(fieldValue, field)) } } row } @experimental def toTableSchema(avroSchema: Schema): TableSchema = { val fields = getFieldSchemas(avroSchema) new TableSchema().setFields(fields.asJava) } final case class AvroConversionException( private val message: String, private val cause: Throwable = null ) extends Exception(message, cause) }
Example 36
Source File: SCollectionSyntax.scala From scio with Apache License 2.0 | 5 votes |
package com.spotify.scio.extra.bigquery.syntax import com.google.api.services.bigquery.model.TableReference import com.spotify.scio.annotations.experimental import com.spotify.scio.bigquery.BigQueryTable.WriteParam import com.spotify.scio.bigquery.{BigQueryTable, Table, TableRow} import com.spotify.scio.io.ClosedTap import com.spotify.scio.util.ScioUtil import com.spotify.scio.values.SCollection import org.apache.avro.Schema import org.apache.avro.generic.IndexedRecord import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.{CreateDisposition, WriteDisposition} import scala.reflect.ClassTag trait SCollectionSyntax { implicit def toAvroToBigQuerySCollection[T <: IndexedRecord: ClassTag]( data: SCollection[T] ): AvroToBigQuerySCollectionOps[T] = new AvroToBigQuerySCollectionOps[T](data) } final class AvroToBigQuerySCollectionOps[T <: IndexedRecord: ClassTag]( private val self: SCollection[T] ) extends Serializable { import com.spotify.scio.extra.bigquery.AvroConverters._ @experimental def saveAvroAsBigQuery( table: TableReference, avroSchema: Schema = null, writeDisposition: WriteDisposition = null, createDisposition: CreateDisposition = null, tableDescription: String = null ): ClosedTap[TableRow] = { val schema: Schema = Option(avroSchema) .getOrElse { val cls = ScioUtil.classOf[T] if (classOf[IndexedRecord] isAssignableFrom cls) { cls.getMethod("getClassSchema").invoke(null).asInstanceOf[Schema] } else { throw AvroConversionException("Could not invoke $SCHEMA on provided Avro type") } } val params = WriteParam(toTableSchema(schema), writeDisposition, createDisposition, tableDescription) self .map(toTableRow(_)) .write(BigQueryTable(Table.Ref(table)))(params) } }
Example 37
Source File: Registry.scala From tamer with MIT License | 5 votes |
package tamer package registry import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient import log.effect.LogWriter import log.effect.zio.ZioLogWriter.log4sFromName import org.apache.avro.{Schema, SchemaValidatorBuilder} import zio.{RIO, Task} import scala.jdk.CollectionConverters._ trait Registry extends Serializable { val registry: Registry.Service[Any] } object Registry { trait Service[R] { def getOrRegisterId(subject: String, schema: Schema): RIO[R, Int] def verifySchema(id: Int, schema: Schema): RIO[R, Unit] } object > extends Service[Registry] { override final def getOrRegisterId(subject: String, schema: Schema): RIO[Registry, Int] = RIO.accessM(_.registry.getOrRegisterId(subject, schema)) override final def verifySchema(id: Int, schema: Schema): RIO[Registry, Unit] = RIO.accessM(_.registry.verifySchema(id, schema)) } trait Live extends Registry { val client: SchemaRegistryClient override final val registry: Service[Any] = new Service[Any] { private[this] final val logTask: Task[LogWriter[Task]] = log4sFromName.provide("tamer.Registry.Live") private[this] final val strategy = new SchemaValidatorBuilder().canReadStrategy().validateLatest() private[this] final def validate(toValidate: Schema, writerSchema: Schema): Task[Unit] = Task(strategy.validate(toValidate, List(writerSchema).asJava)) override final def getOrRegisterId(subject: String, schema: Schema): Task[Int] = for { log <- logTask id <- Task(client.getId(subject, schema)).tap(id => log.debug(s"retrieved existing writer schema id: $id")) <> Task(client.register(subject, schema)).tap(id => log.info(s"registered with id $id new subject $subject writer schema $schema")) } yield id override final def verifySchema(id: Int, schema: Schema): Task[Unit] = for { log <- logTask writerSchema <- Task(client.getById(id)).tap(_ => log.debug(s"retrieved writer schema id: $id")) _ <- validate(schema, writerSchema).tapError(t => log.error(s"schema supplied cannot read payload: ${t.getLocalizedMessage}")) } yield () } } }
Example 38
Source File: AvroUtils.scala From scio with Apache License 2.0 | 5 votes |
package com.spotify.scio.avro import org.apache.avro.Schema import org.apache.avro.generic.{GenericData, GenericRecord} import scala.jdk.CollectionConverters._ object AvroUtils { private def f(name: String, tpe: Schema.Type) = new Schema.Field( name, Schema.createUnion(List(Schema.create(Schema.Type.NULL), Schema.create(tpe)).asJava), null: String, null: AnyRef ) private def fArr(name: String, tpe: Schema.Type) = new Schema.Field(name, Schema.createArray(Schema.create(tpe)), null: String, null: AnyRef) val schema = Schema.createRecord("GenericTestRecord", null, null, false) schema.setFields( List( f("int_field", Schema.Type.INT), f("long_field", Schema.Type.LONG), f("float_field", Schema.Type.FLOAT), f("double_field", Schema.Type.DOUBLE), f("boolean_field", Schema.Type.BOOLEAN), f("string_field", Schema.Type.STRING), fArr("array_field", Schema.Type.STRING) ).asJava ) def newGenericRecord(i: Int): GenericRecord = { val r = new GenericData.Record(schema) r.put("int_field", 1 * i) r.put("long_field", 1L * i) r.put("float_field", 1f * i) r.put("double_field", 1.0 * i) r.put("boolean_field", true) r.put("string_field", "hello") r.put("array_field", List[CharSequence]("a", "b", "c").asJava) r } def newSpecificRecord(i: Int): TestRecord = new TestRecord( i, i.toLong, i.toFloat, i.toDouble, true, "hello", List[CharSequence]("a", "b", "c").asJava ) }
Example 39
Source File: Sedes.scala From shc with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.hbase import java.io.ByteArrayInputStream import org.apache.avro.Schema import org.apache.avro.Schema.Type._ import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord} import org.apache.avro.io._ import org.apache.commons.io.output.ByteArrayOutputStream import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.sql.types._ trait Sedes { def serialize(value: Any): Array[Byte] def deserialize(bytes: Array[Byte], start: Int, end: Int): Any } class DoubleSedes extends Sedes { override def serialize(value: Any): Array[Byte] = Bytes.toBytes(value.asInstanceOf[Double]) override def deserialize(bytes: Array[Byte], start: Int, end: Int): Any = { Bytes.toLong(bytes, start) } }
Example 40
Source File: BasicTest.scala From ksql-streams with Apache License 2.0 | 5 votes |
package com.landoop.kstreams.sql.cluster import org.apache.avro.Schema import org.apache.avro.generic.GenericData import org.apache.kafka.clients.producer.ProducerRecord class BasicTest extends ClusterTestingCapabilities { private def createAvroRecord = { val userSchema = "{\"namespace\": \"example.avro\", \"type\": \"record\", " + "\"name\": \"User\"," + "\"fields\": [{\"name\": \"name\", \"type\": \"string\"}]}" val parser = new Schema.Parser val schema = parser.parse(userSchema) val avroRecord = new GenericData.Record(schema) avroRecord.put("name", "testUser") avroRecord } "KCluster" should { "start up and be able to handle avro records being sent " in { val topic = "testAvro" val avroRecord = createAvroRecord val objects = Array[AnyRef](avroRecord) val producerProps = stringAvroProducerProps val producer = createProducer(producerProps) for (o <- objects) { val message = new ProducerRecord[String, Any](topic, o) producer.send(message) } val consumerProps = stringAvroConsumerProps() val consumer = createStringAvroConsumer(consumerProps) val records = consumeStringAvro(consumer, topic, objects.length) objects.toSeq shouldBe records } "handle the avro new producer" in { val topic = "testAvro" val avroRecord = createAvroRecord val objects = Array[Any](avroRecord, true, 130, 345L, 1.23f, 2.34d, "abc", "def".getBytes) val producerProps = stringAvroProducerProps val producer = createProducer(producerProps) for (o <- objects) { producer.send(new ProducerRecord[String, Any](topic, o)) } val consumerProps = stringAvroConsumerProps() val consumer = createStringAvroConsumer(consumerProps) val records = consumeStringAvro(consumer, topic, objects.length) objects.deep shouldBe records.toArray.deep } } }
Example 41
Source File: JsonToAvroConverter.scala From ksql-streams with Apache License 2.0 | 5 votes |
package com.landoop.kstreams.sql.transform import java.util import com.fasterxml.jackson.databind.node._ import com.sksamuel.avro4s.ScaleAndPrecision import io.confluent.kafka.serializers import io.confluent.kafka.serializers.NonRecordContainer import org.apache.avro.generic.GenericContainer import org.apache.avro.generic.GenericData.Record import org.apache.avro.{LogicalTypes, Schema} class JsonToAvroConverter(namespace: String, avroStringTypeIsString: Boolean = false) { import org.json4s._ import org.json4s.native.JsonMethods._ def convert(name: String, str: String) (implicit schema: Option[Schema], sp: ScaleAndPrecision): GenericContainer = convert(name, parse(str)) def convert(name: String, value: JValue) (implicit aggregatedSchema: Option[Schema], sp: ScaleAndPrecision): GenericContainer = { value match { case JArray(arr) => val values = new java.util.ArrayList[AnyRef]() val prevSchema = aggregatedSchema.map(_.getField(name)).map(_.schema) val result = convert(name, arr.head)(prevSchema, sp) result match { case n: NonRecordContainer => values.add(n.getValue) case _ => values.add(result) } arr.tail.foreach { v => convert(name, v)(prevSchema, sp) match { case n: NonRecordContainer => values.add(n.getValue) case other => values.add(other) } } new NonRecordContainer(Schema.createArray(result.getSchema), values) case JBool(b) => new NonRecordContainer(Schema.create(Schema.Type.BOOLEAN), b) case JDecimal(d) => val schema = Schema.create(Schema.Type.BYTES) val decimal = LogicalTypes.decimal(sp.precision, sp.scale) decimal.addToSchema(schema) new serializers.NonRecordContainer(schema, d.bigDecimal.unscaledValue().toByteArray) case JDouble(d) => new serializers.NonRecordContainer(Schema.create(Schema.Type.DOUBLE), d) case JInt(i) => new serializers.NonRecordContainer(Schema.create(Schema.Type.LONG), i.toLong) case JLong(l) => new serializers.NonRecordContainer(Schema.create(Schema.Type.LONG), l) case JNothing => new NonRecordContainer(Schema.create(Schema.Type.NULL), null) case JNull => val schema = Schema.createUnion(java.util.Arrays.asList(Schema.create(Schema.Type.NULL), createStringSchema)) new serializers.NonRecordContainer(schema, null) case JString(s) => val schema = createStringSchema new serializers.NonRecordContainer(schema, s) case JObject(values) => val schema = Schema.createRecord(name, "", namespace, false) val fields = new util.ArrayList[Schema.Field]() val default: AnyRef = null val fieldsMap = values.map { case (n, v) => val prevSchema = aggregatedSchema.map(_.getField(n)).map(_.schema()) val result = convert(n, v)(prevSchema, sp) //schema.setFields(java.util.Arrays.asList())) fields.add(new Schema.Field(n, result.getSchema, "", default)) n -> result }.toMap import scala.collection.JavaConversions._ aggregatedSchema .foreach { schema => schema.getFields .withFilter(f => !fieldsMap.contains(f.name())) .foreach { f => fields.add(new Schema.Field(f.name(), f.schema(), "", default)) } } schema.setFields(fields) val record = new Record(schema) fieldsMap.foreach { case (field, v: NonRecordContainer) => record.put(field, v.getValue) case (field, v: GenericContainer) => record.put(field, v) } record } } private def createStringSchema = { val schema = Schema.create(Schema.Type.STRING) if (avroStringTypeIsString) schema.addProp("avro.java.string", new TextNode("String")) schema } }
Example 42
Source File: StdAvroModelFactoryTest.scala From aloha with MIT License | 5 votes |
package com.eharmony.aloha.factory.avro import com.eharmony.aloha.audit.impl.avro.Score import com.eharmony.aloha.factory.ModelFactory import com.eharmony.aloha.io.vfs.Vfs1 import com.eharmony.aloha.models.Model import org.apache.avro.Schema import org.apache.avro.generic.{GenericData, GenericRecord} import org.apache.commons.io.IOUtils import org.junit.Assert.assertEquals import org.junit.Test import org.junit.runner.RunWith import org.junit.runners.BlockJUnit4ClassRunner import scala.util.Try private[this] def record = { val r = new GenericData.Record(TheSchema) r.put("req_str_1", "smart handsome stubborn") r } } object StdAvroModelFactoryTest { private lazy val TheSchema = { val is = getClass.getClassLoader.getResourceAsStream(SchemaUrlResource) try new Schema.Parser().parse(is) finally IOUtils.closeQuietly(is) } private val ExpectedResult = 7d private val SchemaUrlResource = "avro/class7.avpr" private val SchemaUrl = s"res:$SchemaUrlResource" private val SchemaFile = new java.io.File(getClass.getClassLoader.getResource(SchemaUrlResource).getFile) private val SchemaVfs1FileObject = org.apache.commons.vfs.VFS.getManager.resolveFile(SchemaUrl) private val SchemaVfs2FileObject = org.apache.commons.vfs2.VFS.getManager.resolveFile(SchemaUrl) private val Imports = Seq("com.eharmony.aloha.feature.BasicFunctions._", "scala.math._") private val ReturnType = "Double" private val ModelJson = """ |{ | "modelType": "Regression", | "modelId": { "id": 0, "name": "" }, | "features" : { | "my_attributes": "${req_str_1}.split(\"\\\\W+\").map(v => (s\"=$v\", 1.0))" | }, | "weights": { | "my_attributes=handsome": 1, | "my_attributes=smart": 2, | "my_attributes=stubborn": 4 | } |} """.stripMargin }
Example 43
Source File: TestAvroClass.scala From embedded-kafka-schema-registry with MIT License | 5 votes |
package net.manub.embeddedkafka.schemaregistry import org.apache.avro.specific.SpecificRecordBase import org.apache.avro.{AvroRuntimeException, Schema} case class TestAvroClass(var name: String) extends SpecificRecordBase { def this() = this("") override def get(i: Int): AnyRef = i match { case 0 => name case _ => throw new AvroRuntimeException("Bad index") } override def put(i: Int, v: scala.Any): Unit = i match { case 0 => name = v match { case (utf8: org.apache.avro.util.Utf8) => utf8.toString case _ => v.asInstanceOf[String] } case _ => throw new AvroRuntimeException("Bad index") } override def getSchema: Schema = TestAvroClass.avroSchema } object TestAvroClass { val avroSchema = (new Schema.Parser) .parse(""" |{"namespace": "net.manub.embeddedkafka.schemaregistry", | "type": "record", | "name": "TestAvroClass", | "fields": [ | {"name": "name", "type": "string"} | ] |} """.stripMargin) }
Example 44
Source File: TestAvroClass.scala From embedded-kafka-schema-registry with MIT License | 5 votes |
package net.manub.embeddedkafka.schemaregistry import org.apache.avro.specific.SpecificRecordBase import org.apache.avro.{AvroRuntimeException, Schema} case class TestAvroClass(var name: String) extends SpecificRecordBase { def this() = this("") override def get(i: Int): AnyRef = i match { case 0 => name case _ => throw new AvroRuntimeException("Bad index") } override def put(i: Int, v: scala.Any): Unit = i match { case 0 => name = v match { case (utf8: org.apache.avro.util.Utf8) => utf8.toString case _ => v.asInstanceOf[String] } case _ => throw new AvroRuntimeException("Bad index") } override def getSchema: Schema = TestAvroClass.avroSchema } object TestAvroClass { val avroSchema = (new Schema.Parser) .parse(""" |{"namespace": "net.manub.embeddedkafka.schemaregistry", | "type": "record", | "name": "TestAvroClass", | "fields": [ | {"name": "name", "type": "string"} | ] |} """.stripMargin) }
Example 45
Source File: AvroCodecsSpecification.scala From kafka-scala-api with Apache License 2.0 | 5 votes |
package com.example.avro import org.scalatest._ import com.twitter.bijection.Injection import com.twitter.bijection.avro.GenericAvroCodecs import org.apache.avro.Schema import org.apache.avro.generic.{GenericData, GenericRecord} class GenericAvroCodecsSpecification extends WordSpec with Matchers { val testSchema = new Schema.Parser().parse("""{ "type":"record", "name":"FiscalRecord", "namespace":"avro", "fields":[ { "name":"calendarDate", "type":"string" }, { "name":"fiscalWeek", "type":[ "int", "null" ] }, { "name":"fiscalYear", "type":[ "int", "null" ] } ] }""") "Generic Avro codec" should { "Round trip generic record using Generic Injection" in { implicit val genericInjection = GenericAvroCodecs[GenericRecord](testSchema) val testRecord = buildGenericAvroRecord(("2012-01-01", 1, 12)) val bytes = Injection[GenericRecord, Array[Byte]](testRecord) val attempt = Injection.invert[GenericRecord, Array[Byte]](bytes) assert(attempt.get == testRecord) } "Round trip generic record using Binary Injection" in { implicit val genericBinaryInjection = GenericAvroCodecs.toBinary[GenericRecord](testSchema) val testRecord = buildGenericAvroRecord(("2012-01-01", 1, 12)) val bytes = Injection[GenericRecord, Array[Byte]](testRecord) val attempt = Injection.invert[GenericRecord, Array[Byte]](bytes) assert(attempt.get == testRecord) } "Round trip generic record using Json Injection" in { implicit val genericJsonInjection = GenericAvroCodecs.toJson[GenericRecord](testSchema) val testRecord = buildGenericAvroRecord(("2012-01-01", 1, 12)) val jsonString = Injection[GenericRecord, String](testRecord) val attempt = Injection.invert[GenericRecord, String](jsonString) assert(attempt.get == testRecord) } } def buildGenericAvroRecord(i: (String, Int, Int)): GenericRecord = { val fiscalRecord = new GenericData.Record(testSchema) fiscalRecord.put("calendarDate", i._1) fiscalRecord.put("fiscalWeek", i._2) fiscalRecord.put("fiscalYear", i._3) fiscalRecord } }
Example 46
Source File: AvroRecord.scala From shc with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.hbase.examples import org.apache.avro.Schema import org.apache.avro.generic.GenericData import org.apache.spark.sql.execution.datasources.hbase.types.{AvroSerde, SchemaConverters} object AvroRecord { def main(args: Array[String]) { //Test avro to schema converterBasic setup val schemaString = """{"namespace": "example.avro", | "type": "record", "name": "User", | "fields": [ {"name": "name", "type": "string"}, | {"name": "favorite_number", "type": ["int", "null"]}, | {"name": "favorite_color", "type": ["string", "null"]} ] }""".stripMargin val avroSchema: Schema = { val p = new Schema.Parser p.parse(schemaString) } val user1 = new GenericData.Record(avroSchema) user1.put("name", "Alyssa") user1.put("favorite_number", 256) val user2 = new GenericData.Record(avroSchema) user2.put("name", "Ben") user2.put("favorite_number", 7) user2.put("favorite_color", "red") val sqlUser1 = SchemaConverters.createConverterToSQL(avroSchema)(user1) println(sqlUser1) val schema = SchemaConverters.toSqlType(avroSchema) println(s"\nSqlschema: $schema") val avroUser1 = SchemaConverters.createConverterToAvro(schema.dataType, "avro", "example.avro")(sqlUser1) val avroByte = AvroSerde.serialize(avroUser1, avroSchema) val avroUser11 = AvroSerde.deserialize(avroByte, avroSchema) println(s"$avroUser1") } }
Example 47
Source File: LinearAlgebraLibrarySuite.scala From aardpfark with Apache License 2.0 | 5 votes |
package com.ibm.aardpfark.pfa.functions import com.ibm.aardpfark.pfa.dsl._ import com.ibm.aardpfark.pfa.document.PFABuilder import org.apache.avro.Schema class LinearAlgebraLibrarySuite extends FunctionLibrarySuite { test("Linear algebra add") { val action = la.add(inputExpr, NewArray[Double](Seq(-1.0, 1.0, 4.0))) val pfaDoc = new PFABuilder() .withInput(doubleArraySchema) .withOutput(doubleArraySchema) .withAction(action) .pfa val engine = getPFAEngine(pfaDoc.toJSON()) val result = engine.action(engine.jsonInput("[1.0, 10.0, -3.0]")) assert(engine.jsonOutput(result) == "[0.0,11.0,1.0]") } test("Linear algebra dot - matrix / matrix") { val action = la.dot(inputExpr, inputExpr) val pfaDoc = new PFABuilder() .withInput(Schema.createArray(doubleArraySchema)) .withOutput(Schema.createArray(doubleArraySchema)) .withAction(action) .pfa val engine = getPFAEngine(pfaDoc.toJSON()) val result = engine.action(engine.jsonInput("[[0.0, 1.0], [2.0, 1.0]]")) assert(engine.jsonOutput(result) == "[[2.0,1.0],[2.0,3.0]]") } test("Linear algebra dot - matrix / vector") { val action = la.dot(inputExpr, NewArray[Double](Seq(-1.0, 1.0))) val pfaDoc = new PFABuilder() .withInput(Schema.createArray(doubleArraySchema)) .withOutput(doubleArraySchema) .withAction(action) .pfa val engine = getPFAEngine(pfaDoc.toJSON()) val result = engine.action(engine.jsonInput("[[0.0, 1.0], [2.0, 1.0]]")) assert(engine.jsonOutput(result) == "[1.0,-1.0]") } test("Linear algebra scale") { val action = la.scale(inputExpr, 0.5) val pfaDoc = new PFABuilder() .withInput(Schema.createArray(doubleArraySchema)) .withOutput(Schema.createArray(doubleArraySchema)) .withAction(action) .pfa val engine = getPFAEngine(pfaDoc.toJSON()) val result = engine.action(engine.jsonInput("[[0.0, 1.0], [2.0, 1.0]]")) assert(engine.jsonOutput(result) == "[[0.0,0.5],[1.0,0.5]]") } test("Linear algebra sub") { val action = la.sub(inputExpr, NewArray[Double](Seq(-1.0, 1.0, 4.0))) val pfaDoc = new PFABuilder() .withInput(doubleArraySchema) .withOutput(doubleArraySchema) .withAction(action) .pfa val engine = getPFAEngine(pfaDoc.toJSON()) val result = engine.action(engine.jsonInput("[1.0, 10.0, -3.0]")) assert(engine.jsonOutput(result) == "[2.0,9.0,-7.0]") } }
Example 48
Source File: GenericAvroSerializerSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.serializer import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.nio.ByteBuffer import com.esotericsoftware.kryo.io.{Input, Output} import org.apache.avro.{Schema, SchemaBuilder} import org.apache.avro.generic.GenericData.Record import org.apache.spark.{SharedSparkContext, SparkFunSuite} class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext { conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") val schema : Schema = SchemaBuilder .record("testRecord").fields() .requiredString("data") .endRecord() val record = new Record(schema) record.put("data", "test data") test("schema compression and decompression") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema)))) } test("record serialization and deserialization") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val outputStream = new ByteArrayOutputStream() val output = new Output(outputStream) genericSer.serializeDatum(record, output) output.flush() output.close() val input = new Input(new ByteArrayInputStream(outputStream.toByteArray)) assert(genericSer.deserializeDatum(input) === record) } test("uses schema fingerprint to decrease message size") { val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema) val output = new Output(new ByteArrayOutputStream()) val beginningNormalPosition = output.total() genericSerFull.serializeDatum(record, output) output.flush() val normalLength = output.total - beginningNormalPosition conf.registerAvroSchemas(schema) val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema) val beginningFingerprintPosition = output.total() genericSerFinger.serializeDatum(record, output) val fingerprintLength = output.total - beginningFingerprintPosition assert(fingerprintLength < normalLength) } test("caches previously seen schemas") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val compressedSchema = genericSer.compress(schema) val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema)) assert(compressedSchema.eq(genericSer.compress(schema))) assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema)))) } }
Example 49
Source File: Schemas.scala From ratatool with Apache License 2.0 | 5 votes |
package com.spotify.ratatool import com.google.api.client.json.JsonObjectParser import com.google.api.client.json.jackson2.JacksonFactory import com.google.api.services.bigquery.model.TableSchema import com.google.common.base.Charsets import org.apache.avro.Schema object Schemas { val avroSchema: Schema = new Schema.Parser().parse(this.getClass.getResourceAsStream("/schema.avsc")) val simpleAvroSchema: Schema = new Schema.Parser().parse(this.getClass.getResourceAsStream("/SimpleRecord.avsc")) val evolvedSimpleAvroSchema: Schema = new Schema.Parser().parse(this.getClass.getResourceAsStream("/EvolvedSimpleRecord.avsc")) val simpleAvroByteFieldSchema: Schema = new Schema.Parser().parse(this.getClass.getResourceAsStream("/SimpleByteFieldRecord.avsc")) val tableSchema: TableSchema = new JsonObjectParser(new JacksonFactory) .parseAndClose( this.getClass.getResourceAsStream("/schema.json"), Charsets.UTF_8, classOf[TableSchema]) }
Example 50
Source File: AvroIO.scala From ratatool with Apache License 2.0 | 5 votes |
package com.spotify.ratatool.io import java.io.{File, InputStream, OutputStream} import java.nio.ByteBuffer import java.nio.channels.SeekableByteChannel import com.google.common.io.ByteStreams import org.apache.avro.Schema import org.apache.avro.file.{DataFileReader, DataFileWriter, SeekableByteArrayInput, SeekableInput} import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord} import org.apache.avro.io.{DatumReader, DatumWriter} import org.apache.avro.reflect.{ReflectDatumReader, ReflectDatumWriter} import org.apache.avro.specific.{SpecificDatumReader, SpecificDatumWriter, SpecificRecord} import org.apache.beam.sdk.io.FileSystems import org.apache.beam.sdk.io.fs.MatchResult.Metadata import scala.jdk.CollectionConverters._ import scala.reflect.ClassTag def writeToOutputStream[T: ClassTag](data: Iterable[T], schema: Schema, os: OutputStream): Unit = { val fileWriter = new DataFileWriter(createDatumWriter[T]).create(schema, os) data.foreach(fileWriter.append) fileWriter.close() } def getAvroSchemaFromFile(path: String): Schema = { require(FileStorage(path).exists, s"File `$path` does not exist!") val files = FileStorage(path).listFiles.filter(_.resourceId.getFilename.endsWith(".avro")) require(files.nonEmpty, s"File `$path` does not contain avro files") val reader = new GenericDatumReader[GenericRecord]() val dfr = new DataFileReader[GenericRecord](AvroIO.getAvroSeekableInput(files.head), reader) dfr.getSchema } private def getAvroSeekableInput(meta: Metadata): SeekableInput = new SeekableInput { require(meta.isReadSeekEfficient) private val in = FileSystems.open(meta.resourceId()).asInstanceOf[SeekableByteChannel] override def read(b: Array[Byte], off: Int, len: Int): Int = in.read(ByteBuffer.wrap(b, off, len)) override def tell(): Long = in.position() override def length(): Long = in.size() override def seek(p: Long): Unit = in.position(p) override def close(): Unit = in.close() } }
Example 51
Source File: GenericAvroSerializerSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.serializer import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.nio.ByteBuffer import com.esotericsoftware.kryo.io.{Output, Input} import org.apache.avro.{SchemaBuilder, Schema} import org.apache.avro.generic.GenericData.Record import org.apache.spark.{SparkFunSuite, SharedSparkContext} class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext { conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") val schema : Schema = SchemaBuilder .record("testRecord").fields() .requiredString("data") .endRecord() val record = new Record(schema) record.put("data", "test data") test("schema compression and decompression") {//模式压缩与解压缩 val genericSer = new GenericAvroSerializer(conf.getAvroSchema) assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema)))) } test("record serialization and deserialization") {//记录序列化和反序列化 val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val outputStream = new ByteArrayOutputStream() val output = new Output(outputStream) genericSer.serializeDatum(record, output) output.flush() output.close() val input = new Input(new ByteArrayInputStream(outputStream.toByteArray)) assert(genericSer.deserializeDatum(input) === record) } //使用模式指纹以减少信息大小 test("uses schema fingerprint to decrease message size") { val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema) val output = new Output(new ByteArrayOutputStream()) val beginningNormalPosition = output.total() genericSerFull.serializeDatum(record, output) output.flush() val normalLength = output.total - beginningNormalPosition conf.registerAvroSchemas(schema) val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema) val beginningFingerprintPosition = output.total() genericSerFinger.serializeDatum(record, output) val fingerprintLength = output.total - beginningFingerprintPosition assert(fingerprintLength < normalLength) } test("caches previously seen schemas") {//缓存之前模式 val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val compressedSchema = genericSer.compress(schema) val decompressedScheam = genericSer.decompress(ByteBuffer.wrap(compressedSchema)) assert(compressedSchema.eq(genericSer.compress(schema))) assert(decompressedScheam.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema)))) } }
Example 52
Source File: AvroFieldTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.salesforce.op.cli.gen import com.salesforce.op.cli.gen.AvroField._ import com.salesforce.op.test.TestCommon import org.apache.avro.Schema import org.junit.runner.RunWith import org.scalatest.junit.JUnitRunner import org.scalatest.{Assertions, FlatSpec} import scala.collection.JavaConverters._ import scala.language.postfixOps @RunWith(classOf[JUnitRunner]) class AvroFieldTest extends FlatSpec with TestCommon with Assertions { Spec[AvroField] should "do from" in { val types = List( Schema.Type.STRING, // Schema.Type.BYTES, // somehow this avro type is not covered (yet) Schema.Type.INT, Schema.Type.LONG, Schema.Type.FLOAT, Schema.Type.DOUBLE, Schema.Type.BOOLEAN ) val simpleSchemas = types map Schema.create val unions = List( Schema.createUnion((Schema.Type.NULL::Schema.Type.INT::Nil) map Schema.create asJava), Schema.createUnion((Schema.Type.INT::Schema.Type.NULL::Nil) map Schema.create asJava) ) val enum = Schema.createEnum("Aliens", "undocumented", "outer", List("Edgar_the_Bug", "Boris_the_Animal", "Laura_Vasquez") asJava) val allSchemas = (enum::unions)++simpleSchemas // NULL does not work val fields = allSchemas.zipWithIndex map { case (s, i) => new Schema.Field("x" + i, s, "Who", null: Object) } val expected = List( AEnum(fields(0), isNullable = false), AInt(fields(1), isNullable = true), AInt(fields(2), isNullable = true), AString(fields(3), isNullable = false), AInt(fields(4), isNullable = false), ALong(fields(5), isNullable = false), AFloat(fields(6), isNullable = false), ADouble(fields(7), isNullable = false), ABoolean(fields(8), isNullable = false) ) an[IllegalArgumentException] should be thrownBy { val nullSchema = Schema.create(Schema.Type.NULL) val nullField = new Schema.Field("xxx", null, "Nobody", null: Object) AvroField from nullField } fields.size shouldBe expected.size for { (field, expected) <- fields zip expected } { val actual = AvroField from field actual shouldBe expected } } }
Example 53
Source File: CSVAutoReadersTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.salesforce.op.readers import com.salesforce.op.test.PassengerSparkFixtureTest import org.apache.avro.Schema import org.apache.avro.generic.GenericRecord import org.junit.runner.RunWith import org.scalatest.FlatSpec import org.scalatest.junit.JUnitRunner import scala.collection.JavaConverters._ @RunWith(classOf[JUnitRunner]) class CSVAutoReadersTest extends FlatSpec with PassengerSparkFixtureTest { private val expectedSchema = new Schema.Parser().parse(resourceFile(name = "PassengerAuto.avsc")) private val allFields = expectedSchema.getFields.asScala.map(_.name()) private val keyField: String = allFields.head Spec[CSVAutoReader[_]] should "read in data correctly and infer schema" in { val dataReader = DataReaders.Simple.csvAuto[GenericRecord]( path = Some(passengerCsvWithHeaderPath), key = _.get(keyField).toString ) val data = dataReader.readRDD().collect() data.foreach(_ shouldBe a[GenericRecord]) data.length shouldBe 8 val inferredSchema = data.head.getSchema inferredSchema shouldBe expectedSchema } it should "read in data correctly and infer schema based with headers provided" in { val dataReader = DataReaders.Simple.csvAuto[GenericRecord]( path = Some(passengerCsvPath), key = _.get(keyField).toString, headers = allFields ) val data = dataReader.readRDD().collect() data.foreach(_ shouldBe a[GenericRecord]) data.length shouldBe 8 val inferredSchema = data.head.getSchema inferredSchema shouldBe expectedSchema } }
Example 54
Source File: HttpSchemaRegistrySpec.scala From affinity with Apache License 2.0 | 5 votes |
package io.amient.affinity.kafka import io.amient.affinity.avro.HttpSchemaRegistry import io.amient.affinity.avro.HttpSchemaRegistry.HttpAvroConf import io.amient.affinity.avro.record.AvroRecord import io.amient.affinity.avro.record.AvroSerde.AvroConf import org.apache.avro.Schema import org.scalatest.{FlatSpec, Matchers} import scala.collection.JavaConverters._ object SimpleEnum extends Enumeration { type SimpleEnum = Value val A, B, C = Value } case class SimpleKey(val id: Int) extends AvroRecord { override def hashCode(): Int = id.hashCode() } case class SimpleRecord(val id: SimpleKey = SimpleKey(0), val side: SimpleEnum.Value = SimpleEnum.A, val seq: Seq[SimpleKey] = Seq()) extends AvroRecord{ override def hashCode(): Int = id.hashCode() } case class CompositeRecord( val items: Seq[SimpleRecord] = Seq(), val index: Map[String, SimpleRecord] = Map(), val setOfPrimitives: Set[Long] = Set() ) extends AvroRecord class HttpSchemaRegistrySpec extends FlatSpec with Matchers with EmbeddedConfluentRegistry { override def numPartitions = 1 behavior of "HttpSchemaRegistry" val serde = new HttpSchemaRegistry(HttpAvroConf(Map( HttpAvroConf(AvroConf).HttpSchemaRegistryUrl.path -> registryUrl ).asJava)) serde.register[SimpleKey] serde.register[SimpleRecord] val v1schema = new Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Record\",\"namespace\":\"io.amient.affinity.kafka\",\"fields\":[{\"name\":\"items\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"SimpleRecord\",\"fields\":[{\"name\":\"id\",\"type\":{\"type\":\"record\",\"name\":\"SimpleKey\",\"fields\":[{\"name\":\"id\",\"type\":\"int\"}]},\"default\":{\"id\":0}},{\"name\":\"side\",\"type\":{\"type\":\"enum\",\"name\":\"SimpleEnum\",\"symbols\":[\"A\",\"B\",\"C\"]},\"default\":\"A\"},{\"name\":\"seq\",\"type\":{\"type\":\"array\",\"items\":\"SimpleKey\"},\"default\":[]}]}},\"default\":[]},{\"name\":\"removed\",\"type\":\"int\",\"default\":0}]}") serde.register[CompositeRecord](v1schema) it should "allow compatible version of previously registered schema" in { serde.register[CompositeRecord] should be(4) } it should "reject incompatible schema registration" in { val thrown = intercept[RuntimeException]{ val v3schema = new Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Record\",\"namespace\":\"io.amient.affinity.kafka\",\"fields\":[{\"name\":\"data\",\"type\":\"string\"}]}") serde.register[CompositeRecord](v3schema) } thrown.getMessage should include("incompatible") } it should "register topic subject when fqn subject is already registered" in { val data = SimpleRecord() //fqn should be already registered serde.getRuntimeSchema(classOf[SimpleRecord].getName) should be((2, data.getSchema)) //now simulate what KafkaAvroSerde would do val (schemaId, objSchema) = serde.from(data, "topic-simple") schemaId should be(2) objSchema should be(data.getSchema) //and check the additional subject was registered with the same schema serde.register("topic-simple", data.getSchema) should be(2) } }
Example 55
Source File: AvroSchemaSpec.scala From affinity with Apache License 2.0 | 5 votes |
package io.amient.affinity.avro import io.amient.affinity.avro.record.AvroRecord import org.apache.avro.Schema import org.scalatest.{FlatSpec, Matchers} object Status extends Enumeration { type Status = Value val OK, FAILED = Value } case class Referenced( A: Status.Value, B: Status.Value, C: Map[String, Status.Value], D: List[Status.Value], E: Option[Status.Value]) extends AvroRecord class AvroSchemaSpec extends FlatSpec with Matchers { "AvroRecord" should "not fail when referencing the same type in a single schema" in { val schemaJson = AvroRecord.inferSchema[Referenced].toString(false) println(schemaJson) new Schema.Parser().parse(schemaJson) } "AvroRecord" should "1" in { new Schema.Parser().parse( """{"type":"record","namespace":"com.trustelevate.vpc.domain","name":"Parent","fields":[{"name":"pid","type":"long"},{"name":"registered","default":false,"type":"boolean"},{"name":"consents","default":{},"type":{"type":"map","values":{"type":"record","name":"Consent","fields":[{"name":"username","type":"string"},{"name":"contact","type":{"type":"record","name":"CredentialKey","fields":[{"name":"kind","type":{"type":"enum","name":"CredentialType","symbols":["FIRST_NAME","LAST_NAME","EMAIL","DOB","ADDRESS","PARENT","PHONE"]}},{"name":"value","type":"string"}]}},{"name":"service","type":"string"},{"name":"consentAge","default":0,"type":"int"},{"name":"status","default":"PENDING","type":{"type":"enum","name":"ConsentStatus","symbols":["NOT_REQUIRED","PENDING","APPROVED","REJECTED"]}},{"name":"requestedUTC","type":"long"},{"name":"updatedUTC","default":-1,"type":"long"},{"name":"child","default":0,"type":"long"},{"name":"verification","default":"UNKNOWN","type":{"type":"enum","name":"VerificationStatus","symbols":["UNKNOWN","CONFIRMED","VERIFIED","FAILED"]}},{"name":"requestToken","default":"","type":"string"}]}}},{"name":"children","default":{},"type":{"type":"map","values":{"type":"record","name":"Child","fields":[{"name":"pii","type":{"type":"array","items":"CredentialKey"}},{"name":"verification","type":"VerificationStatus"},{"name":"verificationTimestamp","default":-1,"type":"long"}]}}},{"name":"password","default":null,"type":["null","string"]},{"name":"action","default":"NONE","type":{"type":"enum","name":"UserAction","symbols":["CREATE_PASSWORD","RESET_PASSWORD","NONE"]}}]}""" ) } }
Example 56
Source File: ZookeeperSchemaRegistrySpec.scala From affinity with Apache License 2.0 | 5 votes |
package io.amient.affinity.avro import io.amient.affinity.avro.ZookeeperSchemaRegistry.ZkAvroConf import io.amient.affinity.avro.record.AvroSerde import io.amient.affinity.avro.record.AvroSerde.AvroConf import io.amient.affinity.kafka.EmbeddedZooKeeper import org.apache.avro.{Schema, SchemaValidationException} import org.scalatest.{FlatSpec, Matchers} import scala.collection.JavaConverters._ class ZookeeperSchemaRegistrySpec extends FlatSpec with Matchers with EmbeddedZooKeeper { behavior of "ZkAvroRegistry" val v1schema = new Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Record_Current\",\"namespace\":\"io.amient.affinity.avro\",\"fields\":[{\"name\":\"items\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"SimpleRecord\",\"fields\":[{\"name\":\"id\",\"type\":{\"type\":\"record\",\"name\":\"SimpleKey\",\"fields\":[{\"name\":\"id\",\"type\":\"int\"}]},\"default\":{\"id\":0}},{\"name\":\"side\",\"type\":{\"type\":\"enum\",\"name\":\"SimpleEnum\",\"symbols\":[\"A\",\"B\",\"C\"]},\"default\":\"A\"},{\"name\":\"seq\",\"type\":{\"type\":\"array\",\"items\":\"SimpleKey\"},\"default\":[]}]}},\"default\":[]},{\"name\":\"removed\",\"type\":\"int\",\"default\":0}]}") val v3schema = new Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Record_Current\",\"namespace\":\"io.amient.affinity.avro\",\"fields\":[{\"name\":\"items\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"SimpleRecord\",\"fields\":[{\"name\":\"id\",\"type\":{\"type\":\"record\",\"name\":\"SimpleKey\",\"fields\":[{\"name\":\"id\",\"type\":\"int\"}]},\"default\":{\"id\":0}},{\"name\":\"side\",\"type\":{\"type\":\"enum\",\"name\":\"SimpleEnum\",\"symbols\":[\"A\",\"B\",\"C\"]},\"default\":\"A\"},{\"name\":\"seq\",\"type\":{\"type\":\"array\",\"items\":\"SimpleKey\"},\"default\":[]}]}},\"default\":[]},{\"name\":\"index\",\"type\":{\"type\":\"map\",\"values\":\"SimpleRecord\"},\"default\":{}}]}") val conf = AvroConf(Map( AvroConf.Class.path -> classOf[ZookeeperSchemaRegistry].getName, ZkAvroConf(AvroConf).ZooKeeper.Connect.path -> zkConnect ).asJava) val serde = AvroSerde.create(conf) serde.register[SimpleKey] serde.register[SimpleRecord] val backwardSchemaId = serde.register[Record_Current](v1schema) val currentSchemaId = serde.register[Record_Current] val forwardSchemaId = serde.register[Record_Current](v3schema) it should "work in a backward-compatibility scenario" in { val oldValue = Record_V1(Seq(SimpleRecord(SimpleKey(1), SimpleEnum.C)), 10) val oldBytes = serde.write(oldValue, v1schema, backwardSchemaId) oldBytes.mkString(",") should be("0,0,0,0,2,2,2,4,0,0,20") val upgraded = serde.fromBytes(oldBytes) upgraded should be(Record_Current(Seq(SimpleRecord(SimpleKey(1), SimpleEnum.C)), Map())) } it should "work in a forward-compatibility scenario" in { val forwardValue = Record_V3(Seq(SimpleRecord(SimpleKey(1), SimpleEnum.A)), Map("X" -> SimpleRecord(SimpleKey(1), SimpleEnum.A))) val forwardBytes = serde.write(forwardValue, v3schema, forwardSchemaId) val downgraded = serde.fromBytes(forwardBytes) downgraded should be(Record_Current(Seq(SimpleRecord(SimpleKey(1), SimpleEnum.A)), Map("X" -> SimpleRecord(SimpleKey(1), SimpleEnum.A)), Set())) } it should "reject incompatible schema registration" in { val v4schema = new Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Record\",\"namespace\":\"io.amient.affinity.avro\",\"fields\":[{\"name\":\"data\",\"type\":\"string\"}]}") an[SchemaValidationException] should be thrownBy { serde.register[Record_Current](v4schema) } } }
Example 57
Source File: LocalSchemaRegistry.scala From affinity with Apache License 2.0 | 5 votes |
package io.amient.affinity.avro import java.nio.file.{Files, Path} import com.typesafe.config.Config import io.amient.affinity.avro.LocalSchemaRegistry.LocalAvroConf import io.amient.affinity.avro.record.AvroSerde import io.amient.affinity.avro.record.AvroSerde.AvroConf import io.amient.affinity.core.config.CfgStruct import org.apache.avro.Schema import scala.collection.JavaConverters._ import scala.io.Source object LocalSchemaRegistry { object LocalAvroConf extends LocalAvroConf { override def apply(config: Config) = new LocalAvroConf().apply(config) } class LocalAvroConf extends CfgStruct[LocalAvroConf](classOf[AvroConf]) { val DataPath = filepath("schema.registry.path", true).doc("local file path under which schemas will be stored") } } class LocalSchemaRegistry(dataPath: Path) extends AvroSerde with AvroSchemaRegistry { def this(_conf: AvroConf) = this(LocalAvroConf(_conf).DataPath()) def checkDataPath(): Unit = { require(dataPath != null, s"${LocalAvroConf.DataPath.path} is not defined") if (!Files.exists(dataPath)) Files.createDirectories(dataPath) } override def close() = () override protected def registerSchema(subject: String, schema: Schema): Int = hypersynchronized { checkDataPath() val s = dataPath.resolve(s"$subject.dat") val versions: Map[Schema, Int] = if (Files.exists(s)) { Source.fromFile(s.toFile).mkString.split(",").toList.map(_.toInt).map { case id => getSchema(id) -> id }.toMap } else { Map.empty } versions.get(schema).getOrElse { validator.validate(schema, versions.map(_._1).asJava) val id = (0 until Int.MaxValue).find(i => !Files.exists(dataPath.resolve(s"$i.avsc"))).max val schemaPath = dataPath.resolve(s"$id.avsc") Files.createFile(schemaPath) Files.write(schemaPath, schema.toString(true).getBytes("UTF-8")) val updatedVersions = versions + (schema -> id) Files.write(s, updatedVersions.values.mkString(",").getBytes("UTF-8")) id } } private def hypersynchronized[X](func: => X) = synchronized { checkDataPath() val file = dataPath.resolve(".lock").toFile def getLock(countDown: Int = 30): Unit = { if (!file.createNewFile()) if (countDown > 0) { Thread.sleep(1000) getLock(countDown - 1) } else throw new java.nio.file.FileAlreadyExistsException("atomic createNewFile failed") } getLock() try { func } finally { file.delete() } } }
Example 58
Source File: MemorySchemaRegistry.scala From affinity with Apache License 2.0 | 5 votes |
package io.amient.affinity.avro import java.util.concurrent.ConcurrentHashMap import com.typesafe.config.Config import io.amient.affinity.avro.MemorySchemaRegistry.MemAvroConf import io.amient.affinity.avro.record.AvroSerde import io.amient.affinity.avro.record.AvroSerde.AvroConf import io.amient.affinity.core.config.CfgStruct import org.apache.avro.{Schema, SchemaValidator} import scala.collection.JavaConverters._ object MemorySchemaRegistry { object MemAvroConf extends MemAvroConf { override def apply(config: Config) = new MemAvroConf().apply(config) } class MemAvroConf extends CfgStruct[MemAvroConf](classOf[AvroConf]) { val ID = integer("schema.registry.id", false) .doc("multiple instances with the same id will share the schemas registered by any of them") } val multiverse = new ConcurrentHashMap[Int, Universe]() def createUniverse(reuse: Option[Int] = None): Universe = synchronized { reuse match { case Some(id) if multiverse.containsKey(id) => multiverse.get(id) case Some(id) => val universe = new Universe(id) multiverse.asScala += id -> universe universe case None => val id = (if (multiverse.isEmpty) 1 else multiverse.asScala.keys.max + 1) val universe = new Universe(id) multiverse.asScala += id -> universe universe } } class Universe(val id: Int) { val schemas = new ConcurrentHashMap[Int, Schema]() val subjects = new ConcurrentHashMap[String, List[Int]]() def getOrRegister(schema: Schema): Int = synchronized { schemas.asScala.find(_._2 == schema) match { case None => val newId = schemas.size schemas.put(newId, schema) newId case Some((id, _)) => id } } def updateSubject(subject: String, schemaId: Int, validator: SchemaValidator): Unit = synchronized { val existing = Option(subjects.get(subject)).getOrElse(List()) validator.validate(schemas.get(schemaId), existing.map(id => schemas.get(id)).asJava) if (!existing.contains(schemaId)) { subjects.put(subject, (existing :+ schemaId)) } } } } class MemorySchemaRegistry(universe: MemorySchemaRegistry.Universe) extends AvroSerde with AvroSchemaRegistry { def this(conf: MemAvroConf) = this(MemorySchemaRegistry.createUniverse(if (conf.ID.isDefined) Some(conf.ID()) else None)) def this(_conf: AvroConf) = this(MemAvroConf.apply(_conf)) def this() = this(new MemAvroConf()) //this is for stable tests register[Null]("null") register[Boolean]("boolean") register[Int]("int") register[Long]("long") register[Float]("float") register[Double]("double") register[String]("string") register[Array[Byte]]("bytes") override protected def registerSchema(subject: String, schema: Schema): Int = { val id = universe.getOrRegister(schema) universe.updateSubject(subject, id, validator) id } override def close() = () }
Example 59
Source File: ZookeeperSchemaRegistry.scala From affinity with Apache License 2.0 | 5 votes |
package io.amient.affinity.avro import com.typesafe.config.Config import io.amient.affinity.avro.ZookeeperSchemaRegistry.ZkAvroConf import io.amient.affinity.avro.record.AvroSerde import io.amient.affinity.avro.record.AvroSerde.AvroConf import io.amient.affinity.core.config.CfgStruct import io.amient.affinity.core.util.{ZkClients, ZkConf} import org.I0Itec.zkclient.ZkClient import org.I0Itec.zkclient.exception.ZkNodeExistsException import org.apache.avro.Schema import org.apache.zookeeper.CreateMode import scala.collection.JavaConverters._ object ZookeeperSchemaRegistry { object ZkAvroConf extends ZkAvroConf { override def apply(config: Config) = new ZkAvroConf().apply(config) } class ZkAvroConf extends CfgStruct[ZkAvroConf](classOf[AvroConf]) { val ZooKeeper = struct("schema.registry.zookeeper", new ZkConf, true) val ZkRoot = string("schema.registry.zookeeper.root", "/affinity-schema-registry") .doc("znode under which schemas will be stored") } } class ZookeeperSchemaRegistry(zkRoot: String, zk: ZkClient) extends AvroSerde with AvroSchemaRegistry { def this(conf: ZkAvroConf) = this(conf.ZkRoot(), { val zk = ZkClients.get(conf.ZooKeeper) val zkRoot = conf.ZkRoot() if (!zk.exists(zkRoot)) zk.createPersistent(zkRoot) val zkSchemas = s"$zkRoot/schemas" if (!zk.exists(zkSchemas)) zk.createPersistent(zkSchemas) val zkSubjects = s"$zkRoot/subjects" if (!zk.exists(zkSubjects)) zk.createPersistent(zkSubjects) zk }) def this(_conf: AvroConf) = this { new ZkAvroConf().apply(_conf) } override def close(): Unit = ZkClients.close(zk) override protected def registerSchema(subject: String, schema: Schema): Int = hypersynchronized { val zkSubject = s"$zkRoot/subjects/$subject" val zkSchemas = s"$zkRoot/schemas" val versions: Map[Schema, Int] = if (!zk.exists(zkSubject)) Map.empty else { zk.readData[String](zkSubject) match { case some => some.split(",").toList.map(_.toInt).map { case id => getSchema(id) -> id }.toMap } } versions.get(schema).getOrElse { validator.validate(schema, versions.map(_._1).asJava) val schemaPath = zk.create(s"$zkSchemas/", schema.toString(true), CreateMode.PERSISTENT_SEQUENTIAL) val id = schemaPath .substring(zkSchemas.length + 1).toInt val updatedVersions = versions.map(_._2).toList :+ id if (zk.exists(zkSubject)) { zk.writeData(zkSubject, updatedVersions.mkString(",")) } else { zk.create(zkSubject, updatedVersions.mkString(","), CreateMode.PERSISTENT) } id } } private def hypersynchronized[X](f: => X): X = synchronized { val lockPath = zkRoot + "/lock" var acquired = 0 do { try { zk.createEphemeral(lockPath) acquired = 1 } catch { case _: ZkNodeExistsException => acquired -= 1 if (acquired < -100) { throw new IllegalStateException("Could not acquire zk registry lock") } else { Thread.sleep(500) } } } while (acquired != 1) try f finally zk.delete(lockPath) } }
Example 60
Source File: JsonToAvroConverter.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s.json import java.util import com.sksamuel.avro4s._ import org.apache.avro.Schema class JsonToAvroConverter(namespace: String, avroStringTypeIsString: Boolean = false, jsonFieldMapper: FieldMapper = DefaultFieldMapper) { import org.json4s._ import org.json4s.native.JsonMethods._ import scala.collection.JavaConverters._ def convert(name: String, str: String): Schema = { convert(name, parse(str).transformField { case JField(n, v) => val newName = toCamelCase(n, jsonFieldMapper) (newName, v) }) } def convert(name: String, value: JValue): Schema = value match { case JArray(list) if list.isEmpty => Schema.create(Schema.Type.NULL) case JArray(list) => Schema.createArray(convert(name, list.head)) case JBool(_) => Schema.create(Schema.Type.BOOLEAN) case JDecimal(_) => Schema.create(Schema.Type.DOUBLE) case JDouble(_) => Schema.create(Schema.Type.DOUBLE) case JInt(_) => Schema.create(Schema.Type.LONG) case JLong(_) => Schema.create(Schema.Type.LONG) case JNothing => Schema.create(Schema.Type.NULL) case JNull => Schema.createUnion(util.Arrays.asList(Schema.create(Schema.Type.NULL), createStringSchema)) case JString(_) => createStringSchema case JSet(value) => Schema.createArray(convert(name, value.head)) case JObject(values) => val record = Schema.createRecord(name, null, namespace, false) val doc: String = null val default: AnyRef = null val fields = values.map { case (k, v) => new Schema.Field(k, convert(k, v), doc, default) } record.setFields(fields.asJava) record } private def createStringSchema = { val schema = Schema.create(Schema.Type.STRING) if (avroStringTypeIsString) schema.addProp("avro.java.string", "String") schema } private def toCamelCase(s: String, from: FieldMapper): String = { def fromDelimited(sep: String, s: String): String = { val head :: tail = s.split(sep).toList head ++ tail.foldLeft("")((acc, word) => acc ++ word.capitalize) } def decapitalize(s: String): String = { if (s.nonEmpty) s.head.toLower.toString + s.tail else s } from match { case DefaultFieldMapper => s case PascalCase => decapitalize(s) case SnakeCase => fromDelimited("_", s) case LispCase => fromDelimited("-", s) } } }
Example 61
Source File: IndexWithCompleteDocument.scala From CM-Well with Apache License 2.0 | 5 votes |
package cmwell.analytics.data import com.fasterxml.jackson.databind.JsonNode import com.typesafe.config.ConfigFactory import org.apache.avro.generic.GenericRecord import org.apache.avro.{Schema, SchemaBuilder} case class IndexWithCompleteDocument(uuid: String, document: String) extends GenericRecord with CsvGenerator { override def put(key: String, v: scala.Any): Unit = ??? override def get(key: String): AnyRef = key match { case "uuid" => uuid case "document" => document case _ => throw new IllegalArgumentException } override def put(i: Int, v: scala.Any): Unit = ??? override def get(i: Int): AnyRef = i match { case 0 => uuid case 1 => document case _ => throw new IllegalArgumentException } override def getSchema: Schema = IndexWithCompleteDocument.schema // Specifically don't implement CsvGenerator.csv since it is guaranteed to be invalid CSV - force use of Parquet. } object IndexWithCompleteDocument extends ObjectExtractor[IndexWithCompleteDocument] { val schema: Schema = SchemaBuilder .record("IndexWithCompleteDocument").namespace("cmwell.analytics") .fields .name("uuid").`type`.unionOf.stringType.and.nullType.endUnion.noDefault .name("document").`type`.unionOf.stringType.and.nullType.endUnion.noDefault .endRecord private val config = ConfigFactory.load val infotonSize: Int = config.getInt("extract-index-from-es.fetch-size-index-with-complete-document") def includeFields: String = s""""_source": "*"""" def extractFromJson(hit: JsonNode): IndexWithCompleteDocument = IndexWithCompleteDocument( uuid = hit.findValue("_id").asText, document = hit.findValue("_source").toString) }
Example 62
Source File: IndexWithKeyFields.scala From CM-Well with Apache License 2.0 | 5 votes |
package cmwell.analytics.data import com.fasterxml.jackson.databind.JsonNode import com.typesafe.config.ConfigFactory import org.apache.avro.{LogicalTypes, Schema, SchemaBuilder} import org.apache.avro.generic.GenericRecord import org.apache.log4j.LogManager import org.joda.time.format.ISODateTimeFormat import scala.util.control.NonFatal case class IndexWithKeyFields(uuid: String, lastModified: java.sql.Timestamp, path: String) extends GenericRecord with CsvGenerator { override def put(key: String, v: scala.Any): Unit = ??? override def get(key: String): AnyRef = key match { case "uuid" => uuid case "lastModified" => java.lang.Long.valueOf(lastModified.getTime) case "path" => path } override def put(i: Int, v: scala.Any): Unit = ??? override def get(i: Int): AnyRef = i match { case 0 => uuid case 1 => java.lang.Long.valueOf(lastModified.getTime) case 2 => path case _ => throw new IllegalArgumentException } override def getSchema: Schema = IndexWithSystemFields.schema override def csv: String = (if (uuid == null) "" else uuid) + "," + (if (lastModified == null) "" else ISODateTimeFormat.dateTime.print(lastModified.getTime)) + "," + (if (path == null) "" else path) } object IndexWithKeyFields extends ObjectExtractor[IndexWithKeyFields] { private val logger = LogManager.getLogger(IndexWithSystemFields.getClass) // AVRO-2065 - doesn't allow union over logical type, so we can't make timestamp column nullable. val timestampMilliType: Schema = LogicalTypes.timestampMillis.addToSchema(Schema.create(Schema.Type.LONG)) val schema: Schema = SchemaBuilder .record("IndexWithSystemFields").namespace("cmwell.analytics") .fields .name("uuid").`type`.unionOf.stringType.and.nullType.endUnion.noDefault .name("lastModified").`type`(timestampMilliType).noDefault .name("path").`type`.unionOf.stringType.and.nullType.endUnion.noDefault .endRecord private val config = ConfigFactory.load val infotonSize: Int = config.getInt("extract-index-from-es.fetch-size-index-with-uuid-lastModified-path") def includeFields: String = { // Note that 'quad' is not included in this list val fields = "uuid,lastModified,path" .split(",") .map(name => s""""system.$name"""") .mkString(",") s""""_source": [$fields]""" } def extractFromJson(hit: JsonNode): IndexWithKeyFields = { val system = hit.findValue("_source").findValue("system") def extractString(name: String): String = system.findValue(name) match { case x: JsonNode => x.asText case _ => null } // Extracting date values as Long - as a java.sql.Date might be better def extractDate(name: String): java.sql.Timestamp = system.findValue(name) match { case x: JsonNode => try { new java.sql.Timestamp(ISODateTimeFormat.dateTime.parseDateTime(x.asText).getMillis) } catch { case NonFatal(ex) => logger.warn(s"Failed conversion of date value: $x", ex) throw ex } case _ => null } IndexWithKeyFields( uuid = extractString("uuid"), lastModified = extractDate("lastModified"), path = extractString("path")) } }
Example 63
Source File: Job.scala From spark-avro-compactor with Apache License 2.0 | 5 votes |
package ie.ianduffy.spark.avro.compactor import ie.ianduffy.spark.avro.compactor.Utils._ import io.confluent.kafka.schemaregistry.client.{SchemaMetadata, SchemaRegistryClient} import org.apache.avro.Schema import org.apache.avro.generic.GenericRecord import org.apache.avro.mapred.AvroKey import org.apache.avro.mapreduce.AvroKeyOutputFormat import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.io.NullWritable import org.apache.spark.sql.SparkSession import org.slf4j.LoggerFactory object Job { private val log = LoggerFactory.getLogger(Job.getClass.getName.replace("$", "")) def run(spark: SparkSession, schemaRegistry: SchemaRegistryClient, jobConfig: JobConfig): Unit = { val schema: Schema = { val latestSchemaMetadata: SchemaMetadata = schemaRegistry.getLatestSchemaMetadata(jobConfig.schemaRegistrySubject) val id: Int = latestSchemaMetadata.getId schemaRegistry.getById(id) } implicit val sparkConfig: Configuration = spark.sparkContext.hadoopConfiguration sparkConfig.set("avro.schema.input.key", schema.toString()) sparkConfig.set("avro.schema.output.key", schema.toString()) val inputPath: Path = new Path(jobConfig.input) val outputPath: Path = new Path(jobConfig.output) val fs: FileSystem = inputPath.getFileSystem(sparkConfig) // avoid raising org.apache.hadoop.mapred.FileAlreadyExistsException if (jobConfig.overrideOutput) fs.delete(outputPath, true) // from fileSystem prefix with s3 the default is 64MB and can be overwitten by fs.s3.block.size // from fileSystem prefix with s3a the default is 32MB and can be overwitten by setting fs.s3a.block.size val outputBlocksize: Long = fs.getDefaultBlockSize(outputPath) // Where inputPath is of the form s3://some/path val inputPathSize: Long = fs.getContentSummary(inputPath).getSpaceConsumed val numPartitions: Int = Math.max(1, Math.floor((inputPathSize / CompressionRatio.AVRO_SNAPPY) / outputBlocksize).toInt) log.debug( s"""outputBlocksize: $outputBlocksize | inputPathSize: $inputPathSize | splitSize: $numPartitions """.stripMargin) val rdd = readHadoopFile(spark, inputPath.toString) rdd.coalesce(numPartitions) .saveAsNewAPIHadoopFile( outputPath.toString, classOf[AvroKey[GenericRecord]], classOf[NullWritable], classOf[AvroKeyOutputFormat[GenericRecord]], sparkConfig ) } }
Example 64
Source File: AvroToParquetWriter.scala From etl-light with MIT License | 5 votes |
package yamrcraft.etlite.writers import org.apache.avro.Schema import org.apache.avro.generic.GenericRecord import org.apache.hadoop.fs.Path import org.apache.parquet.avro.AvroParquetWriter import org.slf4j.LoggerFactory import yamrcraft.etlite.utils.FileUtils class AvroToParquetWriter(tempFile: String, outputFile: String) extends Writer[GenericRecord] { val logger = LoggerFactory.getLogger(this.getClass) // lazy initialization var writer: Option[AvroParquetWriter[GenericRecord]] = None val tempPath = new Path(tempFile + ".parquet") val outputPath = new Path(outputFile + ".parquet") logger.info(s"creating writer for working file: ${tempPath.toString}, outputFile: ${outputPath.toString}") override def write(event: GenericRecord): Unit = { logger.info(s"ParquetWriter.write, event type: ${event.getSchema.getName}") if (writer.isEmpty) { writer = Some(createWriter(tempPath.toString, event.getSchema)) } writer.get.write(event) } override def commit(): Unit = { writer.get.close() val fs = FileUtils.getFS(outputPath.toString) fs.mkdirs(outputPath.getParent) if (fs.exists(outputPath)) { fs.rename(outputPath, new Path(outputPath.getParent, s"__${outputPath.getName}.${System.currentTimeMillis()}.old.__")) } // copy temp file to output file (typically temp file would be on local file system). if (tempFile.startsWith("file")) { logger.info(s"copy file from: ${tempPath.toString} to $outputPath") fs.copyFromLocalFile(true, true, tempPath, outputPath) } else { logger.info(s"renaming file from: ${tempPath.toString} to $outputPath") fs.rename(tempPath, outputPath) } } private def createWriter(file: String, schema: Schema) = { val fs = FileUtils.getFS(file) val path = new Path(file) if (fs.exists(path)) { fs.delete(path, true) } fs.mkdirs(path.getParent) new AvroParquetWriter[GenericRecord](path, schema) } }
Example 65
Source File: JsonToAvroTransformer.scala From etl-light with MIT License | 5 votes |
package yamrcraft.etlite.transformers import com.typesafe.config.Config import org.apache.avro.Schema import org.apache.avro.generic.GenericRecord import play.api.libs.json.Json import yamrcraft.etlite.utils.ConfigConversions._ import yamrcraft.etlite.utils.{FileUtils, JsonAvroConverter, TimeUtils} import yamrcraft.etlite.{ErrorType, EtlException} class JsonToAvroTransformer(config: Config) extends Transformer[Message[GenericRecord]] { val converter = new JsonAvroConverter() // config settings val timestampField = config.getString("timestamp-field") val timestampFieldFormat = config.getString("timestamp-field-format") val defaultSchemaFileName = config.getString("default-schema-file") val (schemaSelectionField, schemas) = { config.hasPath("schema-selection") match { case true => (Some(config.getString("schema-selection.field")), Some(config.getConfig("schema-selection.schemas").asMap.map {case (k,v) => (k, createSchema(v))}) ) case false => (None, None) } } val defaultSchema: Schema = createSchema(defaultSchemaFileName) @throws(classOf[EtlException]) override def transform(inbound: InboundMessage): Message[GenericRecord] = { try { val schema = getSchema(inbound.msg) val record = converter.convertToGenericDataRecord(inbound.msg, schema) Message[GenericRecord]( record, schema.getName, extractTimestamp(record) ) } catch { case e: EtlException => throw e case e: Exception => throw new EtlException(ErrorType.TransformationError, e) } } private def createSchema(path: String): Schema = new Schema.Parser().parse(FileUtils.readContent(path)) private def getSchema(msg: Array[Byte]): Schema = { if (schemaSelectionField.isEmpty) { defaultSchema } else { val msgAsString = new String(msg, "UTF8") val msgJson = Json.parse(msgAsString) val selectionValue = (msgJson \ schemaSelectionField.get).asOpt[String] schemas.get.getOrElse(selectionValue.get, defaultSchema) } } @throws(classOf[EtlException]) private def extractTimestamp(event: GenericRecord): Long = { try { (event.get(timestampField): Any) match { case ts: Long => ts.asInstanceOf[Long] case ts: String => TimeUtils.stringTimeToLong(ts, timestampFieldFormat) case _ => throw new RuntimeException("timestamp field is not of either Long or String types.") } } catch { case e: Exception => throw new EtlException(ErrorType.PartitionTimestampError, e) } } }
Example 66
Source File: AvroSchemaHelper.scala From memsql-spark-connector with Apache License 2.0 | 5 votes |
package com.memsql.spark import org.apache.avro.Schema import org.apache.avro.Schema.Type import org.apache.avro.Schema.Type._ import scala.collection.JavaConverters._ object AvroSchemaHelper { def resolveNullableType(avroType: Schema, nullable: Boolean): Schema = { if (nullable && avroType.getType != NULL) { // avro uses union to represent nullable type. val fields = avroType.getTypes.asScala assert(fields.length == 2) val actualType = fields.filter(_.getType != Type.NULL) assert(actualType.length == 1) actualType.head } else { avroType } } }
Example 67
Source File: AvroDecoder.scala From cuesheet with Apache License 2.0 | 5 votes |
package com.kakao.cuesheet.convert import java.util.Arrays.copyOfRange import kafka.serializer.Decoder import kafka.utils.VerifiableProperties import org.apache.avro.Schema import org.apache.avro.generic.{GenericDatumReader, GenericRecord} sealed trait AvroDecoder[T] extends Decoder[T] { def props: VerifiableProperties protected val schema = new Schema.Parser().parse(props.getString(Avro.SCHEMA)) protected val skipBytes = props.getInt(Avro.SKIP_BYTES, 0) protected val reader = new GenericDatumReader[GenericRecord](schema) protected val decoder = Avro.recordDecoder(reader) private def skip(bytes: Array[Byte], size: Int): Array[Byte] = { val length = bytes.length length - size match { case remaining if remaining > 0 => copyOfRange(bytes, size, length) case _ => new Array[Byte](0) } } def parse(bytes: Array[Byte]): GenericRecord = { val data = if (skipBytes == 0) bytes else skip(bytes, skipBytes) decoder(data) } } class AvroRecordDecoder(val props: VerifiableProperties) extends AvroDecoder[GenericRecord] { override def fromBytes(bytes: Array[Byte]): GenericRecord = parse(bytes) } class AvroMapDecoder(val props: VerifiableProperties) extends AvroDecoder[Map[String, Any]] { override def fromBytes(bytes: Array[Byte]): Map[String, Any] = Avro.toMap(parse(bytes)) } class AvroJsonDecoder(val props: VerifiableProperties) extends AvroDecoder[String] { override def fromBytes(bytes: Array[Byte]): String = Avro.toJson(parse(bytes)) }
Example 68
Source File: AvroTypeSpec.scala From shapeless-datatype with Apache License 2.0 | 5 votes |
package shapeless.datatype.avro import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.net.URI import java.nio.ByteBuffer import com.google.protobuf.ByteString import org.apache.avro.Schema import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord} import org.apache.avro.io.{DecoderFactory, EncoderFactory} import org.joda.time.Instant import org.scalacheck.Prop.forAll import org.scalacheck.ScalacheckShapeless._ import org.scalacheck._ import shapeless._ import shapeless.datatype.record._ import scala.reflect.runtime.universe._ object AvroTypeSpec extends Properties("AvroType") { import shapeless.datatype.test.Records._ import shapeless.datatype.test.SerializableUtils._ implicit def compareByteArrays(x: Array[Byte], y: Array[Byte]) = java.util.Arrays.equals(x, y) implicit def compareIntArrays(x: Array[Int], y: Array[Int]) = java.util.Arrays.equals(x, y) def roundTrip[A: TypeTag, L <: HList](m: A)(implicit gen: LabelledGeneric.Aux[A, L], fromL: FromAvroRecord[L], toL: ToAvroRecord[L], mr: MatchRecord[L] ): Boolean = { val t = ensureSerializable(AvroType[A]) val f1: SerializableFunction[A, GenericRecord] = new SerializableFunction[A, GenericRecord] { override def apply(m: A): GenericRecord = t.toGenericRecord(m) } val f2: SerializableFunction[GenericRecord, Option[A]] = new SerializableFunction[GenericRecord, Option[A]] { override def apply(m: GenericRecord): Option[A] = t.fromGenericRecord(m) } val toFn = ensureSerializable(f1) val fromFn = ensureSerializable(f2) val copy = fromFn(roundTripRecord(toFn(m))) val rm = RecordMatcher[A] copy.exists(rm(_, m)) } def roundTripRecord(r: GenericRecord): GenericRecord = { val writer = new GenericDatumWriter[GenericRecord](r.getSchema) val baos = new ByteArrayOutputStream() val encoder = EncoderFactory.get().binaryEncoder(baos, null) writer.write(r, encoder) encoder.flush() baos.close() val bytes = baos.toByteArray val reader = new GenericDatumReader[GenericRecord](r.getSchema) val bais = new ByteArrayInputStream(bytes) val decoder = DecoderFactory.get().binaryDecoder(bais, null) reader.read(null, decoder) } implicit val byteStringAvroType = AvroType.at[ByteString](Schema.Type.BYTES)( v => ByteString.copyFrom(v.asInstanceOf[ByteBuffer]), v => ByteBuffer.wrap(v.toByteArray) ) implicit val instantAvroType = AvroType.at[Instant](Schema.Type.LONG)(v => new Instant(v.asInstanceOf[Long]), _.getMillis) property("required") = forAll { m: Required => roundTrip(m) } property("optional") = forAll { m: Optional => roundTrip(m) } property("repeated") = forAll { m: Repeated => roundTrip(m) } property("mixed") = forAll { m: Mixed => roundTrip(m) } property("nested") = forAll { m: Nested => roundTrip(m) } property("seqs") = forAll { m: Seqs => roundTrip(m) } implicit val uriAvroType = AvroType.at[URI](Schema.Type.STRING)(v => URI.create(v.toString), _.toString) property("custom") = forAll { m: Custom => roundTrip(m) } }
Example 69
Source File: AvroType.scala From shapeless-datatype with Apache License 2.0 | 5 votes |
package shapeless.datatype.avro import org.apache.avro.Schema import org.apache.avro.generic.GenericRecord import shapeless._ import scala.reflect.runtime.universe._ class AvroType[A] extends Serializable { def fromGenericRecord[L <: HList]( m: GenericRecord )(implicit gen: LabelledGeneric.Aux[A, L], fromL: FromAvroRecord[L]): Option[A] = fromL(Right(m)).map(gen.from) def toGenericRecord[L <: HList]( a: A )(implicit gen: LabelledGeneric.Aux[A, L], toL: ToAvroRecord[L], tt: TypeTag[A]): GenericRecord = toL(gen.to(a)).left.get.build(AvroSchema[A]) } object AvroType { def apply[A: TypeTag]: AvroType[A] = new AvroType[A] def at[V: TypeTag]( schemaType: Schema.Type )(fromFn: Any => V, toFn: V => Any): BaseAvroMappableType[V] = { AvroSchema.register(implicitly[TypeTag[V]].tpe, schemaType) new BaseAvroMappableType[V] { override def from(value: Any): V = fromFn(value) override def to(value: V): Any = toFn(value) } } }
Example 70
Source File: AvroSchema.scala From shapeless-datatype with Apache License 2.0 | 5 votes |
package shapeless.datatype.avro import org.apache.avro.Schema.Field import org.apache.avro.{JsonProperties, Schema} import scala.collection.JavaConverters._ import scala.reflect.runtime.universe._ object AvroSchema { private def isField(s: Symbol): Boolean = s.isPublic && s.isMethod && !s.isSynthetic && !s.isConstructor private def isCaseClass(tpe: Type): Boolean = !tpe.toString.startsWith("scala.") && List(typeOf[Product], typeOf[Serializable], typeOf[Equals]) .forall(b => tpe.baseClasses.contains(b.typeSymbol)) private def toSchema(tpe: Type): (Schema, Any) = tpe match { case t if t =:= typeOf[Boolean] => (Schema.create(Schema.Type.BOOLEAN), null) case t if t =:= typeOf[Int] => (Schema.create(Schema.Type.INT), null) case t if t =:= typeOf[Long] => (Schema.create(Schema.Type.LONG), null) case t if t =:= typeOf[Float] => (Schema.create(Schema.Type.FLOAT), null) case t if t =:= typeOf[Double] => (Schema.create(Schema.Type.DOUBLE), null) case t if t =:= typeOf[String] => (Schema.create(Schema.Type.STRING), null) case t if t =:= typeOf[Array[Byte]] => (Schema.create(Schema.Type.BYTES), null) case t if t.erasure =:= typeOf[Option[_]].erasure => val s = toSchema(t.typeArgs.head)._1 (Schema.createUnion(Schema.create(Schema.Type.NULL), s), JsonProperties.NULL_VALUE) case t if t.erasure <:< typeOf[Traversable[_]].erasure || t.erasure <:< typeOf[Array[_]] => val s = toSchema(t.typeArgs.head)._1 (Schema.createArray(s), java.util.Collections.emptyList()) case t if isCaseClass(t) => val fields: List[Field] = t.decls.filter(isField).map(toField).toList val name = t.typeSymbol.name.toString val pkg = t.typeSymbol.owner.fullName (Schema.createRecord(name, null, pkg, false, fields.asJava), null) case t if customTypes.contains(t.toString) => (Schema.create(customTypes(t.toString)), null) } private def toField(s: Symbol): Field = { val name = s.name.toString val tpe = s.asMethod.returnType val (schema, default) = toSchema(tpe) new Field(name, schema, null, default) } private val customTypes = scala.collection.mutable.Map[String, Schema.Type]() private val cachedSchemas = scala.collection.concurrent.TrieMap.empty[TypeTag[_], Schema] private[avro] def register(tpe: Type, schemaType: Schema.Type): Unit = customTypes += tpe.toString -> schemaType def apply[T: TypeTag]: Schema = { val tt = implicitly[TypeTag[T]] cachedSchemas.getOrElseUpdate(tt, toSchema(tt.tpe)._1) } }
Example 71
Source File: EnumSchemaCompatibilityTest.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s.schema import com.sksamuel.avro4s.{AvroName, AvroSchema, ScalaEnumSchemaFor, SchemaFor} import org.apache.avro.{Schema, SchemaCompatibility} import org.apache.avro.SchemaCompatibility.SchemaCompatibilityType import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class EnumSchemaCompatibilityTest extends AnyWordSpec with Matchers { @AvroName("Colours") object Colours1 extends Enumeration { val Red, Amber, Green = Value } @AvroName("Colours") object Colours2 extends Enumeration { val Red, Amber, Green, Orange = Value } "An enum schema that does not contain a default enum value" should { val schemaVersion1: Schema = AvroSchema[Colours1.Value] val schemaVersion2: Schema = AvroSchema[Colours2.Value] "not be backwards compatible when a new enum value is added" in { val compatibilityType = SchemaCompatibility.checkReaderWriterCompatibility( schemaVersion1, schemaVersion2 ).getType compatibilityType shouldEqual SchemaCompatibilityType.INCOMPATIBLE } "be forwards compatible even when a new enum value is added" in { val compatibilityType = SchemaCompatibility.checkReaderWriterCompatibility( schemaVersion2, schemaVersion1 ).getType compatibilityType shouldEqual SchemaCompatibilityType.COMPATIBLE } } "an enum schema that contains a default enum value" should { // define the enum schemas with a default value implicit val schemaForColour1: SchemaFor[Colours1.Value] = ScalaEnumSchemaFor[Colours1.Value](Colours1.Amber) implicit val schemaForColour2: SchemaFor[Colours2.Value] = ScalaEnumSchemaFor[Colours2.Value](Colours2.Amber) val schemaVersion1: Schema = AvroSchema[Colours1.Value] val schemaVersion2: Schema = AvroSchema[Colours2.Value] "be backwards compatible when a new enum value is added" in { val compatibilityType = SchemaCompatibility.checkReaderWriterCompatibility( schemaVersion1, schemaVersion2 ).getType compatibilityType shouldEqual SchemaCompatibilityType.COMPATIBLE } "be forwards compatible when a new enum value is added" in { val compatibilityType = SchemaCompatibility.checkReaderWriterCompatibility( schemaVersion2, schemaVersion1 ).getType compatibilityType shouldEqual SchemaCompatibilityType.COMPATIBLE } } }
Example 72
Source File: twitter_schema.scala From avrohugger with Apache License 2.0 | 5 votes |
package com.miguno.avro.model import org.apache.avro.Schema import org.oedura.scavro.{AvroMetadata, AvroReader, AvroSerializeable} import com.miguno.avro.{twitter_schema => Jtwitter_schema} final case class twitter_schema(username: String, tweet: String, timestamp: Long) extends AvroSerializeable { type J = Jtwitter_schema override def toAvro: Jtwitter_schema = { new Jtwitter_schema(username, tweet, timestamp) } } object twitter_schema { implicit def reader = new AvroReader[twitter_schema] { override type J = Jtwitter_schema } implicit val metadata: AvroMetadata[twitter_schema, Jtwitter_schema] = new AvroMetadata[twitter_schema, Jtwitter_schema] { override val avroClass: Class[Jtwitter_schema] = classOf[Jtwitter_schema] override val schema: Schema = Jtwitter_schema.getClassSchema() override val fromAvro: (Jtwitter_schema) => twitter_schema = { (j: Jtwitter_schema) => twitter_schema(j.getUsername.toString, j.getTweet.toString, j.getTimestamp.toLong) } } }
Example 73
Source File: Serdes.scala From tamer with MIT License | 5 votes |
package tamer import java.io.ByteArrayOutputStream import java.nio.ByteBuffer import com.sksamuel.avro4s._ import org.apache.avro.Schema import tamer.registry._ import zio.{RIO, Task} import zio.kafka.client.serde.{Deserializer, Serializer} sealed trait Serde[A] extends Any { def isKey: Boolean def schema: Schema def deserializer: Deserializer[Registry with Topic, A] def serializer: Serializer[Registry with Topic, A] final def serde: ZSerde[Registry with Topic, A] = ZSerde(deserializer)(serializer) } object Serde { private[this] final val Magic: Byte = 0x0 private[this] final val intByteSize = 4 final def apply[A <: Product: Decoder: Encoder: SchemaFor](isKey: Boolean = false) = new RecordSerde[A](isKey, SchemaFor[A].schema(DefaultFieldMapper)) final class RecordSerde[A: Decoder: Encoder](override final val isKey: Boolean, override final val schema: Schema) extends Serde[A] { private[this] def subject(topic: String): String = s"$topic-${if (isKey) "key" else "value"}" override final val deserializer: Deserializer[Registry with Topic, A] = Deserializer.byteArray.mapM { ba => val buffer = ByteBuffer.wrap(ba) if (buffer.get() != Magic) RIO.fail(SerializationError("Unknown magic byte!")) else { val id = buffer.getInt() for { env <- RIO.environment[Registry] _ <- env.registry.verifySchema(id, schema) res <- RIO.fromTry { val length = buffer.limit() - 1 - intByteSize val payload = new Array[Byte](length) buffer.get(payload, 0, length) AvroInputStream.binary[A].from(payload).build(schema).tryIterator.next } } yield res } } override final val serializer: Serializer[Registry with Topic, A] = Serializer.byteArray.contramapM { a => for { env <- RIO.environment[Registry with Topic] id <- env.registry.getOrRegisterId(subject(env.topic), schema) arr <- Task { val baos = new ByteArrayOutputStream baos.write(Magic.toInt) baos.write(ByteBuffer.allocate(intByteSize).putInt(id).array()) val ser = AvroOutputStream.binary[A].to(baos).build(schema) ser.write(a) ser.close() baos.toByteArray } } yield arr } } }
Example 74
Source File: AvroUtil.scala From cloudflow with Apache License 2.0 | 5 votes |
package cloudflow.streamlets.avro import scala.util.{ Failure, Success, Try } import scala.reflect.ClassTag import scala.reflect._ import org.apache.avro.specific.SpecificRecordBase import org.apache.avro.Schema import cloudflow.streamlets._ object AvroUtil { val Format = "avro" def makeSchema[T <: SpecificRecordBase: ClassTag]: Schema = Try(classTag[T].runtimeClass.getDeclaredMethod("SCHEMA$")) match { case Success(schema) ⇒ schema.invoke(null).asInstanceOf[Schema] case Failure(_) ⇒ { Try(classTag[T].runtimeClass.getDeclaredField("SCHEMA$")) match { case Success(schema) ⇒ schema.get(null).asInstanceOf[Schema] case Failure(ex) ⇒ throw new RuntimeException(s"Error fetching avro schema for class ${classTag[T].runtimeClass}", ex) } } } def fingerprintSha256(schema: Schema): String = { import java.util.Base64 import org.apache.avro.SchemaNormalization._ Base64 .getEncoder() .encodeToString(parsingFingerprint("SHA-256", schema)) } def createSchemaDefinition(schema: Schema) = SchemaDefinition( name = schema.getFullName, schema = schema.toString(false), fingerprint = fingerprintSha256(schema), format = Format ) }
Example 75
Source File: AvroCodec.scala From cloudflow with Apache License 2.0 | 5 votes |
package cloudflow.streamlets.avro import scala.util.{ Failure, Try } import com.twitter.bijection.Injection import org.apache.avro.Schema import org.apache.avro.specific.SpecificRecordBase import com.twitter.bijection.avro.SpecificAvroCodecs import cloudflow.streamlets._ class AvroCodec[T <: SpecificRecordBase](avroSchema: Schema) extends Codec[T] { val recordInjection: Injection[T, Array[Byte]] = SpecificAvroCodecs.toBinary(avroSchema) val avroSerde = new AvroSerde(recordInjection) def encode(value: T): Array[Byte] = avroSerde.encode(value) def decode(bytes: Array[Byte]): T = avroSerde.decode(bytes) def schema: Schema = avroSchema } private[avro] class AvroSerde[T <: SpecificRecordBase](injection: Injection[T, Array[Byte]]) extends Serializable { val inverted: Array[Byte] ⇒ Try[T] = injection.invert _ def encode(value: T): Array[Byte] = injection(value) // TODO fix up the exception, maybe pas through input def decode(bytes: Array[Byte]): T = Try(inverted(bytes).get).recoverWith { case t ⇒ Failure(DecodeException("Could not decode.", t)) }.get }
Example 76
Source File: SparkAvroDecoder.scala From cloudflow with Apache License 2.0 | 5 votes |
package cloudflow.spark.avro import org.apache.log4j.Logger import java.io.ByteArrayOutputStream import scala.reflect.runtime.universe._ import org.apache.avro.generic.{ GenericDatumReader, GenericDatumWriter, GenericRecord } import org.apache.avro.io.{ DecoderFactory, EncoderFactory } import org.apache.spark.sql.{ Dataset, Encoder, Row } import org.apache.spark.sql.catalyst.encoders.{ encoderFor, ExpressionEncoder, RowEncoder } import org.apache.spark.sql.catalyst.expressions.GenericRow import org.apache.spark.sql.types.StructType import org.apache.avro.Schema import cloudflow.spark.sql.SQLImplicits._ case class EncodedKV(key: String, value: Array[Byte]) case class SparkAvroDecoder[T: Encoder: TypeTag](avroSchema: String) { val encoder: Encoder[T] = implicitly[Encoder[T]] val sqlSchema: StructType = encoder.schema val encoderForDataColumns: ExpressionEncoder[Row] = RowEncoder(sqlSchema) @transient lazy val _avroSchema = new Schema.Parser().parse(avroSchema) @transient lazy val rowConverter = SchemaConverters.createConverterToSQL(_avroSchema, sqlSchema) @transient lazy val datumReader = new GenericDatumReader[GenericRecord](_avroSchema) @transient lazy val decoder = DecoderFactory.get def decode(bytes: Array[Byte]): Row = { val binaryDecoder = decoder.binaryDecoder(bytes, null) val record = datumReader.read(null, binaryDecoder) rowConverter(record).asInstanceOf[GenericRow] } } case class SparkAvroEncoder[T: Encoder: TypeTag](avroSchema: String) { @transient lazy val log = Logger.getLogger(getClass.getName) val BufferSize = 5 * 1024 // 5 Kb val encoder = implicitly[Encoder[T]] val sqlSchema = encoder.schema @transient lazy val _avroSchema = new Schema.Parser().parse(avroSchema) val recordName = "topLevelRecord" // ??? val recordNamespace = "recordNamespace" // ??? @transient lazy val converter = AvroConverter.createConverterToAvro(sqlSchema, recordName, recordNamespace) // Risk: This process is memory intensive. Might require thread-level buffers to optimize memory usage def rowToBytes(row: Row): Array[Byte] = { val genRecord = converter(row).asInstanceOf[GenericRecord] if (log.isDebugEnabled) log.debug(s"genRecord = $genRecord") val datumWriter = new GenericDatumWriter[GenericRecord](_avroSchema) val avroEncoder = EncoderFactory.get val byteArrOS = new ByteArrayOutputStream(BufferSize) val binaryEncoder = avroEncoder.binaryEncoder(byteArrOS, null) datumWriter.write(genRecord, binaryEncoder) binaryEncoder.flush() byteArrOS.toByteArray } def encode(dataset: Dataset[T]): Dataset[Array[Byte]] = dataset.toDF().mapPartitions(rows ⇒ rows.map(rowToBytes)).as[Array[Byte]] // Note to self: I'm not sure how heavy this chain of transformations is def encodeWithKey(dataset: Dataset[T], keyFun: T ⇒ String): Dataset[EncodedKV] = { val encoder = encoderFor[T] implicit val rowEncoder = RowEncoder(encoder.schema).resolveAndBind() dataset.map { value ⇒ val key = keyFun(value) val internalRow = encoder.toRow(value) val row = rowEncoder.fromRow(internalRow) val bytes = rowToBytes(row) EncodedKV(key, bytes) } } }
Example 77
Source File: GenericAvroSerializerSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.serializer import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.nio.ByteBuffer import com.esotericsoftware.kryo.io.{Output, Input} import org.apache.avro.{SchemaBuilder, Schema} import org.apache.avro.generic.GenericData.Record import org.apache.spark.{SparkFunSuite, SharedSparkContext} class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext { conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") val schema : Schema = SchemaBuilder .record("testRecord").fields() .requiredString("data") .endRecord() val record = new Record(schema) record.put("data", "test data") test("schema compression and decompression") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema)))) } test("record serialization and deserialization") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val outputStream = new ByteArrayOutputStream() val output = new Output(outputStream) genericSer.serializeDatum(record, output) output.flush() output.close() val input = new Input(new ByteArrayInputStream(outputStream.toByteArray)) assert(genericSer.deserializeDatum(input) === record) } test("uses schema fingerprint to decrease message size") { val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema) val output = new Output(new ByteArrayOutputStream()) val beginningNormalPosition = output.total() genericSerFull.serializeDatum(record, output) output.flush() val normalLength = output.total - beginningNormalPosition conf.registerAvroSchemas(schema) val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema) val beginningFingerprintPosition = output.total() genericSerFinger.serializeDatum(record, output) val fingerprintLength = output.total - beginningFingerprintPosition assert(fingerprintLength < normalLength) } test("caches previously seen schemas") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val compressedSchema = genericSer.compress(schema) val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema)) assert(compressedSchema.eq(genericSer.compress(schema))) assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema)))) } }
Example 78
Source File: SchemaRegistryImpl.scala From kafka4s with Apache License 2.0 | 5 votes |
package com.banno.kafka.schemaregistry import scala.collection.compat._ import org.apache.avro.Schema import io.confluent.kafka.schemaregistry.client.{SchemaMetadata, SchemaRegistryClient} import cats.effect.Sync import scala.jdk.CollectionConverters._ case class SchemaRegistryImpl[F[_]](c: SchemaRegistryClient)(implicit F: Sync[F]) extends SchemaRegistryApi[F] { import SchemaRegistryApi._ def getAllSubjects: F[Iterable[String]] = F.delay(c.getAllSubjects().asScala) def getById(id: Int): F[Schema] = F.delay(c.getById(id)) def getBySubjectAndId(subject: String, id: Int): F[Schema] = F.delay(c.getBySubjectAndId(subject, id)) def getCompatibility(subject: String): F[SchemaRegistryApi.CompatibilityLevel] = F.delay(CompatibilityLevel.unsafeFromString(c.getCompatibility(subject))) def getLatestSchemaMetadata(subject: String): F[SchemaMetadata] = F.delay(c.getLatestSchemaMetadata(subject)) def getSchemaMetadata(subject: String, version: Int): F[SchemaMetadata] = F.delay(c.getSchemaMetadata(subject, version)) def getVersion(subject: String, schema: Schema): F[Int] = F.delay(c.getVersion(subject, schema)) def register(subject: String, schema: Schema): F[Int] = F.delay(c.register(subject, schema)) def testCompatibility(subject: String, schema: Schema): F[Boolean] = F.delay(c.testCompatibility(subject, schema)) def updateCompatibility(subject: String, compatibility: CompatibilityLevel): F[String] = F.delay(c.updateCompatibility(subject, compatibility.asString)) }
Example 79
Source File: SchemaRegistryOps.scala From kafka4s with Apache License 2.0 | 5 votes |
package com.banno.kafka.schemaregistry import scala.collection.compat._ import org.apache.avro.Schema import com.sksamuel.avro4s.{DefaultFieldMapper, SchemaFor} import cats.FlatMap import cats.implicits._ case class SchemaRegistryOps[F[_]](registry: SchemaRegistryApi[F]) { def keySubject(topic: String): String = topic + "-key" def valueSubject(topic: String): String = topic + "-value" def register[A](subject: String)(implicit SF: SchemaFor[A]): F[Int] = registry.register(subject, SF.schema(DefaultFieldMapper)) def registerKey[K: SchemaFor](topic: String): F[Int] = register[K](keySubject(topic)) def registerValue[V: SchemaFor](topic: String): F[Int] = register[V](valueSubject(topic)) def register[K: SchemaFor, V: SchemaFor](topic: String)(implicit F: FlatMap[F]): F[(Int, Int)] = for { k <- registerKey[K](topic) v <- registerValue[V](topic) } yield (k, v) def isCompatible(subject: String, schema: Schema): F[Boolean] = registry.testCompatibility(subject, schema) def isCompatible[A](subject: String)(implicit SF: SchemaFor[A]): F[Boolean] = isCompatible(subject, SF.schema(DefaultFieldMapper)) def isKeyCompatible[K: SchemaFor](topic: String): F[Boolean] = isCompatible[K](keySubject(topic)) def isValueCompatible[V: SchemaFor](topic: String): F[Boolean] = isCompatible[V](valueSubject(topic)) def isCompatible[K: SchemaFor, V: SchemaFor]( topic: String )(implicit F: FlatMap[F]): F[(Boolean, Boolean)] = for { k <- isKeyCompatible[K](topic) v <- isValueCompatible[V](topic) } yield (k, v) }
Example 80
Source File: GenericAvroSerializerSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.serializer import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.nio.ByteBuffer import com.esotericsoftware.kryo.io.{Input, Output} import org.apache.avro.{Schema, SchemaBuilder} import org.apache.avro.generic.GenericData.Record import org.apache.spark.{SharedSparkContext, SparkFunSuite} class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext { conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") val schema : Schema = SchemaBuilder .record("testRecord").fields() .requiredString("data") .endRecord() val record = new Record(schema) record.put("data", "test data") test("schema compression and decompression") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema)))) } test("record serialization and deserialization") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val outputStream = new ByteArrayOutputStream() val output = new Output(outputStream) genericSer.serializeDatum(record, output) output.flush() output.close() val input = new Input(new ByteArrayInputStream(outputStream.toByteArray)) assert(genericSer.deserializeDatum(input) === record) } test("uses schema fingerprint to decrease message size") { val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema) val output = new Output(new ByteArrayOutputStream()) val beginningNormalPosition = output.total() genericSerFull.serializeDatum(record, output) output.flush() val normalLength = output.total - beginningNormalPosition conf.registerAvroSchemas(schema) val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema) val beginningFingerprintPosition = output.total() genericSerFinger.serializeDatum(record, output) val fingerprintLength = output.total - beginningFingerprintPosition assert(fingerprintLength < normalLength) } test("caches previously seen schemas") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val compressedSchema = genericSer.compress(schema) val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema)) assert(compressedSchema.eq(genericSer.compress(schema))) assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema)))) } }
Example 81
Source File: package.scala From avro4s with Apache License 2.0 | 5 votes |
package benchmarks import benchmarks.record.AttributeValue import benchmarks.record.AttributeValue.{Empty, Invalid, Valid} import com.sksamuel.avro4s._ import org.apache.avro.Schema import org.apache.avro.generic.GenericData import scala.collection.JavaConverters._ import scala.reflect.runtime.universe.{TypeTag, typeOf} package object handrolled_codecs { final class AttributeValueCodec[T: Encoder: Decoder](val schemaForValid: SchemaFor[Valid[T]]) extends Codec[AttributeValue[T]] { codec => def schemaFor: SchemaFor[AttributeValue[T]] = { implicit val sfv: SchemaFor[Valid[T]] = schemaForValid SchemaFor[AttributeValue[T]] } val validEncoder = Encoder[Valid[T]].withSchema(schemaForValid) val emptyEncoder = Encoder[Empty] val invalidEncoder = Encoder[Invalid] def encode(t: AttributeValue[T]): AnyRef = t match { case v: Valid[T] => validEncoder.encode(v) case e: Empty => emptyEncoder.encode(e) case i: Invalid => invalidEncoder.encode(i) } val validDecoder = Decoder[Valid[T]].withSchema(schemaForValid) val emptyDecoder = Decoder[Empty] val invalidDecoder = Decoder[Invalid] val validSn: String = validDecoder.schema.getFullName val emptySn: String = emptyDecoder.schema.getFullName val invalidSn: String = invalidDecoder.schema.getFullName def decode(value: Any): AttributeValue[T] = { val schema = value match { case r: GenericData.Record => r.getSchema case i: ImmutableRecord => i.schema } schema.getFullName match { case `validSn` => validDecoder.decode(value) case `emptySn` => emptyDecoder.decode(value) case `invalidSn` => invalidDecoder.decode(value) } } } def buildSchemaForValid[T: SchemaFor: TypeTag]: SchemaFor[Valid[T]] = { val sf = SchemaFor[Valid[T]] val name: String = typeOf[T].typeSymbol.name.toString val s = sf.schema val fields = s.getFields.asScala.map(f => new Schema.Field(f.name, f.schema, f.doc, f.defaultVal)).asJava SchemaFor(Schema.createRecord(s"Valid$name", s.getDoc, s.getNamespace, s.isError, fields), sf.fieldMapper) } object AttributeValueCodec { def apply[T: Encoder: Decoder: SchemaFor: TypeTag]: AttributeValueCodec[T] = { implicit val schemaForValid: SchemaFor[Valid[T]] = buildSchemaForValid new AttributeValueCodec[T](schemaForValid) } } }
Example 82
Source File: RecursiveSchemaTest.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s.schema import com.sksamuel.avro4s.Recursive.{Branch, MutRec1} import com.sksamuel.avro4s._ import org.apache.avro.Schema import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class RecursiveSchemaTest extends AnyWordSpec with Matchers { "SchemaFor" should { "support recursive types with sealed traits" in { AvroSchema[Recursive.Tree[Int]] shouldBe expectedSchema("/recursive_tree.json") } "support mutually recursive types" in { AvroSchema[MutRec1] shouldBe expectedSchema("/mutually_recursive.json") } "support recursive types with lists" in { AvroSchema[Recursive.ListTree[Int]] shouldBe expectedSchema("/recursive_list.json") } "support recursive types with maps" in { AvroSchema[Recursive.MapTree[Int]] shouldBe expectedSchema("/recursive_map.json") } "support recursive types with option" in { AvroSchema[Recursive.OptionTree[Int]] shouldBe expectedSchema("/recursive_option.json") } "support recursive types with either" in { AvroSchema[Recursive.EitherTree[Int]] shouldBe expectedSchema("/recursive_either.json") } "support recursive types with shapeless coproduct" in { AvroSchema[Recursive.CoproductTree[Int]] shouldBe expectedSchema("/recursive_coproduct.json") } "support recursive types with tuples and value types" in { AvroSchema[Recursive.TVTree[Int]] shouldBe expectedSchema("/recursive_tuple_value_type.json") } "support custom definitions" in { import scala.collection.JavaConverters._ implicit def sf: SchemaFor[Recursive.Branch[Int]] = new ResolvableSchemaFor[Recursive.Branch[Int]] { val tree = SchemaFor[Recursive.Tree[Int]] def schemaFor(env: DefinitionEnvironment[SchemaFor], update: SchemaUpdate): SchemaFor[Branch[Int]] = env.get[Recursive.Branch[Int]].getOrElse { val record: SchemaFor[Recursive.Branch[Int]] = SchemaFor(Schema.createRecord("CustomBranch", "custom schema", "custom", false)) val nextEnv = env.updated(record) val treeSchema = tree.resolveSchemaFor(nextEnv, update).schema val fields = Seq(new Schema.Field("left", treeSchema), new Schema.Field("right", treeSchema)) record.schema.setFields(fields.asJava) record } } val schema = sf.resolveSchemaFor().schema schema shouldBe expectedSchema("/recursive_custom.json") } } def expectedSchema(name: String) = new org.apache.avro.Schema.Parser().parse(getClass.getResourceAsStream(name)) }
Example 83
Source File: AvroRecordFieldExtractorMapFn.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.hbase.avro import com.datamountaineer.streamreactor.connect.hbase.BytesHelper._ import org.apache.avro.Schema import org.apache.avro.Schema.Type import scala.collection.JavaConverters._ def apply(schema: Schema, fields: Seq[String]): Map[String, (Any) => Array[Byte]] = { fields.map { fn => val f = schema.getField(fn) if (f == null) { throw new IllegalArgumentException(s"$fn does not exist in the given schema.") } fn -> getFunc(f.schema()) }.toMap } private def getFunc(schema: Schema): (Any) => Array[Byte] = { val `type` = schema.getType.getName `type`.toUpperCase() match { case "BOOLEAN" => (v: Any) => if (v == null) null else v.fromBoolean() case "BYTES" => (v: Any) => if (v == null) null else v.asInstanceOf[Array[Byte]] case "DOUBLE" => (v: Any) => if (v == null) null else v.fromDouble() case "FLOAT" => (v: Any) => if (v == null) null else v.fromFloat() case "INT" => (v: Any) => if (v == null) null else v.fromInt() case "LONG" => (v: Any) => if (v == null) null else v.fromLong() case "STRING" => (v: Any) => if (v == null) null else v.fromString() case "UNION" => schema.getTypes.asScala.collectFirst { case s if s.getType != Type.NULL => getFunc(s) }.getOrElse(throw new IllegalArgumentException(s"$schema is not supported.")) case _ => throw new IllegalArgumentException(s"${schema.getType.name()} is not supported") } } }
Example 84
Source File: BasicEncoderTest.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s.record.encoder import com.sksamuel.avro4s.examples.UppercasePkg.ClassInUppercasePackage import com.sksamuel.avro4s._ import org.apache.avro.Schema import org.apache.avro.generic.{GenericFixed, GenericRecord} import org.apache.avro.util.Utf8 import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class BasicEncoderTest extends AnyWordSpec with Matchers { "Encoder" should { "encode strings as UTF8" in { case class Foo(s: String) val schema = AvroSchema[Foo] val record = Encoder[Foo].encode(Foo("hello")) record shouldBe ImmutableRecord(schema, Vector(new Utf8("hello"))) } "encode strings as GenericFixed and pad bytes when schema is fixed" in { case class Foo(s: String) val fixedSchema = SchemaFor[String](Schema.createFixed("FixedString", null, null, 7)) implicit val fixedStringEncoder: Encoder[String] = Encoder.StringEncoder.withSchema(fixedSchema) val record = Encoder[Foo].encode(Foo("hello")).asInstanceOf[GenericRecord] record.get("s").asInstanceOf[GenericFixed].bytes().toList shouldBe Seq(104, 101, 108, 108, 111, 0, 0) // the fixed should have the right size record.get("s").asInstanceOf[GenericFixed].bytes().length shouldBe 7 } "encode longs" in { case class Foo(l: Long) val schema = AvroSchema[Foo] Encoder[Foo].encode(Foo(123456L)) shouldBe ImmutableRecord(schema, Vector(java.lang.Long.valueOf(123456L))) } "encode doubles" in { case class Foo(d: Double) val schema = AvroSchema[Foo] Encoder[Foo].encode(Foo(123.435)) shouldBe ImmutableRecord(schema, Vector(java.lang.Double.valueOf(123.435D))) } "encode booleans" in { case class Foo(d: Boolean) val schema = AvroSchema[Foo] Encoder[Foo].encode(Foo(true)) shouldBe ImmutableRecord(schema, Vector(java.lang.Boolean.valueOf(true))) } "encode floats" in { case class Foo(d: Float) val schema = AvroSchema[Foo] Encoder[Foo].encode(Foo(123.435F)) shouldBe ImmutableRecord(schema, Vector(java.lang.Float.valueOf(123.435F))) } "encode ints" in { case class Foo(i: Int) val schema = AvroSchema[Foo] Encoder[Foo].encode(Foo(123)) shouldBe ImmutableRecord(schema, Vector(java.lang.Integer.valueOf(123))) } "support uppercase packages" in { val schema = AvroSchema[ClassInUppercasePackage] val t = com.sksamuel.avro4s.examples.UppercasePkg.ClassInUppercasePackage("hello") schema.getFullName shouldBe "com.sksamuel.avro4s.examples.UppercasePkg.ClassInUppercasePackage" Encoder[ClassInUppercasePackage].encode(t) shouldBe ImmutableRecord(schema, Vector(new Utf8("hello"))) } } }
Example 85
Source File: Github284.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s.github import com.sksamuel.avro4s.{Record, RecordFormat} import org.apache.avro.specific.SpecificRecordBase import org.apache.avro.{AvroRuntimeException, Schema} import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec case class Street(var name: String) extends SpecificRecordBase { def this() = this("") override def get(i: Int): AnyRef = i match { case 0 => name case _ => throw new AvroRuntimeException("Bad index") } override def put(index: Int, value: scala.Any): Unit = index match { case 0 => value.asInstanceOf[String] case _ => throw new AvroRuntimeException("Bad index") } override def getSchema: Schema = Street.SCHEMA$ } object Street { val SCHEMA$ = (new Schema.Parser).parse(""" |{ | "type": "record", | "namespace": "com.sksamuel.avro4s.github", | "name": "Street", | "fields": [ | {"name": "name", "type": "string"} | ] |} """.stripMargin) } final class Github284 extends AnyWordSpec with Matchers { "SchemaFor" should { "convert case class to a Record and convert it back to original case class" in { val street: Street = Street(name = "street name") val streetAsRecord: Record = RecordFormat[Street].to(street) val decodedStreet: Street = RecordFormat[Street].from(streetAsRecord) streetAsRecord shouldBe a [Record] decodedStreet shouldBe a [Street] decodedStreet shouldBe street } } }
Example 86
Source File: CustomDefaults.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s import magnolia.{SealedTrait, Subtype} import org.json4s.native.JsonMethods.parse import org.json4s.native.Serialization.write import org.apache.avro.Schema import org.apache.avro.Schema.Type import org.json4s.DefaultFormats import scala.collection.JavaConverters._ sealed trait CustomDefault case class CustomUnionDefault(className: String, values: java.util.Map[String, Any]) extends CustomDefault case class CustomUnionWithEnumDefault(parentName: String, default: String, value: String) extends CustomDefault case class CustomEnumDefault(value: String) extends CustomDefault object CustomDefaults { implicit val formats = DefaultFormats def customScalaEnumDefault(value: Any) = CustomEnumDefault(value.toString) def customDefault(p: Product, schema: Schema): CustomDefault = if(isEnum(p, schema.getType)) CustomEnumDefault(trimmedClassName(p)) else { if(isUnionOfEnum(schema)) { val enumType = schema.getTypes.asScala.filter(_.getType == Schema.Type.ENUM).head CustomUnionWithEnumDefault(enumType.getName, trimmedClassName(p), p.toString) } else CustomUnionDefault(trimmedClassName(p), parse(write(p)).extract[Map[String, Any]].map { case (name, b: BigInt) if b.isValidInt => name -> b.intValue case (name, b: BigInt) if b.isValidLong => name -> b.longValue case (name, z) if schema.getType == Type.UNION => name -> schema.getTypes.asScala.find(_.getName == trimmedClassName(p)).map(_.getField(name).schema()) .map(DefaultResolver(z, _)).getOrElse(z) case (name, z) => name -> DefaultResolver(z, schema.getField(name).schema()) }.asJava) } def isUnionOfEnum(schema: Schema) = schema.getType == Schema.Type.UNION && schema.getTypes.asScala.map(_.getType).contains(Schema.Type.ENUM) def sealedTraitEnumDefaultValue[T](ctx: SealedTrait[SchemaFor, T]) = { val defaultExtractor = new AnnotationExtractors(ctx.annotations) defaultExtractor.enumDefault.flatMap { default => ctx.subtypes.flatMap { st: Subtype[SchemaFor, T] => if(st.typeName.short == default.toString) Option(st.typeName.short) else None }.headOption } } def isScalaEnumeration(value: Any) = value.getClass.getCanonicalName == "scala.Enumeration.Val" private def isEnum(product: Product, schemaType: Schema.Type) = product.productArity == 0 && schemaType == Schema.Type.ENUM private def trimmedClassName(p: Product) = trimDollar(p.getClass.getSimpleName) private def trimDollar(s: String) = if(s.endsWith("$")) s.dropRight(1) else s }
Example 87
Source File: AvroSchemaMerge.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s import org.apache.avro.{JsonProperties, Schema} import org.apache.avro.Schema.Field object AvroSchemaMerge { import scala.collection.JavaConverters._ def apply(name: String, namespace: String, schemas: List[Schema]): Schema = { require(schemas.forall(_.getType == Schema.Type.RECORD), "Can only merge records") val doc = schemas.flatMap(x => Option(x.getDoc)).mkString("; ") val fields = schemas.flatMap(_.getFields.asScala).groupBy(_.name).map { case (name, fields) => val doc = fields.flatMap(x => Option(x.doc)).mkString("; ") val default = fields.find(_.defaultVal != null).map(_.defaultVal).orNull // if we have two schemas with the same type, then just keep the first one val union = { val schemas = fields .map(_.schema) .flatMap(schema => schema.getType match { case Schema.Type.UNION => schema.getTypes.asScala case _ => Seq(schema) }) .filter(_.getType != Schema.Type.NULL) .groupBy(_.getType) .map(_._2.head) .toList .sortBy(_.getName) // if default value was not specified or equal to JsonProperties.NULL_VALUE then null schema should be the first in union Schema.createUnion({ if (default == null || default == JsonProperties.NULL_VALUE) { (Schema.create(Schema.Type.NULL) :: schemas).asJava } else { (schemas :+ Schema.create(Schema.Type.NULL)).asJava } }) } new Field(name, union, if (doc.isEmpty) null else doc, default) } val schema = Schema.createRecord(name, if (doc.isEmpty) null else doc, namespace, false) schema.setFields(fields.toList.asJava) schema } }
Example 88
Source File: DefaultResolver.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s import java.nio.ByteBuffer import java.util.UUID import org.apache.avro.LogicalTypes.Decimal import org.apache.avro.generic.{GenericEnumSymbol, GenericFixed} import org.apache.avro.util.Utf8 import org.apache.avro.{Conversions, Schema} import CustomDefaults._ import scala.collection.JavaConverters._ object DefaultResolver { def apply(value: Any, schema: Schema): AnyRef = value match { case Some(x) => apply(x, schema) case u: Utf8 => u.toString case uuid: UUID => uuid.toString case enum: GenericEnumSymbol[_] => enum.toString case fixed: GenericFixed => fixed.bytes() case bd: BigDecimal => bd.toString() case byteBuffer: ByteBuffer if schema.getLogicalType.isInstanceOf[Decimal] => val decimalConversion = new Conversions.DecimalConversion val bd = decimalConversion.fromBytes(byteBuffer, schema, schema.getLogicalType) java.lang.Double.valueOf(bd.doubleValue) case byteBuffer: ByteBuffer => byteBuffer.array() case x: scala.Long => java.lang.Long.valueOf(x) case x: scala.Boolean => java.lang.Boolean.valueOf(x) case x: scala.Int => java.lang.Integer.valueOf(x) case x: scala.Double => java.lang.Double.valueOf(x) case x: scala.Float => java.lang.Float.valueOf(x) case x: Map[_,_] => x.asJava case x: Seq[_] => x.asJava case shapeless.Inl(x) => apply(x, schema) case p: Product => customDefault(p, schema) case v if isScalaEnumeration(v) => customScalaEnumDefault(value) case _ => value.asInstanceOf[AnyRef] } }
Example 89
Source File: AvroDataOutputStream.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s import java.io.OutputStream import org.apache.avro.Schema import org.apache.avro.file.{CodecFactory, DataFileWriter} import org.apache.avro.generic.{GenericDatumWriter, GenericRecord} case class AvroDataOutputStream[T](os: OutputStream, codec: CodecFactory) (implicit encoder: Encoder[T]) extends AvroOutputStream[T] { val resolved = encoder.resolveEncoder() val (writer, writeFn) = resolved.schema.getType match { case Schema.Type.DOUBLE | Schema.Type.LONG | Schema.Type.BOOLEAN | Schema.Type.STRING | Schema.Type.INT | Schema.Type.FLOAT => val datumWriter = new GenericDatumWriter[T](resolved.schema) val dataFileWriter = new DataFileWriter[T](datumWriter) dataFileWriter.setCodec(codec) dataFileWriter.create(resolved.schema, os) (dataFileWriter, (t: T) => dataFileWriter.append(t)) case _ => val datumWriter = new GenericDatumWriter[GenericRecord](resolved.schema) val dataFileWriter = new DataFileWriter[GenericRecord](datumWriter) dataFileWriter.setCodec(codec) dataFileWriter.create(resolved.schema, os) (dataFileWriter, (t: T) => { val record = resolved.encode(t).asInstanceOf[GenericRecord] dataFileWriter.append(record) }) } override def close(): Unit = { flush() writer.close() } override def write(t: T): Unit = { writeFn(t) } override def flush(): Unit = writer.flush() override def fSync(): Unit = writer.fSync() }
Example 90
Source File: DefaultAwareDatumReader.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s import org.apache.avro.generic.GenericDatumReader import org.apache.avro.io.ResolvingDecoder import org.apache.avro.{AvroTypeException, Schema} class DefaultAwareDatumReader[T](writer: Schema, reader: Schema) extends GenericDatumReader[T](writer, reader, new DefaultAwareGenericData) { override def readField(r: scala.Any, f: Schema.Field, oldDatum: scala.Any, in: ResolvingDecoder, state: scala.Any): Unit = { try { super.readField(r, f, oldDatum, in, state) } catch { case t: AvroTypeException => if (f.defaultVal == null) throw t else getData.setField(r, f.name, f.pos, f.defaultVal) } } } object DefaultAwareDatumReader { def apply[T](writerSchema: Schema): DefaultAwareDatumReader[T] = new DefaultAwareDatumReader[T](writerSchema, writerSchema) }
Example 91
Source File: AvroDataInputStream.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s import java.io.InputStream import org.apache.avro.Schema import org.apache.avro.file.DataFileStream import org.apache.avro.generic.{GenericData, GenericRecord} import org.apache.avro.io.DatumReader import scala.util.Try class AvroDataInputStream[T](in: InputStream, writerSchema: Option[Schema]) (implicit decoder: Decoder[T]) extends AvroInputStream[T] { val resolved = decoder.resolveDecoder() // if no reader or writer schema is specified, then we create a reader that uses what's present in the files private val datumReader = writerSchema match { case Some(writer) => GenericData.get.createDatumReader(writer, resolved.schema) case None => GenericData.get.createDatumReader(null, resolved.schema) } private val dataFileReader = new DataFileStream[GenericRecord](in, datumReader.asInstanceOf[DatumReader[GenericRecord]]) override def iterator: Iterator[T] = new Iterator[T] { override def hasNext: Boolean = dataFileReader.hasNext override def next(): T = { val record = dataFileReader.next resolved.decode(record) } } override def tryIterator: Iterator[Try[T]] = new Iterator[Try[T]] { override def hasNext: Boolean = dataFileReader.hasNext override def next(): Try[T] = Try { val record = dataFileReader.next resolved.decode(record) } } override def close(): Unit = in.close() }
Example 92
Source File: GenericSerde.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s.kafka import java.io.ByteArrayOutputStream import com.sksamuel.avro4s.{AvroFormat, AvroInputStream, AvroOutputStream, AvroSchema, BinaryFormat, DataFormat, Decoder, Encoder, JsonFormat, SchemaFor} import org.apache.avro.Schema import org.apache.kafka.common.serialization.{Deserializer, Serde, Serializer} class GenericSerde[T >: Null : SchemaFor : Encoder : Decoder](avroFormat: AvroFormat = BinaryFormat) extends Serde[T] with Deserializer[T] with Serializer[T] with Serializable { val schema: Schema = AvroSchema[T] override def serializer(): Serializer[T] = this override def deserializer(): Deserializer[T] = this override def deserialize(topic: String, data: Array[Byte]): T = { if (data == null) null else { val avroInputStream = avroFormat match { case BinaryFormat => AvroInputStream.binary[T] case JsonFormat => AvroInputStream.json[T] case DataFormat => AvroInputStream.data[T] } val input = avroInputStream.from(data).build(schema) val result = input.iterator.next() input.close() result } } override def close(): Unit = () override def configure(configs: java.util.Map[String, _], isKey: Boolean): Unit = () override def serialize(topic: String, data: T): Array[Byte] = { val baos = new ByteArrayOutputStream() val avroOutputStream = avroFormat match { case BinaryFormat => AvroOutputStream.binary[T] case JsonFormat => AvroOutputStream.json[T] case DataFormat => AvroOutputStream.data[T] } val output = avroOutputStream.to(baos).build() output.write(data) output.close() baos.toByteArray } }
Example 93
Source File: RefinedTest.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s.refined import com.sksamuel.avro4s._ import eu.timepit.refined.api.Refined import eu.timepit.refined.auto._ import eu.timepit.refined.collection.NonEmpty import org.apache.avro.Schema import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec case class Foo(nonEmptyStr: String Refined NonEmpty) class RefinedTest extends AnyWordSpec with Matchers { "refinedSchemaFor" should { "use the schema for the underlying type" in { AvroSchema[Foo] shouldBe new Schema.Parser().parse( """ |{ | "type": "record", | "name": "Foo", | "namespace": "com.sksamuel.avro4s.refined", | "fields": [{ | "name": "nonEmptyStr", | "type": "string" | }] |} """.stripMargin) } } "refinedEncoder" should { "use the encoder for the underlying type" in { val expected: String Refined NonEmpty = "foo" val record = ToRecord[Foo].to(Foo(expected)) record.get("nonEmptyStr").toString shouldBe expected.value } } "refinedDecoder" should { "use the decoder for the underlying type" in { val expected: String Refined NonEmpty = "foo" val record = ImmutableRecord(AvroSchema[Foo], Vector(expected.value)) FromRecord[Foo].from(record) shouldBe Foo(expected) } "throw when the value does not conform to the refined predicate" in { val record = ImmutableRecord(AvroSchema[Foo], Vector("")) assertThrows[IllegalArgumentException](FromRecord[Foo].from(record)) } } }
Example 94
Source File: GenericAvroSerializerSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.serializer import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.nio.ByteBuffer import com.esotericsoftware.kryo.io.{Input, Output} import org.apache.avro.{Schema, SchemaBuilder} import org.apache.avro.generic.GenericData.Record import org.apache.spark.{SharedSparkContext, SparkFunSuite} class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext { conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") val schema : Schema = SchemaBuilder .record("testRecord").fields() .requiredString("data") .endRecord() val record = new Record(schema) record.put("data", "test data") test("schema compression and decompression") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema)))) } test("record serialization and deserialization") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val outputStream = new ByteArrayOutputStream() val output = new Output(outputStream) genericSer.serializeDatum(record, output) output.flush() output.close() val input = new Input(new ByteArrayInputStream(outputStream.toByteArray)) assert(genericSer.deserializeDatum(input) === record) } test("uses schema fingerprint to decrease message size") { val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema) val output = new Output(new ByteArrayOutputStream()) val beginningNormalPosition = output.total() genericSerFull.serializeDatum(record, output) output.flush() val normalLength = output.total - beginningNormalPosition conf.registerAvroSchemas(schema) val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema) val beginningFingerprintPosition = output.total() genericSerFinger.serializeDatum(record, output) val fingerprintLength = output.total - beginningFingerprintPosition assert(fingerprintLength < normalLength) } test("caches previously seen schemas") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val compressedSchema = genericSer.compress(schema) val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema)) assert(compressedSchema.eq(genericSer.compress(schema))) assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema)))) } }
Example 95
Source File: AvroSEBasicTest.scala From akka-serialization-test with Apache License 2.0 | 5 votes |
package com.github.dnvriend.serializer.avro4s import com.github.dnvriend.TestSpec import com.github.dnvriend.domain.BookStore.{ ChangedBookV1, ChangedBookV2, ChangedBookV3, ChangedBookV4 } import com.github.dnvriend.serializer.avro.{ BookSerializerV1, BookSerializerV2, BookSerializerV3 } import com.sksamuel.avro4s.{ AvroSchema, RecordFormat } import org.apache.avro.Schema import org.apache.avro.file.SeekableByteArrayInput import org.apache.avro.generic.{ GenericDatumReader, GenericRecord } import org.apache.avro.io.DecoderFactory // SE stands for Schema Evolution class AvroSEBasicTest extends TestSpec { @Override def fromBytes(bytes: Array[Byte], schema: Schema): GenericRecord = { val serveReader = new GenericDatumReader[GenericRecord](schema) serveReader.read(null, DecoderFactory.get().binaryDecoder(bytes, null)) } val title = "Moby-Dick; or, The Whale" val year = 1851 val editor = "Scala Books" "AvroSEBasicTest" should "deserialize old class with renamed field" in { // in this case, two different serializers can be used val obj = ChangedBookV1(title, year) val serializerV1 = new BookSerializerV1 val bytes: Array[Byte] = serializerV1.toBinary(obj) val serializerV2 = new BookSerializerV2 serializerV2.fromBinary(bytes) should matchPattern { case ChangedBookV2(`title`, `year`) ⇒ } } it should "deserialize old class without new field" in { val obj = ChangedBookV2(title, year) val serializerV2 = new BookSerializerV2 val bytes: Array[Byte] = serializerV2.toBinary(obj) val in = new SeekableByteArrayInput(bytes) val schema2 = AvroSchema[ChangedBookV2] val schema3 = AvroSchema[ChangedBookV3] val gdr = new GenericDatumReader[GenericRecord](schema2, schema3) val binDecoder = DecoderFactory.get().binaryDecoder(in, null) val record: GenericRecord = gdr.read(null, binDecoder) val format = RecordFormat[ChangedBookV3] val r = format.from(record) r should matchPattern { case ChangedBookV3(`title`, `year`, "") ⇒ } } it should "deserialize old class with dropped field" in { val obj = ChangedBookV3(title, year, editor) val serializerV3 = new BookSerializerV3 val bytes: Array[Byte] = serializerV3.toBinary(obj) val in = new SeekableByteArrayInput(bytes) val schema3 = AvroSchema[ChangedBookV3] val schema4 = AvroSchema[ChangedBookV4] val gdr = new GenericDatumReader[GenericRecord](schema3, schema4) val binDecoder = DecoderFactory.get().binaryDecoder(in, null) val record: GenericRecord = gdr.read(null, binDecoder) val format = RecordFormat[ChangedBookV4] val r = format.from(record) r should matchPattern { case ChangedBookV4(`title`, `editor`) ⇒ } } }
Example 96
Source File: avroMarshallers.scala From scalatest-embedded-kafka with MIT License | 5 votes |
package net.manub.embeddedkafka.avro import java.io.ByteArrayOutputStream import kafka.utils.VerifiableProperties import org.apache.avro.Schema import org.apache.avro.io._ import org.apache.avro.specific.{ SpecificDatumReader, SpecificDatumWriter, SpecificRecord } import org.apache.kafka.common.serialization.{Deserializer, Serializer} class KafkaAvroDeserializer[T <: SpecificRecord](schema: Schema) extends Deserializer[T] with NoOpConfiguration with NoOpClose { private val reader = new SpecificDatumReader[T](schema) override def deserialize(topic: String, data: Array[Byte]): T = { val decoder = DecoderFactory.get().binaryDecoder(data, null) reader.read(null.asInstanceOf[T], decoder) } } class KafkaAvroSerializer[T <: SpecificRecord]() extends Serializer[T] with NoOpConfiguration with NoOpClose { private def toBytes(nullableData: T): Array[Byte] = Option(nullableData).fold[Array[Byte]](null) { data => val writer: DatumWriter[T] = new SpecificDatumWriter[T](data.getSchema) val out = new ByteArrayOutputStream() val encoder = EncoderFactory.get.binaryEncoder(out, null) writer.write(data, encoder) encoder.flush() out.close() out.toByteArray } override def serialize(topic: String, data: T): Array[Byte] = toBytes(data) } sealed trait NoOpConfiguration { def configure(configs: java.util.Map[String, _], isKey: Boolean): Unit = () } sealed trait NoOpClose { def close(): Unit = () }
Example 97
Source File: TestAvroClass.scala From scalatest-embedded-kafka with MIT License | 5 votes |
package net.manub.embeddedkafka import org.apache.avro.specific.SpecificRecordBase import org.apache.avro.{AvroRuntimeException, Schema} case class TestAvroClass(var name: String) extends SpecificRecordBase { def this() = this("") override def get(i: Int): AnyRef = i match { case 0 => name case _ => throw new AvroRuntimeException("Bad index") } override def put(i: Int, v: scala.Any): Unit = i match { case 0 => name = v match { case (utf8: org.apache.avro.util.Utf8) => utf8.toString case _ => v.asInstanceOf[String] } case _ => throw new AvroRuntimeException("Bad index") } override def getSchema: Schema = TestAvroClass.SCHEMA$ } object TestAvroClass { val SCHEMA$ = (new Schema.Parser).parse(""" |{"namespace": "example", | "type": "record", | "namespace": "net.manub.embeddedkafka", | "name": "TestAvroClass", | "fields": [ | {"name": "name", "type": "string"} | ] |} """.stripMargin) }
Example 98
Source File: TestAvroClass.scala From scalatest-embedded-kafka with MIT License | 5 votes |
package net.manub.embeddedkafka import org.apache.avro.specific.SpecificRecordBase import org.apache.avro.{AvroRuntimeException, Schema} case class TestAvroClass(var name: String) extends SpecificRecordBase { def this() = this("") override def get(i: Int): AnyRef = i match { case 0 => name case _ => throw new AvroRuntimeException("Bad index") } override def put(i: Int, v: scala.Any): Unit = i match { case 0 => name = v match { case (utf8: org.apache.avro.util.Utf8) => utf8.toString case _ => v.asInstanceOf[String] } case _ => throw new AvroRuntimeException("Bad index") } override def getSchema: Schema = TestAvroClass.SCHEMA$ } object TestAvroClass { val SCHEMA$ = (new Schema.Parser).parse(""" |{"namespace": "example", | "type": "record", | "namespace": "net.manub.embeddedkafka", | "name": "TestAvroClass", | "fields": [ | {"name": "name", "type": "string"} | ] |} """.stripMargin) }
Example 99
Source File: AvroSchemaMerge.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.avro import com.sksamuel.exts.StringOption import org.apache.avro.Schema import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer object AvroSchemaMerge { def apply(name: String, namespace: String, schemas: List[Schema]): Schema = { require(schemas.forall(_.getType == Schema.Type.RECORD), "Can only merge records") // documentations can just be a concat val doc = schemas.map(_.getDoc).filter(_ != null).mkString("; ") // simple impl to start: take all the fields from the first schema, and then add in the missing ones // from second 2 and so on val fields = new ArrayBuffer[Schema.Field]() schemas.foreach { schema => schema.getFields.asScala.filterNot { field => fields.exists(_.name() == field.name) }.foreach { field => // avro is funny about sharing fields, so need to copy it val copy = new Schema.Field(field.name(), field.schema(), StringOption(field.doc).orNull, field.defaultVal) fields.append(copy) } } val schema = Schema.createRecord(name, if (doc.isEmpty()) null else doc, namespace, false) schema.setFields(fields.result().asJava) schema } }
Example 100
Source File: AvroParquetRowWriter.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.parquet.avro import com.sksamuel.exts.Logging import com.typesafe.config.{Config, ConfigFactory} import org.apache.avro.Schema import org.apache.avro.generic.GenericRecord import org.apache.hadoop.fs.{FileSystem, Path} class AvroParquetRowWriter(path: Path, avroSchema: Schema)(implicit fs: FileSystem) extends Logging { private val config: Config = ConfigFactory.load() private val skipCrc = config.getBoolean("eel.parquet.skipCrc") logger.info(s"Parquet writer will skipCrc = $skipCrc") private val writer = AvroParquetWriterFn(path, avroSchema) def write(record: GenericRecord): Unit = { writer.write(record) } def close(): Unit = { writer.close() if (skipCrc) { val crc = new Path("." + path.toString() + ".crc") logger.debug("Deleting crc $crc") if (fs.exists(crc)) fs.delete(crc, false) } } }
Example 101
Source File: AvroParquetWriterFn.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.parquet.avro import com.sksamuel.exts.Logging import io.eels.component.parquet.ParquetWriterConfig import org.apache.avro.Schema import org.apache.avro.generic.GenericRecord import org.apache.hadoop.fs.Path import org.apache.parquet.avro.AvroParquetWriter import org.apache.parquet.hadoop.{ParquetFileWriter, ParquetWriter} object AvroParquetWriterFn extends Logging { def apply(path: Path, avroSchema: Schema): ParquetWriter[GenericRecord] = { val config = ParquetWriterConfig() AvroParquetWriter.builder[GenericRecord](path) .withSchema(avroSchema) .withCompressionCodec(config.compressionCodec) .withPageSize(config.pageSize) .withRowGroupSize(config.blockSize) .withDictionaryEncoding(config.enableDictionary) .withWriteMode(ParquetFileWriter.Mode.CREATE) .withValidation(config.validating) .build() } }
Example 102
Source File: AvroParquetReaderFn.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.parquet.avro import io.eels.Predicate import io.eels.component.parquet.{ParquetPredicateBuilder, ParquetReaderConfig} import org.apache.avro.Schema import org.apache.avro.generic.GenericRecord import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.parquet.avro.{AvroParquetReader, AvroReadSupport} import org.apache.parquet.filter2.compat.FilterCompat import org.apache.parquet.hadoop.ParquetReader def apply(path: Path, predicate: Option[Predicate], projectionSchema: Option[Schema])(implicit conf: Configuration): ParquetReader[GenericRecord] = { // The parquet reader can use a projection by setting a projected schema onto a conf object def configuration(): Configuration = { val newconf = new Configuration(conf) projectionSchema.foreach { it => AvroReadSupport.setAvroReadSchema(newconf, it) AvroReadSupport.setRequestedProjection(newconf, it) } //conf.set(ParquetInputFormat.DICTIONARY_FILTERING_ENABLED, "true") newconf.set(org.apache.parquet.hadoop.ParquetFileReader.PARQUET_READ_PARALLELISM, config.parallelism.toString) newconf } // a filter is set when we have a predicate for the read def filter(): FilterCompat.Filter = predicate.map(ParquetPredicateBuilder.build) .map(FilterCompat.get) .getOrElse(FilterCompat.NOOP) AvroParquetReader.builder[GenericRecord](path) .withCompatibility(false) .withConf(configuration()) .withFilter(filter()) .build() .asInstanceOf[ParquetReader[GenericRecord]] } }
Example 103
Source File: ISODateConverter.scala From hydra with Apache License 2.0 | 5 votes |
package hydra.avro.convert import java.text.SimpleDateFormat import java.time._ import hydra.common.logging.LoggingAdapter import org.apache.avro.{Conversion, LogicalType, Schema} import scala.util.Try class ISODateConverter extends Conversion[ZonedDateTime] with LoggingAdapter { private val utc = ZoneOffset.UTC override def getLogicalTypeName: String = IsoDate.IsoDateLogicalTypeName override def getConvertedType: Class[ZonedDateTime] = classOf[ZonedDateTime] private val simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssX") override def fromCharSequence( value: CharSequence, schema: Schema, `type`: LogicalType ): ZonedDateTime = { Try(OffsetDateTime.parse(value).toInstant) .orElse { Try(LocalDateTime.parse(value).toInstant(ZoneOffset.UTC)) } .orElse { Try(simpleDateFormat.parse(value.toString).toInstant) } .recover { case e: Throwable => log.error(e.getMessage, e) Instant.EPOCH } .map(_.atZone(utc)) .get } } object IsoDate extends LogicalType("iso-datetime") { val IsoDateLogicalTypeName = "iso-datetime" override def validate(schema: Schema): Unit = { if (schema.getType() != Schema.Type.STRING) { throw new IllegalArgumentException( "Iso-datetime can only be used with an underlying string type" ) } } }
Example 104
Source File: AvroUuid.scala From hydra with Apache License 2.0 | 5 votes |
package hydra.avro.convert import org.apache.avro.{LogicalType, Schema} object AvroUuid extends LogicalType("uuid") { val AvroUuidLogicalTypeName = "uuid" override def validate(schema: Schema): Unit = { if (schema.getType() != Schema.Type.STRING) { throw new IllegalArgumentException( "uui can only be used with an underlying string type" ) } } }
Example 105
Source File: StringToGenericRecord.scala From hydra with Apache License 2.0 | 5 votes |
package hydra.avro.convert import java.util.UUID import org.apache.avro.{LogicalTypes, Schema} import org.apache.avro.generic.{GenericDatumReader, GenericRecord} import org.apache.avro.io.DecoderFactory import cats.implicits._ import org.apache.avro.util.Utf8 import scala.util.{Failure, Success, Try} object StringToGenericRecord { final case class ValidationExtraFieldsError(fields: Set[String]) extends RuntimeException( s"Extra fields ${fields.mkString(",")} found with Strict Validation Strategy" ) final case class InvalidLogicalTypeError(expected: String, received: AnyRef) extends RuntimeException( s"Invalid logical type. Expected $expected but received $received" ) implicit class ConvertToGenericRecord(s: String) { private def isUuidValid(s: String): Boolean = Try(UUID.fromString(s)).isSuccess private def checkLogicalTypes(record: GenericRecord): Try[Unit] = { import collection.JavaConverters._ def checkAll(avroField: AnyRef, fieldSchema: Option[Schema]): Try[Unit] = avroField match { case g: GenericRecord => g.getSchema.getFields.asScala.toList .traverse(f => checkAll(g.get(f.name), f.schema.some)).void case u: Utf8 if fieldSchema.exists(f => Option(f.getLogicalType).exists(_.getName == LogicalTypes.uuid.getName)) => if (isUuidValid(u.toString)) Success(()) else Failure(InvalidLogicalTypeError("UUID", u.toString)) case _ => Success(()) } val fields = record.getSchema.getFields.asScala.toList fields.traverse(f => checkAll(record.get(f.name), f.schema.some)).void } private def getAllPayloadFieldNames: Set[String] = { import spray.json._ def loop(cur: JsValue, extraName: Option[String]): Set[String] = cur match { case JsObject(f) => f.flatMap { case (k: String, v: JsValue) => loop(v, k.some) ++ Set(extraName.getOrElse("") + k) }.toSet case _ => Set.empty } loop(s.parseJson, None) } private def getAllSchemaFieldNames(schema: Schema): Set[String] = { import Schema.Type._ import collection.JavaConverters._ def loop(sch: Schema, extraName: Option[String]): Set[String] = sch.getType match { case RECORD => sch.getFields.asScala.toSet.flatMap { f: Schema.Field => loop(f.schema, f.name.some) ++ Set(extraName.getOrElse("") + f.name) } case _ => Set.empty } loop(schema, None) } def toGenericRecord(schema: Schema, useStrictValidation: Boolean): Try[GenericRecord] = Try { if (useStrictValidation) { val diff = getAllPayloadFieldNames diff getAllSchemaFieldNames(schema) if (diff.nonEmpty) throw ValidationExtraFieldsError(diff) } val decoderFactory = new DecoderFactory val decoder = decoderFactory.jsonDecoder(schema, s) val reader = new GenericDatumReader[GenericRecord](schema) reader.read(null, decoder) }.flatTap(checkLogicalTypes) } }
Example 106
Source File: SchemaWrapper.scala From hydra with Apache License 2.0 | 5 votes |
package hydra.avro.util import org.apache.avro.Schema import org.apache.avro.Schema.{Field, Type} import scala.collection.mutable import scala.util.Try def from(schema: Schema, primaryKeys: Seq[String]): SchemaWrapper = { SchemaWrapper(schema, primaryKeys) } private def schemaPKs(schema: Schema): Seq[String] = { Option(schema.getProp("hydra.key")) .map(_.replaceAll("\\s", "").split(",")) match { case Some(ids) => ids case None => Seq.empty } } }
Example 107
Source File: AvroUtils.scala From hydra with Apache License 2.0 | 5 votes |
package hydra.avro.util import com.pluralsight.hydra.avro.JsonToAvroConversionException import hydra.avro.registry.JsonToAvroConversionExceptionWithMetadata import hydra.avro.resource.SchemaResource import org.apache.avro.Schema import org.apache.avro.Schema.Field import scala.collection.mutable def areEqual(one: Schema, other: Schema): Boolean = { val seen = SEEN_EQUALS.get val here = SeenPair(one.hashCode(), other.hashCode()) val equals = { if (seen.contains(here)) return true if (one eq other) return true if (one.getFullName != other.getFullName) return false one.getFields.asScala.map(_.name()).toSet == other.getFields.asScala .map(_.name()) .toSet } if (equals) seen.add(here) equals } def improveException(ex: Throwable, schema: SchemaResource, registryUrl:String) = { ex match { case e: JsonToAvroConversionException => JsonToAvroConversionExceptionWithMetadata(e, schema, registryUrl) case e: Exception => e } } private[avro] case class SeenPair private (s1: Int, s2: Int) { override def equals(o: Any): Boolean = (this.s1 == o.asInstanceOf[SeenPair].s1) && (this.s2 == o .asInstanceOf[SeenPair] .s2) override def hashCode: Int = s1 + s2 } }
Example 108
Source File: IngestionFlow.scala From hydra with Apache License 2.0 | 5 votes |
package hydra.ingest.services import java.io.IOException import cats.MonadError import cats.implicits._ import com.pluralsight.hydra.avro.JsonToAvroConversionException import hydra.avro.registry.SchemaRegistry import hydra.avro.resource.SchemaResourceLoader.SchemaNotFoundException import hydra.avro.util.SchemaWrapper import hydra.core.ingest.HydraRequest import hydra.core.ingest.RequestParams.{HYDRA_KAFKA_TOPIC_PARAM, HYDRA_RECORD_KEY_PARAM} import hydra.core.transport.{AckStrategy, ValidationStrategy} import hydra.kafka.algebras.KafkaClientAlgebra import hydra.kafka.producer.AvroRecord import org.apache.avro.Schema import org.apache.avro.generic.GenericRecord import scalacache._ import scalacache.guava._ import scalacache.memoization._ import scala.concurrent.duration._ import scala.util.{Failure, Success, Try} final class IngestionFlow[F[_]: MonadError[*[_], Throwable]: Mode]( schemaRegistry: SchemaRegistry[F], kafkaClient: KafkaClientAlgebra[F], schemaRegistryBaseUrl: String ) { import IngestionFlow._ implicit val guavaCache: Cache[SchemaWrapper] = GuavaCache[SchemaWrapper] private def getValueSchema(topicName: String): F[Schema] = { schemaRegistry.getLatestSchemaBySubject(topicName + "-value") .flatMap { maybeSchema => val schemaNotFound = SchemaNotFoundException(topicName) MonadError[F, Throwable].fromOption(maybeSchema, SchemaNotFoundAugmentedException(schemaNotFound, topicName)) } } private def getValueSchemaWrapper(topicName: String): F[SchemaWrapper] = memoizeF[F, SchemaWrapper](Some(2.minutes)) { getValueSchema(topicName).map { valueSchema => SchemaWrapper.from(valueSchema) } } def ingest(request: HydraRequest): F[Unit] = { request.metadataValue(HYDRA_KAFKA_TOPIC_PARAM) match { case Some(topic) => getValueSchemaWrapper(topic).flatMap { schemaWrapper => val useStrictValidation = request.validationStrategy == ValidationStrategy.Strict val payloadTryMaybe: Try[Option[GenericRecord]] = Option(request.payload) match { case Some(p) => convertToAvro(topic, schemaWrapper, useStrictValidation, p).map(avroRecord => Some(avroRecord.payload)) case None => Success(None) } val v1Key = getV1RecordKey(schemaWrapper, payloadTryMaybe, request) MonadError[F, Throwable].fromTry(payloadTryMaybe).flatMap { payloadMaybe => kafkaClient.publishStringKeyMessage((v1Key, payloadMaybe), topic).void } } case None => MonadError[F, Throwable].raiseError(MissingTopicNameException(request)) } } private def getV1RecordKey(schemaWrapper: SchemaWrapper, payloadTryMaybe: Try[Option[GenericRecord]], request: HydraRequest): Option[String] = { val headerV1Key = request.metadata.get(HYDRA_RECORD_KEY_PARAM) val optionString = schemaWrapper.primaryKeys.toList match { case Nil => None case l => l.flatMap(pkName => payloadTryMaybe match { case Success(payloadMaybe) => payloadMaybe.flatMap(p => Try(p.get(pkName)).toOption) case Failure(_) => None }).mkString("|").some } headerV1Key.orElse(optionString) } private def convertToAvro(topic: String, schemaWrapper: SchemaWrapper, useStrictValidation: Boolean, payloadString: String): Try[AvroRecord] = { Try(AvroRecord(topic, schemaWrapper.schema, None, payloadString, AckStrategy.Replicated, useStrictValidation)).recoverWith { case e: JsonToAvroConversionException => val location = s"$schemaRegistryBaseUrl/subjects/$topic-value/versions/latest/schema" Failure(new AvroConversionAugmentedException(s"${e.getClass.getName}: ${e.getMessage} [$location]")) case e: IOException => val location = s"$schemaRegistryBaseUrl/subjects/$topic-value/versions/latest/schema" Failure(new AvroConversionAugmentedException(s"${e.getMessage} [$location]")) case e => Failure(e) } } } object IngestionFlow { final case class MissingTopicNameException(request: HydraRequest) extends Exception(s"Missing the topic name in request with correlationId ${request.correlationId}") final case class AvroConversionAugmentedException(message: String) extends RuntimeException(message) final case class SchemaNotFoundAugmentedException(schemaNotFoundException: SchemaNotFoundException, topic: String) extends RuntimeException(s"Schema '$topic' cannot be loaded. Cause: ${schemaNotFoundException.getClass.getName}: Schema not found for $topic") }
Example 109
Source File: IngestionFlowV2.scala From hydra with Apache License 2.0 | 5 votes |
package hydra.ingest.services import java.io.IOException import cats.MonadError import cats.implicits._ import hydra.avro.registry.SchemaRegistry import hydra.avro.resource.SchemaResourceLoader.SchemaNotFoundException import hydra.avro.util.SchemaWrapper import hydra.core.transport.ValidationStrategy import hydra.kafka.algebras.KafkaClientAlgebra import hydra.kafka.algebras.KafkaClientAlgebra.PublishResponse import hydra.kafka.model.TopicMetadataV2Request.Subject import org.apache.avro.Schema import org.apache.avro.generic.GenericRecord import scalacache._ import scalacache.guava._ import scalacache.memoization._ import scala.concurrent.duration._ import scala.util.{Failure, Try} final class IngestionFlowV2[F[_]: MonadError[*[_], Throwable]: Mode]( schemaRegistry: SchemaRegistry[F], kafkaClient: KafkaClientAlgebra[F], schemaRegistryBaseUrl: String) { import IngestionFlowV2._ import hydra.avro.convert.StringToGenericRecord._ implicit val guavaCache: Cache[SchemaWrapper] = GuavaCache[SchemaWrapper] private def getSchema(subject: String): F[Schema] = { schemaRegistry.getLatestSchemaBySubject(subject) .flatMap { maybeSchema => val schemaNotFound = SchemaNotFoundException(subject) MonadError[F, Throwable].fromOption(maybeSchema, SchemaNotFoundAugmentedException(schemaNotFound, subject)) } } private def getSchemaWrapper(subject: Subject, isKey: Boolean): F[SchemaWrapper] = memoizeF[F, SchemaWrapper](Some(2.minutes)) { val suffix = if (isKey) "-key" else "-value" getSchema(subject.value + suffix).map { sch => SchemaWrapper.from(sch) } } private def recover[A](subject: Subject, isKey: Boolean): PartialFunction[Throwable, Try[A]] = { val suffix = if (isKey) "-key" else "-value" val location = s"$schemaRegistryBaseUrl/subjects/${subject.value}$suffix/versions/latest/schema" val pf: PartialFunction[Throwable, Try[A]] = { case e: ValidationExtraFieldsError => Failure(AvroConversionAugmentedException(s"${e.getClass.getName}: ${e.getMessage} [$location]")) case e: InvalidLogicalTypeError => Failure(AvroConversionAugmentedException(s"${e.getClass.getName}: ${e.getMessage} [$location]")) case e: IOException => Failure(AvroConversionAugmentedException(s"${e.getClass.getName}: ${e.getMessage} [$location]")) case e => Failure(e) } pf } private def getSchemas(request: V2IngestRequest, topic: Subject): F[(GenericRecord, Option[GenericRecord])] = { val useStrictValidation = request.validationStrategy.getOrElse(ValidationStrategy.Strict) == ValidationStrategy.Strict def getRecord(payload: String, schema: Schema): Try[GenericRecord] = payload.toGenericRecord(schema, useStrictValidation) for { kSchema <- getSchemaWrapper(topic, isKey = true) vSchema <- getSchemaWrapper(topic, isKey = false) k <- MonadError[F, Throwable].fromTry( getRecord(request.keyPayload, kSchema.schema).recoverWith(recover(topic, isKey = true))) v <- MonadError[F, Throwable].fromTry( request.valPayload.traverse(getRecord(_, vSchema.schema)).recoverWith(recover(topic, isKey = false))) } yield (k, v) } def ingest(request: V2IngestRequest, topic: Subject): F[PublishResponse] = { getSchemas(request, topic).flatMap { case (key, value) => kafkaClient.publishMessage((key, value), topic.value).rethrow } } } object IngestionFlowV2 { final case class V2IngestRequest(keyPayload: String, valPayload: Option[String], validationStrategy: Option[ValidationStrategy]) final case class AvroConversionAugmentedException(message: String) extends RuntimeException(message) final case class SchemaNotFoundAugmentedException(schemaNotFoundException: SchemaNotFoundException, topic: String) extends RuntimeException(s"Schema '$topic' cannot be loaded. Cause: ${schemaNotFoundException.getClass.getName}: Schema not found for $topic") }
Example 110
Source File: IngestionFlowSpec.scala From hydra with Apache License 2.0 | 5 votes |
package hydra.ingest.services import cats.effect.{Concurrent, ContextShift, IO} import hydra.avro.registry.SchemaRegistry import hydra.core.ingest.HydraRequest import hydra.core.ingest.RequestParams.{HYDRA_KAFKA_TOPIC_PARAM,HYDRA_RECORD_KEY_PARAM} import hydra.ingest.services.IngestionFlow.MissingTopicNameException import hydra.kafka.algebras.KafkaClientAlgebra import org.apache.avro.{Schema, SchemaBuilder} import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers import scala.concurrent.ExecutionContext class IngestionFlowSpec extends AnyFlatSpec with Matchers { private implicit val contextShift: ContextShift[IO] = IO.contextShift(ExecutionContext.global) private implicit val concurrentEffect: Concurrent[IO] = IO.ioConcurrentEffect private implicit val mode: scalacache.Mode[IO] = scalacache.CatsEffect.modes.async private val testSubject: String = "test_subject" private val testSubjectNoKey: String = "test_subject_no_key" private val testKey: String = "test" private val testPayload: String = s"""{"id": "$testKey", "testField": true}""" private val testSchema: Schema = SchemaBuilder.record("TestRecord") .prop("hydra.key", "id") .fields().requiredString("id").requiredBoolean("testField").endRecord() private val testSchemaNoKey: Schema = SchemaBuilder.record("TestRecordNoKey") .fields().requiredString("id").requiredBoolean("testField").endRecord() private def ingest(request: HydraRequest): IO[KafkaClientAlgebra[IO]] = for { schemaRegistry <- SchemaRegistry.test[IO] _ <- schemaRegistry.registerSchema(testSubject + "-value", testSchema) _ <- schemaRegistry.registerSchema(testSubjectNoKey + "-value", testSchemaNoKey) kafkaClient <- KafkaClientAlgebra.test[IO] ingestFlow <- IO(new IngestionFlow[IO](schemaRegistry, kafkaClient, "https://schemaRegistry.notreal")) _ <- ingestFlow.ingest(request) } yield kafkaClient it should "ingest a message" in { val testRequest = HydraRequest("correlationId", testPayload, metadata = Map(HYDRA_KAFKA_TOPIC_PARAM -> testSubject)) ingest(testRequest).flatMap { kafkaClient => kafkaClient.consumeStringKeyMessages(testSubject, "test-consumer").take(1).compile.toList.map { publishedMessages => val firstMessage = publishedMessages.head (firstMessage._1, firstMessage._2.get.toString) shouldBe (Some(testKey), testPayload) } }.unsafeRunSync() } it should "ingest a message with a null key" in { val testRequest = HydraRequest("correlationId", testPayload, metadata = Map(HYDRA_KAFKA_TOPIC_PARAM -> testSubjectNoKey)) ingest(testRequest).flatMap { kafkaClient => kafkaClient.consumeStringKeyMessages(testSubjectNoKey, "test-consumer").take(1).compile.toList.map { publishedMessages => val firstMessage = publishedMessages.head (firstMessage._1, firstMessage._2.get.toString) shouldBe (None, testPayload) } }.unsafeRunSync() } it should "return an error when no topic name is provided" in { val testRequest = HydraRequest("correlationId", testPayload) ingest(testRequest).attempt.unsafeRunSync() shouldBe Left(MissingTopicNameException(testRequest)) } it should "take the key from the header if present" in { val headerKey = "someDifferentKey" val testRequest = HydraRequest("correlationId", testPayload, metadata = Map(HYDRA_RECORD_KEY_PARAM -> headerKey, HYDRA_KAFKA_TOPIC_PARAM -> testSubject)) ingest(testRequest).flatMap { kafkaClient => kafkaClient.consumeStringKeyMessages(testSubject, "test-consumer").take(1).compile.toList.map { publishedMessages => val firstMessage = publishedMessages.head (firstMessage._1, firstMessage._2.get.toString) shouldBe (Some(headerKey), testPayload) } }.unsafeRunSync() } }
Example 111
Source File: H2Dialect.scala From hydra with Apache License 2.0 | 5 votes |
package hydra.sql import java.sql.JDBCType import hydra.avro.util.SchemaWrapper import org.apache.avro.Schema import org.apache.avro.Schema.Field import org.apache.avro.Schema.Type._ private object H2Dialect extends JdbcDialect { override def canHandle(url: String): Boolean = url.startsWith("jdbc:h2") override def getJDBCType(dt: Schema): Option[JdbcType] = dt.getType match { case STRING => Option(JdbcType("CLOB", JDBCType.CLOB)) case BOOLEAN => Option(JdbcType("CHAR(1)", JDBCType.CHAR)) case ARRAY => Option(JdbcType("ARRAY", JDBCType.ARRAY)) case _ => None } override def getArrayType(schema: Schema) = Some(JdbcType("ARRAY", java.sql.JDBCType.ARRAY)) override def buildUpsert( table: String, schema: SchemaWrapper, dbs: DbSyntax ): String = { val idFields = schema.primaryKeys val fields = schema.getFields val columns = fields.map(c => quoteIdentifier(dbs.format(c.name))).mkString(",") val placeholders = fields.map(_ => "?").mkString(",") val pk = idFields.map(i => quoteIdentifier(dbs.format(i))).mkString(",") val sql = s"""merge into ${table} ($columns) key($pk) values ($placeholders);""".stripMargin sql } override def upsertFields(schema: SchemaWrapper): Seq[Field] = schema.getFields override def alterTableQueries( table: String, missingFields: Seq[Schema.Field], dbs: DbSyntax ): Seq[String] = { missingFields.map { f => val dbDef = JdbcUtils.getJdbcType(f.schema, this).databaseTypeDefinition val colName = quoteIdentifier(dbs.format(f.name)) s"alter table $table add column $colName $dbDef" } } override def dropNotNullConstraintQueries( table: String, schema: SchemaWrapper, dbs: DbSyntax ): Seq[String] = { schema.getFields.filterNot(f => schema.primaryKeys.contains(f.name)).map { f => val colName = quoteIdentifier(dbs.format(f.name)) s"alter table $table alter column $colName drop not null" } } }
Example 112
Source File: AggregatedDialect.scala From hydra with Apache License 2.0 | 5 votes |
package hydra.sql import hydra.avro.util.SchemaWrapper import org.apache.avro.Schema import scala.util.Try private class AggregatedDialect(dialects: List[JdbcDialect]) extends JdbcDialect { require(dialects.nonEmpty) override def canHandle(url: String): Boolean = dialects.map(_.canHandle(url)).reduce(_ && _) override def getJDBCType(dt: Schema): Option[JdbcType] = { dialects.flatMap(_.getJDBCType(dt)).headOption } override def buildUpsert( table: String, schema: SchemaWrapper, dbs: DbSyntax ): String = { dialects.map(d => Try(d.buildUpsert(table, schema, dbs))).head.get } }
Example 113
Source File: Interface.scala From hydra with Apache License 2.0 | 5 votes |
package hydra.sql import java.sql.JDBCType import hydra.avro.util.SchemaWrapper import org.apache.avro.Schema case class Database( name: String, locationUri: String, description: Option[String] ) case class Table( name: String, schema: SchemaWrapper, dbSchema: Option[String] = None, description: Option[String] = None ) case class Column( name: String, schema: Schema, dataType: JdbcType, nullable: Boolean, description: Option[String] ) case class DbTable( name: String, columns: Seq[DbColumn], description: Option[String] = None ) case class DbColumn( name: String, jdbcType: JDBCType, nullable: Boolean, description: Option[String] )
Example 114
Source File: NoOpDialectSpec.scala From hydra with Apache License 2.0 | 5 votes |
package hydra.sql import hydra.avro.util.SchemaWrapper import org.apache.avro.Schema import org.scalatest.matchers.should.Matchers import org.scalatest.funspec.AnyFunSpecLike class NoOpDialectSpec extends Matchers with AnyFunSpecLike { describe("The NoOp dialect") { it("handles everything") { NoopDialect.canHandle("url") shouldBe true } it("does not upsert") { intercept[UnsupportedOperationException] { NoopDialect.buildUpsert( "table", SchemaWrapper.from(Schema.create(Schema.Type.NULL)), UnderscoreSyntax ) } } it("returns the correct json placeholder") { NoopDialect.jsonPlaceholder shouldBe "?" } it("does not support dropping constraints by default") { intercept[UnsupportedOperationException] { NoopDialect.dropNotNullConstraintQueries( "table", null, UnderscoreSyntax ) } } } }
Example 115
Source File: AvroRecord.scala From hydra with Apache License 2.0 | 5 votes |
package hydra.kafka.producer import com.pluralsight.hydra.avro.JsonConverter import hydra.core.transport.AckStrategy import org.apache.avro.Schema import org.apache.avro.generic.GenericRecord import org.apache.commons.lang3.StringUtils case class AvroRecord( destination: String, schema: Schema, key: String, payload: GenericRecord, ackStrategy: AckStrategy ) extends KafkaRecord[String, GenericRecord] object AvroRecord { def apply( destination: String, schema: Schema, key: Option[String], json: String, ackStrategy: AckStrategy, useStrictValidation: Boolean = false ): AvroRecord = { val payload: GenericRecord = { val converter: JsonConverter[GenericRecord] = new JsonConverter[GenericRecord](schema, useStrictValidation) converter.convert(json) } AvroRecord(destination, schema, key.orNull, payload, ackStrategy) } def apply( destination: String, schema: Schema, key: Option[String], record: GenericRecord, ackStrategy: AckStrategy ): AvroRecord = { AvroRecord(destination, schema, key.orNull, record, ackStrategy) } }
Example 116
Source File: AvroKeyRecord.scala From hydra with Apache License 2.0 | 5 votes |
package hydra.kafka.producer import com.pluralsight.hydra.avro.JsonConverter import hydra.core.transport.AckStrategy import org.apache.avro.Schema import org.apache.avro.generic.GenericRecord final case class AvroKeyRecord( destination: String, keySchema: Schema, valueSchema: Schema, key: GenericRecord, payload: GenericRecord, ackStrategy: AckStrategy ) extends KafkaRecord[GenericRecord, GenericRecord] object AvroKeyRecord { def apply( destination: String, keySchema: Schema, valueSchema: Schema, keyJson: String, valueJson: String, ackStrategy: AckStrategy ): AvroKeyRecord = { val (key, value): (GenericRecord, GenericRecord) = { val keyConverter: String => GenericRecord = new JsonConverter[GenericRecord](keySchema).convert val valueConverter: String => GenericRecord = new JsonConverter[GenericRecord](valueSchema).convert (keyConverter(keyJson), valueConverter(valueJson)) } AvroKeyRecord(destination, keySchema, valueSchema, key, value, ackStrategy) } def apply( destination: String, keySchema: Schema, valueSchema: Schema, key: GenericRecord, value: GenericRecord, ackStrategy: AckStrategy ): AvroKeyRecord = { new AvroKeyRecord( destination, keySchema, valueSchema, key, value, ackStrategy ) } }
Example 117
Source File: IngestionErrorHandler.scala From hydra with Apache License 2.0 | 5 votes |
package hydra.kafka.ingestors import akka.actor.Actor import com.pluralsight.hydra.avro.JsonToAvroConversionException import hydra.common.config.ConfigSupport._ import hydra.avro.registry.JsonToAvroConversionExceptionWithMetadata import hydra.common.config.ConfigSupport import hydra.core.ingest.RequestParams.HYDRA_KAFKA_TOPIC_PARAM import hydra.core.protocol.GenericIngestionError import hydra.core.transport.Transport.Deliver import hydra.kafka.producer.AvroRecord import org.apache.avro.Schema import spray.json.DefaultJsonProtocol import scala.io.Source class IngestionErrorHandler extends Actor with ConfigSupport with DefaultJsonProtocol { import spray.json._ private implicit val ec = context.dispatcher private implicit val hydraIngestionErrorInfoFormat = jsonFormat6( HydraIngestionErrorInfo ) private val errorTopic = applicationConfig .getStringOpt("ingest.error-topic") .getOrElse("_hydra_ingest_errors") private lazy val kafkaTransport = context .actorSelection( applicationConfig .getStringOpt(s"transports.kafka.path") .getOrElse(s"/user/service/kafka_transport") ) private val errorSchema = new Schema.Parser() .parse(Source.fromResource("schemas/HydraIngestError.avsc").mkString) override def receive: Receive = { case error: GenericIngestionError => kafkaTransport ! Deliver(buildPayload(error)) } private[ingestors] def buildPayload( err: GenericIngestionError ): AvroRecord = { val schema: Option[String] = err.cause match { case e: JsonToAvroConversionException => Some(e.getSchema.toString) case e: JsonToAvroConversionExceptionWithMetadata => Some(e.location) case e: Exception => None } val topic = err.request.metadataValue(HYDRA_KAFKA_TOPIC_PARAM) val errorInfo = HydraIngestionErrorInfo( err.ingestor, topic, err.cause.getMessage, err.request.metadata, schema, err.request.payload ).toJson.compactPrint AvroRecord( errorTopic, errorSchema, topic, errorInfo, err.request.ackStrategy ) } } case class HydraIngestionErrorInfo( ingestor: String, destination: Option[String], errorMessage: String, metadata: Map[String, String], schema: Option[String], payload: String )
Example 118
Source File: MockConnectorSpec.scala From darwin with Apache License 2.0 | 5 votes |
package it.agilelab.darwin.connector.mock import com.typesafe.config.ConfigFactory import org.apache.avro.Schema import org.apache.avro.Schema.Type import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers class MockConnectorSpec extends AnyFlatSpec with Matchers { it should "load the test schemas" in { new MockConnectorCreator().create(ConfigFactory.empty()).fullLoad() should have size (2) } it should "load the test schemas and custom ones" in { val connector = new MockConnectorCreator().create(ConfigFactory.empty()) connector.insert((3L, Schema.create(Type.BYTES)) :: Nil) connector.fullLoad() should have size (3) } }
Example 119
Source File: CachedEagerApplicationSuite.scala From darwin with Apache License 2.0 | 5 votes |
package it.agilelab.darwin.app.mock import java.lang.reflect.Modifier import java.nio.ByteOrder import com.typesafe.config.{Config, ConfigFactory} import it.agilelab.darwin.annotations.AvroSerde import it.agilelab.darwin.app.mock.classes.{MyClass, MyNestedClass, NewClass, OneField} import it.agilelab.darwin.common.{Connector, ConnectorFactory} import it.agilelab.darwin.manager.{AvroSchemaManager, CachedEagerAvroSchemaManager} import org.apache.avro.{Schema, SchemaNormalization} import org.apache.avro.reflect.ReflectData import org.reflections.Reflections import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers import it.agilelab.darwin.common.compat._ class BigEndianCachedEagerApplicationSuite extends CachedEagerApplicationSuite(ByteOrder.BIG_ENDIAN) class LittleEndianCachedEagerApplicationSuite extends CachedEagerApplicationSuite(ByteOrder.LITTLE_ENDIAN) abstract class CachedEagerApplicationSuite(val endianness: ByteOrder) extends AnyFlatSpec with Matchers { val config: Config = ConfigFactory.load() val connector: Connector = ConnectorFactory.connector(config) val manager: AvroSchemaManager = new CachedEagerAvroSchemaManager(connector, endianness) "CachedEagerAvroSchemaManager" should "not fail after the initialization" in { val schemas: Seq[Schema] = Seq(SchemaReader.readFromResources("MyNestedClass.avsc")) assert(manager.registerAll(schemas).size == 1) } it should "load all existing schemas and register a new one" in { val schemas: Seq[Schema] = Seq(SchemaReader.readFromResources("MyNestedClass.avsc")) manager.getSchema(0L) manager.registerAll(schemas) val id = manager.getId(schemas.head) assert(manager.getSchema(id).isDefined) assert(schemas.head == manager.getSchema(id).get) } it should "get all previously registered schemas" in { val schema: Schema = SchemaReader.readFromResources("MyNestedClass.avsc") val schema0 = manager.getSchema(0L) val schema1 = manager.getSchema(1L) assert(schema0.isDefined) assert(schema1.isDefined) assert(schema0.get != schema1.get) assert(schema != schema0.get) assert(schema != schema1.get) } it should "generate all schemas for all the annotated classes with @AvroSerde" in { val reflections = new Reflections("it.agilelab.darwin.app.mock.classes") val oneFieldSchema = ReflectData.get().getSchema(classOf[OneField]).toString val myNestedSchema = ReflectData.get().getSchema(classOf[MyNestedClass]).toString val myClassSchema = ReflectData.get().getSchema(classOf[MyClass]).toString val annotationClass: Class[AvroSerde] = classOf[AvroSerde] val classes = reflections.getTypesAnnotatedWith(annotationClass).toScala.toSeq .filter(c => !c.isInterface && !Modifier.isAbstract(c.getModifiers)) val schemas = classes.map(c => ReflectData.get().getSchema(Class.forName(c.getName)).toString) Seq(oneFieldSchema, myClassSchema, myNestedSchema) should contain theSameElementsAs schemas } it should "reload all schemas from the connector" in { val newSchema = ReflectData.get().getSchema(classOf[NewClass]) val newId = SchemaNormalization.parsingFingerprint64(newSchema) assert(manager.getSchema(newId).isEmpty) connector.insert(Seq(newId -> newSchema)) assert(manager.getSchema(newId).isEmpty) manager.reload() assert(manager.getSchema(newId).isDefined) assert(manager.getSchema(newId).get == newSchema) } }
Example 120
Source File: SchemaReader.scala From darwin with Apache License 2.0 | 5 votes |
package it.agilelab.darwin.app.mock import java.io.{File, InputStream} import org.apache.avro.Schema object SchemaReader { def readFromResources(p: String): Schema = { read(getClass.getClassLoader.getResourceAsStream(p)) } def read(f: File): Schema = { val parser = new Schema.Parser() parser.parse(f) } def read(s: String): Schema = { val parser = new Schema.Parser() parser.parse(s) } def read(is: InputStream): Schema = { val parser = new Schema.Parser() parser.parse(is) } }
Example 121
Source File: CachedLazyApplicationSuite.scala From darwin with Apache License 2.0 | 5 votes |
package it.agilelab.darwin.app.mock import java.lang.reflect.Modifier import java.nio.ByteOrder import com.typesafe.config.{Config, ConfigFactory} import it.agilelab.darwin.annotations.AvroSerde import it.agilelab.darwin.app.mock.classes.{MyClass, MyNestedClass, NewClass, OneField} import it.agilelab.darwin.common.{Connector, ConnectorFactory} import it.agilelab.darwin.manager.{AvroSchemaManager, CachedLazyAvroSchemaManager} import org.apache.avro.{Schema, SchemaNormalization} import org.apache.avro.reflect.ReflectData import org.reflections.Reflections import it.agilelab.darwin.common.compat._ import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers class BigEndianCachedLazyApplicationSuite extends CachedLazyApplicationSuite(ByteOrder.BIG_ENDIAN) class LittleEndianCachedLazyApplicationSuite extends CachedLazyApplicationSuite(ByteOrder.LITTLE_ENDIAN) abstract class CachedLazyApplicationSuite(val endianness: ByteOrder) extends AnyFlatSpec with Matchers { val config: Config = ConfigFactory.load() val connector: Connector = ConnectorFactory.connector(config) val manager: AvroSchemaManager = new CachedLazyAvroSchemaManager(connector, endianness) "CachedLazyAvroSchemaManager" should "not fail after the initialization" in { val schemas: Seq[Schema] = Seq(SchemaReader.readFromResources("MyNestedClass.avsc")) assert(manager.registerAll(schemas).size == 1) } it should "load all existing schemas and register a new one" in { val schemas: Seq[Schema] = Seq(SchemaReader.readFromResources("MyNestedClass.avsc")) manager.getSchema(0L) manager.registerAll(schemas) val id = manager.getId(schemas.head) assert(manager.getSchema(id).isDefined) assert(schemas.head == manager.getSchema(id).get) } it should "get all previously registered schemas" in { val schema: Schema = SchemaReader.readFromResources("MyNestedClass.avsc") val schema0 = manager.getSchema(0L) val schema1 = manager.getSchema(1L) assert(schema0.isDefined) assert(schema1.isDefined) assert(schema0.get != schema1.get) assert(schema != schema0.get) assert(schema != schema1.get) } it should "generate all schemas for all the annotated classes with @AvroSerde" in { val reflections = new Reflections("it.agilelab.darwin.app.mock.classes") val oneFieldSchema = ReflectData.get().getSchema(classOf[OneField]).toString val myNestedSchema = ReflectData.get().getSchema(classOf[MyNestedClass]).toString val myClassSchema = ReflectData.get().getSchema(classOf[MyClass]).toString val annotationClass: Class[AvroSerde] = classOf[AvroSerde] val classes = reflections.getTypesAnnotatedWith(annotationClass).toScala.toSeq .filter(c => !c.isInterface && !Modifier.isAbstract(c.getModifiers)) val schemas = classes.map(c => ReflectData.get().getSchema(Class.forName(c.getName)).toString) Seq(oneFieldSchema, myClassSchema, myNestedSchema) should contain theSameElementsAs schemas } it should "reload all schemas from the connector" in { val newSchema = ReflectData.get().getSchema(classOf[NewClass]) val newId = SchemaNormalization.parsingFingerprint64(newSchema) assert(manager.getSchema(newId).isEmpty) connector.insert(Seq(newId -> newSchema)) assert(manager.getSchema(newId).isDefined) assert(manager.getSchema(newId).get == newSchema) } }
Example 122
Source File: LazyApplicationSuite.scala From darwin with Apache License 2.0 | 5 votes |
package it.agilelab.darwin.app.mock import java.lang.reflect.Modifier import java.nio.ByteOrder import com.typesafe.config.{Config, ConfigFactory} import it.agilelab.darwin.annotations.AvroSerde import it.agilelab.darwin.app.mock.classes.{MyClass, MyNestedClass, NewClass, OneField} import it.agilelab.darwin.common.{Connector, ConnectorFactory} import it.agilelab.darwin.manager.{AvroSchemaManager, LazyAvroSchemaManager} import org.apache.avro.{Schema, SchemaNormalization} import org.apache.avro.reflect.ReflectData import org.reflections.Reflections import it.agilelab.darwin.common.compat._ import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers class BigEndianLazyApplicationSuite extends LazyApplicationSuite(ByteOrder.BIG_ENDIAN) class LittleEndianLazyApplicationSuite extends LazyApplicationSuite(ByteOrder.LITTLE_ENDIAN) abstract class LazyApplicationSuite(endianness: ByteOrder) extends AnyFlatSpec with Matchers { val config: Config = ConfigFactory.load() val connector: Connector = ConnectorFactory.connector(config) val manager: AvroSchemaManager = new LazyAvroSchemaManager(connector, endianness) "LazyAvroSchemaManager" should "not fail after the initialization" in { val schemas: Seq[Schema] = Seq(SchemaReader.readFromResources("MyNestedClass.avsc")) assert(manager.registerAll(schemas).size == 1) } it should "load all existing schemas and register a new one" in { val schemas: Seq[Schema] = Seq(SchemaReader.readFromResources("MyNestedClass.avsc")) manager.getSchema(0L) manager.registerAll(schemas) val id = manager.getId(schemas.head) assert(manager.getSchema(id).isDefined) assert(schemas.head == manager.getSchema(id).get) } it should "get all previously registered schemas" in { val schema: Schema = SchemaReader.readFromResources("MyNestedClass.avsc") val schema0 = manager.getSchema(0L) val schema1 = manager.getSchema(1L) assert(schema0.isDefined) assert(schema1.isDefined) assert(schema0.get != schema1.get) assert(schema != schema0.get) assert(schema != schema1.get) } it should "generate all schemas for all the annotated classes with @AvroSerde" in { val reflections = new Reflections("it.agilelab.darwin.app.mock.classes") val oneFieldSchema = ReflectData.get().getSchema(classOf[OneField]).toString val myNestedSchema = ReflectData.get().getSchema(classOf[MyNestedClass]).toString val myClassSchema = ReflectData.get().getSchema(classOf[MyClass]).toString val annotationClass: Class[AvroSerde] = classOf[AvroSerde] val classes = reflections.getTypesAnnotatedWith(annotationClass).toScala.toSeq .filter(c => !c.isInterface && !Modifier.isAbstract(c.getModifiers)) val schemas = classes.map(c => ReflectData.get().getSchema(Class.forName(c.getName)).toString) Seq(oneFieldSchema, myClassSchema, myNestedSchema) should contain theSameElementsAs schemas } it should "reload all schemas from the connector" in { val newSchema = ReflectData.get().getSchema(classOf[NewClass]) val newId = SchemaNormalization.parsingFingerprint64(newSchema) assert(manager.getSchema(newId).isEmpty) connector.insert(Seq(newId -> newSchema)) assert(manager.getSchema(newId).isDefined) assert(manager.getSchema(newId).get == newSchema) } }
Example 123
Source File: DarwinService.scala From darwin with Apache License 2.0 | 5 votes |
package it.agilelab.darwin.server.rest import akka.actor.ActorSystem import akka.http.scaladsl.model.{HttpResponse, StatusCodes} import akka.http.scaladsl.server.directives.DebuggingDirectives import akka.http.scaladsl.server.{Directives, Route} import akka.stream.ActorMaterializer import akka.stream.Attributes.LogLevels import it.agilelab.darwin.manager.AvroSchemaManager import org.apache.avro.Schema trait DarwinService extends Service with Directives with DebuggingDirectives with JsonSupport { val manager: AvroSchemaManager override def route: Route = logRequestResult(("darwin", LogLevels.Debug)) { get { path("schemas" / LongNumber.?) { case Some(id) => manager.getSchema(id) match { case Some(schema) => complete(schema) case None => complete { HttpResponse(StatusCodes.NotFound) } } case None => complete(manager.getAll) } } ~ post { path("schemas" / PathEnd) { entity(as[Seq[Schema]]) { schemas => complete { manager.registerAll(schemas).map(_._1) } } } } } } object DarwinService { def apply(asm: AvroSchemaManager)(implicit s: ActorSystem, m: ActorMaterializer): DarwinService = new DarwinService { override implicit val materializer: ActorMaterializer = m override implicit val system: ActorSystem = s override val manager: AvroSchemaManager = asm } }
Example 124
Source File: JsonSupport.scala From darwin with Apache License 2.0 | 5 votes |
package it.agilelab.darwin.server.rest import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport import org.apache.avro.Schema import spray.json.{DefaultJsonProtocol, JsObject, JsString, JsValue, JsonParser, PrettyPrinter, RootJsonFormat} trait JsonSupport extends SprayJsonSupport with DefaultJsonProtocol { implicit val printer: PrettyPrinter.type = PrettyPrinter implicit val schemaFormat: RootJsonFormat[Schema] = new RootJsonFormat[Schema] { override def write(obj: Schema): JsValue = JsonParser(obj.toString(true)) override def read(json: JsValue): Schema = new Schema.Parser().parse(json.prettyPrint) } implicit val schemaWithIdFormat: RootJsonFormat[(Long, Schema)] = new RootJsonFormat[(Long, Schema)] { override def write(obj: (Long, Schema)): JsValue = JsObject(Map( "id" -> JsString(obj._1.toString), "schema" -> schemaFormat.write(obj._2) )) override def read(json: JsValue): (Long, Schema) = json match { case JsObject(fields) => val id = fields.get("id") match { case Some(JsString(number)) => number case _ => throw new Exception("Id field should be a long") } val schema = fields.get("schema") match { case Some(x@JsObject(_)) => x case _ => throw new Exception("schema should be an object") } (id.toLong, schemaFormat.read(schema)) case _ => throw new Exception("should be an object") } } }
Example 125
Source File: HBaseConnectorSuite.scala From darwin with Apache License 2.0 | 5 votes |
package it.agilelab.darwin.connector.hbase import java.nio.file.Files import com.typesafe.config.{ConfigFactory, ConfigValueFactory} import it.agilelab.darwin.common.Connector import org.apache.avro.reflect.ReflectData import org.apache.avro.{Schema, SchemaNormalization} import org.apache.hadoop.hbase.HBaseTestingUtility import org.scalatest.BeforeAndAfterAll import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers class HBaseConnectorSuite extends AnyFlatSpec with Matchers with BeforeAndAfterAll { var connector: Connector = _ "HBaseConnector" should "load all existing schemas" in { connector.fullLoad() } it should "insert and retrieve" in { val schemas = Seq(ReflectData.get().getSchema(classOf[HBaseMock]), ReflectData.get().getSchema(classOf[HBase2Mock])) .map(s => SchemaNormalization.parsingFingerprint64(s) -> s) connector.insert(schemas) val loaded: Seq[(Long, Schema)] = connector.fullLoad() assert(loaded.size == schemas.size) assert(loaded.forall(schemas.contains)) val schema = connector.findSchema(loaded.head._1) assert(schema.isDefined) assert(schema.get == loaded.head._2) val noSchema = connector.findSchema(-1L) assert(noSchema.isEmpty) } "connector.tableCreationHint" should "print the correct hint for table creation" in { connector.tableCreationHint() should be( """To create namespace and table from an HBase shell issue: | create_namespace 'AVRO' | create 'AVRO:SCHEMA_REPOSITORY', '0'""".stripMargin) } "connector.tableExists" should "return true with existent table" in { connector.tableExists() should be(true) } override def beforeAll(): Unit = { connector = new HBaseConnectorCreator().create(HBaseConnectorSuite.config) connector.createTable() } } object HBaseConnectorSuite { private lazy val config = { val util = new HBaseTestingUtility() val minicluster = util.startMiniCluster() //Hbase connector can only load configurations from a file path so we need to render the hadoop conf val confFile = Files.createTempFile("prefix", "suffix") val stream = Files.newOutputStream(confFile) minicluster.getConfiguration.writeXml(stream) stream.flush() stream.close() val hbaseConfigPath = ConfigValueFactory.fromAnyRef(confFile.toAbsolutePath.toString) //HbaseConnector will only load conf if hbase-site and core-site are given, //we give the same file to each. sys.addShutdownHook(minicluster.shutdown()) ConfigFactory.load() .withValue(ConfigurationKeys.HBASE_SITE, hbaseConfigPath) .withValue(ConfigurationKeys.CORE_SITE, hbaseConfigPath) } }
Example 126
Source File: PostgresConnectorSuite.scala From darwin with Apache License 2.0 | 5 votes |
package it.agilelab.darwin.connector.postgres import com.typesafe.config.{Config, ConfigFactory} import it.agilelab.darwin.common.Connector import org.apache.avro.{Schema, SchemaNormalization} import org.scalatest.BeforeAndAfterAll import ru.yandex.qatools.embed.postgresql.EmbeddedPostgres import ru.yandex.qatools.embed.postgresql.distribution.Version import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers class PostgresConnectorSuite extends AnyFlatSpec with Matchers with BeforeAndAfterAll { val config: Config = ConfigFactory.load("postgres.properties") val connector: Connector = new PostgresConnectorCreator().create(config) val embeddedPostgres: EmbeddedPostgres = new EmbeddedPostgres(Version.V9_6_11) override protected def beforeAll(): Unit = { super.beforeAll() val port = 5432 val host = "localhost" val dbname = "postgres" val username = "postgres" val password = "mysecretpassword" embeddedPostgres.start(host, port, dbname, username, password) connector.createTable() } override protected def afterAll(): Unit = { super.afterAll() embeddedPostgres.stop() } "PostgresConnector" should "load all existing schemas" in { connector.fullLoad() } ignore should "insert and retrieve" in { val outerSchema = new Schema.Parser().parse(getClass.getClassLoader.getResourceAsStream("postgresmock.avsc")) val innerSchema = outerSchema.getField("four").schema() val schemas = Seq(innerSchema, outerSchema) .map(s => SchemaNormalization.parsingFingerprint64(s) -> s) connector.insert(schemas) val loaded: Seq[(Long, Schema)] = connector.fullLoad() assert(loaded.size == schemas.size) assert(loaded.forall(schemas.contains)) } }
Example 127
Source File: JsonProtocol.scala From darwin with Apache License 2.0 | 5 votes |
package it.agilelab.darwin.connector.rest import java.io.InputStream import org.apache.avro.Schema import org.codehaus.jackson.map.ObjectMapper import org.codehaus.jackson.node.JsonNodeFactory import it.agilelab.darwin.common.compat._ trait JsonProtocol { val objectMapper = new ObjectMapper() def toJson(schemas : Seq[(Long,Schema)]): String = { val data = schemas.map { case (_, schema) => objectMapper.readTree(schema.toString) }.foldLeft(JsonNodeFactory.instance.arrayNode()) { case (array, node) => array.add(node) array } objectMapper.writeValueAsString(data) } def toSeqOfIdSchema(in: InputStream): Seq[(Long, Schema)] = { val node = objectMapper.readTree(in) node.getElements.toScala.map { node => val id = node.get("id").asText().toLong val schemaNode = node.get("schema") val schemaToString = objectMapper.writeValueAsString(schemaNode) val parser = new Schema.Parser() val schema = parser.parse(schemaToString) (id, schema) }.toVector } def toSchema(in: InputStream): Schema = { val parser = new Schema.Parser() parser.parse(in) } }
Example 128
Source File: CachedLazyAvroSchemaManager.scala From darwin with Apache License 2.0 | 5 votes |
package it.agilelab.darwin.manager import java.nio.ByteOrder import it.agilelab.darwin.common.Connector import org.apache.avro.Schema class CachedLazyAvroSchemaManager(connector: Connector, endianness: ByteOrder) extends CachedAvroSchemaManager(connector, endianness) { override def getSchema(id: Long): Option[Schema] = { cache.getSchema(id).orElse { val schema: Option[Schema] = connector.findSchema(id) schema.foreach(s => _cache.set(Some(cache.insert(Seq(getId(s) -> s))))) schema } } override def getAll: Seq[(Long, Schema)] = { _cache.set(Some(cache.insert(connector.fullLoad()))) cache.getAll } }
Example 129
Source File: ObjectToBytesWithSchema.scala From trucking-iot with Apache License 2.0 | 5 votes |
package com.orendainx.trucking.storm.bolts import java.util import com.hortonworks.registries.schemaregistry.avro.AvroSchemaProvider import com.hortonworks.registries.schemaregistry.client.SchemaRegistryClient import com.hortonworks.registries.schemaregistry.serdes.avro.AvroSnapshotSerializer import com.hortonworks.registries.schemaregistry.{SchemaMetadata, SchemaVersionInfo} import com.orendainx.trucking.commons.models.{EnrichedTruckAndTrafficData, WindowedDriverStats} import com.typesafe.scalalogging.Logger import org.apache.avro.Schema import org.apache.avro.generic.GenericData import org.apache.storm.task.{OutputCollector, TopologyContext} import org.apache.storm.topology.OutputFieldsDeclarer import org.apache.storm.topology.base.BaseRichBolt import org.apache.storm.tuple.{Fields, Tuple, Values} import scala.collection.JavaConverters._ class ObjectToBytesWithSchema extends BaseRichBolt { private lazy val log = Logger(this.getClass) private var outputCollector: OutputCollector = _ // Declare schema-related fields to be initialized when this component's prepare() method is called private var schemaRegistryClient: SchemaRegistryClient = _ private var serializer: AvroSnapshotSerializer = _ private var joinedSchemaMetadata: SchemaMetadata = _ private var joinedSchemaInfo: SchemaVersionInfo = _ private var driverStatsSchemaMetadata: SchemaMetadata = _ private var driverStatsJoinedSchemaInfo: SchemaVersionInfo = _ override def prepare(stormConf: util.Map[_, _], context: TopologyContext, collector: OutputCollector): Unit = { outputCollector = collector val schemaRegistryUrl = stormConf.get(SchemaRegistryClient.Configuration.SCHEMA_REGISTRY_URL.name()).toString val clientConfig = Map(SchemaRegistryClient.Configuration.SCHEMA_REGISTRY_URL.name() -> schemaRegistryUrl).asJava schemaRegistryClient = new SchemaRegistryClient(clientConfig) joinedSchemaMetadata = schemaRegistryClient.getSchemaMetadataInfo("EnrichedTruckAndTrafficData").getSchemaMetadata joinedSchemaInfo = schemaRegistryClient.getLatestSchemaVersionInfo("EnrichedTruckAndTrafficData") driverStatsSchemaMetadata = schemaRegistryClient.getSchemaMetadataInfo("WindowedDriverStats").getSchemaMetadata driverStatsJoinedSchemaInfo = schemaRegistryClient.getLatestSchemaVersionInfo("WindowedDriverStats") serializer = schemaRegistryClient.getDefaultSerializer(AvroSchemaProvider.TYPE).asInstanceOf[AvroSnapshotSerializer] serializer.init(clientConfig) } override def execute(tuple: Tuple): Unit = { val serializedBytes = tuple.getStringByField("dataType") match { case "EnrichedTruckAndTrafficData" => val record = enrichedTruckAndTrafficToGenericRecord(tuple.getValueByField("data").asInstanceOf[EnrichedTruckAndTrafficData]) serializer.serialize(record, joinedSchemaMetadata) case "WindowedDriverStats" => val record = enrichedTruckAndTrafficToGenericRecord(tuple.getValueByField("data").asInstanceOf[WindowedDriverStats]) serializer.serialize(record, driverStatsSchemaMetadata) } outputCollector.emit(new Values(serializedBytes)) outputCollector.ack(tuple) } override def declareOutputFields(declarer: OutputFieldsDeclarer): Unit = declarer.declare(new Fields("data")) private def enrichedTruckAndTrafficToGenericRecord(data: EnrichedTruckAndTrafficData) = { val record = new GenericData.Record(new Schema.Parser().parse(joinedSchemaInfo.getSchemaText)) record.put("eventTime", data.eventTime) record.put("truckId", data.truckId) record.put("driverId", data.driverId) record.put("driverName", data.driverName) record.put("routeId", data.routeId) record.put("routeName", data.routeName) record.put("latitude", data.latitude) record.put("longitude", data.longitude) record.put("speed", data.speed) record.put("eventType", data.eventType) record.put("foggy", data.foggy) record.put("rainy", data.rainy) record.put("windy", data.windy) record.put("congestionLevel", data.congestionLevel) record } private def enrichedTruckAndTrafficToGenericRecord(data: WindowedDriverStats) = { val record = new GenericData.Record(new Schema.Parser().parse(driverStatsJoinedSchemaInfo.getSchemaText)) record.put("driverId", data.driverId) record.put("averageSpeed", data.averageSpeed) record.put("totalFog", data.totalFog) record.put("totalRain", data.totalRain) record.put("totalWind", data.totalWind) record.put("totalViolations", data.totalViolations) record } }
Example 130
Source File: AvroToSchema.scala From streamliner-examples with Apache License 2.0 | 5 votes |
package com.memsql.spark.examples.avro import collection.JavaConversions._ import org.apache.spark.sql.types._ import org.apache.avro.Schema // Converts an Avro schema to a Spark DataFrame schema. // // This assumes that the Avro schema is "flat", i.e. a Record that includes primitive types // or unions of primitive types. Unions, and Avro types that don't directly map to Scala types, // are converted to Strings and put in a Spark SQL StringType column. private object AvroToSchema { def getSchema(schema: Schema): StructType = { StructType(schema.getFields.map(field => { val fieldName = field.name val fieldSchema = field.schema val fieldType = fieldSchema.getType match { case Schema.Type.BOOLEAN => BooleanType case Schema.Type.DOUBLE => DoubleType case Schema.Type.FLOAT => FloatType case Schema.Type.INT => IntegerType case Schema.Type.LONG => LongType case Schema.Type.NULL => NullType case Schema.Type.STRING => StringType case _ => StringType } StructField(fieldName, fieldType.asInstanceOf[DataType], true) })) } }
Example 131
Source File: AvroTransformer.scala From streamliner-examples with Apache License 2.0 | 5 votes |
package com.memsql.spark.examples.avro import com.memsql.spark.etl.api.{UserTransformConfig, Transformer, PhaseConfig} import com.memsql.spark.etl.utils.PhaseLogger import org.apache.spark.rdd.RDD import org.apache.spark.sql.{SQLContext, DataFrame, Row} import org.apache.spark.sql.types.StructType import org.apache.avro.Schema import org.apache.avro.generic.GenericData import org.apache.avro.io.DecoderFactory import org.apache.avro.specific.SpecificDatumReader // Takes DataFrames of byte arrays, where each row is a serialized Avro record. // Returns DataFrames of deserialized data, where each field has its own column. class AvroTransformer extends Transformer { var avroSchemaStr: String = null var sparkSqlSchema: StructType = null def AvroRDDToDataFrame(sqlContext: SQLContext, rdd: RDD[Row]): DataFrame = { val rowRDD: RDD[Row] = rdd.mapPartitions({ partition => { // Create per-partition copies of non-serializable objects val parser: Schema.Parser = new Schema.Parser() val avroSchema = parser.parse(avroSchemaStr) val reader = new SpecificDatumReader[GenericData.Record](avroSchema) partition.map({ rowOfBytes => val bytes = rowOfBytes(0).asInstanceOf[Array[Byte]] val decoder = DecoderFactory.get().binaryDecoder(bytes, null) val record = reader.read(null, decoder) val avroToRow = new AvroToRow() avroToRow.getRow(record) }) }}) sqlContext.createDataFrame(rowRDD, sparkSqlSchema) } override def initialize(sqlContext: SQLContext, config: PhaseConfig, logger: PhaseLogger): Unit = { val userConfig = config.asInstanceOf[UserTransformConfig] val avroSchemaJson = userConfig.getConfigJsValue("avroSchema") match { case Some(s) => s case None => throw new IllegalArgumentException("avroSchema must be set in the config") } avroSchemaStr = avroSchemaJson.toString val parser = new Schema.Parser() val avroSchema = parser.parse(avroSchemaJson.toString) sparkSqlSchema = AvroToSchema.getSchema(avroSchema) } override def transform(sqlContext: SQLContext, df: DataFrame, config: PhaseConfig, logger: PhaseLogger): DataFrame = { AvroRDDToDataFrame(sqlContext, df.rdd) } }
Example 132
Source File: AvroRandomGenerator.scala From streamliner-examples with Apache License 2.0 | 5 votes |
package com.memsql.spark.examples.avro import org.apache.avro.Schema import org.apache.avro.generic.GenericData import scala.collection.JavaConversions._ import scala.util.Random class AvroRandomGenerator(inSchema: Schema) { // Avoid nested Records, since our destination is a DataFrame. val MAX_RECURSION_LEVEL: Int = 1 val topSchema: Schema = inSchema val random = new Random def next(schema: Schema = this.topSchema, level: Int = 0): Any = { if (level <= MAX_RECURSION_LEVEL) { schema.getType match { case Schema.Type.RECORD => { val datum = new GenericData.Record(schema) schema.getFields.foreach { x => datum.put(x.pos, next(x.schema, level + 1)) } datum } case Schema.Type.UNION => { val types = schema.getTypes // Generate a value using the first type in the union. // "Random type" is also a valid option. next(types(0), level) } case _ => generateValue(schema.getType) } } else { null } } def generateValue(avroType: Schema.Type): Any = avroType match { case Schema.Type.BOOLEAN => random.nextBoolean case Schema.Type.DOUBLE => random.nextDouble case Schema.Type.FLOAT => random.nextFloat case Schema.Type.INT => random.nextInt case Schema.Type.LONG => random.nextLong case Schema.Type.NULL => null case Schema.Type.STRING => getRandomString case _ => null } def getRandomString(): String = { val length: Int = 5 + random.nextInt(5) (1 to length).map(x => ('a'.toInt + random.nextInt(26)).toChar).mkString } }
Example 133
Source File: AvroRandomExtractor.scala From streamliner-examples with Apache License 2.0 | 5 votes |
package com.memsql.spark.examples.avro import com.memsql.spark.etl.api._ import com.memsql.spark.etl.utils.PhaseLogger import org.apache.spark.streaming.StreamingContext import org.apache.spark.sql.{SQLContext, DataFrame, Row} import org.apache.spark.sql.types._ import org.apache.avro.Schema import org.apache.avro.generic.GenericData import org.apache.avro.io.{DatumWriter, EncoderFactory} import org.apache.avro.specific.SpecificDatumWriter import java.io.ByteArrayOutputStream // Generates an RDD of byte arrays, where each is a serialized Avro record. class AvroRandomExtractor extends Extractor { var count: Int = 1 var generator: AvroRandomGenerator = null var writer: DatumWriter[GenericData.Record] = null var avroSchema: Schema = null def schema: StructType = StructType(StructField("bytes", BinaryType, false) :: Nil) val parser: Schema.Parser = new Schema.Parser() override def initialize(ssc: StreamingContext, sqlContext: SQLContext, config: PhaseConfig, batchInterval: Long, logger: PhaseLogger): Unit = { val userConfig = config.asInstanceOf[UserExtractConfig] val avroSchemaJson = userConfig.getConfigJsValue("avroSchema") match { case Some(s) => s case None => throw new IllegalArgumentException("avroSchema must be set in the config") } count = userConfig.getConfigInt("count").getOrElse(1) avroSchema = parser.parse(avroSchemaJson.toString) writer = new SpecificDatumWriter(avroSchema) generator = new AvroRandomGenerator(avroSchema) } override def next(ssc: StreamingContext, time: Long, sqlContext: SQLContext, config: PhaseConfig, batchInterval: Long, logger: PhaseLogger): Option[DataFrame] = { val rdd = sqlContext.sparkContext.parallelize((1 to count).map(_ => Row({ val out = new ByteArrayOutputStream val encoder = EncoderFactory.get().binaryEncoder(out, null) val avroRecord: GenericData.Record = generator.next().asInstanceOf[GenericData.Record] writer.write(avroRecord, encoder) encoder.flush out.close out.toByteArray }))) Some(sqlContext.createDataFrame(rdd, schema)) } }
Example 134
Source File: AvroToRow.scala From streamliner-examples with Apache License 2.0 | 5 votes |
package com.memsql.spark.examples.avro import collection.JavaConversions._ import org.apache.spark.sql.Row import org.apache.avro.Schema import org.apache.avro.generic.GenericData // Converts an Avro record to a Spark DataFrame row. // // This assumes that the Avro schema is "flat", i.e. a Record that includes primitive types // or unions of primitive types. Unions, and Avro types that don't directly map to Scala types, // are converted to Strings and put in a Spark SQL StringType column. private class AvroToRow { def getRow(record: GenericData.Record): Row = { Row.fromSeq(record.getSchema.getFields().map(f => { val schema = f.schema() val obj = record.get(f.pos) schema.getType match { case Schema.Type.BOOLEAN => obj.asInstanceOf[Boolean] case Schema.Type.DOUBLE => obj.asInstanceOf[Double] case Schema.Type.FLOAT => obj.asInstanceOf[Float] case Schema.Type.INT => obj.asInstanceOf[Int] case Schema.Type.LONG => obj.asInstanceOf[Long] case Schema.Type.NULL => null case _ => obj.toString } })) } }
Example 135
Source File: AvroToRowSpec.scala From streamliner-examples with Apache License 2.0 | 5 votes |
package com.memsql.spark.examples.avro import com.memsql.spark.connector.dataframe.JsonValue import org.apache.avro.Schema import org.apache.avro.generic.GenericData import org.apache.spark.sql.Row import test.util.Fixtures import collection.JavaConversions._ import java.nio.ByteBuffer import org.scalatest._ class AvroToRowSpec extends FlatSpec { "AvroToRow" should "create Spark SQL Rows from Avro objects" in { val parser: Schema.Parser = new Schema.Parser() val avroTestSchema: Schema = parser.parse(Fixtures.avroSchema) val record: GenericData.Record = new GenericData.Record(avroTestSchema) record.put("testBool", true) record.put("testDouble", 19.88) record.put("testFloat", 3.19f) record.put("testInt", 1123) record.put("testLong", 2147483648L) record.put("testNull", null) record.put("testString", "Conor") record.put("testUnion", 17) val row: Row = new AvroToRow().getRow(record) assert(row.getAs[Boolean](0)) assert(row.getAs[Double](1) == 19.88) assert(row.getAs[Float](2) == 3.19f) assert(row.getAs[Int](3) == 1123) assert(row.getAs[Long](4) == 2147483648L) assert(row.getAs[Null](5) == null) assert(row.getAs[String](6) == "Conor") assert(row.getAs[String](7) == "17") } }
Example 136
Source File: AvroRandomGeneratorSpec.scala From streamliner-examples with Apache License 2.0 | 5 votes |
package com.memsql.spark.examples.avro import org.scalatest._ import org.apache.avro.Schema import org.apache.avro.generic.GenericData import test.util.Fixtures class AvroRandomGeneratorSpec extends FlatSpec { "AvroRandomGenerator" should "create Avro objects with random values" in { val schema = new Schema.Parser().parse(Fixtures.avroSchema) val avroRecord:GenericData.Record = new AvroRandomGenerator(schema).next().asInstanceOf[GenericData.Record] assert(avroRecord.get("testBool").isInstanceOf[Boolean]) assert(avroRecord.get("testDouble").isInstanceOf[Double]) assert(avroRecord.get("testFloat").isInstanceOf[Float]) assert(avroRecord.get("testInt").isInstanceOf[Int]) assert(avroRecord.get("testLong").isInstanceOf[Long]) assert(avroRecord.get("testNull") == null) assert(avroRecord.get("testString").isInstanceOf[String]) assert(avroRecord.get("testUnion").isInstanceOf[Int]) } }
Example 137
Source File: AvroToSchemaSpec.scala From streamliner-examples with Apache License 2.0 | 5 votes |
package com.memsql.spark.examples.avro import com.memsql.spark.connector.dataframe.JsonType import org.apache.spark.sql.types._ import org.apache.avro.Schema import org.scalatest._ import test.util.Fixtures class AvroToSchemaSpec extends FlatSpec { "AvroToSchema" should "create a Spark SQL schema from an Avro schema" in { val parser = new Schema.Parser() val avroSchema = parser.parse(Fixtures.avroSchema) val sparkSchema = AvroToSchema.getSchema(avroSchema) val fields = sparkSchema.fields assert(fields.forall(field => field.nullable)) assert(fields(0).name == "testBool") assert(fields(0).dataType == BooleanType) assert(fields(1).name == "testDouble") assert(fields(1).dataType == DoubleType) assert(fields(2).name == "testFloat") assert(fields(2).dataType == FloatType) assert(fields(3).name == "testInt") assert(fields(3).dataType == IntegerType) assert(fields(4).name == "testLong") assert(fields(4).dataType == LongType) assert(fields(5).name == "testNull") assert(fields(5).dataType == NullType) assert(fields(6).name == "testString") assert(fields(6).dataType == StringType) assert(fields(7).name == "testUnion") assert(fields(7).dataType == StringType) } }
Example 138
Source File: SpecificTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.io.{DecoderFactory, EncoderFactory} import org.apache.avro.generic.{ GenericDatumReader, GenericRecord} import org.apache.avro.specific.{ SpecificDatumReader, SpecificDatumWriter, SpecificRecordBase } import org.apache.avro.Schema import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.specs2.mutable.Specification object SpecificTestUtil extends Specification { def write[T <: SpecificRecordBase](file: File, records: List[T]) = { val userDatumWriter = new SpecificDatumWriter[T] val dataFileWriter = new DataFileWriter[T](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read[T <: SpecificRecordBase](file: File, records: List[T]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new SpecificDatumReader[T](schema) val dataFileReader = new DataFileReader[T](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the SpecificDatumReader. var record: T = null.asInstanceOf[T] var sameRecord: T = null.asInstanceOf[T] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord must ===(record) } def verifyWriteAndRead[T <: SpecificRecordBase](records: List[T]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } def verifyEncodeDecode[T <: SpecificRecordBase](record: T) = { val schema = record.getSchema val writer = new SpecificDatumWriter[T](schema) val out = new java.io.ByteArrayOutputStream() val encoder = EncoderFactory.get().binaryEncoder(out, null) writer.write(record, encoder) encoder.flush val ba = out.toByteArray ba.size must ===(1) ba(0) must ===(0) out.close val reader = new SpecificDatumReader[T](schema) val decoder = DecoderFactory.get().binaryDecoder(ba, null) val decoded = reader.read(record, decoder) decoded must ===(record) } }
Example 139
Source File: SpecificTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.io.{DecoderFactory, EncoderFactory} import org.apache.avro.generic.{ GenericDatumReader, GenericRecord} import org.apache.avro.specific.{ SpecificDatumReader, SpecificDatumWriter, SpecificRecordBase } import org.apache.avro.Schema import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.specs2.mutable.Specification object SpecificTestUtil extends Specification { def write[T <: SpecificRecordBase](file: File, records: List[T]) = { val userDatumWriter = new SpecificDatumWriter[T]() val dataFileWriter = new DataFileWriter[T](userDatumWriter) dataFileWriter.create(records.head.getSchema, file) records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close() } def read[T <: SpecificRecordBase](file: File, records: List[T]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new SpecificDatumReader[T](schema) val dataFileReader = new DataFileReader[T](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the SpecificDatumReader. var record: T = null.asInstanceOf[T] var sameRecord: T = null.asInstanceOf[T] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord must ===(record) } def verifyWriteAndRead[T <: SpecificRecordBase](records: List[T]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } def verifyEncodeDecode[T <: SpecificRecordBase](record: T) = { val schema = record.getSchema val writer = new SpecificDatumWriter[T](schema) val out = new java.io.ByteArrayOutputStream() val encoder = EncoderFactory.get().binaryEncoder(out, null) writer.write(record, encoder) encoder.flush val ba = out.toByteArray ba.size must ===(1) ba(0) must ===(0) out.close val reader = new SpecificDatumReader[T](schema) val decoder = DecoderFactory.get().binaryDecoder(ba, null) val decoded = reader.read(record, decoder) decoded must ===(record) } }
Example 140
Source File: SpecificTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.io.{DecoderFactory, EncoderFactory} import org.apache.avro.generic.{ GenericDatumReader, GenericRecord} import org.apache.avro.specific.{ SpecificDatumReader, SpecificDatumWriter, SpecificRecordBase } import org.apache.avro.Schema import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.specs2.mutable.Specification object SpecificTestUtil extends Specification { def write[T <: SpecificRecordBase](file: File, records: List[T]) = { val userDatumWriter = new SpecificDatumWriter[T] val dataFileWriter = new DataFileWriter[T](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read[T <: SpecificRecordBase](file: File, records: List[T]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new SpecificDatumReader[T](schema) val dataFileReader = new DataFileReader[T](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the SpecificDatumReader. var record: T = null.asInstanceOf[T] var sameRecord: T = null.asInstanceOf[T] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord.equals(record) } def verifyWriteAndRead[T <: SpecificRecordBase](records: List[T]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } def verifyEncodeDecode[T <: SpecificRecordBase](record: T) = { val schema = record.getSchema val writer = new SpecificDatumWriter[T](schema) val out = new java.io.ByteArrayOutputStream() val encoder = EncoderFactory.get().binaryEncoder(out, null) writer.write(record, encoder) encoder.flush val ba = out.toByteArray ba.size must ===(1) ba(0) must ===(0) out.close val reader = new SpecificDatumReader[T](schema) val decoder = DecoderFactory.get().binaryDecoder(ba, null) val decoded = reader.read(record, decoder) decoded must ===(record) } }
Example 141
Source File: SpecificTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.io.{DecoderFactory, EncoderFactory} import org.apache.avro.generic.{ GenericDatumReader, GenericRecord} import org.apache.avro.specific.{ SpecificDatumReader, SpecificDatumWriter, SpecificRecordBase } import org.apache.avro.Schema import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.specs2.mutable.Specification object SpecificTestUtil extends Specification { def write[T <: SpecificRecordBase](file: File, records: List[T]) = { val userDatumWriter = new SpecificDatumWriter[T] val dataFileWriter = new DataFileWriter[T](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read[T <: SpecificRecordBase](file: File, records: List[T]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new SpecificDatumReader[T](schema) val dataFileReader = new DataFileReader[T](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the SpecificDatumReader. var record: T = null.asInstanceOf[T] var sameRecord: T = null.asInstanceOf[T] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord must ===(record) } def verifyWriteAndRead[T <: SpecificRecordBase](records: List[T]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } def verifyEncodeDecode[T <: SpecificRecordBase](record: T) = { val schema = record.getSchema val writer = new SpecificDatumWriter[T](schema) val out = new java.io.ByteArrayOutputStream() val encoder = EncoderFactory.get().binaryEncoder(out, null) writer.write(record, encoder) encoder.flush val ba = out.toByteArray ba.size must ===(1) ba(0) must ===(0) out.close val reader = new SpecificDatumReader[T](schema) val decoder = DecoderFactory.get().binaryDecoder(ba, null) val decoded = reader.read(record, decoder) decoded must ===(record) } }
Example 142
Source File: DefaultRowReader.scala From mleap with Apache License 2.0 | 5 votes |
package ml.combust.mleap.avro import java.nio.charset.Charset import org.apache.avro.Schema import org.apache.avro.generic.{GenericData, GenericDatumReader} import org.apache.avro.io.{BinaryDecoder, DecoderFactory} import SchemaConverter._ import ml.combust.mleap.runtime.serialization.{BuiltinFormats, RowReader} import ml.combust.mleap.core.types.StructType import ml.combust.mleap.runtime.frame.{ArrayRow, Row} import scala.util.Try class DefaultRowReader(override val schema: StructType) extends RowReader { val valueConverter = ValueConverter() lazy val readers = schema.fields.map(_.dataType).map(valueConverter.avroToMleap) val avroSchema = schema: Schema val datumReader = new GenericDatumReader[GenericData.Record](avroSchema) var decoder: BinaryDecoder = null var record = new GenericData.Record(avroSchema) override def fromBytes(bytes: Array[Byte], charset: Charset = BuiltinFormats.charset): Try[Row] = Try { decoder = DecoderFactory.get().binaryDecoder(bytes, decoder) record = datumReader.read(record, decoder) val row = ArrayRow(new Array[Any](schema.fields.length)) for(i <- schema.fields.indices) { row.set(i, readers(i)(record.get(i))) } row } }
Example 143
Source File: DefaultFrameWriter.scala From mleap with Apache License 2.0 | 5 votes |
package ml.combust.mleap.avro import java.io.ByteArrayOutputStream import java.nio.charset.Charset import org.apache.avro.Schema import org.apache.avro.file.DataFileWriter import org.apache.avro.generic.{GenericData, GenericDatumWriter} import SchemaConverter._ import ml.combust.mleap.runtime.frame.LeapFrame import ml.combust.mleap.runtime.serialization.{BuiltinFormats, FrameWriter} import resource._ import scala.util.{Failure, Try} class DefaultFrameWriter[LF <: LeapFrame[LF]](frame: LF) extends FrameWriter { val valueConverter = ValueConverter() override def toBytes(charset: Charset = BuiltinFormats.charset): Try[Array[Byte]] = { (for(out <- managed(new ByteArrayOutputStream())) yield { val writers = frame.schema.fields.map(_.dataType).map(valueConverter.mleapToAvro) val avroSchema = frame.schema: Schema val record = new GenericData.Record(avroSchema) val datumWriter = new GenericDatumWriter[GenericData.Record](avroSchema) val writer = new DataFileWriter[GenericData.Record](datumWriter) writer.create(avroSchema, out) for(row <- frame.collect()) { var i = 0 for(writer <- writers) { record.put(i, writer(row.getRaw(i))) i = i + 1 } Try(writer.append(record)) match { case Failure(error) => error.printStackTrace() case _ => } } writer.close() out.toByteArray }).tried } }
Example 144
Source File: DefaultRowWriter.scala From mleap with Apache License 2.0 | 5 votes |
package ml.combust.mleap.avro import java.io.ByteArrayOutputStream import java.nio.charset.Charset import org.apache.avro.Schema import org.apache.avro.generic.{GenericData, GenericDatumWriter} import org.apache.avro.io.{BinaryEncoder, EncoderFactory} import SchemaConverter._ import ml.combust.mleap.runtime.serialization.{BuiltinFormats, RowWriter} import ml.combust.mleap.core.types.StructType import ml.combust.mleap.runtime.frame.Row import resource._ import scala.util.Try class DefaultRowWriter(override val schema: StructType) extends RowWriter { val valueConverter = ValueConverter() lazy val writers = schema.fields.map(_.dataType).map(valueConverter.mleapToAvro) val avroSchema = schema: Schema val datumWriter = new GenericDatumWriter[GenericData.Record](avroSchema) var encoder: BinaryEncoder = null var record = new GenericData.Record(avroSchema) override def toBytes(row: Row, charset: Charset = BuiltinFormats.charset): Try[Array[Byte]] = { (for(out <- managed(new ByteArrayOutputStream(1024))) yield { encoder = EncoderFactory.get().binaryEncoder(out, encoder) var i = 0 for(writer <- writers) { record.put(i, writer(row.getRaw(i))) i = i + 1 } datumWriter.write(record, encoder) encoder.flush() out.toByteArray }).tried } }
Example 145
Source File: ParquetWriterTask.scala From gearpump-examples with Apache License 2.0 | 5 votes |
package io.gearpump.examples.kafka_hdfs_pipeline import org.apache.avro.Schema import io.gearpump.Message import io.gearpump.cluster.UserConfig import io.gearpump.examples.kafka_hdfs_pipeline.ParquetWriterTask._ import io.gearpump.streaming.task.{StartTime, Task, TaskContext} import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.parquet.avro.AvroParquetWriter import scala.util.{Failure, Success, Try} class ParquetWriterTask(taskContext : TaskContext, config: UserConfig) extends Task(taskContext, config) { val outputFileName = taskContext.appName + ".parquet" val absolutePath = Option(getHdfs + config.getString(PARQUET_OUTPUT_DIRECTORY).getOrElse("/parquet") + "/" + outputFileName).map(deleteFile(_)).get val outputPath = new Path(absolutePath) var parquetWriter = new AvroParquetWriter[SpaceShuttleRecord](outputPath, SpaceShuttleRecord.SCHEMA$) def getYarnConf = new YarnConfiguration def getFs = FileSystem.get(getYarnConf) def getHdfs = new Path(getFs.getHomeDirectory, "gearpump") private def deleteFile(fileName: String): String = { val file = new Path(fileName) getFs.exists(file) match { case true => getFs.delete(file,false) case false => } fileName } override def onStart(startTime: StartTime): Unit = { LOG.info(s"ParquetWriter.onStart $absolutePath") } override def onNext(msg: Message): Unit = { Try({ parquetWriter.write(msg.msg.asInstanceOf[SpaceShuttleRecord]) }) match { case Success(ok) => case Failure(throwable) => LOG.error(s"failed ${throwable.getMessage}") } } override def onStop(): Unit = { LOG.info("ParquetWriter.onStop") parquetWriter.close() } } object ParquetWriterTask { val PARQUET_OUTPUT_DIRECTORY = "parquet.output.directory" val PARQUET_WRITER = "parquet.writer" }
Example 146
Source File: ParquetWriterTaskSpec.scala From gearpump-examples with Apache License 2.0 | 5 votes |
package io.gearpump.examples.kafka_hdfs_pipeline import akka.actor.ActorSystem import org.apache.avro.Schema import io.gearpump.Message import io.gearpump.cluster.UserConfig import io.gearpump.streaming.MockUtil import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.parquet.avro.{AvroParquetReader, AvroParquetWriter} import org.apache.parquet.hadoop.ParquetReader import org.apache.parquet.hadoop.api.ReadSupport import org.mockito.Mockito import org.mockito.Mockito._ import org.scalatest.prop.PropertyChecks import org.scalatest.{BeforeAndAfterAll, Matchers, PropSpec} class ParquetWriterTaskSpec extends PropSpec with PropertyChecks with Matchers with BeforeAndAfterAll { implicit var system: ActorSystem = ActorSystem("PipeLineSpec") val context = MockUtil.mockTaskContext val appName = "KafkaHdfsPipeLine" when(context.appName).thenReturn(appName) val fs = FileSystem.get(new YarnConfiguration) val homeDir = fs.getHomeDirectory.toUri.getPath val parquetDir = new Path(homeDir, "gearpump") + "/parquet/" val parquetPath = parquetDir + appName + ".parquet" val parquetCrc = parquetDir + "." + appName + ".parquet.crc" val parquetWriter = Mockito.mock(classOf[AvroParquetWriter[SpaceShuttleRecord]]) val anomaly = 0.252 val now = System.currentTimeMillis val userConfig = UserConfig.empty.withString(ParquetWriterTask.PARQUET_OUTPUT_DIRECTORY, "/parquet") override def afterAll(): Unit = { List(parquetPath, parquetCrc, parquetDir).foreach(new java.io.File(_).delete) system.shutdown() } property("ParquetWriterTask should initialize with local parquet file opened for writing") { val parquetWriterTask = new ParquetWriterTask(context, userConfig) val path = parquetWriterTask.absolutePath.stripPrefix("file:") assert(parquetPath.equals(path)) parquetWriterTask.onStop } property("ParquetWriterTask should write records to a parquet file") { val message = Message(SpaceShuttleRecord(now, anomaly), now) val parquetWriterTask = new ParquetWriterTask(context, userConfig) parquetWriterTask.parquetWriter = parquetWriter parquetWriterTask.onNext(message) verify(parquetWriterTask.parquetWriter).write(message.msg.asInstanceOf[SpaceShuttleRecord]) parquetWriterTask.onStop } property("ParquetWriterTask should have verifiable written record") { val message = Message(SpaceShuttleRecord(now, anomaly), now) val parquetWriterTask = new ParquetWriterTask(context, userConfig) parquetWriterTask.onNext(message) parquetWriterTask.onStop val reader = new AvroParquetReader[SpaceShuttleRecord](new Path(parquetPath)) val record = reader.read() assert(message.msg.asInstanceOf[SpaceShuttleRecord].anomaly == record.anomaly) assert(message.msg.asInstanceOf[SpaceShuttleRecord].ts == record.ts) } }
Example 147
Source File: LineItem.scala From scavro with Apache License 2.0 | 5 votes |
package org.oedura.scavrodemo.model import org.apache.avro.Schema import org.oedura.scavro.{AvroReader, AvroSerializeable, AvroMetadata} import org.oedura.scavrodemo.idl.{LineItem => JLineItem} case class LineItem(name: String, price: Double, quantity: Int) extends AvroSerializeable { type J = JLineItem override def toAvro: JLineItem = new JLineItem(name, price.toFloat, quantity) } object LineItem { implicit def reader = new AvroReader[LineItem] { override type J = JLineItem } implicit val metadata: AvroMetadata[LineItem, JLineItem] = new AvroMetadata[LineItem, JLineItem] { override val avroClass: Class[JLineItem] = classOf[JLineItem] override val schema: Schema = JLineItem.getClassSchema override val fromAvro: (JLineItem) => LineItem = (j: JLineItem) => { LineItem(j.getName.toString, j.getPrice.doubleValue, j.getQuantity) } } }
Example 148
Source File: Number.scala From scavro with Apache License 2.0 | 5 votes |
package org.oedura.scavro import org.apache.avro.Schema case class Number(name: String, value: Int) extends AvroSerializeable { type J = MockNumber override def toAvro: MockNumber = new MockNumber(name, value) } object Number { implicit def reader = new AvroReader[Number] { override type J = MockNumber } implicit val metadata = new AvroMetadata[Number, MockNumber] { override val avroClass = classOf[MockNumber] override val schema: Schema = MockNumber.getClassSchema override val fromAvro: (MockNumber) => Number = { mock => val name: String = mock.get(0).asInstanceOf[String] val value: Int = mock.get(1).asInstanceOf[Int] Number(name, value) } } }
Example 149
Source File: GenericAvroSerializerSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.serializer import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.nio.ByteBuffer import com.esotericsoftware.kryo.io.{Input, Output} import org.apache.avro.{Schema, SchemaBuilder} import org.apache.avro.generic.GenericData.Record import org.apache.spark.{SharedSparkContext, SparkFunSuite} class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext { conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") val schema : Schema = SchemaBuilder .record("testRecord").fields() .requiredString("data") .endRecord() val record = new Record(schema) record.put("data", "test data") test("schema compression and decompression") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema)))) } test("record serialization and deserialization") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val outputStream = new ByteArrayOutputStream() val output = new Output(outputStream) genericSer.serializeDatum(record, output) output.flush() output.close() val input = new Input(new ByteArrayInputStream(outputStream.toByteArray)) assert(genericSer.deserializeDatum(input) === record) } test("uses schema fingerprint to decrease message size") { val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema) val output = new Output(new ByteArrayOutputStream()) val beginningNormalPosition = output.total() genericSerFull.serializeDatum(record, output) output.flush() val normalLength = output.total - beginningNormalPosition conf.registerAvroSchemas(schema) val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema) val beginningFingerprintPosition = output.total() genericSerFinger.serializeDatum(record, output) val fingerprintLength = output.total - beginningFingerprintPosition assert(fingerprintLength < normalLength) } test("caches previously seen schemas") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val compressedSchema = genericSer.compress(schema) val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema)) assert(compressedSchema.eq(genericSer.compress(schema))) assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema)))) } }
Example 150
Source File: ClassStore.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package stores import org.apache.avro.Schema import treehugger.forest.Symbol import java.util.concurrent.ConcurrentHashMap import scala.jdk.CollectionConverters._ class ClassStore { val generatedClasses: scala.collection.concurrent.Map[Schema, Symbol] = { new ConcurrentHashMap[Schema, Symbol]().asScala } def accept(schema: Schema, caseClassDef: Symbol) = { if (!generatedClasses.contains(schema)) { val _ = generatedClasses += schema -> caseClassDef } } }
Example 151
Source File: DefaultParamMatcher.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package matchers import avrohugger.matchers.custom.CustomDefaultParamMatcher import avrohugger.stores.ClassStore import avrohugger.types._ import org.apache.avro.Schema import org.apache.avro.Schema.Type import treehugger.forest._ import definitions._ import treehugger.forest import treehuggerDSL._ object DefaultParamMatcher { // for SpecificRecord def asDefaultParam( classStore: ClassStore, avroSchema: Schema, typeMatcher: TypeMatcher): Tree = { avroSchema.getType match { case Type.BOOLEAN => FALSE case Type.INT => LogicalType.foldLogicalTypes[Tree]( schema = avroSchema, default = LIT(0)) { case Date => CustomDefaultParamMatcher.checkCustomDateType( typeMatcher.avroScalaTypes.date) } case Type.LONG => LogicalType.foldLogicalTypes[Tree]( schema = avroSchema, default = LIT(0L)) { case TimestampMillis => CustomDefaultParamMatcher.checkCustomTimestampMillisType( typeMatcher.avroScalaTypes.timestampMillis) } case Type.FLOAT => LIT(0F) case Type.DOUBLE => LIT(0D) case Type.STRING => LogicalType.foldLogicalTypes[Tree]( schema = avroSchema, default = LIT("")) { case UUID => REF("java.util.UUID.randomUUID") } case Type.NULL => NULL case Type.FIXED => sys.error("the FIXED datatype is not yet supported") case Type.ENUM => CustomDefaultParamMatcher.checkCustomEnumType(typeMatcher.avroScalaTypes.enum) case Type.BYTES => CustomDefaultParamMatcher.checkCustomDecimalType( decimalType = typeMatcher.avroScalaTypes.decimal, schema = avroSchema, default = NULL) case Type.RECORD => NEW(classStore.generatedClasses(avroSchema)) case Type.UNION => NONE case Type.ARRAY => CustomDefaultParamMatcher.checkCustomArrayType(typeMatcher.avroScalaTypes.array) DOT "empty" case Type.MAP => MAKE_MAP(LIT("") ANY_-> asDefaultParam(classStore, avroSchema.getValueType, typeMatcher)) } } }
Example 152
Source File: CustomDefaultParamMatcher.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package matchers package custom import avrohugger.matchers.custom.CustomUtils._ import avrohugger.types._ import treehugger.forest._ import definitions._ import treehuggerDSL._ import org.apache.avro.Schema object CustomDefaultParamMatcher { def checkCustomArrayType(arrayType: AvroScalaArrayType) = { arrayType match { case ScalaArray => ArrayClass case ScalaList => ListClass case ScalaSeq => SeqClass case ScalaVector => VectorClass } } def checkCustomEnumType(enumType: AvroScalaEnumType) = { enumType match { case JavaEnum => NULL // TODO Take first enum value? case ScalaEnumeration => NULL // TODO Take first enum value? case ScalaCaseObjectEnum => NULL // TODO Take first enum value? case EnumAsScalaString => LIT("") } } def checkCustomDateType(dateType: AvroScalaDateType) = dateType match { case JavaSqlDate => NEW(REF("java.sql.Date"), LIT(0L)) case JavaTimeLocalDate => REF("java.time.LocalDate.now") } def checkCustomTimestampMillisType(timestampMillisType: AvroScalaTimestampMillisType) = timestampMillisType match { case JavaSqlTimestamp => NEW(REF("java.sql.Timestamp"), LIT(0L)) case JavaTimeInstant => REF("java.time.Instant.now") } def checkCustomDecimalType(decimalType: AvroScalaDecimalType, schema: Schema, default: => Tree, decimalValue: => Option[String] = None) = { val decimalValueRef = REF("scala.math.BigDecimal") APPLY decimalValue.map(LIT(_)).getOrElse(LIT(0)) LogicalType.foldLogicalTypes[Tree]( schema = schema, default = default) { case Decimal(precision, scale) => decimalType match { case ScalaBigDecimal(_) => decimalValueRef case ScalaBigDecimalWithPrecision(_) => decimalTagged(precision, scale) APPLY decimalValueRef } } } }
Example 153
Source File: CustomTypeMatcher.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package matchers package custom import avrohugger.matchers.custom.CustomUtils._ import avrohugger.stores.ClassStore import avrohugger.types._ import org.apache.avro.Schema import treehugger.forest._ import treehuggerDSL._ import definitions._ object CustomTypeMatcher { def checkCustomArrayType(arrayType: AvroScalaArrayType) = arrayType match { case ScalaArray => TYPE_ARRAY(_) case ScalaList => TYPE_LIST(_) case ScalaSeq => TYPE_SEQ(_) case ScalaVector => TYPE_VECTOR(_) } def checkCustomEnumType( enumType: AvroScalaEnumType, classStore: ClassStore, schema: Schema) = enumType match { case JavaEnum => classStore.generatedClasses(schema) case ScalaEnumeration => classStore.generatedClasses(schema) case ScalaCaseObjectEnum => classStore.generatedClasses(schema) case EnumAsScalaString => StringClass } def checkCustomNumberType(numberType: AvroScalaNumberType) = numberType match { case ScalaDouble => DoubleClass case ScalaFloat => FloatClass case ScalaLong => LongClass case ScalaInt => IntClass } def checkCustomDateType(dateType: AvroScalaDateType) = dateType match { case JavaTimeLocalDate => RootClass.newClass(nme.createNameType("java.time.LocalDate")) case JavaSqlDate => RootClass.newClass(nme.createNameType("java.sql.Date")) } def checkCustomTimestampMillisType(timestampType: AvroScalaTimestampMillisType) = timestampType match { case JavaSqlTimestamp => RootClass.newClass(nme.createNameType("java.sql.Timestamp")) case JavaTimeInstant => RootClass.newClass(nme.createNameType("java.time.Instant")) } def checkCustomDecimalType(decimalType: AvroScalaDecimalType, schema: Schema) = LogicalType.foldLogicalTypes( schema = schema, default = TYPE_ARRAY(ByteClass)) { case Decimal(precision, scale) => decimalType match { case ScalaBigDecimal(_) => BigDecimalClass case ScalaBigDecimalWithPrecision(_) => decimalTaggedType(precision, scale) } } }
Example 154
Source File: LogicalAvroScalaTypes.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package types import org.apache.avro.Schema sealed trait AvroScalaDecimalType extends Product with Serializable case class ScalaBigDecimal(maybeRoundingMode: Option[BigDecimal.RoundingMode.Value]) extends AvroScalaDecimalType case class ScalaBigDecimalWithPrecision(maybeRoundingMode: Option[BigDecimal.RoundingMode.Value]) extends AvroScalaDecimalType sealed trait AvroScalaDateType extends Product with Serializable case object JavaSqlDate extends AvroScalaDateType case object JavaTimeLocalDate extends AvroScalaDateType sealed trait AvroScalaTimestampMillisType extends Product with Serializable case object JavaSqlTimestamp extends AvroScalaTimestampMillisType case object JavaTimeInstant extends AvroScalaTimestampMillisType sealed trait AvroUuidType extends Product with Serializable case object JavaUuid extends AvroUuidType sealed abstract class LogicalType(name: String) case class Decimal(precision: Int, scale: Int) extends LogicalType("decimal") case object Date extends LogicalType("date") case object TimestampMillis extends LogicalType("timestamp-millis") case object UUID extends LogicalType("uuid") object LogicalType { def apply(logicalType: org.apache.avro.LogicalType): Option[LogicalType] = logicalType match { case d: org.apache.avro.LogicalTypes.Decimal => Some(Decimal(d.getPrecision, d.getScale)) case _: org.apache.avro.LogicalTypes.Date => Some(Date) case _: org.apache.avro.LogicalTypes.TimestampMillis => Some(TimestampMillis) case _ if logicalType.getName == "uuid" => Some(UUID) case _ => None } def foldLogicalTypes[A](schema: Schema, default: => A)(cases : PartialFunction[LogicalType, A]): A = Option(schema.getLogicalType) match { case Some(tpe) => LogicalType(tpe).flatMap(cases.lift(_)).getOrElse(default) case _ => default } }
Example 155
Source File: SpecificImporter.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package format package specific import avrohugger.format.abstractions.Importer import avrohugger.input.DependencyInspector._ import avrohugger.input.NestedSchemaExtractor._ import avrohugger.matchers.TypeMatcher import avrohugger.stores.SchemaStore import org.apache.avro.{ Protocol, Schema } import org.apache.avro.Schema.Type.RECORD import treehugger.forest._ import definitions._ import treehuggerDSL._ import scala.collection.JavaConverters._ object SpecificImporter extends Importer { def getImports( schemaOrProtocol: Either[Schema, Protocol], currentNamespace: Option[String], schemaStore: SchemaStore, typeMatcher: TypeMatcher): List[Import] = { val switchAnnotSymbol = RootClass.newClass("scala.annotation.switch") val switchImport = IMPORT(switchAnnotSymbol) val topLevelSchemas = getTopLevelSchemas(schemaOrProtocol, schemaStore, typeMatcher) val recordSchemas = getRecordSchemas(topLevelSchemas) val enumSchemas = getEnumSchemas(topLevelSchemas) val deps = getUserDefinedImports(recordSchemas ++ enumSchemas, currentNamespace, typeMatcher) schemaOrProtocol match { case Left(schema) => { if (schema.getType == RECORD) switchImport :: deps else deps } case Right(protocol) => { val types = protocol.getTypes.asScala.toList val messages = protocol.getMessages.asScala.toMap if (messages.isEmpty) switchImport :: deps // for ADT else List.empty // for RPC } } } }
Example 156
Source File: SpecificSchemahugger.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package format package specific package avrohuggers import format.abstractions.avrohuggers.Schemahugger import trees.{ SpecificCaseClassTree, SpecificObjectTree } import matchers.TypeMatcher import stores.{ClassStore, SchemaStore} import org.apache.avro.Schema import treehugger.forest.Tree object SpecificSchemahugger extends Schemahugger { def toTrees( schemaStore: SchemaStore, classStore: ClassStore, namespace: Option[String], schema: Schema, typeMatcher: TypeMatcher, maybeBaseTrait: Option[String], maybeFlags: Option[List[Long]], restrictedFields: Boolean): List[Tree] = { val caseClassDef = SpecificCaseClassTree.toCaseClassDef( classStore, namespace, schema, typeMatcher, maybeBaseTrait, maybeFlags, restrictedFields) val companionDef = SpecificObjectTree.toCaseCompanionDef( schema, maybeFlags, schemaStore, typeMatcher) List(caseClassDef, companionDef) } }
Example 157
Source File: SpecificProtocolhugger.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package format package specific package avrohuggers import format.abstractions.avrohuggers.Protocolhugger import generators.ScalaDocGenerator import trees.{ SpecificObjectTree, SpecificTraitTree } import matchers.TypeMatcher import stores.{ClassStore, SchemaStore} import types.ScalaADT import org.apache.avro.{ Protocol, Schema } import treehugger.forest._ import definitions._ import treehuggerDSL._ import scala.collection.JavaConverters._ object SpecificProtocolhugger extends Protocolhugger { def toTrees( schemaStore: SchemaStore, classStore: ClassStore, namespace: Option[String], protocol: Protocol, typeMatcher: TypeMatcher, maybeBaseTrait: Option[String], maybeFlags: Option[List[Long]], restrictedFields: Boolean): List[Tree] = { val name: String = protocol.getName val messages = protocol.getMessages.asScala.toMap val maybeProtocolDoc = Option(protocol.getDoc) if (messages.isEmpty) { val localSubTypes = getLocalSubtypes(protocol) // protocols with more than 1 schema defined (Java Enums don't count) and // without messages are generated as ADTs val localNonEnums = localSubTypes.filterNot(isEnum) if (localNonEnums.length > 1 && typeMatcher.avroScalaTypes.protocol == types.ScalaADT) { val maybeNewBaseTrait = Some(name) val maybeNewFlags = Some(List(Flags.FINAL.toLong)) val sealedTraitDef = SpecificTraitTree.toADTRootDef(protocol) val subTypeDefs = localNonEnums.flatMap(schema => { SpecificSchemahugger.toTrees( schemaStore, classStore, namespace, schema, typeMatcher, maybeNewBaseTrait, maybeNewFlags, restrictedFields) }) sealedTraitDef +: subTypeDefs } // if only one Scala type is defined, then don't generate sealed trait else { // no sealed trait tree, but could still need a protocol doc at top val docTrees = { Option(protocol.getDoc) match { case Some(doc) => List(ScalaDocGenerator.docToScalaDoc(Right(protocol), EmptyTree)) case None => List.empty } } docTrees ::: localNonEnums.flatMap(schema => { SpecificSchemahugger.toTrees( schemaStore, classStore, namespace, schema, typeMatcher, maybeBaseTrait, maybeFlags, restrictedFields) }) } } else { val rpcTraitDef = SpecificTraitTree.toRPCTraitDef( classStore, namespace, protocol, typeMatcher) val companionDef = SpecificObjectTree.toTraitCompanionDef(protocol) List(rpcTraitDef, companionDef) } } }
Example 158
Source File: SpecificObjectTree.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package format package specific package trees import generators.ScalaDocGenerator import matchers.TypeMatcher import stores.SchemaStore import org.apache.avro.{Protocol, Schema} import treehugger.forest._ import definitions._ import org.apache.avro.Schema.Type import treehuggerDSL._ import scala.collection.JavaConverters._ // only companions, so no doc generation is required here object SpecificObjectTree { // Companion to case classes def toCaseCompanionDef( schema: Schema, maybeFlags: Option[List[Long]], schemaStore: SchemaStore, typeMatcher: TypeMatcher) = { val ParserClass = RootClass.newClass("org.apache.avro.Schema.Parser") val objectDef = maybeFlags match { case Some(flags) => OBJECTDEF(schema.getName).withFlags(flags:_*) case None => OBJECTDEF(schema.getName) } val schemaDef = VAL(REF("SCHEMA$")) := { (NEW(ParserClass)) APPLY(Nil) DOT "parse" APPLY(LIT(schema.toString)) } val DecimalConversion = RootClass.newClass("org.apache.avro.Conversions.DecimalConversion") val decimalConversionDef = VAL(REF("decimalConversion")) := NEW(DecimalConversion) def schemaContainsDecimal(schema: Schema): Boolean = { def getNestedSchemas(s: Schema): List[Schema] = s.getType match { case Schema.Type.ARRAY => getNestedSchemas(s.getElementType) case Schema.Type.MAP => getNestedSchemas(s.getValueType) case Schema.Type.UNION => s.getTypes.asScala.toList.flatMap(getNestedSchemas) case _ => List(s) } val topLevelSchemas = SpecificImporter.getTopLevelSchemas(Left(schema), schemaStore, typeMatcher) val recordSchemas = SpecificImporter.getRecordSchemas(topLevelSchemas).filter(s => s.getType == Schema.Type.RECORD) val fieldSchemas = recordSchemas.flatMap(_.getFields.asScala.map(_.schema())) fieldSchemas.flatMap(getNestedSchemas).exists(s => Option(s.getLogicalType()) match { case Some(logicalType) => logicalType.getName == "decimal" case None => false }) } // companion object definition if (schemaContainsDecimal(schema)) objectDef := BLOCK(schemaDef, decimalConversionDef) else objectDef := BLOCK(schemaDef) } // union acts as a blackbox, fields are not seen on root level, unpack is required private def collectUnionFields(sc: Schema): Iterable[Schema] = { sc.getTypes.asScala.toList } // Companion to traits that have messages def toTraitCompanionDef(protocol: Protocol) = { val ProtocolClass = RootClass.newClass("org.apache.avro.Protocol") // companion object definition OBJECTDEF(protocol.getName) := BLOCK( VAL("PROTOCOL", ProtocolClass).withFlags(Flags.FINAL) := { REF(ProtocolClass) DOT "parse" APPLY(LIT(protocol.toString)) } ) } }
Example 159
Source File: SpecificScalaTreehugger.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package format package specific import format.abstractions.ScalaTreehugger import avrohuggers.{ SpecificProtocolhugger, SpecificSchemahugger } import matchers.TypeMatcher import stores.{ ClassStore, SchemaStore } import org.apache.avro.{ Protocol, Schema } import org.apache.avro.Schema.Field import org.apache.avro.Schema.Type.RECORD import treehugger.forest._ import definitions._ import treehuggerDSL._ object SpecificScalaTreehugger extends ScalaTreehugger { val schemahugger = SpecificSchemahugger val protocolhugger = SpecificProtocolhugger val importer = SpecificImporter // SpecificCompiler can't return a tree for Java enums, so return // a String here for a consistent api vis a vis *ToFile and *ToStrings def asScalaCodeString( classStore: ClassStore, namespace: Option[String], schemaOrProtocol: Either[Schema, Protocol], typeMatcher: TypeMatcher, schemaStore: SchemaStore, restrictedFields: Boolean): String = { // imports in case a field type is from a different namespace val imports: List[Import] = importer.getImports( schemaOrProtocol, namespace, schemaStore, typeMatcher) val topLevelDefs: List[Tree] = schemaOrProtocol match { case Left(schema) => schemahugger.toTrees( schemaStore, classStore, namespace, schema, typeMatcher, None, None, restrictedFields ) case Right(protocol) => protocolhugger.toTrees( schemaStore, classStore, namespace, protocol, typeMatcher, None, None, restrictedFields ) } // wrap the definitions in a block with a comment and a package val tree = { val blockContent = imports ++ topLevelDefs if (namespace.isDefined) BLOCK(blockContent:_*).inPackage(namespace.get) else BLOCK(blockContent:_*).withoutPackage }.withDoc("MACHINE-GENERATED FROM AVRO SCHEMA. DO NOT EDIT DIRECTLY") val codeString = treeToString(tree) codeString } }
Example 160
Source File: Protocolhugger.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package format package abstractions package avrohuggers import stores.{ClassStore, SchemaStore} import matchers.TypeMatcher import org.apache.avro.{ Protocol, Schema } import treehugger.forest.Tree import scala.collection.JavaConverters._ trait Protocolhugger { def toTrees( schemaStore: SchemaStore, classStore: ClassStore, namespace: Option[String], protocol: Protocol, typeMatcher: TypeMatcher, maybeBaseTrait: Option[String], maybeFlags: Option[List[Long]], restrictedFields: Boolean): List[Tree] def getLocalSubtypes(protocol: Protocol): List[Schema] = { val protocolNS = protocol.getNamespace val types = protocol.getTypes.asScala.toList def isTopLevelNamespace(schema: Schema) = schema.getNamespace == protocolNS types.filter(isTopLevelNamespace) } def isEnum(schema: Schema) = schema.getType == Schema.Type.ENUM }
Example 161
Source File: Schemahugger.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package format package abstractions package avrohuggers import stores.{ClassStore, SchemaStore} import matchers.TypeMatcher import org.apache.avro.Schema import treehugger.forest.Tree trait Schemahugger { def toTrees( schemaStore: SchemaStore, classStore: ClassStore, namespace: Option[String], schema: Schema, typeMatcher: TypeMatcher, maybeBaseTrait: Option[String], maybeFlags: Option[List[Long]], restrictedFields: Boolean): List[Tree] }
Example 162
Source File: StandardScalaTreehugger.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package format package standard import format.abstractions.ScalaTreehugger import avrohuggers.{ StandardProtocolhugger, StandardSchemahugger } import matchers.TypeMatcher import stores.{ ClassStore, SchemaStore } import org.apache.avro.{ Protocol, Schema } import org.apache.avro.Schema.Field import org.apache.avro.Schema.Type.{ RECORD } import treehugger.forest._ import definitions._ import treehuggerDSL._ object StandardScalaTreehugger extends ScalaTreehugger { val schemahugger = StandardSchemahugger val protocolhugger = StandardProtocolhugger val importer = StandardImporter def asScalaCodeString( classStore: ClassStore, namespace: Option[String], schemaOrProtocol: Either[Schema, Protocol], typeMatcher: TypeMatcher, schemaStore: SchemaStore, restrictedFields: Boolean): String = { val imports = importer.getImports( schemaOrProtocol, namespace, schemaStore, typeMatcher) val topLevelDefs: List[Tree] = schemaOrProtocol match { case Left(schema) => schemahugger.toTrees( schemaStore, classStore, namespace, schema, typeMatcher, None, None, restrictedFields ) case Right(protocol) => protocolhugger.toTrees( schemaStore, classStore, namespace, protocol, typeMatcher, None, None, restrictedFields ) } // wrap the imports and class definition in a block with comment and package val tree = { val blockContent = imports ++ topLevelDefs if (namespace.isDefined) BLOCK(blockContent).inPackage(namespace.get) else BLOCK(blockContent:_*).withoutPackage }.withDoc("MACHINE-GENERATED FROM AVRO SCHEMA. DO NOT EDIT DIRECTLY") // SpecificCompiler can't return a tree for Java enums, so return // a string here for a consistent api vis a vis *ToFile and *ToStrings treeToString(tree) } }
Example 163
Source File: StandardProtocolhugger.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package format package standard package avrohuggers import generators.ScalaDocGenerator import trees.StandardTraitTree import matchers.TypeMatcher import stores.{ClassStore, SchemaStore} import types._ import org.apache.avro.{ Protocol, Schema } import treehugger.forest._ import definitions._ import treehuggerDSL._ import format.abstractions.avrohuggers.Protocolhugger object StandardProtocolhugger extends Protocolhugger { def toTrees( schemaStore: SchemaStore, classStore: ClassStore, namespace: Option[String], protocol: Protocol, typeMatcher: TypeMatcher, maybeBaseTrait: Option[String], maybeFlags: Option[List[Long]], restrictedFields: Boolean): List[Tree] = { val name: String = protocol.getName val localSubTypes = getLocalSubtypes(protocol) val adtSubTypes = typeMatcher.avroScalaTypes.enum match { case JavaEnum => localSubTypes.filterNot(isEnum) case ScalaCaseObjectEnum => localSubTypes case ScalaEnumeration => localSubTypes case EnumAsScalaString => localSubTypes.filterNot(isEnum) } if (adtSubTypes.length > 1 && typeMatcher.avroScalaTypes.protocol == types.ScalaADT) { val maybeNewBaseTrait = Some(name) val maybeNewFlags = Some(List(Flags.FINAL.toLong)) val traitDef = StandardTraitTree.toADTRootDef(protocol, typeMatcher) traitDef +: adtSubTypes.flatMap(schema => { StandardSchemahugger.toTrees( schemaStore, classStore, namespace, schema, typeMatcher, maybeNewBaseTrait, maybeNewFlags, restrictedFields) }) } // if only one Scala type is defined, then don't generate sealed trait else { // no sealed trait tree, but could still need a top-level doc val docTrees = { Option(protocol.getDoc) match { case Some(doc) => List(ScalaDocGenerator.docToScalaDoc(Right(protocol), EmptyTree)) case None => List.empty } } docTrees ::: localSubTypes.flatMap(schema => { StandardSchemahugger.toTrees( schemaStore, classStore, namespace, schema, typeMatcher, maybeBaseTrait, maybeFlags, restrictedFields) }) } } }
Example 164
Source File: StandardSchemahugger.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package format package standard package avrohuggers import format.abstractions.avrohuggers.Schemahugger import trees.{ StandardCaseClassTree, StandardObjectTree, StandardTraitTree } import matchers.TypeMatcher import stores.{ClassStore, SchemaStore} import types._ import org.apache.avro.{ Protocol, Schema } import org.apache.avro.Schema.Type.{ ENUM, RECORD } import treehugger.forest._ import definitions._ import treehuggerDSL._ object StandardSchemahugger extends Schemahugger { def toTrees( schemaStore: SchemaStore, classStore: ClassStore, namespace: Option[String], schema: Schema, typeMatcher: TypeMatcher, maybeBaseTrait: Option[String], maybeFlags: Option[List[Long]], restrictedFields: Boolean): List[Tree] = { // as case class definition schema.getType match { case RECORD => val classDef = StandardCaseClassTree.toCaseClassDef( classStore, namespace, schema, typeMatcher, maybeBaseTrait, maybeFlags, restrictedFields) val companionDef = StandardObjectTree.toCaseCompanionDef( schema, maybeFlags) typeMatcher.avroScalaTypes.record match { case ScalaCaseClass => List(classDef) case ScalaCaseClassWithSchema => List(classDef, companionDef) } case ENUM => typeMatcher.avroScalaTypes.enum match { case JavaEnum => List.empty case ScalaCaseObjectEnum => StandardTraitTree.toCaseObjectEnumDef(schema, maybeBaseTrait) case ScalaEnumeration => val objectDef = StandardObjectTree.toScalaEnumDef( classStore, schema, maybeBaseTrait, maybeFlags) List(objectDef) case EnumAsScalaString => List.empty } case _ => sys.error("Only RECORD or ENUM can be toplevel definitions") } } }
Example 165
Source File: StandardTraitTree.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package format package standard package trees import generators.ScalaDocGenerator import matchers.TypeMatcher import types._ import treehugger.forest._ import definitions._ import treehuggerDSL._ import org.apache.avro.{ Protocol, Schema } import org.apache.avro.Schema.Type.{ ENUM, RECORD } import scala.collection.JavaConverters._ object StandardTraitTree { def toADTRootDef(protocol: Protocol, typeMatcher: TypeMatcher) = { def isEnum(schema: Schema) = schema.getType == ENUM val sealedTraitTree = TRAITDEF(protocol.getName).withFlags(Flags.SEALED) val adtRootTree = { val adtSubTypes = typeMatcher.avroScalaTypes.enum match { case JavaEnum => protocol.getTypes.asScala.toList.filterNot(isEnum) case ScalaCaseObjectEnum => protocol.getTypes.asScala.toList case ScalaEnumeration => protocol.getTypes.asScala.toList case EnumAsScalaString => protocol.getTypes.asScala.filterNot(isEnum) } if (adtSubTypes.forall(schema => schema.getType == RECORD)) { sealedTraitTree .withParents("Product") .withParents("Serializable") } else sealedTraitTree } val treeWithScalaDoc = ScalaDocGenerator.docToScalaDoc( Right(protocol), adtRootTree) treeWithScalaDoc } def toCaseObjectEnumDef(schema: Schema, maybeBaseTrait: Option[String]): List[Tree] = { val adtRootTree: Tree = maybeBaseTrait match { case Some(baseTrait) => TRAITDEF(schema.getName).withFlags(Flags.SEALED).withParents(baseTrait) case None => TRAITDEF(schema.getName).withFlags(Flags.SEALED) } val adtSubTypes: List[Tree] = schema.getEnumSymbols.asScala .map(enumSymbol => enumSymbol.toString) .map(enumSymbolString => { (CASEOBJECTDEF(enumSymbolString).withParents(schema.getName): Tree) }).toList val objectTree = OBJECTDEF(schema.getName) := Block(adtSubTypes:_*) val adtRootTreeWithScalaDoc: Tree = ScalaDocGenerator.docToScalaDoc( Left(schema), adtRootTree) List(adtRootTreeWithScalaDoc, objectTree) } }
Example 166
Source File: StandardObjectTree.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package format package standard package trees import generators.ScalaDocGenerator import stores.ClassStore import treehugger.forest._ import definitions._ import treehuggerDSL._ import org.apache.avro.Schema import scala.collection.JavaConverters._ object StandardObjectTree { def toCaseCompanionDef(schema: Schema, maybeFlags: Option[List[Long]]) = { val ParserClass = RootClass.newClass("org.apache.avro.Schema.Parser") val objectDef = maybeFlags match { case Some(flags) => OBJECTDEF(schema.getName).withFlags(flags:_*) case None => OBJECTDEF(schema.getName) } // companion object definition objectDef := BLOCK( VAL(REF("SCHEMA$")) := { (NEW(ParserClass)) APPLY(Nil) DOT "parse" APPLY(LIT(schema.toString)) } ) } def toScalaEnumDef( classStore: ClassStore, schema: Schema, maybeBaseTrait: Option[String], maybeFlags: Option[List[Long]]) = { val objectDef = (maybeBaseTrait, maybeFlags) match { case (Some(baseTrait), Some(flags)) => OBJECTDEF(schema.getName) .withFlags(flags:_*) .withParents("Enumeration") .withParents(baseTrait) case (Some(baseTrait), None) => OBJECTDEF(schema.getName) .withParents("Enumeration") .withParents(baseTrait) case (None, Some(flags)) => OBJECTDEF(schema.getName) .withFlags(flags:_*) .withParents("Enumeration") case (None, None) => OBJECTDEF(schema.getName) .withParents("Enumeration") } val objectTree = objectDef := BLOCK( TYPEVAR(schema.getName) := REF("Value"), VAL(schema.getEnumSymbols.asScala.mkString(", ")) := REF("Value") ) val treeWithScalaDoc = ScalaDocGenerator.docToScalaDoc( Left(schema), objectTree) treeWithScalaDoc } }
Example 167
Source File: StandardJavaTreehugger.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package format package standard import stores.ClassStore import format.abstractions.JavaTreehugger import org.apache.avro.Schema import scala.collection.JavaConverters._ object StandardJavaTreehugger extends JavaTreehugger { val wrapRegEx = """(.{1,75})\s""".r def wrapLine(s: String) = wrapRegEx.replaceAllIn(s, m=>m.group(1)+"\n * ") def javaDoc(docString: String): String = s" |${namespace.orElse(Option(schema.getNamespace)).fold("")(n => s"package $n;")} | |${Option(schema.getDoc).fold("")(javaDoc)} |public enum ${schema.getName} { | ${schema.getEnumSymbols.asScala.mkString(", ")} ; |}""".stripMargin case _ => sys.error("Currently ENUM is the only supported Java type.") } } }
Example 168
Source File: ScavroNamespaceRenamer.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package format package scavro import avrohugger.matchers.TypeMatcher import avrohugger.matchers.custom.CustomNamespaceMatcher import org.apache.avro.{ Schema, Protocol } object ScavroNamespaceRenamer { // By default, Scavro generates Scala classes in packages that are the same // as the Java package with `model` appended. // TypeMatcher is here because it holds the custom namespace map def renameNamespace( maybeNamespace: Option[String], schemaOrProtocol: Either[Schema, Protocol], typeMatcher: TypeMatcher): Option[String] = { val scavroModelDefaultPackage: String = typeMatcher.customNamespaces .get("SCAVRO_DEFAULT_PACKAGE$") .getOrElse("model") val someScavroModelDefaultNamespace = maybeNamespace match { case Some(ns) => Some(ns + "." + scavroModelDefaultPackage) case None => sys.error("Scavro requires a namespace because Java " + "classes cannot be imported from the default package") } val scavroModelNamespace = { val ns = schemaOrProtocol match { case Left(schema) => Option(schema.getNamespace) case Right(protocol) => Option(protocol.getNamespace) } ns match { case Some(schemaNS) => { CustomNamespaceMatcher.checkCustomNamespace( ns, typeMatcher, maybeDefaultNamespace = someScavroModelDefaultNamespace) } case None => someScavroModelDefaultNamespace } } scavroModelNamespace } }
Example 169
Source File: ScavroSchemahugger.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package format package scavro package avrohuggers import format.abstractions.avrohuggers.Schemahugger import trees.{ ScavroCaseClassTree, ScavroObjectTree, ScavroTraitTree } import matchers.TypeMatcher import stores.{ClassStore, SchemaStore} import types._ import org.apache.avro.Schema import org.apache.avro.Schema.Type.{ ENUM, RECORD } import treehugger.forest._ import definitions._ import treehuggerDSL._ object ScavroSchemahugger extends Schemahugger{ def toTrees( schemaStore: SchemaStore, classStore: ClassStore, namespace: Option[String], schema: Schema, typeMatcher: TypeMatcher, maybeBaseTrait: Option[String], maybeFlags: Option[List[Long]], restrictedFields: Boolean): List[Tree] = { val ScalaClass = RootClass.newClass(schema.getName) val JavaClass = RootClass.newClass("J" + schema.getName) schema.getType match { case RECORD => val caseClassDef = ScavroCaseClassTree.toCaseClassDef( classStore, namespace, schema, ScalaClass, JavaClass, typeMatcher, maybeBaseTrait, maybeFlags, restrictedFields) val companionDef = ScavroObjectTree.toCompanionDef( classStore, schema, ScalaClass, JavaClass, typeMatcher, maybeFlags) List(caseClassDef, companionDef) case ENUM => typeMatcher.avroScalaTypes.enum match { case JavaEnum => List.empty case ScalaCaseObjectEnum => ScavroTraitTree.toCaseObjectEnumDef(schema, maybeBaseTrait) case ScalaEnumeration => val objectDef = ScavroObjectTree.toScalaEnumDef( classStore, schema, maybeBaseTrait, maybeFlags) List(objectDef) case EnumAsScalaString => List.empty } case _ => sys.error("Only RECORD and ENUM can be top-level definitions") } } }
Example 170
Source File: ScavroMethodRenamer.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package format package scavro import org.apache.avro.Schema import org.apache.avro.Schema.Field import org.apache.avro.compiler.specific.SpecificCompiler import org.apache.avro.specific.SpecificData import scala.collection.JavaConverters._ object ScavroMethodRenamer { val ERROR_RESERVED_WORDS: java.util.Set[String] = (Set("message", "cause") ++ ACCESSOR_MUTATOR_RESERVED_WORDS.asScala).asJava // Check for the special case in which the schema defines two fields whose // names are identical except for the case of the first character: val firstChar: Char = field.name().charAt(0) val conflictingFieldName: String = (if (Character.isLowerCase(firstChar)) Character.toUpperCase(firstChar) else Character.toLowerCase(firstChar)) + (if (field.name().length() > 1) field.name().substring(1) else "") val fieldNameConflict: Boolean = Option(schema.getField(conflictingFieldName)).isDefined val methodBuilder: StringBuilder = new StringBuilder(prefix) val fieldName: String = SpecificCompiler.mangle( field.name(), if(schema.isError()) ERROR_RESERVED_WORDS else ACCESSOR_MUTATOR_RESERVED_WORDS, true) var nextCharToUpper: Boolean = true (0 until fieldName.length).map(ii => { if (fieldName.charAt(ii) == '_') { nextCharToUpper = true } else if (nextCharToUpper) { methodBuilder.append(Character.toUpperCase(fieldName.charAt(ii))) nextCharToUpper = false } else { methodBuilder.append(fieldName.charAt(ii)) } }) methodBuilder.append(postfix) // If there is a field name conflict append $0 or $1 if (fieldNameConflict) { if (methodBuilder.charAt(methodBuilder.length() - 1) != '$') { methodBuilder.append('$') } methodBuilder.append(if(Character.isLowerCase(firstChar))'0' else '1') } methodBuilder.toString() } }
Example 171
Source File: ScavroTraitTree.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package format package scavro package trees import generators.ScalaDocGenerator import matchers.TypeMatcher import types._ import treehugger.forest._ import definitions._ import treehuggerDSL._ import org.apache.avro.Schema.Type.{ ENUM, RECORD } import org.apache.avro.{ Protocol, Schema } import scala.collection.JavaConverters._ object ScavroTraitTree { def toADTRootDef(protocol: Protocol, typeMatcher: TypeMatcher) = { def isEnum(schema: Schema) = schema.getType == ENUM val sealedTraitTree = TRAITDEF(protocol.getName).withFlags(Flags.SEALED) val adtRootTree = { val adtSubTypes = typeMatcher.avroScalaTypes.enum match { case JavaEnum => protocol.getTypes.asScala.toList.filterNot(isEnum) case ScalaCaseObjectEnum => protocol.getTypes.asScala.toList case ScalaEnumeration => protocol.getTypes.asScala.toList case EnumAsScalaString => protocol.getTypes.asScala.filterNot(isEnum) } if (adtSubTypes.forall(schema => schema.getType == RECORD)) { sealedTraitTree .withParents("AvroSerializeable") .withParents("Product") .withParents("Serializable") } else sealedTraitTree } val treeWithScalaDoc = ScalaDocGenerator.docToScalaDoc( Right(protocol), adtRootTree) treeWithScalaDoc } def toCaseObjectEnumDef(schema: Schema, maybeBaseTrait: Option[String]): List[Tree] = { val adtRootTree: Tree = maybeBaseTrait match { case Some(baseTrait) => TRAITDEF(schema.getName).withFlags(Flags.SEALED).withParents(baseTrait) case None => TRAITDEF(schema.getName).withFlags(Flags.SEALED) } val adtSubTypes: List[Tree] = schema.getEnumSymbols.asScala .map(enumSymbol => enumSymbol.toString) .map(enumSymbolString => { (CASEOBJECTDEF(enumSymbolString).withParents(schema.getName): Tree) }).toList val objectTree = OBJECTDEF(schema.getName) := Block(adtSubTypes:_*) val adtRootTreeWithScalaDoc: Tree = ScalaDocGenerator.docToScalaDoc( Left(schema), adtRootTree) List(adtRootTreeWithScalaDoc, objectTree) } }
Example 172
Source File: ScalaConverter.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package format package scavro package converters import matchers.TypeMatcher import types._ import treehugger.forest._ import definitions._ import treehuggerDSL._ import org.apache.avro.Schema import scala.language.postfixOps import scala.collection.JavaConverters._ class ScalaConverter(typeMatcher: TypeMatcher) { def convertFromJava( schema: Schema, tree: Tree, fieldPath: List[String] = List.empty): Tree = { schema.getType match { case Schema.Type.ENUM => typeMatcher.avroScalaTypes.enum match { case EnumAsScalaString => tree TOSTRING case JavaEnum | ScalaEnumeration | ScalaCaseObjectEnum => { val conversionCases = schema.getEnumSymbols.asScala.map(enumSymbol => { CASE(REF("J" + schema.getName) DOT(enumSymbol)) ==> (REF(schema.getName) DOT(enumSymbol)) }) tree MATCH(conversionCases) } } case Schema.Type.RECORD => { REF(schema.getName).DOT("metadata").DOT("fromAvro").APPLY(tree) } case Schema.Type.UNION => { val types = schema.getTypes.asScala // check if it's the kind of union that we support (i.e. nullable fields) if (types.length != 2 || !types.map(x => x.getType).contains(Schema.Type.NULL) || types.filterNot(x => x.getType == Schema.Type.NULL).length != 1) { sys.error("Unions beyond nullable fields are not supported") } // the union represents a nullable field, the kind of union supported in avrohugger else { val typeParamSchema = types.find(x => x.getType != Schema.Type.NULL).get val nullConversion = CASE(NULL) ==> NONE val someConversion = CASE(WILDCARD) ==> SOME(convertFromJava(typeParamSchema, tree, fieldPath)) val conversionCases = List(nullConversion, someConversion) tree MATCH(conversionCases:_*) } } case Schema.Type.NULL => NULL case Schema.Type.STRING => tree TOSTRING case Schema.Type.INT => tree DOT "toInt" case Schema.Type.FLOAT => tree DOT "toFloat" case Schema.Type.DOUBLE => tree DOT "toDouble" case Schema.Type.LONG => tree DOT "toLong" case Schema.Type.ARRAY => { val seqArgs = SEQARG(tree DOT "asScala") val collection = typeMatcher.avroScalaTypes.array match { case ScalaArray => ARRAY(seqArgs) case ScalaList => LIST(seqArgs) case ScalaSeq => SEQ(seqArgs) case ScalaVector => VECTOR(seqArgs) } collection MAP(LAMBDA(PARAM("x")) ==> BLOCK(convertFromJava(schema.getElementType, REF("x"), fieldPath))) } case Schema.Type.MAP => { val JavaMap = RootClass.newClass("java.util.Map[_,_]") val resultExpr = { BLOCK( REF("scala.collection.JavaConverters.mapAsScalaMapConverter") .APPLY(REF("map")) .DOT("asScala") .DOT("toMap") .MAP(LAMBDA(PARAM("kvp")) ==> BLOCK( VAL("key") := REF("kvp._1").DOT("toString"), VAL("value") := REF("kvp._2"), PAREN(REF("key"), convertFromJava(schema.getValueType, REF("value"), fieldPath))) ) ) } val mapConversion = CASE(ID("map") withType(JavaMap)) ==> resultExpr tree MATCH(mapConversion) } case Schema.Type.FIXED => sys.error("the FIXED datatype is not yet supported") case Schema.Type.BYTES => { val JavaBuffer = RootClass.newClass("java.nio.ByteBuffer") tree MATCH CASE(ID("buffer") withType(JavaBuffer)) ==> Block( REF("buffer") DOT "array" APPLY(Nil) ) } case _ => tree } } }
Example 173
Source File: ScavroScalaTreehugger.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package format package scavro import format.abstractions.ScalaTreehugger import avrohuggers.{ ScavroProtocolhugger, ScavroSchemahugger } import input.reflectivecompilation.schemagen._ import matchers.TypeMatcher import stores.{ ClassStore, SchemaStore } import org.apache.avro.{ Protocol, Schema } import treehugger.forest._ import definitions._ import treehuggerDSL._ object ScavroScalaTreehugger extends ScalaTreehugger { val schemahugger = ScavroSchemahugger val protocolhugger = ScavroProtocolhugger val importer = ScavroImporter // SpecificCompiler can't return a tree for Java enums, so return // a String here for a consistent api vis a vis *ToFile and *ToStrings def asScalaCodeString( classStore: ClassStore, namespace: Option[String], schemaOrProtocol: Either[Schema, Protocol], typeMatcher: TypeMatcher, schemaStore: SchemaStore, restrictedFields: Boolean): String = { val imports: List[Import] = importer.getImports( schemaOrProtocol, namespace, schemaStore, typeMatcher) val topLevelDefs: List[Tree] = schemaOrProtocol match { case Left(schema) => schemahugger.toTrees( schemaStore, classStore, namespace, schema, typeMatcher, None, None, restrictedFields ) case Right(protocol) => protocolhugger.toTrees( schemaStore, classStore, namespace, protocol, typeMatcher, None, None, restrictedFields ) } // wrap the imports and classdef in a block with a comment and a package val tree = { val blockContent = imports ++ topLevelDefs if (namespace.isDefined) BLOCK(blockContent).inPackage(namespace.get) else BLOCK(blockContent:_*).withoutPackage }.withDoc("MACHINE-GENERATED FROM AVRO SCHEMA. DO NOT EDIT DIRECTLY") treeToString(tree) } }
Example 174
Source File: NestedSchemaExtractor.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package input import avrohugger.matchers.TypeMatcher import stores.SchemaStore import types.EnumAsScalaString import org.apache.avro.Schema import org.apache.avro.Schema.Type.{ARRAY, ENUM, MAP, RECORD, UNION} import scala.collection.JavaConverters._ object NestedSchemaExtractor { // if a record is found, extract nested RECORDs and ENUMS (i.e. top-level types) def getNestedSchemas( schema: Schema, schemaStore: SchemaStore, typeMatcher: TypeMatcher): List[Schema] = { def extract( schema: Schema, fieldPath: List[String] = List.empty): List[Schema] = { schema.getType match { case RECORD => val fields: List[Schema.Field] = schema.getFields.asScala.toList val fieldSchemas: List[Schema] = fields.map(field => field.schema) def flattenSchema(fieldSchema: Schema): List[Schema] = { fieldSchema.getType match { case ARRAY => flattenSchema(fieldSchema.getElementType) case MAP => flattenSchema(fieldSchema.getValueType) case RECORD => { // if the field schema is one that has already been stored, use that one if (schemaStore.schemas.contains(fieldSchema.getFullName)) List() // if we've already seen this schema (recursive schemas) don't traverse further else if (fieldPath.contains(fieldSchema.getFullName)) List() else fieldSchema :: extract(fieldSchema, fieldSchema.getFullName :: fieldPath) } case UNION => fieldSchema.getTypes.asScala.toList.flatMap(x => flattenSchema(x)) case ENUM => { // if the field schema is one that has already been stored, use that one if (schemaStore.schemas.contains(fieldSchema.getFullName)) List() else List(fieldSchema) } case _ => List(fieldSchema) } } val flatSchemas = fieldSchemas.flatMap(fieldSchema => flattenSchema(fieldSchema)) def topLevelTypes(schema: Schema) = { if (typeMatcher.avroScalaTypes.enum == EnumAsScalaString) schema.getType == RECORD else (schema.getType == RECORD | schema.getType == ENUM) } val nestedTopLevelSchemas = flatSchemas.filter(topLevelTypes) nestedTopLevelSchemas case ENUM => List(schema) case _ => Nil } } schema::extract(schema) } }
Example 175
Source File: DependencyInspector.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package input import org.apache.avro.Schema import scala.collection.JavaConverters._ object DependencyInspector { import Schema.Type._ def getReferredNamespace(schema: Schema): Option[String] = schema.getType match { case ARRAY => getReferredNamespace(schema.getElementType) case UNION => schema.getTypes.asScala.find( innerType => innerType.getType != NULL ) flatMap getReferredNamespace case MAP => getReferredNamespace(schema.getValueType) case RECORD | ENUM => Option(schema.getNamespace) case _ => None } def getReferredTypeName(schema: Schema): String = schema.getType match { case ARRAY => getReferredTypeName(schema.getElementType) case UNION => schema.getTypes.asScala.find( innerType => innerType.getType != NULL ).map( getReferredTypeName ).getOrElse("") case MAP => getReferredTypeName(schema.getValueType) case _ => schema.getName } }
Example 176
Source File: IdlImportParser.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package input package parsers import org.apache.avro.{ Protocol, Schema } import java.io.File import scala.util.matching.Regex.Match object IdlImportParser { def stripComments(fileContents: String): String = { val multiLinePattern = """/\*.*\*/""".r val singleLinePattern = """//.*$""".r val noSingleLines = singleLinePattern.replaceAllIn(fileContents, "") val commentFree = multiLinePattern.replaceAllIn(noSingleLines, "") commentFree } def getImportedFiles(infile: File, classLoader: ClassLoader): List[File] = { def readFile(file: File): String = { var count = 0 val maxTries = 3 try { count += 1 val file = scala.io.Source.fromFile(infile) val fileContents: String = stripComments(file.mkString) file.close // if file is empty, try again, it was there when we read idl if (fileContents.isEmpty && (count < maxTries)) readFile(infile) else fileContents } catch {// if file is not found, try again, it was there when we read idl case e: java.io.FileNotFoundException => { if (count < maxTries) readFile(infile) else sys.error("File to found: " + infile) } } } val path = infile.getParent + "/" val contents = readFile(infile) val avdlPattern = """import[ \t]+idl[ \t]+"([^"]*\.avdl)"[ \t]*;""".r val avprPattern = """import[ \t]+protocol[ \t]+"([^"]*\.avpr)"[ \t]*;""".r val avscPattern = """import[ \t]+schema[ \t]+"([^"]*\.avsc)"[ \t]*;""".r val idlMatches = avdlPattern.findAllIn(contents).matchData.toList val protocolMatches = avprPattern.findAllIn(contents).matchData.toList val schemaMatches = avscPattern.findAllIn(contents).matchData.toList val importMatches = idlMatches ::: protocolMatches ::: schemaMatches val (localImports, nonLocalMatches): (List[File], List[Match]) = importMatches.foldLeft((List.empty[File], List.empty[Match])){ case ((ai,am), m) => val f = new File(path + m.group(1)) if (f.exists) (ai:+f, am) else (ai, am:+m) } val classpathImports: List[File] = nonLocalMatches.map(m =>{ Option(classLoader.getResource(m.group(1))).map(resource =>{ new File(resource.getFile) }) }).flatMap(_.toList).filter(file => file.exists) val importedFiles = classpathImports ++ localImports importedFiles } }
Example 177
Source File: StringInputParser.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package input package parsers import reflectivecompilation.{ PackageSplitter, Toolbox } import stores.{ SchemaStore, TypecheckDependencyStore } import org.apache.avro.Protocol import org.apache.avro.Schema import org.apache.avro.Schema.Parser import org.apache.avro.SchemaParseException import org.apache.avro.compiler.idl.Idl import org.apache.avro.compiler.idl.ParseException import scala.collection.JavaConverters._ import java.nio.charset.Charset import java.io.FileNotFoundException // tries schema first, then protocol, then idl, then for case class defs class StringInputParser { lazy val schemaParser = new Parser() lazy val typecheckDependencyStore = new TypecheckDependencyStore def getSchemaOrProtocols( inputString: String, schemaStore: SchemaStore): List[Either[Schema, Protocol]] = { def trySchema(str: String): List[Either[Schema, Protocol]] = { try { List(Left(schemaParser.parse(str))) } catch { case notSchema: SchemaParseException => tryProtocol(str) case unknown: Throwable => sys.error("Unexpected exception: " + unknown) } } def tryProtocol(str: String): List[Either[Schema, Protocol]] = { try { List(Right(Protocol.parse(str))) } catch { case notProtocol: SchemaParseException => tryIDL(str) case unknown: Throwable => sys.error("Unexpected exception: " + unknown) } } def tryIDL(str: String): List[Either[Schema, Protocol]] = { try { val bytes = str.getBytes(Charset.forName("UTF-8")) val inStream = new java.io.ByteArrayInputStream(bytes) val idlParser = new Idl(inStream) val protocol = idlParser.CompilationUnit() List(Right(protocol)) } catch { case e: ParseException => { if (e.getMessage.contains("FileNotFoundException")) { sys.error("Imports not supported in String IDLs, only avdl files.") } else tryCaseClass(str, schemaStore) } case unknown: Throwable => sys.error("Unexpected exception: " + unknown) } } def tryCaseClass( str: String, schemaStore: SchemaStore): List[Either[Schema, Protocol]] = { val compilationUnits = PackageSplitter.getCompilationUnits(str) val scalaDocs = ScalaDocParser.getScalaDocs(compilationUnits) val trees = compilationUnits.map(src => Toolbox.toolBox.parse(src)) val treesZippedWithDocs = trees.zip(scalaDocs) val schemas = treesZippedWithDocs.flatMap(treeAndDocs => { val tree = treeAndDocs._1 val docs = treeAndDocs._2 TreeInputParser.parse(tree, docs, schemaStore, typecheckDependencyStore) }) schemas.map(schema => Left(schema)) } // tries schema first, then protocol, then idl, then for case class defs val schemaOrProtocols = trySchema(inputString) schemaOrProtocols } }
Example 178
Source File: RecordSchemaGenerator.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package input package reflectivecompilation package schemagen import parsers.ScalaDocParser import stores.{ SchemaStore, TypecheckDependencyStore } import org.apache.avro.Schema.Field import org.apache.avro.Schema import java.util.{ Arrays => JArrays } import scala.reflect.runtime.universe._ import scala.reflect.runtime.currentMirror import scala.collection.JavaConverters._ object RecordSchemaGenerator { def generateSchema( className: String, namespace: Option[Name], fields: List[ValDef], maybeScalaDoc: Option[String], schemaStore: SchemaStore, typecheckDependencyStore: TypecheckDependencyStore): Schema = { // Can't seem to typecheck packaged classes, so splice-in unpackaged versions // and later the FieldSchemaGenerator's type matcher must be passed the field's // namespace explicitly. def typeCheck(t: Tree) = { val dependencies = typecheckDependencyStore.knownClasses.values.toList Toolbox.toolBox.typeCheck(q"..$dependencies; {type T = $t}") match { case x @ Block(classDefs, Block(List(TypeDef(mods, name, tparams, rhs)), const)) => rhs.tpe case _ => t.tpe // if there are no fields, then no dependencies either } } def toAvroFieldSchema(valDef: ValDef) = { val (referredNamespace, fieldType) = valDef.tpt match { case tq"$ns.$typeName" => (Some(newTermName(ns.toString)), tq"$typeName") case t => (namespace, t) } val maybeFieldDoc = ScalaDocParser.fieldDocsMap(maybeScalaDoc).get(valDef.name.toString) new FieldSchemaGenerator().toAvroField( referredNamespace, valDef.name, typeCheck(fieldType), valDef.rhs, maybeFieldDoc, schemaStore ) } // conversion from Option to String/null is for compatibility with Apache Avro val ns = namespace match { case Some(n) => n.toString case None => null } val avroFields = fields.map(valDef => { toAvroFieldSchema(valDef) }) // conversion from Option to String/null is for compatibility with Apache Avro val recordDoc = ScalaDocParser.getTopLevelDoc(maybeScalaDoc) val avroSchema = Schema.createRecord(className, recordDoc, ns, false) avroSchema.setFields(JArrays.asList(avroFields.toArray:_*)) schemaStore.accept(avroSchema) avroSchema } }
Example 179
Source File: EnumSchemaGenerator.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package input package reflectivecompilation package schemagen import parsers.ScalaDocParser import stores.SchemaStore import org.apache.avro.Schema import java.util.{Arrays => JArrays} import scala.reflect.runtime.universe.Name import scala.collection.JavaConverters._ object EnumSchemaGenerator { def generateSchema( className: String, namespace: Option[Name], values: List[Name], maybeScalaDoc: Option[String], schemaStore: SchemaStore): Schema = { // conversion from Option to String/null is for compatibility with Apache Avro val ns = namespace match { case Some(n) => n.toString case None => null } val vals = JArrays.asList(values.map(value => value.toString).toArray:_*) val doc = ScalaDocParser.getTopLevelDoc(maybeScalaDoc) val avroSchema = Schema.createEnum(className, doc, ns, vals) schemaStore.accept(avroSchema) avroSchema } }
Example 180
Source File: FileGenerator.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package generators import avrohugger.format.abstractions.SourceFormat import avrohugger.input.DependencyInspector import avrohugger.input.NestedSchemaExtractor import avrohugger.input.reflectivecompilation.schemagen._ import avrohugger.input.parsers.{ FileInputParser, StringInputParser} import avrohugger.matchers.TypeMatcher import avrohugger.stores.{ ClassStore, SchemaStore } import java.io.{File, FileNotFoundException, IOException} import org.apache.avro.{ Protocol, Schema } import org.apache.avro.Schema.Type.ENUM // Unable to overload this class' methods because outDir uses a default value private[avrohugger] object FileGenerator { def schemaToFile( schema: Schema, outDir: String, format: SourceFormat, classStore: ClassStore, schemaStore: SchemaStore, typeMatcher: TypeMatcher, restrictedFields: Boolean): Unit = { val topNS: Option[String] = DependencyInspector.getReferredNamespace(schema) val topLevelSchemas: List[Schema] = NestedSchemaExtractor.getNestedSchemas(schema, schemaStore, typeMatcher) // most-nested classes processed first topLevelSchemas.reverse.distinct.foreach(schema => { // pass in the top-level schema's namespace if the nested schema has none val ns = DependencyInspector.getReferredNamespace(schema) orElse topNS format.compile(classStore, ns, Left(schema), outDir, schemaStore, typeMatcher, restrictedFields) }) } def protocolToFile( protocol: Protocol, outDir: String, format: SourceFormat, classStore: ClassStore, schemaStore: SchemaStore, typeMatcher: TypeMatcher, restrictedFields: Boolean): Unit = { val ns = Option(protocol.getNamespace) format.compile(classStore, ns, Right(protocol), outDir, schemaStore, typeMatcher, restrictedFields) } def stringToFile( str: String, outDir: String, format: SourceFormat, classStore: ClassStore, schemaStore: SchemaStore, stringParser: StringInputParser, typeMatcher: TypeMatcher, restrictedFields: Boolean): Unit = { val schemaOrProtocols = stringParser.getSchemaOrProtocols(str, schemaStore) schemaOrProtocols.foreach(schemaOrProtocol => { schemaOrProtocol match { case Left(schema) => { schemaToFile(schema, outDir, format, classStore, schemaStore, typeMatcher, restrictedFields) } case Right(protocol) => { protocolToFile(protocol, outDir, format, classStore, schemaStore, typeMatcher, restrictedFields) } } }) } def fileToFile( inFile: File, outDir: String, format: SourceFormat, classStore: ClassStore, schemaStore: SchemaStore, fileParser: FileInputParser, typeMatcher: TypeMatcher, classLoader: ClassLoader, restrictedFields: Boolean): Unit = { val schemaOrProtocols: List[Either[Schema, Protocol]] = fileParser.getSchemaOrProtocols(inFile, format, classStore, classLoader) schemaOrProtocols.foreach(schemaOrProtocol => schemaOrProtocol match { case Left(schema) => { schemaToFile(schema, outDir, format, classStore, schemaStore, typeMatcher, restrictedFields) } case Right(protocol) => { protocolToFile(protocol, outDir, format, classStore, schemaStore, typeMatcher, restrictedFields) } }) } }
Example 181
Source File: ScalaDocGenerator.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package generators import treehugger.forest._ import definitions._ import treehuggerDSL._ import org.apache.avro.{ Protocol, Schema } import org.apache.avro.Schema.Field import org.apache.avro.Schema.Type.{ ENUM, RECORD } import scala.language.postfixOps import scala.collection.JavaConverters._ object ScalaDocGenerator { def docToScalaDoc( schemaOrProtocol: Either[Schema, Protocol], tree: Tree): Tree = { def aFieldHasDoc(schema: Schema): Boolean = { schema.getFields.asScala.exists(field => { val maybeFieldDoc = Option(field.doc) isDoc(maybeFieldDoc) }) } def topLevelHasDoc(schema: Schema): Boolean = { val maybeSchemaDoc = Option(schema.getDoc) isDoc(maybeSchemaDoc) } def isDoc(maybeDoc: Option[String]): Boolean = { maybeDoc match { case Some(doc) => true case None => false } } // Need arbitrary number of fields, so can't use DocTags, must return String def getFieldFauxDocTags(schema: Schema): List[String] = { val docStrings = schema.getFields.asScala.toList.map(field => { val fieldName = field.name val fieldDoc = Option(field.doc).getOrElse("") s"@param $fieldName $fieldDoc" }) docStrings } def wrapClassWithDoc(schema: Schema, tree: Tree, docs: List[String]) = { if (topLevelHasDoc(schema) || aFieldHasDoc(schema)) tree.withDoc(docs) else tree } def wrapEnumWithDoc(schema: Schema, tree: Tree, docs: List[String]) = { if (topLevelHasDoc(schema)) tree.withDoc(docs) else tree } def wrapTraitWithDoc(protocol: Protocol, tree: Tree, docs: List[String]) = { if (isDoc(Option(protocol.getDoc))) tree.withDoc(docs) else tree } val docStrings: List[String] = schemaOrProtocol match { case Left(schema) => Option(schema.getDoc).toList case Right(protocol) => Option(protocol.getDoc).toList } schemaOrProtocol match { case Left(schema) => schema.getType match { case RECORD => val paramDocs = getFieldFauxDocTags(schema) wrapClassWithDoc(schema, tree, docStrings:::paramDocs) case ENUM => wrapEnumWithDoc(schema, tree, docStrings) case _ => sys.error("Error generating ScalaDoc from Avro doc. Not ENUM/RECORD") } case Right(protocol) => wrapTraitWithDoc(protocol, tree, docStrings) } } }
Example 182
Source File: EnumProtocol.scala From avrohugger with Apache License 2.0 | 5 votes |
package example.idl.model import org.apache.avro.Schema import org.oedura.scavro.{AvroMetadata, AvroReader, AvroSerializeable} import example.idl.{Card => JCard, Suit => JSuit} sealed trait EnumProtocol extends AvroSerializeable with Product with Serializable final object Suit extends Enumeration with EnumProtocol { type Suit = Value val SPADES, DIAMONDS, CLUBS, HEARTS = Value } final case class Card(suit: Suit.Value, number: Int) extends AvroSerializeable with EnumProtocol { type J = JCard override def toAvro: JCard = { new JCard(suit match { case Suit.SPADES => JSuit.SPADES case Suit.DIAMONDS => JSuit.DIAMONDS case Suit.CLUBS => JSuit.CLUBS case Suit.HEARTS => JSuit.HEARTS }, number) } } final object Card { implicit def reader = new AvroReader[Card] { override type J = JCard } implicit val metadata: AvroMetadata[Card, JCard] = new AvroMetadata[Card, JCard] { override val avroClass: Class[JCard] = classOf[JCard] override val schema: Schema = JCard.getClassSchema() override val fromAvro: (JCard) => Card = { (j: JCard) => Card(j.getSuit match { case JSuit.SPADES => Suit.SPADES case JSuit.DIAMONDS => Suit.DIAMONDS case JSuit.CLUBS => Suit.CLUBS case JSuit.HEARTS => Suit.HEARTS }, j.getNumber.toInt) } } }
Example 183
Source File: EnumProtocol.scala From avrohugger with Apache License 2.0 | 5 votes |
package example.proto.model import org.apache.avro.Schema import org.oedura.scavro.{AvroMetadata, AvroReader, AvroSerializeable} import example.proto.{Card => JCard, Suit => JSuit} sealed trait EnumProtocol extends AvroSerializeable with Product with Serializable final object Suit extends Enumeration with EnumProtocol { type Suit = Value val SPADES, HEARTS, DIAMONDS, CLUBS = Value } final case class Card(suit: Suit.Value, number: Int) extends AvroSerializeable with EnumProtocol { type J = JCard override def toAvro: JCard = { new JCard(suit match { case Suit.SPADES => JSuit.SPADES case Suit.HEARTS => JSuit.HEARTS case Suit.DIAMONDS => JSuit.DIAMONDS case Suit.CLUBS => JSuit.CLUBS }, number) } } final object Card { implicit def reader = new AvroReader[Card] { override type J = JCard } implicit val metadata: AvroMetadata[Card, JCard] = new AvroMetadata[Card, JCard] { override val avroClass: Class[JCard] = classOf[JCard] override val schema: Schema = JCard.getClassSchema() override val fromAvro: (JCard) => Card = { (j: JCard) => Card(j.getSuit match { case JSuit.SPADES => Suit.SPADES case JSuit.HEARTS => Suit.HEARTS case JSuit.DIAMONDS => Suit.DIAMONDS case JSuit.CLUBS => Suit.CLUBS }, j.getNumber.toInt) } } }
Example 184
Source File: Example5.scala From avrohugger with Apache License 2.0 | 5 votes |
package com.example.model import org.apache.avro.Schema import org.oedura.scavro.{AvroMetadata, AvroReader, AvroSerializeable} import com.example.{NoSpaces6 => JNoSpaces6, NoSpaces7 => JNoSpaces7} final case class NoSpaces6(comment_property1: String) extends AvroSerializeable { type J = JNoSpaces6 override def toAvro: JNoSpaces6 = { new JNoSpaces6(comment_property1) } } object NoSpaces6 { implicit def reader = new AvroReader[NoSpaces6] { override type J = JNoSpaces6 } implicit val metadata: AvroMetadata[NoSpaces6, JNoSpaces6] = new AvroMetadata[NoSpaces6, JNoSpaces6] { override val avroClass: Class[JNoSpaces6] = classOf[JNoSpaces6] override val schema: Schema = JNoSpaces6.getClassSchema() override val fromAvro: (JNoSpaces6) => NoSpaces6 = { (j: JNoSpaces6) => NoSpaces6(j.getCommentProperty1.toString) } } } final case class NoSpaces7(comment_property2: String) extends AvroSerializeable { type J = JNoSpaces7 override def toAvro: JNoSpaces7 = { new JNoSpaces7(comment_property2) } } object NoSpaces7 { implicit def reader = new AvroReader[NoSpaces7] { override type J = JNoSpaces7 } implicit val metadata: AvroMetadata[NoSpaces7, JNoSpaces7] = new AvroMetadata[NoSpaces7, JNoSpaces7] { override val avroClass: Class[JNoSpaces7] = classOf[JNoSpaces7] override val schema: Schema = JNoSpaces7.getClassSchema() override val fromAvro: (JNoSpaces7) => NoSpaces7 = { (j: JNoSpaces7) => NoSpaces7(j.getCommentProperty2.toString) } } }
Example 185
Source File: NoSpaces2.scala From avrohugger with Apache License 2.0 | 5 votes |
package com.example.model import org.apache.avro.Schema import org.oedura.scavro.{AvroMetadata, AvroReader, AvroSerializeable} import com.example.{NoSpaces2 => JNoSpaces2} final case class NoSpaces2(comment_property: String) extends AvroSerializeable { type J = JNoSpaces2 override def toAvro: JNoSpaces2 = { new JNoSpaces2(comment_property) } } object NoSpaces2 { implicit def reader = new AvroReader[NoSpaces2] { override type J = JNoSpaces2 } implicit val metadata: AvroMetadata[NoSpaces2, JNoSpaces2] = new AvroMetadata[NoSpaces2, JNoSpaces2] { override val avroClass: Class[JNoSpaces2] = classOf[JNoSpaces2] override val schema: Schema = JNoSpaces2.getClassSchema() override val fromAvro: (JNoSpaces2) => NoSpaces2 = { (j: JNoSpaces2) => NoSpaces2(j.getCommentProperty.toString) } } }
Example 186
Source File: NoSpaces3.scala From avrohugger with Apache License 2.0 | 5 votes |
package com.example.model import org.apache.avro.Schema import org.oedura.scavro.{AvroMetadata, AvroReader, AvroSerializeable} import com.example.{NoSpaces3 => JNoSpaces3} final case class NoSpaces3(comment_property: String) extends AvroSerializeable { type J = JNoSpaces3 override def toAvro: JNoSpaces3 = { new JNoSpaces3(comment_property) } } object NoSpaces3 { implicit def reader = new AvroReader[NoSpaces3] { override type J = JNoSpaces3 } implicit val metadata: AvroMetadata[NoSpaces3, JNoSpaces3] = new AvroMetadata[NoSpaces3, JNoSpaces3] { override val avroClass: Class[JNoSpaces3] = classOf[JNoSpaces3] override val schema: Schema = JNoSpaces3.getClassSchema() override val fromAvro: (JNoSpaces3) => NoSpaces3 = { (j: JNoSpaces3) => NoSpaces3(j.getCommentProperty.toString) } } }
Example 187
Source File: Example4.scala From avrohugger with Apache License 2.0 | 5 votes |
package com.example.model import org.apache.avro.Schema import org.oedura.scavro.{AvroMetadata, AvroReader, AvroSerializeable} import com.example.{NoSpaces4 => JNoSpaces4, NoSpaces5 => JNoSpaces5} sealed trait Example4 extends AvroSerializeable with Product with Serializable final case class NoSpaces4(comment_property1: String) extends AvroSerializeable with Example4 { type J = JNoSpaces4 override def toAvro: JNoSpaces4 = { new JNoSpaces4(comment_property1) } } final object NoSpaces4 { implicit def reader = new AvroReader[NoSpaces4] { override type J = JNoSpaces4 } implicit val metadata: AvroMetadata[NoSpaces4, JNoSpaces4] = new AvroMetadata[NoSpaces4, JNoSpaces4] { override val avroClass: Class[JNoSpaces4] = classOf[JNoSpaces4] override val schema: Schema = JNoSpaces4.getClassSchema() override val fromAvro: (JNoSpaces4) => NoSpaces4 = { (j: JNoSpaces4) => NoSpaces4(j.getCommentProperty1.toString) } } } final case class NoSpaces5(comment_property2: String) extends AvroSerializeable with Example4 { type J = JNoSpaces5 override def toAvro: JNoSpaces5 = { new JNoSpaces5(comment_property2) } } final object NoSpaces5 { implicit def reader = new AvroReader[NoSpaces5] { override type J = JNoSpaces5 } implicit val metadata: AvroMetadata[NoSpaces5, JNoSpaces5] = new AvroMetadata[NoSpaces5, JNoSpaces5] { override val avroClass: Class[JNoSpaces5] = classOf[JNoSpaces5] override val schema: Schema = JNoSpaces5.getClassSchema() override val fromAvro: (JNoSpaces5) => NoSpaces5 = { (j: JNoSpaces5) => NoSpaces5(j.getCommentProperty2.toString) } } }
Example 188
Source File: NoSpaces1.scala From avrohugger with Apache License 2.0 | 5 votes |
package com.example.model import org.apache.avro.Schema import org.oedura.scavro.{AvroMetadata, AvroReader, AvroSerializeable} import com.example.{NoSpaces1 => JNoSpaces1} final case class NoSpaces1(single_line_comment_property: String, multi_line_property: String) extends AvroSerializeable { type J = JNoSpaces1 override def toAvro: JNoSpaces1 = { new JNoSpaces1(single_line_comment_property, multi_line_property) } } object NoSpaces1 { implicit def reader = new AvroReader[NoSpaces1] { override type J = JNoSpaces1 } implicit val metadata: AvroMetadata[NoSpaces1, JNoSpaces1] = new AvroMetadata[NoSpaces1, JNoSpaces1] { override val avroClass: Class[JNoSpaces1] = classOf[JNoSpaces1] override val schema: Schema = JNoSpaces1.getClassSchema() override val fromAvro: (JNoSpaces1) => NoSpaces1 = { (j: JNoSpaces1) => NoSpaces1(j.getSingleLineCommentProperty.toString, j.getMultiLineProperty.toString) } } }
Example 189
Source File: Compass.scala From avrohugger with Apache License 2.0 | 5 votes |
package example.model import org.apache.avro.Schema import org.oedura.scavro.{AvroMetadata, AvroReader, AvroSerializeable} import example.{Compass => JCompass, Direction => JDirection} final case class Compass(direction: Direction.Value) extends AvroSerializeable { type J = JCompass override def toAvro: JCompass = { new JCompass(direction match { case Direction.NORTH => JDirection.NORTH case Direction.SOUTH => JDirection.SOUTH case Direction.EAST => JDirection.EAST case Direction.WEST => JDirection.WEST }) } } object Compass { implicit def reader = new AvroReader[Compass] { override type J = JCompass } implicit val metadata: AvroMetadata[Compass, JCompass] = new AvroMetadata[Compass, JCompass] { override val avroClass: Class[JCompass] = classOf[JCompass] override val schema: Schema = JCompass.getClassSchema() override val fromAvro: (JCompass) => Compass = { (j: JCompass) => Compass(j.getDirection match { case JDirection.NORTH => Direction.NORTH case JDirection.SOUTH => Direction.SOUTH case JDirection.EAST => Direction.EAST case JDirection.WEST => Direction.WEST }) } } }
Example 190
Source File: Level0.scala From avrohugger with Apache License 2.0 | 5 votes |
package example.model import org.apache.avro.Schema import org.oedura.scavro.{AvroMetadata, AvroReader, AvroSerializeable} import example.{Level0 => JLevel0, Level1 => JLevel1, Level2 => JLevel2} final case class Level0(level1: Level1) extends AvroSerializeable { type J = JLevel0 override def toAvro: JLevel0 = { new JLevel0(level1.toAvro) } } object Level0 { implicit def reader = new AvroReader[Level0] { override type J = JLevel0 } implicit val metadata: AvroMetadata[Level0, JLevel0] = new AvroMetadata[Level0, JLevel0] { override val avroClass: Class[JLevel0] = classOf[JLevel0] override val schema: Schema = JLevel0.getClassSchema() override val fromAvro: (JLevel0) => Level0 = { (j: JLevel0) => Level0(Level1.metadata.fromAvro(j.getLevel1)) } } }
Example 191
Source File: BinarySc.scala From avrohugger with Apache License 2.0 | 5 votes |
package example.model import org.apache.avro.Schema import org.oedura.scavro.{AvroMetadata, AvroReader, AvroSerializeable} import example.{BinarySc => JBinarySc} final case class BinarySc(data: Array[Byte]) extends AvroSerializeable { type J = JBinarySc override def toAvro: JBinarySc = { new JBinarySc(java.nio.ByteBuffer.wrap(data)) } } object BinarySc { implicit def reader = new AvroReader[BinarySc] { override type J = JBinarySc } implicit val metadata: AvroMetadata[BinarySc, JBinarySc] = new AvroMetadata[BinarySc, JBinarySc] { override val avroClass: Class[JBinarySc] = classOf[JBinarySc] override val schema: Schema = JBinarySc.getClassSchema() override val fromAvro: (JBinarySc) => BinarySc = { (j: JBinarySc) => BinarySc(j.getData match { case (buffer: java.nio.ByteBuffer) => { buffer.array() } }) } } }
Example 192
Source File: Level1.scala From avrohugger with Apache License 2.0 | 5 votes |
package example.model import org.apache.avro.Schema import org.oedura.scavro.{AvroMetadata, AvroReader, AvroSerializeable} import example.{Level1 => JLevel1, Level2 => JLevel2} final case class Level1(level2: Level2) extends AvroSerializeable { type J = JLevel1 override def toAvro: JLevel1 = { new JLevel1(level2.toAvro) } } object Level1 { implicit def reader = new AvroReader[Level1] { override type J = JLevel1 } implicit val metadata: AvroMetadata[Level1, JLevel1] = new AvroMetadata[Level1, JLevel1] { override val avroClass: Class[JLevel1] = classOf[JLevel1] override val schema: Schema = JLevel1.getClassSchema() override val fromAvro: (JLevel1) => Level1 = { (j: JLevel1) => Level1(Level2.metadata.fromAvro(j.getLevel2)) } } }
Example 193
Source File: User.scala From avrohugger with Apache License 2.0 | 5 votes |
package example.model import org.apache.avro.Schema import org.oedura.scavro.{AvroMetadata, AvroReader, AvroSerializeable} import example.{User => JUser} final case class User(name: String, favorite_number: Option[Int], favorite_color: Option[String]) extends AvroSerializeable { type J = JUser override def toAvro: JUser = { new JUser(name, favorite_number match { case Some(x) => x case None => null }, favorite_color match { case Some(x) => x case None => null }) } } object User { implicit def reader = new AvroReader[User] { override type J = JUser } implicit val metadata: AvroMetadata[User, JUser] = new AvroMetadata[User, JUser] { override val avroClass: Class[JUser] = classOf[JUser] override val schema: Schema = JUser.getClassSchema() override val fromAvro: (JUser) => User = { (j: JUser) => User(j.getName.toString, j.getFavoriteNumber match { case null => None case _ => Some(j.getFavoriteNumber.toInt) }, j.getFavoriteColor match { case null => None case _ => Some(j.getFavoriteColor.toString) }) } } }
Example 194
Source File: ClashRecord.scala From avrohugger with Apache License 2.0 | 5 votes |
package example.avro.model import org.apache.avro.Schema import org.oedura.scavro.{AvroMetadata, AvroReader, AvroSerializeable} import example.avro.{ClashInner => JClashInner, ClashOuter => JClashOuter, ClashRecord => JClashRecord} import scala.collection.JavaConverters._ final case class ClashRecord(some: Int, outer: ClashOuter, id: Int) extends AvroSerializeable { type J = JClashRecord override def toAvro: JClashRecord = { new JClashRecord(some, outer.toAvro, id) } } object ClashRecord { implicit def reader = new AvroReader[ClashRecord] { override type J = JClashRecord } implicit val metadata: AvroMetadata[ClashRecord, JClashRecord] = new AvroMetadata[ClashRecord, JClashRecord] { override val avroClass: Class[JClashRecord] = classOf[JClashRecord] override val schema: Schema = JClashRecord.getClassSchema() override val fromAvro: (JClashRecord) => ClashRecord = { (j: JClashRecord) => ClashRecord(j.getSome.toInt, ClashOuter.metadata.fromAvro(j.getOuter), j.getId.toInt) } } }
Example 195
Source File: ClashInner.scala From avrohugger with Apache License 2.0 | 5 votes |
package example.avro.model import org.apache.avro.Schema import org.oedura.scavro.{AvroMetadata, AvroReader, AvroSerializeable} import example.avro.{ClashInner => JClashInner} final case class ClashInner(some: Option[Int], other: Option[Int], id: Option[Int]) extends AvroSerializeable { type J = JClashInner override def toAvro: JClashInner = { new JClashInner(some match { case Some(x) => x case None => null }, other match { case Some(x) => x case None => null }, id match { case Some(x) => x case None => null }) } } object ClashInner { implicit def reader = new AvroReader[ClashInner] { override type J = JClashInner } implicit val metadata: AvroMetadata[ClashInner, JClashInner] = new AvroMetadata[ClashInner, JClashInner] { override val avroClass: Class[JClashInner] = classOf[JClashInner] override val schema: Schema = JClashInner.getClassSchema() override val fromAvro: (JClashInner) => ClashInner = { (j: JClashInner) => ClashInner(j.getSome match { case null => None case _ => Some(j.getSome.toInt) }, j.getOther match { case null => None case _ => Some(j.getOther.toInt) }, j.getId match { case null => None case _ => Some(j.getId.toInt) }) } } }
Example 196
Source File: ClashOuter.scala From avrohugger with Apache License 2.0 | 5 votes |
package example.avro.model import org.apache.avro.Schema import org.oedura.scavro.{AvroMetadata, AvroReader, AvroSerializeable} import example.avro.{ClashInner => JClashInner, ClashOuter => JClashOuter} import scala.collection.JavaConverters._ final case class ClashOuter(inner: Option[Array[Option[ClashInner]]]) extends AvroSerializeable { type J = JClashOuter override def toAvro: JClashOuter = { new JClashOuter(inner match { case Some(x) => { val array: java.util.List[JClashInner] = new java.util.ArrayList[JClashInner] x foreach { element => array.add(element match { case Some(x) => x.toAvro case None => null }) } array } case None => null }) } } object ClashOuter { implicit def reader = new AvroReader[ClashOuter] { override type J = JClashOuter } implicit val metadata: AvroMetadata[ClashOuter, JClashOuter] = new AvroMetadata[ClashOuter, JClashOuter] { override val avroClass: Class[JClashOuter] = classOf[JClashOuter] override val schema: Schema = JClashOuter.getClassSchema() override val fromAvro: (JClashOuter) => ClashOuter = { (j: JClashOuter) => ClashOuter(j.getInner match { case null => None case _ => Some(Array((j.getInner.asScala: _*)) map { x => x match { case null => None case _ => Some(ClashInner.metadata.fromAvro(x)) } }) }) } } }
Example 197
Source File: Level2.scala From avrohugger with Apache License 2.0 | 5 votes |
package example.model import org.apache.avro.Schema import org.oedura.scavro.{AvroMetadata, AvroReader, AvroSerializeable} import example.{Level2 => JLevel2} final case class Level2(name: String) extends AvroSerializeable { type J = JLevel2 override def toAvro: JLevel2 = { new JLevel2(name) } } object Level2 { implicit def reader = new AvroReader[Level2] { override type J = JLevel2 } implicit val metadata: AvroMetadata[Level2, JLevel2] = new AvroMetadata[Level2, JLevel2] { override val avroClass: Class[JLevel2] = classOf[JLevel2] override val schema: Schema = JLevel2.getClassSchema() override val fromAvro: (JLevel2) => Level2 = { (j: JLevel2) => Level2(j.getName.toString) } } }
Example 198
Source File: ArrayAsScalaSeq.scala From avrohugger with Apache License 2.0 | 5 votes |
package example.idl.array.model import org.apache.avro.Schema import org.oedura.scavro.{AvroMetadata, AvroReader, AvroSerializeable} import example.idl.array.{ArrayIdl => JArrayIdl} import scala.collection.JavaConverters._ final case class ArrayIdl(data: Seq[Int]) extends AvroSerializeable { type J = JArrayIdl override def toAvro: JArrayIdl = { new JArrayIdl({ val array: java.util.List[java.lang.Integer] = new java.util.ArrayList[java.lang.Integer] data foreach { element => array.add(element) } array }) } } object ArrayIdl { implicit def reader = new AvroReader[ArrayIdl] { override type J = JArrayIdl } implicit val metadata: AvroMetadata[ArrayIdl, JArrayIdl] = new AvroMetadata[ArrayIdl, JArrayIdl] { override val avroClass: Class[JArrayIdl] = classOf[JArrayIdl] override val schema: Schema = JArrayIdl.getClassSchema() override val fromAvro: (JArrayIdl) => ArrayIdl = { (j: JArrayIdl) => ArrayIdl(Seq((j.getData.asScala: _*)) map { x => x.toInt }) } } }
Example 199
Source File: ArrayAsScalaList.scala From avrohugger with Apache License 2.0 | 5 votes |
package example.idl.array.model import org.apache.avro.Schema import org.oedura.scavro.{AvroMetadata, AvroReader, AvroSerializeable} import example.idl.array.{ArrayIdl => JArrayIdl} import scala.collection.JavaConverters._ final case class ArrayIdl(data: List[Int]) extends AvroSerializeable { type J = JArrayIdl override def toAvro: JArrayIdl = { new JArrayIdl({ val array: java.util.List[java.lang.Integer] = new java.util.ArrayList[java.lang.Integer] data foreach { element => array.add(element) } array }) } } object ArrayIdl { implicit def reader = new AvroReader[ArrayIdl] { override type J = JArrayIdl } implicit val metadata: AvroMetadata[ArrayIdl, JArrayIdl] = new AvroMetadata[ArrayIdl, JArrayIdl] { override val avroClass: Class[JArrayIdl] = classOf[JArrayIdl] override val schema: Schema = JArrayIdl.getClassSchema() override val fromAvro: (JArrayIdl) => ArrayIdl = { (j: JArrayIdl) => ArrayIdl(List((j.getData.asScala: _*)) map { x => x.toInt }) } } }
Example 200
Source File: ArrayAsScalaVector.scala From avrohugger with Apache License 2.0 | 5 votes |
package example.idl.array.model import org.apache.avro.Schema import org.oedura.scavro.{AvroMetadata, AvroReader, AvroSerializeable} import example.idl.array.{ArrayIdl => JArrayIdl} import scala.collection.JavaConverters._ final case class ArrayIdl(data: Vector[Int]) extends AvroSerializeable { type J = JArrayIdl override def toAvro: JArrayIdl = { new JArrayIdl({ val array: java.util.List[java.lang.Integer] = new java.util.ArrayList[java.lang.Integer] data foreach { element => array.add(element) } array }) } } object ArrayIdl { implicit def reader = new AvroReader[ArrayIdl] { override type J = JArrayIdl } implicit val metadata: AvroMetadata[ArrayIdl, JArrayIdl] = new AvroMetadata[ArrayIdl, JArrayIdl] { override val avroClass: Class[JArrayIdl] = classOf[JArrayIdl] override val schema: Schema = JArrayIdl.getClassSchema() override val fromAvro: (JArrayIdl) => ArrayIdl = { (j: JArrayIdl) => ArrayIdl(Vector((j.getData.asScala: _*)) map { x => x.toInt }) } } }