org.apache.avro.generic.GenericDatumReader Scala Examples
The following examples show how to use org.apache.avro.generic.GenericDatumReader.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: AvroTypeSpec.scala From shapeless-datatype with Apache License 2.0 | 5 votes |
package shapeless.datatype.avro import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.net.URI import java.nio.ByteBuffer import com.google.protobuf.ByteString import org.apache.avro.Schema import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord} import org.apache.avro.io.{DecoderFactory, EncoderFactory} import org.joda.time.Instant import org.scalacheck.Prop.forAll import org.scalacheck.ScalacheckShapeless._ import org.scalacheck._ import shapeless._ import shapeless.datatype.record._ import scala.reflect.runtime.universe._ object AvroTypeSpec extends Properties("AvroType") { import shapeless.datatype.test.Records._ import shapeless.datatype.test.SerializableUtils._ implicit def compareByteArrays(x: Array[Byte], y: Array[Byte]) = java.util.Arrays.equals(x, y) implicit def compareIntArrays(x: Array[Int], y: Array[Int]) = java.util.Arrays.equals(x, y) def roundTrip[A: TypeTag, L <: HList](m: A)(implicit gen: LabelledGeneric.Aux[A, L], fromL: FromAvroRecord[L], toL: ToAvroRecord[L], mr: MatchRecord[L] ): Boolean = { val t = ensureSerializable(AvroType[A]) val f1: SerializableFunction[A, GenericRecord] = new SerializableFunction[A, GenericRecord] { override def apply(m: A): GenericRecord = t.toGenericRecord(m) } val f2: SerializableFunction[GenericRecord, Option[A]] = new SerializableFunction[GenericRecord, Option[A]] { override def apply(m: GenericRecord): Option[A] = t.fromGenericRecord(m) } val toFn = ensureSerializable(f1) val fromFn = ensureSerializable(f2) val copy = fromFn(roundTripRecord(toFn(m))) val rm = RecordMatcher[A] copy.exists(rm(_, m)) } def roundTripRecord(r: GenericRecord): GenericRecord = { val writer = new GenericDatumWriter[GenericRecord](r.getSchema) val baos = new ByteArrayOutputStream() val encoder = EncoderFactory.get().binaryEncoder(baos, null) writer.write(r, encoder) encoder.flush() baos.close() val bytes = baos.toByteArray val reader = new GenericDatumReader[GenericRecord](r.getSchema) val bais = new ByteArrayInputStream(bytes) val decoder = DecoderFactory.get().binaryDecoder(bais, null) reader.read(null, decoder) } implicit val byteStringAvroType = AvroType.at[ByteString](Schema.Type.BYTES)( v => ByteString.copyFrom(v.asInstanceOf[ByteBuffer]), v => ByteBuffer.wrap(v.toByteArray) ) implicit val instantAvroType = AvroType.at[Instant](Schema.Type.LONG)(v => new Instant(v.asInstanceOf[Long]), _.getMillis) property("required") = forAll { m: Required => roundTrip(m) } property("optional") = forAll { m: Optional => roundTrip(m) } property("repeated") = forAll { m: Repeated => roundTrip(m) } property("mixed") = forAll { m: Mixed => roundTrip(m) } property("nested") = forAll { m: Nested => roundTrip(m) } property("seqs") = forAll { m: Seqs => roundTrip(m) } implicit val uriAvroType = AvroType.at[URI](Schema.Type.STRING)(v => URI.create(v.toString), _.toString) property("custom") = forAll { m: Custom => roundTrip(m) } }
Example 2
Source File: AvroConverter.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.converters.source import java.io.File import java.util.Collections import com.datamountaineer.streamreactor.connect.converters.MsgKey import io.confluent.connect.avro.AvroData import org.apache.avro.generic.{GenericDatumReader, GenericRecord} import org.apache.avro.io.DecoderFactory import org.apache.avro.{Schema => AvroSchema} import org.apache.kafka.connect.data.{Schema, Struct} import org.apache.kafka.connect.source.SourceRecord import org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException class AvroConverter extends Converter { private val avroData = new AvroData(8) private var sourceToSchemaMap: Map[String, AvroSchema] = Map.empty private var avroReadersMap: Map[String, GenericDatumReader[GenericRecord]] = Map.empty override def convert(kafkaTopic: String, sourceTopic: String, messageId: String, bytes: Array[Byte], keys: Seq[String] = Seq.empty, keyDelimiter: String = "."): SourceRecord = { Option(bytes) match { case None => new SourceRecord(Collections.singletonMap(Converter.TopicKey, sourceTopic), null, kafkaTopic, avroData.toConnectSchema(sourceToSchemaMap(sourceTopic)), null) case Some(_) => val reader = avroReadersMap.getOrElse(sourceTopic.toLowerCase, throw new ConfigException(s"Invalid ${AvroConverter.SCHEMA_CONFIG} is not configured for $sourceTopic")) val decoder = DecoderFactory.get().binaryDecoder(bytes, null) val record = reader.read(null, decoder) val schemaAndValue = avroData.toConnectData(sourceToSchemaMap(sourceTopic.toLowerCase), record) val value = schemaAndValue.value() value match { case s: Struct if keys.nonEmpty => val keysValue = keys.flatMap { key => Option(KeyExtractor.extract(s, key.split('.').toVector)).map(_.toString) }.mkString(keyDelimiter) new SourceRecord( Collections.singletonMap(Converter.TopicKey, sourceTopic), null, kafkaTopic, Schema.STRING_SCHEMA, keysValue, schemaAndValue.schema(), schemaAndValue.value()) case _ => new SourceRecord( Collections.singletonMap(Converter.TopicKey, sourceTopic), null, kafkaTopic, MsgKey.schema, MsgKey.getStruct(sourceTopic, messageId), schemaAndValue.schema(), schemaAndValue.value()) } } } override def initialize(config: Map[String, String]): Unit = { sourceToSchemaMap = AvroConverter.getSchemas(config) avroReadersMap = sourceToSchemaMap.map { case (key, schema) => key -> new GenericDatumReader[GenericRecord](schema) } } } object AvroConverter { val SCHEMA_CONFIG = "connect.source.converter.avro.schemas" def getSchemas(config: Map[String, String]): Map[String, AvroSchema] = { config.getOrElse(SCHEMA_CONFIG, throw new ConfigException(s"$SCHEMA_CONFIG is not provided")) .toString .split(';') .filter(_.trim.nonEmpty) .map(_.split("=")) .map { case Array(source, path) => val file = new File(path) if (!file.exists()) { throw new ConfigException(s"Invalid $SCHEMA_CONFIG. The file $path doesn't exist!") } val s = source.trim.toLowerCase() if (s.isEmpty) { throw new ConfigException(s"Invalid $SCHEMA_CONFIG. The topic is not valid for entry containing $path") } s -> new AvroSchema.Parser().parse(file) case other => throw new ConfigException(s"$SCHEMA_CONFIG is not properly set. The format is Mqtt_Source->AVRO_FILE") }.toMap } }
Example 3
Source File: AvroSerializer.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.serialization import java.io.{ByteArrayOutputStream, InputStream, OutputStream} import com.sksamuel.avro4s.{RecordFormat, SchemaFor} import org.apache.avro.Schema import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord} import org.apache.avro.io.{DecoderFactory, EncoderFactory} object AvroSerializer { def write[T <: Product](t: T)(implicit os: OutputStream, formatter: RecordFormat[T], schemaFor: SchemaFor[T]): Unit = write(apply(t), schemaFor()) def write(record: GenericRecord, schema: Schema)(implicit os: OutputStream) = { val writer = new GenericDatumWriter[GenericRecord](schema) val encoder = EncoderFactory.get().binaryEncoder(os, null) writer.write(record, encoder) encoder.flush() os.flush() } def getBytes[T <: Product](t: T)(implicit recordFormat: RecordFormat[T], schemaFor: SchemaFor[T]): Array[Byte] = getBytes(recordFormat.to(t), schemaFor()) def getBytes(record: GenericRecord, schema: Schema): Array[Byte] = { implicit val output = new ByteArrayOutputStream() write(record, schema) output.toByteArray } def read(is: InputStream, schema: Schema): GenericRecord = { val reader = new GenericDatumReader[GenericRecord](schema) val decoder = DecoderFactory.get().binaryDecoder(is, null) reader.read(null, decoder) } def read[T <: Product](is: InputStream)(implicit schemaFor: SchemaFor[T], recordFormat: RecordFormat[T]): T = recordFormat.from(read(is, schemaFor())) def apply[T <: Product](t: T)(implicit formatter: RecordFormat[T]): GenericRecord = formatter.to(t) }
Example 4
Source File: ProtobufUtilTest.scala From scio with Apache License 2.0 | 5 votes |
package com.spotify.scio.util import java.io.File import java.nio.channels.Channels import java.nio.file.Files import com.spotify.scio.ScioContext import com.spotify.scio.avro._ import com.spotify.scio.coders.Coder import com.spotify.scio.proto.Track.TrackPB import org.apache.avro.file.DataFileStream import org.apache.avro.generic.{GenericDatumReader, GenericRecord} import org.apache.beam.sdk.io.{FileSystems, LocalResources} import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers import scala.jdk.CollectionConverters._ class ProtobufUtilTest extends AnyFlatSpec with Matchers { "ProtobufUtil" should "convert Message -> GenericRecords that can be written and read" in { val sc = ScioContext() val dir = Files.createTempDirectory("protobuf-util-") val (path1, path2) = (new File(s"$dir/1"), new File(s"$dir/2")) path1.deleteOnExit() path2.deleteOnExit() dir.toFile.deleteOnExit() implicit val grCoder: Coder[GenericRecord] = ProtobufUtil.AvroMessageCoder val messages = sc .parallelize(1 to 10) .map(i => TrackPB.newBuilder().setTrackId(i.toString).build()) messages .map(ProtobufUtil.toAvro[TrackPB]) .saveAsAvroFile( path1.getPath, suffix = ".protobuf", metadata = ProtobufUtil.schemaMetadataOf[TrackPB], schema = ProtobufUtil.AvroMessageSchema, numShards = 1 ) val protoWriteTap = messages.saveAsProtobufFile(path2.getPath, numShards = 1) val result = sc.run().waitUntilDone() val (tapFromAvroWrite, tapFromProtoWrite) = ( ObjectFileTap[TrackPB](ScioUtil.addPartSuffix(path1.getPath)), protoWriteTap.get(result) ) tapFromAvroWrite.value.toList should contain theSameElementsAs tapFromProtoWrite.value.toList getMetadata(path1) should contain theSameElementsAs getMetadata(path2) } private def getMetadata(dir: File): Map[String, AnyRef] = { val files = dir.listFiles() if (files.length != 1) { fail(s"Directory $dir should contain 1 Avro file. Instead, found ${files.toList}") } val dfs = new DataFileStream[GenericRecord]( Channels.newInputStream(FileSystems.open(LocalResources.fromFile(files(0), false))), new GenericDatumReader[GenericRecord] ) dfs.getMetaKeys.asScala.map(k => (k, dfs.getMetaString(k))).toMap } }
Example 5
Source File: Sedes.scala From shc with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.hbase import java.io.ByteArrayInputStream import org.apache.avro.Schema import org.apache.avro.Schema.Type._ import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord} import org.apache.avro.io._ import org.apache.commons.io.output.ByteArrayOutputStream import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.sql.types._ trait Sedes { def serialize(value: Any): Array[Byte] def deserialize(bytes: Array[Byte], start: Int, end: Int): Any } class DoubleSedes extends Sedes { override def serialize(value: Any): Array[Byte] = Bytes.toBytes(value.asInstanceOf[Double]) override def deserialize(bytes: Array[Byte], start: Int, end: Int): Any = { Bytes.toLong(bytes, start) } }
Example 6
Source File: SparkAvroDecoder.scala From cloudflow with Apache License 2.0 | 5 votes |
package cloudflow.spark.avro import org.apache.log4j.Logger import java.io.ByteArrayOutputStream import scala.reflect.runtime.universe._ import org.apache.avro.generic.{ GenericDatumReader, GenericDatumWriter, GenericRecord } import org.apache.avro.io.{ DecoderFactory, EncoderFactory } import org.apache.spark.sql.{ Dataset, Encoder, Row } import org.apache.spark.sql.catalyst.encoders.{ encoderFor, ExpressionEncoder, RowEncoder } import org.apache.spark.sql.catalyst.expressions.GenericRow import org.apache.spark.sql.types.StructType import org.apache.avro.Schema import cloudflow.spark.sql.SQLImplicits._ case class EncodedKV(key: String, value: Array[Byte]) case class SparkAvroDecoder[T: Encoder: TypeTag](avroSchema: String) { val encoder: Encoder[T] = implicitly[Encoder[T]] val sqlSchema: StructType = encoder.schema val encoderForDataColumns: ExpressionEncoder[Row] = RowEncoder(sqlSchema) @transient lazy val _avroSchema = new Schema.Parser().parse(avroSchema) @transient lazy val rowConverter = SchemaConverters.createConverterToSQL(_avroSchema, sqlSchema) @transient lazy val datumReader = new GenericDatumReader[GenericRecord](_avroSchema) @transient lazy val decoder = DecoderFactory.get def decode(bytes: Array[Byte]): Row = { val binaryDecoder = decoder.binaryDecoder(bytes, null) val record = datumReader.read(null, binaryDecoder) rowConverter(record).asInstanceOf[GenericRow] } } case class SparkAvroEncoder[T: Encoder: TypeTag](avroSchema: String) { @transient lazy val log = Logger.getLogger(getClass.getName) val BufferSize = 5 * 1024 // 5 Kb val encoder = implicitly[Encoder[T]] val sqlSchema = encoder.schema @transient lazy val _avroSchema = new Schema.Parser().parse(avroSchema) val recordName = "topLevelRecord" // ??? val recordNamespace = "recordNamespace" // ??? @transient lazy val converter = AvroConverter.createConverterToAvro(sqlSchema, recordName, recordNamespace) // Risk: This process is memory intensive. Might require thread-level buffers to optimize memory usage def rowToBytes(row: Row): Array[Byte] = { val genRecord = converter(row).asInstanceOf[GenericRecord] if (log.isDebugEnabled) log.debug(s"genRecord = $genRecord") val datumWriter = new GenericDatumWriter[GenericRecord](_avroSchema) val avroEncoder = EncoderFactory.get val byteArrOS = new ByteArrayOutputStream(BufferSize) val binaryEncoder = avroEncoder.binaryEncoder(byteArrOS, null) datumWriter.write(genRecord, binaryEncoder) binaryEncoder.flush() byteArrOS.toByteArray } def encode(dataset: Dataset[T]): Dataset[Array[Byte]] = dataset.toDF().mapPartitions(rows ⇒ rows.map(rowToBytes)).as[Array[Byte]] // Note to self: I'm not sure how heavy this chain of transformations is def encodeWithKey(dataset: Dataset[T], keyFun: T ⇒ String): Dataset[EncodedKV] = { val encoder = encoderFor[T] implicit val rowEncoder = RowEncoder(encoder.schema).resolveAndBind() dataset.map { value ⇒ val key = keyFun(value) val internalRow = encoder.toRow(value) val row = rowEncoder.fromRow(internalRow) val bytes = rowToBytes(row) EncodedKV(key, bytes) } } }
Example 7
Source File: Decoding.scala From avro4s with Apache License 2.0 | 5 votes |
package benchmarks import java.io.ByteArrayOutputStream import java.nio.ByteBuffer import java.util.Collections import benchmarks.record._ import com.sksamuel.avro4s._ import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord} import org.apache.avro.io.{DecoderFactory, EncoderFactory} import org.apache.avro.util.ByteBufferInputStream import org.openjdk.jmh.annotations._ import org.openjdk.jmh.infra.Blackhole object Decoding extends BenchmarkHelpers { @State(Scope.Thread) class Setup { val avroBytes = { import benchmarks.record.generated.AttributeValue._ import benchmarks.record.generated._ new RecordWithUnionAndTypeField(new ValidInt(255, t)).toByteBuffer } val avro4sBytes = encode(RecordWithUnionAndTypeField(AttributeValue.Valid[Int](255, t))) val (handrolledDecoder, handrolledReader) = { import benchmarks.handrolled_codecs._ implicit val codec: Codec[AttributeValue[Int]] = AttributeValueCodec[Int] implicit val schemaFor: SchemaFor[AttributeValue[Int]] = SchemaFor[AttributeValue[Int]](codec.schema) val recordSchemaFor = SchemaFor[RecordWithUnionAndTypeField] val decoder = Decoder[RecordWithUnionAndTypeField].withSchema(recordSchemaFor) val reader = new GenericDatumReader[GenericRecord](recordSchemaFor.schema) (decoder, reader) } val (avro4sDecoder, avro4sReader) = { val decoder = Decoder[RecordWithUnionAndTypeField] val reader = new GenericDatumReader[GenericRecord](decoder.schema) (decoder, reader) } } def encode[T: Encoder: SchemaFor](value: T): ByteBuffer = { val outputStream = new ByteArrayOutputStream(512) val encoder = Encoder[T] val schema = AvroSchema[T] val record = encoder.encode(value).asInstanceOf[GenericRecord] val writer = new GenericDatumWriter[GenericRecord](schema) val enc = EncoderFactory.get().directBinaryEncoder(outputStream, null) writer.write(record, enc) ByteBuffer.wrap(outputStream.toByteArray) } } class Decoding extends CommonParams with BenchmarkHelpers { import Decoding._ def decode[T](bytes: ByteBuffer, decoder: Decoder[T], reader: GenericDatumReader[GenericRecord]): T = { val dec = DecoderFactory.get().binaryDecoder(new ByteBufferInputStream(Collections.singletonList(bytes.duplicate)), null) val record = reader.read(null, dec) decoder.decode(record) } @Benchmark def avroSpecificRecord(setup: Setup, blackhole: Blackhole) = { import benchmarks.record.generated._ blackhole.consume(RecordWithUnionAndTypeField.fromByteBuffer(setup.avroBytes.duplicate)) } @Benchmark def avro4sHandrolled(setup: Setup, blackhole: Blackhole) = blackhole.consume(decode(setup.avro4sBytes, setup.handrolledDecoder, setup.handrolledReader)) @Benchmark def avro4sGenerated(setup: Setup, blackhole: Blackhole) = blackhole.consume(decode(setup.avro4sBytes, setup.avro4sDecoder, setup.avro4sReader)) }
Example 8
Source File: OutputStreamTest.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s.streams.output import java.io.ByteArrayOutputStream import com.sksamuel.avro4s._ import org.apache.avro.file.{DataFileReader, SeekableByteArrayInput} import org.apache.avro.generic.{GenericDatumReader, GenericRecord} import org.apache.avro.io.DecoderFactory import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers trait OutputStreamTest extends AnyFunSuite with Matchers { def readData[T: SchemaFor](out: ByteArrayOutputStream): GenericRecord = readData(out.toByteArray) def readData[T: SchemaFor](bytes: Array[Byte]): GenericRecord = { val datumReader = new GenericDatumReader[GenericRecord](AvroSchema[T]) val dataFileReader = new DataFileReader[GenericRecord](new SeekableByteArrayInput(bytes), datumReader) dataFileReader.next } def writeData[T: Encoder : SchemaFor](t: T): ByteArrayOutputStream = { val out = new ByteArrayOutputStream val avro = AvroOutputStream.data[T].to(out).build() avro.write(t) avro.close() out } def readBinary[T: SchemaFor](out: ByteArrayOutputStream): GenericRecord = readBinary(out.toByteArray) def readBinary[T: SchemaFor](bytes: Array[Byte]): GenericRecord = { val datumReader = new GenericDatumReader[GenericRecord](AvroSchema[T]) val decoder = DecoderFactory.get().binaryDecoder(new SeekableByteArrayInput(bytes), null) datumReader.read(null, decoder) } def writeBinary[T: Encoder : SchemaFor](t: T): ByteArrayOutputStream = { val out = new ByteArrayOutputStream val avro = AvroOutputStream.binary[T].to(out).build() avro.write(t) avro.close() out } def readJson[T: SchemaFor](out: ByteArrayOutputStream): GenericRecord = readJson(out.toByteArray) def readJson[T: SchemaFor](bytes: Array[Byte]): GenericRecord = { val schema = AvroSchema[T] val datumReader = new GenericDatumReader[GenericRecord](schema) val decoder = DecoderFactory.get().jsonDecoder(schema, new SeekableByteArrayInput(bytes)) datumReader.read(null, decoder) } def writeJson[T: Encoder : SchemaFor](t: T): ByteArrayOutputStream = { val out = new ByteArrayOutputStream val avro = AvroOutputStream.json[T].to(out).build() avro.write(t) avro.close() out } def writeRead[T: Encoder : SchemaFor](t: T)(fn: GenericRecord => Any): Unit = { { val out = writeData(t) val record = readData(out) fn(record) } { val out = writeBinary(t) val record = readBinary(out) fn(record) } { val out = writeJson(t) val record = readJson(out) fn(record) } } }
Example 9
Source File: GithubIssue235.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s.github import java.io.ByteArrayOutputStream import com.sksamuel.avro4s.{Decoder, Encoder, RecordFormat, SchemaFor} import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord} import org.apache.avro.io.{DecoderFactory, EncoderFactory} import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers case class Label(value: String) extends AnyVal case class Value[A](label: Label, value: A) sealed trait OneOrTwo[A] case class One[A](value: Value[A]) extends OneOrTwo[A] case class Two[A](first: Value[A], second: Value[A]) extends OneOrTwo[A] case class OneOrTwoWrapper[A](t: OneOrTwo[A]) object Bug { def apply[T <: Product](a: T)( implicit schemaFor: SchemaFor[T], encoder: Encoder[T], decoder: Decoder[T] ): Unit = { val format = RecordFormat[T] val schema = schemaFor.schema val datumReader = new GenericDatumReader[GenericRecord](schema) val datumWriter = new GenericDatumWriter[GenericRecord](schema) val stream = new ByteArrayOutputStream() val bEncoder = EncoderFactory.get().binaryEncoder(stream, null) datumWriter.write(format.to(a), bEncoder) bEncoder.flush() val bytes = stream.toByteArray val bDecoder = DecoderFactory.get().binaryDecoder(bytes, null) val record = datumReader.read(null, bDecoder) require(format.from(record) == a) } } class GithubIssue235 extends AnyFunSuite with Matchers { test("Broken typeclass derivation upgrading from 1.9.0 to 2.0.1 #235") { val o = OneOrTwoWrapper(One(Value(Label("lbl"), "foo"))) Bug(o) } }
Example 10
Source File: GithubIssue191.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s.github import java.io.ByteArrayOutputStream import com.sksamuel.avro4s.{AvroOutputStream, AvroSchema} import org.apache.avro.file.{DataFileReader, SeekableByteArrayInput} import org.apache.avro.generic.{GenericDatumReader, GenericRecord} import org.apache.avro.util.Utf8 import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers final case class SN(value: String) extends AnyVal final case class SimpleUser(name: String, sn: Option[SN]) class GithubIssue191 extends AnyFunSuite with Matchers { test("writing out AnyVal in an option") { implicit val schema = AvroSchema[SimpleUser] val bytes = new ByteArrayOutputStream val out = AvroOutputStream.data[SimpleUser].to(bytes).build() out.write(SimpleUser("Tom", Some(SN("123")))) out.close() val datumReader = new GenericDatumReader[GenericRecord](schema) val dataFileReader = new DataFileReader[GenericRecord](new SeekableByteArrayInput(bytes.toByteArray), datumReader) val record = new Iterator[GenericRecord] { override def hasNext: Boolean = dataFileReader.hasNext override def next(): GenericRecord = dataFileReader.next }.toList.head record.getSchema shouldBe schema record.get("name") shouldBe new Utf8("Tom") record.get("sn") shouldBe new Utf8("123") } }
Example 11
Source File: DefaultAwareDatumReader.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s import org.apache.avro.generic.GenericDatumReader import org.apache.avro.io.ResolvingDecoder import org.apache.avro.{AvroTypeException, Schema} class DefaultAwareDatumReader[T](writer: Schema, reader: Schema) extends GenericDatumReader[T](writer, reader, new DefaultAwareGenericData) { override def readField(r: scala.Any, f: Schema.Field, oldDatum: scala.Any, in: ResolvingDecoder, state: scala.Any): Unit = { try { super.readField(r, f, oldDatum, in, state) } catch { case t: AvroTypeException => if (f.defaultVal == null) throw t else getData.setField(r, f.name, f.pos, f.defaultVal) } } } object DefaultAwareDatumReader { def apply[T](writerSchema: Schema): DefaultAwareDatumReader[T] = new DefaultAwareDatumReader[T](writerSchema, writerSchema) }
Example 12
Source File: AvroIO.scala From ratatool with Apache License 2.0 | 5 votes |
package com.spotify.ratatool.io import java.io.{File, InputStream, OutputStream} import java.nio.ByteBuffer import java.nio.channels.SeekableByteChannel import com.google.common.io.ByteStreams import org.apache.avro.Schema import org.apache.avro.file.{DataFileReader, DataFileWriter, SeekableByteArrayInput, SeekableInput} import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord} import org.apache.avro.io.{DatumReader, DatumWriter} import org.apache.avro.reflect.{ReflectDatumReader, ReflectDatumWriter} import org.apache.avro.specific.{SpecificDatumReader, SpecificDatumWriter, SpecificRecord} import org.apache.beam.sdk.io.FileSystems import org.apache.beam.sdk.io.fs.MatchResult.Metadata import scala.jdk.CollectionConverters._ import scala.reflect.ClassTag def writeToOutputStream[T: ClassTag](data: Iterable[T], schema: Schema, os: OutputStream): Unit = { val fileWriter = new DataFileWriter(createDatumWriter[T]).create(schema, os) data.foreach(fileWriter.append) fileWriter.close() } def getAvroSchemaFromFile(path: String): Schema = { require(FileStorage(path).exists, s"File `$path` does not exist!") val files = FileStorage(path).listFiles.filter(_.resourceId.getFilename.endsWith(".avro")) require(files.nonEmpty, s"File `$path` does not contain avro files") val reader = new GenericDatumReader[GenericRecord]() val dfr = new DataFileReader[GenericRecord](AvroIO.getAvroSeekableInput(files.head), reader) dfr.getSchema } private def getAvroSeekableInput(meta: Metadata): SeekableInput = new SeekableInput { require(meta.isReadSeekEfficient) private val in = FileSystems.open(meta.resourceId()).asInstanceOf[SeekableByteChannel] override def read(b: Array[Byte], off: Int, len: Int): Int = in.read(ByteBuffer.wrap(b, off, len)) override def tell(): Long = in.position() override def length(): Long = in.size() override def seek(p: Long): Unit = in.position(p) override def close(): Unit = in.close() } }
Example 13
Source File: AvroDecoder.scala From cuesheet with Apache License 2.0 | 5 votes |
package com.kakao.cuesheet.convert import java.util.Arrays.copyOfRange import kafka.serializer.Decoder import kafka.utils.VerifiableProperties import org.apache.avro.Schema import org.apache.avro.generic.{GenericDatumReader, GenericRecord} sealed trait AvroDecoder[T] extends Decoder[T] { def props: VerifiableProperties protected val schema = new Schema.Parser().parse(props.getString(Avro.SCHEMA)) protected val skipBytes = props.getInt(Avro.SKIP_BYTES, 0) protected val reader = new GenericDatumReader[GenericRecord](schema) protected val decoder = Avro.recordDecoder(reader) private def skip(bytes: Array[Byte], size: Int): Array[Byte] = { val length = bytes.length length - size match { case remaining if remaining > 0 => copyOfRange(bytes, size, length) case _ => new Array[Byte](0) } } def parse(bytes: Array[Byte]): GenericRecord = { val data = if (skipBytes == 0) bytes else skip(bytes, skipBytes) decoder(data) } } class AvroRecordDecoder(val props: VerifiableProperties) extends AvroDecoder[GenericRecord] { override def fromBytes(bytes: Array[Byte]): GenericRecord = parse(bytes) } class AvroMapDecoder(val props: VerifiableProperties) extends AvroDecoder[Map[String, Any]] { override def fromBytes(bytes: Array[Byte]): Map[String, Any] = Avro.toMap(parse(bytes)) } class AvroJsonDecoder(val props: VerifiableProperties) extends AvroDecoder[String] { override def fromBytes(bytes: Array[Byte]): String = Avro.toJson(parse(bytes)) }
Example 14
Source File: AvroSEBasicTest.scala From akka-serialization-test with Apache License 2.0 | 5 votes |
package com.github.dnvriend.serializer.avro4s import com.github.dnvriend.TestSpec import com.github.dnvriend.domain.BookStore.{ ChangedBookV1, ChangedBookV2, ChangedBookV3, ChangedBookV4 } import com.github.dnvriend.serializer.avro.{ BookSerializerV1, BookSerializerV2, BookSerializerV3 } import com.sksamuel.avro4s.{ AvroSchema, RecordFormat } import org.apache.avro.Schema import org.apache.avro.file.SeekableByteArrayInput import org.apache.avro.generic.{ GenericDatumReader, GenericRecord } import org.apache.avro.io.DecoderFactory // SE stands for Schema Evolution class AvroSEBasicTest extends TestSpec { @Override def fromBytes(bytes: Array[Byte], schema: Schema): GenericRecord = { val serveReader = new GenericDatumReader[GenericRecord](schema) serveReader.read(null, DecoderFactory.get().binaryDecoder(bytes, null)) } val title = "Moby-Dick; or, The Whale" val year = 1851 val editor = "Scala Books" "AvroSEBasicTest" should "deserialize old class with renamed field" in { // in this case, two different serializers can be used val obj = ChangedBookV1(title, year) val serializerV1 = new BookSerializerV1 val bytes: Array[Byte] = serializerV1.toBinary(obj) val serializerV2 = new BookSerializerV2 serializerV2.fromBinary(bytes) should matchPattern { case ChangedBookV2(`title`, `year`) ⇒ } } it should "deserialize old class without new field" in { val obj = ChangedBookV2(title, year) val serializerV2 = new BookSerializerV2 val bytes: Array[Byte] = serializerV2.toBinary(obj) val in = new SeekableByteArrayInput(bytes) val schema2 = AvroSchema[ChangedBookV2] val schema3 = AvroSchema[ChangedBookV3] val gdr = new GenericDatumReader[GenericRecord](schema2, schema3) val binDecoder = DecoderFactory.get().binaryDecoder(in, null) val record: GenericRecord = gdr.read(null, binDecoder) val format = RecordFormat[ChangedBookV3] val r = format.from(record) r should matchPattern { case ChangedBookV3(`title`, `year`, "") ⇒ } } it should "deserialize old class with dropped field" in { val obj = ChangedBookV3(title, year, editor) val serializerV3 = new BookSerializerV3 val bytes: Array[Byte] = serializerV3.toBinary(obj) val in = new SeekableByteArrayInput(bytes) val schema3 = AvroSchema[ChangedBookV3] val schema4 = AvroSchema[ChangedBookV4] val gdr = new GenericDatumReader[GenericRecord](schema3, schema4) val binDecoder = DecoderFactory.get().binaryDecoder(in, null) val record: GenericRecord = gdr.read(null, binDecoder) val format = RecordFormat[ChangedBookV4] val r = format.from(record) r should matchPattern { case ChangedBookV4(`title`, `editor`) ⇒ } } }
Example 15
Source File: DefaultFrameReader.scala From mleap with Apache License 2.0 | 5 votes |
package ml.combust.mleap.avro import java.nio.charset.Charset import org.apache.avro.file.{DataFileReader, SeekableByteArrayInput} import org.apache.avro.generic.{GenericData, GenericDatumReader} import SchemaConverter._ import ml.combust.mleap.runtime.serialization.{BuiltinFormats, FrameReader} import ml.combust.mleap.core.types.StructType import ml.combust.mleap.runtime.frame.{ArrayRow, DefaultLeapFrame, Row} import scala.collection.mutable import scala.util.Try class DefaultFrameReader extends FrameReader { val valueConverter = ValueConverter() override def fromBytes(bytes: Array[Byte], charset: Charset = BuiltinFormats.charset): Try[DefaultLeapFrame] = Try { val datumReader = new GenericDatumReader[GenericData.Record]() val reader = new DataFileReader[GenericData.Record](new SeekableByteArrayInput(bytes), datumReader) val avroSchema = reader.getSchema val schema = avroSchema: StructType val readers = schema.fields.map(_.dataType).map(valueConverter.avroToMleap) var record = new GenericData.Record(avroSchema) var rows = mutable.Seq[Row]() while(Try(reader.hasNext).getOrElse(false)) { record = reader.next(record) val row = ArrayRow(new Array[Any](schema.fields.length)) for(i <- schema.fields.indices) { row.set(i, readers(i)(record.get(i))) } rows :+= row } DefaultLeapFrame(schema, rows) } }
Example 16
Source File: DefaultRowReader.scala From mleap with Apache License 2.0 | 5 votes |
package ml.combust.mleap.avro import java.nio.charset.Charset import org.apache.avro.Schema import org.apache.avro.generic.{GenericData, GenericDatumReader} import org.apache.avro.io.{BinaryDecoder, DecoderFactory} import SchemaConverter._ import ml.combust.mleap.runtime.serialization.{BuiltinFormats, RowReader} import ml.combust.mleap.core.types.StructType import ml.combust.mleap.runtime.frame.{ArrayRow, Row} import scala.util.Try class DefaultRowReader(override val schema: StructType) extends RowReader { val valueConverter = ValueConverter() lazy val readers = schema.fields.map(_.dataType).map(valueConverter.avroToMleap) val avroSchema = schema: Schema val datumReader = new GenericDatumReader[GenericData.Record](avroSchema) var decoder: BinaryDecoder = null var record = new GenericData.Record(avroSchema) override def fromBytes(bytes: Array[Byte], charset: Charset = BuiltinFormats.charset): Try[Row] = Try { decoder = DecoderFactory.get().binaryDecoder(bytes, decoder) record = datumReader.read(record, decoder) val row = ArrayRow(new Array[Any](schema.fields.length)) for(i <- schema.fields.indices) { row.set(i, readers(i)(record.get(i))) } row } }
Example 17
Source File: SpecificTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.io.{DecoderFactory, EncoderFactory} import org.apache.avro.generic.{ GenericDatumReader, GenericRecord} import org.apache.avro.specific.{ SpecificDatumReader, SpecificDatumWriter, SpecificRecordBase } import org.apache.avro.Schema import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.specs2.mutable.Specification object SpecificTestUtil extends Specification { def write[T <: SpecificRecordBase](file: File, records: List[T]) = { val userDatumWriter = new SpecificDatumWriter[T] val dataFileWriter = new DataFileWriter[T](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read[T <: SpecificRecordBase](file: File, records: List[T]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new SpecificDatumReader[T](schema) val dataFileReader = new DataFileReader[T](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the SpecificDatumReader. var record: T = null.asInstanceOf[T] var sameRecord: T = null.asInstanceOf[T] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord must ===(record) } def verifyWriteAndRead[T <: SpecificRecordBase](records: List[T]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } def verifyEncodeDecode[T <: SpecificRecordBase](record: T) = { val schema = record.getSchema val writer = new SpecificDatumWriter[T](schema) val out = new java.io.ByteArrayOutputStream() val encoder = EncoderFactory.get().binaryEncoder(out, null) writer.write(record, encoder) encoder.flush val ba = out.toByteArray ba.size must ===(1) ba(0) must ===(0) out.close val reader = new SpecificDatumReader[T](schema) val decoder = DecoderFactory.get().binaryDecoder(ba, null) val decoded = reader.read(record, decoder) decoded must ===(record) } }
Example 18
Source File: SpecificDefautValuesSpec.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
import test._ import org.specs2.mutable.Specification import java.io.File import org.apache.avro.generic.{ GenericDatumReader, GenericRecord} import org.apache.avro.specific.{ SpecificDatumReader, SpecificDatumWriter, SpecificRecordBase } import org.apache.avro.file.DataFileReader import DefaultEnum._ class SpecificDefaultValuesSpec extends Specification { "A case class with default values" should { "deserialize correctly" in { val record = DefaultTest() val records = List(record) val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() SpecificTestUtil.write(file, records) val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new SpecificDatumReader[DefaultTest](schema) val dataFileReader = new DataFileReader[DefaultTest](file, userDatumReader) val sameRecord = dataFileReader.next sameRecord.suit === SPADES sameRecord.number === 0 sameRecord.str === "str" sameRecord.optionString === None sameRecord.optionStringValue === Some("default") sameRecord.embedded === Embedded(1) sameRecord.defaultArray === Vector(1,3,4,5) sameRecord.optionalEnum === None sameRecord.defaultMap === Map("Hello" -> "world", "Merry" -> "Christmas") sameRecord.byt === "\u00FF".getBytes } } }
Example 19
Source File: SpecificTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.io.{DecoderFactory, EncoderFactory} import org.apache.avro.generic.{ GenericDatumReader, GenericRecord} import org.apache.avro.specific.{ SpecificDatumReader, SpecificDatumWriter, SpecificRecordBase } import org.apache.avro.Schema import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.specs2.mutable.Specification object SpecificTestUtil extends Specification { def write[T <: SpecificRecordBase](file: File, records: List[T]) = { val userDatumWriter = new SpecificDatumWriter[T] val dataFileWriter = new DataFileWriter[T](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read[T <: SpecificRecordBase](file: File, records: List[T]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new SpecificDatumReader[T](schema) val dataFileReader = new DataFileReader[T](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the SpecificDatumReader. var record: T = null.asInstanceOf[T] var sameRecord: T = null.asInstanceOf[T] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord.equals(record) } def verifyWriteAndRead[T <: SpecificRecordBase](records: List[T]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } def verifyEncodeDecode[T <: SpecificRecordBase](record: T) = { val schema = record.getSchema val writer = new SpecificDatumWriter[T](schema) val out = new java.io.ByteArrayOutputStream() val encoder = EncoderFactory.get().binaryEncoder(out, null) writer.write(record, encoder) encoder.flush val ba = out.toByteArray ba.size must ===(1) ba(0) must ===(0) out.close val reader = new SpecificDatumReader[T](schema) val decoder = DecoderFactory.get().binaryDecoder(ba, null) val decoded = reader.read(record, decoder) decoded must ===(record) } }
Example 20
Source File: SpecificDefautValuesSpec.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
import test._ import org.specs2.mutable.Specification import java.io.File import org.apache.avro.generic.{ GenericDatumReader, GenericRecord} import org.apache.avro.specific.{ SpecificDatumReader, SpecificDatumWriter, SpecificRecordBase } import org.apache.avro.file.DataFileReader import DefaultEnum._ class SpecificDefaultValuesSpec extends Specification { "A case class with default values" should { "deserialize correctly" in { val record = DefaultTest() val records = List(record) val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() SpecificTestUtil.write(file, records) val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new SpecificDatumReader[DefaultTest](schema) val dataFileReader = new DataFileReader[DefaultTest](file, userDatumReader) val sameRecord = dataFileReader.next sameRecord.suit === SPADES sameRecord.number === 0 sameRecord.str === "str" sameRecord.optionString === None sameRecord.optionStringValue === Some("default") sameRecord.embedded === Embedded(1) sameRecord.defaultArray === List(1,3,4,5) sameRecord.optionalEnum === None sameRecord.defaultMap === Map("Hello" -> "world", "Merry" -> "Christmas") sameRecord.byt === "\u00FF".getBytes } } }
Example 21
Source File: SpecificTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.io.{DecoderFactory, EncoderFactory} import org.apache.avro.generic.{ GenericDatumReader, GenericRecord} import org.apache.avro.specific.{ SpecificDatumReader, SpecificDatumWriter, SpecificRecordBase } import org.apache.avro.Schema import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.specs2.mutable.Specification object SpecificTestUtil extends Specification { def write[T <: SpecificRecordBase](file: File, records: List[T]) = { val userDatumWriter = new SpecificDatumWriter[T]() val dataFileWriter = new DataFileWriter[T](userDatumWriter) dataFileWriter.create(records.head.getSchema, file) records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close() } def read[T <: SpecificRecordBase](file: File, records: List[T]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new SpecificDatumReader[T](schema) val dataFileReader = new DataFileReader[T](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the SpecificDatumReader. var record: T = null.asInstanceOf[T] var sameRecord: T = null.asInstanceOf[T] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord must ===(record) } def verifyWriteAndRead[T <: SpecificRecordBase](records: List[T]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } def verifyEncodeDecode[T <: SpecificRecordBase](record: T) = { val schema = record.getSchema val writer = new SpecificDatumWriter[T](schema) val out = new java.io.ByteArrayOutputStream() val encoder = EncoderFactory.get().binaryEncoder(out, null) writer.write(record, encoder) encoder.flush val ba = out.toByteArray ba.size must ===(1) ba(0) must ===(0) out.close val reader = new SpecificDatumReader[T](schema) val decoder = DecoderFactory.get().binaryDecoder(ba, null) val decoded = reader.read(record, decoder) decoded must ===(record) } }
Example 22
Source File: SpecificDefautValuesSpec.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
import test._ import org.specs2.mutable.Specification import java.io.File import org.apache.avro.generic.{ GenericDatumReader, GenericRecord} import org.apache.avro.specific.{ SpecificDatumReader, SpecificDatumWriter, SpecificRecordBase } import org.apache.avro.file.DataFileReader import DefaultEnum._ class SpecificDefaultValuesSpec extends Specification { "A case class with default values" should { "deserialize correctly" in { val record = DefaultTest() val records = List(record) val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() SpecificTestUtil.write(file, records) val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new SpecificDatumReader[DefaultTest](schema) val dataFileReader = new DataFileReader[DefaultTest](file, userDatumReader) val sameRecord = dataFileReader.next sameRecord.suit === SPADES sameRecord.number === 0 sameRecord.str === "str" sameRecord.optionString === None sameRecord.optionStringValue === Some("default") sameRecord.embedded === Embedded(1) sameRecord.defaultArray === List(1,3,4,5) sameRecord.optionalEnum === None sameRecord.defaultMap === Map("Hello" -> "world", "Merry" -> "Christmas") sameRecord.byt === "\u00FF".getBytes } } }
Example 23
Source File: SpecificTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.io.{DecoderFactory, EncoderFactory} import org.apache.avro.generic.{ GenericDatumReader, GenericRecord} import org.apache.avro.specific.{ SpecificDatumReader, SpecificDatumWriter, SpecificRecordBase } import org.apache.avro.Schema import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.specs2.mutable.Specification object SpecificTestUtil extends Specification { def write[T <: SpecificRecordBase](file: File, records: List[T]) = { val userDatumWriter = new SpecificDatumWriter[T] val dataFileWriter = new DataFileWriter[T](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read[T <: SpecificRecordBase](file: File, records: List[T]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new SpecificDatumReader[T](schema) val dataFileReader = new DataFileReader[T](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the SpecificDatumReader. var record: T = null.asInstanceOf[T] var sameRecord: T = null.asInstanceOf[T] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord must ===(record) } def verifyWriteAndRead[T <: SpecificRecordBase](records: List[T]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } def verifyEncodeDecode[T <: SpecificRecordBase](record: T) = { val schema = record.getSchema val writer = new SpecificDatumWriter[T](schema) val out = new java.io.ByteArrayOutputStream() val encoder = EncoderFactory.get().binaryEncoder(out, null) writer.write(record, encoder) encoder.flush val ba = out.toByteArray ba.size must ===(1) ba(0) must ===(0) out.close val reader = new SpecificDatumReader[T](schema) val decoder = DecoderFactory.get().binaryDecoder(ba, null) val decoded = reader.read(record, decoder) decoded must ===(record) } }
Example 24
Source File: SpecificDefautValuesSpec.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
import test._ import org.specs2.mutable.Specification import java.io.File import org.apache.avro.generic.{ GenericDatumReader, GenericRecord} import org.apache.avro.specific.{ SpecificDatumReader, SpecificDatumWriter, SpecificRecordBase } import org.apache.avro.file.DataFileReader class SpecificDefaultValuesSpec extends Specification { "A case class with default values" should { "deserialize correctly" in { val record = DefaultTest() val records = List(record) val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() SpecificTestUtil.write(file, records) val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new SpecificDatumReader[DefaultTest](schema) val dataFileReader = new DataFileReader[DefaultTest](file, userDatumReader) val sameRecord = dataFileReader.next sameRecord.suit === "SPADES" sameRecord.number === 0 sameRecord.str === "str" sameRecord.optionString === None sameRecord.optionStringValue === Some("default") sameRecord.embedded === Embedded(1) sameRecord.defaultArray === Array(1,3,4,5) sameRecord.optionalEnum === None sameRecord.defaultMap === Map("Hello" -> "world", "Merry" -> "Christmas") sameRecord.byt === "\u00FF".getBytes } } }
Example 25
Source File: StringToGenericRecord.scala From hydra with Apache License 2.0 | 5 votes |
package hydra.avro.convert import java.util.UUID import org.apache.avro.{LogicalTypes, Schema} import org.apache.avro.generic.{GenericDatumReader, GenericRecord} import org.apache.avro.io.DecoderFactory import cats.implicits._ import org.apache.avro.util.Utf8 import scala.util.{Failure, Success, Try} object StringToGenericRecord { final case class ValidationExtraFieldsError(fields: Set[String]) extends RuntimeException( s"Extra fields ${fields.mkString(",")} found with Strict Validation Strategy" ) final case class InvalidLogicalTypeError(expected: String, received: AnyRef) extends RuntimeException( s"Invalid logical type. Expected $expected but received $received" ) implicit class ConvertToGenericRecord(s: String) { private def isUuidValid(s: String): Boolean = Try(UUID.fromString(s)).isSuccess private def checkLogicalTypes(record: GenericRecord): Try[Unit] = { import collection.JavaConverters._ def checkAll(avroField: AnyRef, fieldSchema: Option[Schema]): Try[Unit] = avroField match { case g: GenericRecord => g.getSchema.getFields.asScala.toList .traverse(f => checkAll(g.get(f.name), f.schema.some)).void case u: Utf8 if fieldSchema.exists(f => Option(f.getLogicalType).exists(_.getName == LogicalTypes.uuid.getName)) => if (isUuidValid(u.toString)) Success(()) else Failure(InvalidLogicalTypeError("UUID", u.toString)) case _ => Success(()) } val fields = record.getSchema.getFields.asScala.toList fields.traverse(f => checkAll(record.get(f.name), f.schema.some)).void } private def getAllPayloadFieldNames: Set[String] = { import spray.json._ def loop(cur: JsValue, extraName: Option[String]): Set[String] = cur match { case JsObject(f) => f.flatMap { case (k: String, v: JsValue) => loop(v, k.some) ++ Set(extraName.getOrElse("") + k) }.toSet case _ => Set.empty } loop(s.parseJson, None) } private def getAllSchemaFieldNames(schema: Schema): Set[String] = { import Schema.Type._ import collection.JavaConverters._ def loop(sch: Schema, extraName: Option[String]): Set[String] = sch.getType match { case RECORD => sch.getFields.asScala.toSet.flatMap { f: Schema.Field => loop(f.schema, f.name.some) ++ Set(extraName.getOrElse("") + f.name) } case _ => Set.empty } loop(schema, None) } def toGenericRecord(schema: Schema, useStrictValidation: Boolean): Try[GenericRecord] = Try { if (useStrictValidation) { val diff = getAllPayloadFieldNames diff getAllSchemaFieldNames(schema) if (diff.nonEmpty) throw ValidationExtraFieldsError(diff) } val decoderFactory = new DecoderFactory val decoder = decoderFactory.jsonDecoder(schema, s) val reader = new GenericDatumReader[GenericRecord](schema) reader.read(null, decoder) }.flatTap(checkLogicalTypes) } }