org.apache.avro.generic.GenericDatumWriter Scala Examples
The following examples show how to use org.apache.avro.generic.GenericDatumWriter.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: DefaultFrameWriter.scala From mleap with Apache License 2.0 | 5 votes |
package ml.combust.mleap.avro import java.io.ByteArrayOutputStream import java.nio.charset.Charset import org.apache.avro.Schema import org.apache.avro.file.DataFileWriter import org.apache.avro.generic.{GenericData, GenericDatumWriter} import SchemaConverter._ import ml.combust.mleap.runtime.frame.LeapFrame import ml.combust.mleap.runtime.serialization.{BuiltinFormats, FrameWriter} import resource._ import scala.util.{Failure, Try} class DefaultFrameWriter[LF <: LeapFrame[LF]](frame: LF) extends FrameWriter { val valueConverter = ValueConverter() override def toBytes(charset: Charset = BuiltinFormats.charset): Try[Array[Byte]] = { (for(out <- managed(new ByteArrayOutputStream())) yield { val writers = frame.schema.fields.map(_.dataType).map(valueConverter.mleapToAvro) val avroSchema = frame.schema: Schema val record = new GenericData.Record(avroSchema) val datumWriter = new GenericDatumWriter[GenericData.Record](avroSchema) val writer = new DataFileWriter[GenericData.Record](datumWriter) writer.create(avroSchema, out) for(row <- frame.collect()) { var i = 0 for(writer <- writers) { record.put(i, writer(row.getRaw(i))) i = i + 1 } Try(writer.append(record)) match { case Failure(error) => error.printStackTrace() case _ => } } writer.close() out.toByteArray }).tried } }
Example 2
Source File: DefaultRowWriter.scala From mleap with Apache License 2.0 | 5 votes |
package ml.combust.mleap.avro import java.io.ByteArrayOutputStream import java.nio.charset.Charset import org.apache.avro.Schema import org.apache.avro.generic.{GenericData, GenericDatumWriter} import org.apache.avro.io.{BinaryEncoder, EncoderFactory} import SchemaConverter._ import ml.combust.mleap.runtime.serialization.{BuiltinFormats, RowWriter} import ml.combust.mleap.core.types.StructType import ml.combust.mleap.runtime.frame.Row import resource._ import scala.util.Try class DefaultRowWriter(override val schema: StructType) extends RowWriter { val valueConverter = ValueConverter() lazy val writers = schema.fields.map(_.dataType).map(valueConverter.mleapToAvro) val avroSchema = schema: Schema val datumWriter = new GenericDatumWriter[GenericData.Record](avroSchema) var encoder: BinaryEncoder = null var record = new GenericData.Record(avroSchema) override def toBytes(row: Row, charset: Charset = BuiltinFormats.charset): Try[Array[Byte]] = { (for(out <- managed(new ByteArrayOutputStream(1024))) yield { encoder = EncoderFactory.get().binaryEncoder(out, encoder) var i = 0 for(writer <- writers) { record.put(i, writer(row.getRaw(i))) i = i + 1 } datumWriter.write(record, encoder) encoder.flush() out.toByteArray }).tried } }
Example 3
Source File: AvroTypeSpec.scala From shapeless-datatype with Apache License 2.0 | 5 votes |
package shapeless.datatype.avro import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.net.URI import java.nio.ByteBuffer import com.google.protobuf.ByteString import org.apache.avro.Schema import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord} import org.apache.avro.io.{DecoderFactory, EncoderFactory} import org.joda.time.Instant import org.scalacheck.Prop.forAll import org.scalacheck.ScalacheckShapeless._ import org.scalacheck._ import shapeless._ import shapeless.datatype.record._ import scala.reflect.runtime.universe._ object AvroTypeSpec extends Properties("AvroType") { import shapeless.datatype.test.Records._ import shapeless.datatype.test.SerializableUtils._ implicit def compareByteArrays(x: Array[Byte], y: Array[Byte]) = java.util.Arrays.equals(x, y) implicit def compareIntArrays(x: Array[Int], y: Array[Int]) = java.util.Arrays.equals(x, y) def roundTrip[A: TypeTag, L <: HList](m: A)(implicit gen: LabelledGeneric.Aux[A, L], fromL: FromAvroRecord[L], toL: ToAvroRecord[L], mr: MatchRecord[L] ): Boolean = { val t = ensureSerializable(AvroType[A]) val f1: SerializableFunction[A, GenericRecord] = new SerializableFunction[A, GenericRecord] { override def apply(m: A): GenericRecord = t.toGenericRecord(m) } val f2: SerializableFunction[GenericRecord, Option[A]] = new SerializableFunction[GenericRecord, Option[A]] { override def apply(m: GenericRecord): Option[A] = t.fromGenericRecord(m) } val toFn = ensureSerializable(f1) val fromFn = ensureSerializable(f2) val copy = fromFn(roundTripRecord(toFn(m))) val rm = RecordMatcher[A] copy.exists(rm(_, m)) } def roundTripRecord(r: GenericRecord): GenericRecord = { val writer = new GenericDatumWriter[GenericRecord](r.getSchema) val baos = new ByteArrayOutputStream() val encoder = EncoderFactory.get().binaryEncoder(baos, null) writer.write(r, encoder) encoder.flush() baos.close() val bytes = baos.toByteArray val reader = new GenericDatumReader[GenericRecord](r.getSchema) val bais = new ByteArrayInputStream(bytes) val decoder = DecoderFactory.get().binaryDecoder(bais, null) reader.read(null, decoder) } implicit val byteStringAvroType = AvroType.at[ByteString](Schema.Type.BYTES)( v => ByteString.copyFrom(v.asInstanceOf[ByteBuffer]), v => ByteBuffer.wrap(v.toByteArray) ) implicit val instantAvroType = AvroType.at[Instant](Schema.Type.LONG)(v => new Instant(v.asInstanceOf[Long]), _.getMillis) property("required") = forAll { m: Required => roundTrip(m) } property("optional") = forAll { m: Optional => roundTrip(m) } property("repeated") = forAll { m: Repeated => roundTrip(m) } property("mixed") = forAll { m: Mixed => roundTrip(m) } property("nested") = forAll { m: Nested => roundTrip(m) } property("seqs") = forAll { m: Seqs => roundTrip(m) } implicit val uriAvroType = AvroType.at[URI](Schema.Type.STRING)(v => URI.create(v.toString), _.toString) property("custom") = forAll { m: Custom => roundTrip(m) } }
Example 4
Source File: AvroIO.scala From ratatool with Apache License 2.0 | 5 votes |
package com.spotify.ratatool.io import java.io.{File, InputStream, OutputStream} import java.nio.ByteBuffer import java.nio.channels.SeekableByteChannel import com.google.common.io.ByteStreams import org.apache.avro.Schema import org.apache.avro.file.{DataFileReader, DataFileWriter, SeekableByteArrayInput, SeekableInput} import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord} import org.apache.avro.io.{DatumReader, DatumWriter} import org.apache.avro.reflect.{ReflectDatumReader, ReflectDatumWriter} import org.apache.avro.specific.{SpecificDatumReader, SpecificDatumWriter, SpecificRecord} import org.apache.beam.sdk.io.FileSystems import org.apache.beam.sdk.io.fs.MatchResult.Metadata import scala.jdk.CollectionConverters._ import scala.reflect.ClassTag def writeToOutputStream[T: ClassTag](data: Iterable[T], schema: Schema, os: OutputStream): Unit = { val fileWriter = new DataFileWriter(createDatumWriter[T]).create(schema, os) data.foreach(fileWriter.append) fileWriter.close() } def getAvroSchemaFromFile(path: String): Schema = { require(FileStorage(path).exists, s"File `$path` does not exist!") val files = FileStorage(path).listFiles.filter(_.resourceId.getFilename.endsWith(".avro")) require(files.nonEmpty, s"File `$path` does not contain avro files") val reader = new GenericDatumReader[GenericRecord]() val dfr = new DataFileReader[GenericRecord](AvroIO.getAvroSeekableInput(files.head), reader) dfr.getSchema } private def getAvroSeekableInput(meta: Metadata): SeekableInput = new SeekableInput { require(meta.isReadSeekEfficient) private val in = FileSystems.open(meta.resourceId()).asInstanceOf[SeekableByteChannel] override def read(b: Array[Byte], off: Int, len: Int): Int = in.read(ByteBuffer.wrap(b, off, len)) override def tell(): Long = in.position() override def length(): Long = in.size() override def seek(p: Long): Unit = in.position(p) override def close(): Unit = in.close() } }
Example 5
Source File: AvroDataOutputStream.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s import java.io.OutputStream import org.apache.avro.Schema import org.apache.avro.file.{CodecFactory, DataFileWriter} import org.apache.avro.generic.{GenericDatumWriter, GenericRecord} case class AvroDataOutputStream[T](os: OutputStream, codec: CodecFactory) (implicit encoder: Encoder[T]) extends AvroOutputStream[T] { val resolved = encoder.resolveEncoder() val (writer, writeFn) = resolved.schema.getType match { case Schema.Type.DOUBLE | Schema.Type.LONG | Schema.Type.BOOLEAN | Schema.Type.STRING | Schema.Type.INT | Schema.Type.FLOAT => val datumWriter = new GenericDatumWriter[T](resolved.schema) val dataFileWriter = new DataFileWriter[T](datumWriter) dataFileWriter.setCodec(codec) dataFileWriter.create(resolved.schema, os) (dataFileWriter, (t: T) => dataFileWriter.append(t)) case _ => val datumWriter = new GenericDatumWriter[GenericRecord](resolved.schema) val dataFileWriter = new DataFileWriter[GenericRecord](datumWriter) dataFileWriter.setCodec(codec) dataFileWriter.create(resolved.schema, os) (dataFileWriter, (t: T) => { val record = resolved.encode(t).asInstanceOf[GenericRecord] dataFileWriter.append(record) }) } override def close(): Unit = { flush() writer.close() } override def write(t: T): Unit = { writeFn(t) } override def flush(): Unit = writer.flush() override def fSync(): Unit = writer.fSync() }
Example 6
Source File: GithubIssue235.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s.github import java.io.ByteArrayOutputStream import com.sksamuel.avro4s.{Decoder, Encoder, RecordFormat, SchemaFor} import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord} import org.apache.avro.io.{DecoderFactory, EncoderFactory} import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers case class Label(value: String) extends AnyVal case class Value[A](label: Label, value: A) sealed trait OneOrTwo[A] case class One[A](value: Value[A]) extends OneOrTwo[A] case class Two[A](first: Value[A], second: Value[A]) extends OneOrTwo[A] case class OneOrTwoWrapper[A](t: OneOrTwo[A]) object Bug { def apply[T <: Product](a: T)( implicit schemaFor: SchemaFor[T], encoder: Encoder[T], decoder: Decoder[T] ): Unit = { val format = RecordFormat[T] val schema = schemaFor.schema val datumReader = new GenericDatumReader[GenericRecord](schema) val datumWriter = new GenericDatumWriter[GenericRecord](schema) val stream = new ByteArrayOutputStream() val bEncoder = EncoderFactory.get().binaryEncoder(stream, null) datumWriter.write(format.to(a), bEncoder) bEncoder.flush() val bytes = stream.toByteArray val bDecoder = DecoderFactory.get().binaryDecoder(bytes, null) val record = datumReader.read(null, bDecoder) require(format.from(record) == a) } } class GithubIssue235 extends AnyFunSuite with Matchers { test("Broken typeclass derivation upgrading from 1.9.0 to 2.0.1 #235") { val o = OneOrTwoWrapper(One(Value(Label("lbl"), "foo"))) Bug(o) } }
Example 7
Source File: Encoding.scala From avro4s with Apache License 2.0 | 5 votes |
package benchmarks import java.io.ByteArrayOutputStream import java.nio.ByteBuffer import benchmarks.record._ import com.sksamuel.avro4s._ import org.apache.avro.generic.{GenericDatumWriter, GenericRecord} import org.apache.avro.io.EncoderFactory import org.openjdk.jmh.annotations._ import org.openjdk.jmh.infra.Blackhole object Encoding extends BenchmarkHelpers { @State(Scope.Thread) class Setup { val record = RecordWithUnionAndTypeField(AttributeValue.Valid[Int](255, t)) val specificRecord = { import benchmarks.record.generated.AttributeValue._ import benchmarks.record.generated._ new RecordWithUnionAndTypeField(new ValidInt(255, t)) } val (avro4sEncoder, avro4sWriter) = { val schema = AvroSchema[RecordWithUnionAndTypeField] val encoder = Encoder[RecordWithUnionAndTypeField] val writer = new GenericDatumWriter[GenericRecord](schema) (encoder, writer) } val (handrolledEncoder, handrolledWriter) = { import benchmarks.handrolled_codecs._ implicit val codec: AttributeValueCodec[Int] = AttributeValueCodec[Int] implicit val schemaForValid = codec.schemaForValid val schema = AvroSchema[RecordWithUnionAndTypeField] val encoder = Encoder[RecordWithUnionAndTypeField] val writer = new GenericDatumWriter[GenericRecord](schema) (encoder, writer) } } } class Encoding extends CommonParams with BenchmarkHelpers { import Encoding._ def encode[T](value: T, encoder: Encoder[T], writer: GenericDatumWriter[GenericRecord]): ByteBuffer = { val outputStream = new ByteArrayOutputStream(512) val record = encoder.encode(value).asInstanceOf[GenericRecord] val enc = EncoderFactory.get().directBinaryEncoder(outputStream, null) writer.write(record, enc) ByteBuffer.wrap(outputStream.toByteArray) } @Benchmark def avroSpecificRecord(setup: Setup, blackhole: Blackhole) = blackhole.consume(setup.specificRecord.toByteBuffer) @Benchmark def avro4sGenerated(setup: Setup, blackhole: Blackhole) = blackhole.consume(encode(setup.record, setup.avro4sEncoder, setup.avro4sWriter)) @Benchmark def avro4sHandrolled(setup: Setup, blackhole: Blackhole) = blackhole.consume(encode(setup.record, setup.handrolledEncoder, setup.handrolledWriter)) }
Example 8
Source File: Decoding.scala From avro4s with Apache License 2.0 | 5 votes |
package benchmarks import java.io.ByteArrayOutputStream import java.nio.ByteBuffer import java.util.Collections import benchmarks.record._ import com.sksamuel.avro4s._ import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord} import org.apache.avro.io.{DecoderFactory, EncoderFactory} import org.apache.avro.util.ByteBufferInputStream import org.openjdk.jmh.annotations._ import org.openjdk.jmh.infra.Blackhole object Decoding extends BenchmarkHelpers { @State(Scope.Thread) class Setup { val avroBytes = { import benchmarks.record.generated.AttributeValue._ import benchmarks.record.generated._ new RecordWithUnionAndTypeField(new ValidInt(255, t)).toByteBuffer } val avro4sBytes = encode(RecordWithUnionAndTypeField(AttributeValue.Valid[Int](255, t))) val (handrolledDecoder, handrolledReader) = { import benchmarks.handrolled_codecs._ implicit val codec: Codec[AttributeValue[Int]] = AttributeValueCodec[Int] implicit val schemaFor: SchemaFor[AttributeValue[Int]] = SchemaFor[AttributeValue[Int]](codec.schema) val recordSchemaFor = SchemaFor[RecordWithUnionAndTypeField] val decoder = Decoder[RecordWithUnionAndTypeField].withSchema(recordSchemaFor) val reader = new GenericDatumReader[GenericRecord](recordSchemaFor.schema) (decoder, reader) } val (avro4sDecoder, avro4sReader) = { val decoder = Decoder[RecordWithUnionAndTypeField] val reader = new GenericDatumReader[GenericRecord](decoder.schema) (decoder, reader) } } def encode[T: Encoder: SchemaFor](value: T): ByteBuffer = { val outputStream = new ByteArrayOutputStream(512) val encoder = Encoder[T] val schema = AvroSchema[T] val record = encoder.encode(value).asInstanceOf[GenericRecord] val writer = new GenericDatumWriter[GenericRecord](schema) val enc = EncoderFactory.get().directBinaryEncoder(outputStream, null) writer.write(record, enc) ByteBuffer.wrap(outputStream.toByteArray) } } class Decoding extends CommonParams with BenchmarkHelpers { import Decoding._ def decode[T](bytes: ByteBuffer, decoder: Decoder[T], reader: GenericDatumReader[GenericRecord]): T = { val dec = DecoderFactory.get().binaryDecoder(new ByteBufferInputStream(Collections.singletonList(bytes.duplicate)), null) val record = reader.read(null, dec) decoder.decode(record) } @Benchmark def avroSpecificRecord(setup: Setup, blackhole: Blackhole) = { import benchmarks.record.generated._ blackhole.consume(RecordWithUnionAndTypeField.fromByteBuffer(setup.avroBytes.duplicate)) } @Benchmark def avro4sHandrolled(setup: Setup, blackhole: Blackhole) = blackhole.consume(decode(setup.avro4sBytes, setup.handrolledDecoder, setup.handrolledReader)) @Benchmark def avro4sGenerated(setup: Setup, blackhole: Blackhole) = blackhole.consume(decode(setup.avro4sBytes, setup.avro4sDecoder, setup.avro4sReader)) }
Example 9
Source File: SparkAvroDecoder.scala From cloudflow with Apache License 2.0 | 5 votes |
package cloudflow.spark.avro import org.apache.log4j.Logger import java.io.ByteArrayOutputStream import scala.reflect.runtime.universe._ import org.apache.avro.generic.{ GenericDatumReader, GenericDatumWriter, GenericRecord } import org.apache.avro.io.{ DecoderFactory, EncoderFactory } import org.apache.spark.sql.{ Dataset, Encoder, Row } import org.apache.spark.sql.catalyst.encoders.{ encoderFor, ExpressionEncoder, RowEncoder } import org.apache.spark.sql.catalyst.expressions.GenericRow import org.apache.spark.sql.types.StructType import org.apache.avro.Schema import cloudflow.spark.sql.SQLImplicits._ case class EncodedKV(key: String, value: Array[Byte]) case class SparkAvroDecoder[T: Encoder: TypeTag](avroSchema: String) { val encoder: Encoder[T] = implicitly[Encoder[T]] val sqlSchema: StructType = encoder.schema val encoderForDataColumns: ExpressionEncoder[Row] = RowEncoder(sqlSchema) @transient lazy val _avroSchema = new Schema.Parser().parse(avroSchema) @transient lazy val rowConverter = SchemaConverters.createConverterToSQL(_avroSchema, sqlSchema) @transient lazy val datumReader = new GenericDatumReader[GenericRecord](_avroSchema) @transient lazy val decoder = DecoderFactory.get def decode(bytes: Array[Byte]): Row = { val binaryDecoder = decoder.binaryDecoder(bytes, null) val record = datumReader.read(null, binaryDecoder) rowConverter(record).asInstanceOf[GenericRow] } } case class SparkAvroEncoder[T: Encoder: TypeTag](avroSchema: String) { @transient lazy val log = Logger.getLogger(getClass.getName) val BufferSize = 5 * 1024 // 5 Kb val encoder = implicitly[Encoder[T]] val sqlSchema = encoder.schema @transient lazy val _avroSchema = new Schema.Parser().parse(avroSchema) val recordName = "topLevelRecord" // ??? val recordNamespace = "recordNamespace" // ??? @transient lazy val converter = AvroConverter.createConverterToAvro(sqlSchema, recordName, recordNamespace) // Risk: This process is memory intensive. Might require thread-level buffers to optimize memory usage def rowToBytes(row: Row): Array[Byte] = { val genRecord = converter(row).asInstanceOf[GenericRecord] if (log.isDebugEnabled) log.debug(s"genRecord = $genRecord") val datumWriter = new GenericDatumWriter[GenericRecord](_avroSchema) val avroEncoder = EncoderFactory.get val byteArrOS = new ByteArrayOutputStream(BufferSize) val binaryEncoder = avroEncoder.binaryEncoder(byteArrOS, null) datumWriter.write(genRecord, binaryEncoder) binaryEncoder.flush() byteArrOS.toByteArray } def encode(dataset: Dataset[T]): Dataset[Array[Byte]] = dataset.toDF().mapPartitions(rows ⇒ rows.map(rowToBytes)).as[Array[Byte]] // Note to self: I'm not sure how heavy this chain of transformations is def encodeWithKey(dataset: Dataset[T], keyFun: T ⇒ String): Dataset[EncodedKV] = { val encoder = encoderFor[T] implicit val rowEncoder = RowEncoder(encoder.schema).resolveAndBind() dataset.map { value ⇒ val key = keyFun(value) val internalRow = encoder.toRow(value) val row = rowEncoder.fromRow(internalRow) val bytes = rowToBytes(row) EncodedKV(key, bytes) } } }
Example 10
Source File: AvroSerializer.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.bloomberg.avro import java.io.ByteArrayOutputStream import com.datamountaineer.streamreactor.connect.bloomberg.BloombergData import com.datamountaineer.streamreactor.connect.bloomberg.avro.AvroSchemaGenerator._ import org.apache.avro.Schema import org.apache.avro.generic.GenericData.Record import org.apache.avro.generic.{GenericData, GenericDatumWriter, GenericRecord} import org.apache.avro.io.EncoderFactory import scala.collection.JavaConverters._ object AvroSerializer { private def recursive(record: GenericData.Record, schema: Schema, fieldName: String, value: Any): Unit = { value match { case _: Boolean => record.put(fieldName, value) case _: Int => record.put(fieldName, value) case _: Long => record.put(fieldName, value) case _: Double => record.put(fieldName, value) case _: Char => record.put(fieldName, value) case _: Float => record.put(fieldName, value) case _: String => record.put(fieldName, value) case list: java.util.List[_] => val tmpSchema = schema.getField(fieldName).schema() val itemSchema = if (tmpSchema.getType == Schema.Type.UNION) tmpSchema.getTypes.get(1) else tmpSchema require(itemSchema.getType == Schema.Type.ARRAY) //we might have a record not a primitive if (itemSchema.getElementType.getType == Schema.Type.RECORD) { val items = new GenericData.Array[GenericData.Record](list.size(), itemSchema) list.asScala.foreach { i => //only map is allowed val m = i.asInstanceOf[java.util.Map[String, Any]] items.add(m.toAvroRecord(itemSchema.getElementType)) } record.put(fieldName, items) } else { val items = new GenericData.Array[Any](list.size(), itemSchema) items.addAll(list) record.put(fieldName, items) } case map: java.util.LinkedHashMap[String @unchecked, _] => //record schema val fieldSchema = schema.getField(fieldName).schema() val nestedSchema = if (fieldSchema.getType == Schema.Type.UNION) fieldSchema.getTypes.get(1) else fieldSchema val nestedRecord = new Record(nestedSchema) map.entrySet().asScala.foreach(e => recursive(nestedRecord, nestedSchema, e.getKey, e.getValue)) record.put(fieldName, nestedRecord) } } } }
Example 11
Source File: Sedes.scala From shc with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.hbase import java.io.ByteArrayInputStream import org.apache.avro.Schema import org.apache.avro.Schema.Type._ import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord} import org.apache.avro.io._ import org.apache.commons.io.output.ByteArrayOutputStream import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.sql.types._ trait Sedes { def serialize(value: Any): Array[Byte] def deserialize(bytes: Array[Byte], start: Int, end: Int): Any } class DoubleSedes extends Sedes { override def serialize(value: Any): Array[Byte] = Bytes.toBytes(value.asInstanceOf[Double]) override def deserialize(bytes: Array[Byte], start: Int, end: Int): Any = { Bytes.toLong(bytes, start) } }
Example 12
Source File: AvroSerializer.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.serialization import java.io.{ByteArrayOutputStream, InputStream, OutputStream} import com.sksamuel.avro4s.{RecordFormat, SchemaFor} import org.apache.avro.Schema import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord} import org.apache.avro.io.{DecoderFactory, EncoderFactory} object AvroSerializer { def write[T <: Product](t: T)(implicit os: OutputStream, formatter: RecordFormat[T], schemaFor: SchemaFor[T]): Unit = write(apply(t), schemaFor()) def write(record: GenericRecord, schema: Schema)(implicit os: OutputStream) = { val writer = new GenericDatumWriter[GenericRecord](schema) val encoder = EncoderFactory.get().binaryEncoder(os, null) writer.write(record, encoder) encoder.flush() os.flush() } def getBytes[T <: Product](t: T)(implicit recordFormat: RecordFormat[T], schemaFor: SchemaFor[T]): Array[Byte] = getBytes(recordFormat.to(t), schemaFor()) def getBytes(record: GenericRecord, schema: Schema): Array[Byte] = { implicit val output = new ByteArrayOutputStream() write(record, schema) output.toByteArray } def read(is: InputStream, schema: Schema): GenericRecord = { val reader = new GenericDatumReader[GenericRecord](schema) val decoder = DecoderFactory.get().binaryDecoder(is, null) reader.read(null, decoder) } def read[T <: Product](is: InputStream)(implicit schemaFor: SchemaFor[T], recordFormat: RecordFormat[T]): T = recordFormat.from(read(is, schemaFor())) def apply[T <: Product](t: T)(implicit formatter: RecordFormat[T]): GenericRecord = formatter.to(t) }