org.apache.avro.file.DataFileReader Scala Examples
The following examples show how to use org.apache.avro.file.DataFileReader.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: SpecificDefautValuesSpec.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
import test._ import org.specs2.mutable.Specification import java.io.File import org.apache.avro.generic.{ GenericDatumReader, GenericRecord} import org.apache.avro.specific.{ SpecificDatumReader, SpecificDatumWriter, SpecificRecordBase } import org.apache.avro.file.DataFileReader import DefaultEnum._ class SpecificDefaultValuesSpec extends Specification { "A case class with default values" should { "deserialize correctly" in { val record = DefaultTest() val records = List(record) val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() SpecificTestUtil.write(file, records) val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new SpecificDatumReader[DefaultTest](schema) val dataFileReader = new DataFileReader[DefaultTest](file, userDatumReader) val sameRecord = dataFileReader.next sameRecord.suit === SPADES sameRecord.number === 0 sameRecord.str === "str" sameRecord.optionString === None sameRecord.optionStringValue === Some("default") sameRecord.embedded === Embedded(1) sameRecord.defaultArray === Vector(1,3,4,5) sameRecord.optionalEnum === None sameRecord.defaultMap === Map("Hello" -> "world", "Merry" -> "Christmas") sameRecord.byt === "\u00FF".getBytes } } }
Example 2
Source File: RollingFileWriterSuite.scala From iep-apps with Apache License 2.0 | 5 votes |
package com.netflix.atlas.persistence import java.io.File import java.nio.file.Files import java.nio.file.Paths import com.netflix.atlas.core.model.Datapoint import com.netflix.spectator.api.NoopRegistry import org.apache.avro.file.DataFileReader import org.apache.avro.specific.SpecificDatumReader import org.scalatest.BeforeAndAfter import org.scalatest.BeforeAndAfterAll import org.scalatest.funsuite.AnyFunSuite import scala.collection.mutable.ListBuffer class RollingFileWriterSuite extends AnyFunSuite with BeforeAndAfter with BeforeAndAfterAll { private val outputDir = "./target/unitTestAvroOutput" private val registry = new NoopRegistry before { listFilesSorted(outputDir).foreach(_.delete()) // Clean up files if exits Files.createDirectories(Paths.get(outputDir)) } after { listFilesSorted(outputDir).foreach(_.delete()) Files.deleteIfExists(Paths.get(outputDir)) } // Write 3 datapoints, first 2 is written in file 1, rollover, and 3rd one is written in file 2 test("avro writer rollover by max records") { val rollingConf = RollingConfig(2, 12000, 12000) val hourStart = 3600000 val hourEnd = 7200000 val writer = new RollingFileWriter(s"$outputDir/prefix", rollingConf, hourStart, hourEnd, registry) writer.initialize() createData(hourStart, 0, 1, 2).foreach(writer.write) writer.write(Datapoint(Map.empty, hourEnd, 3)) // out of range, should be ignored writer.close() // Check num of files val files = listFilesSorted(outputDir) assert(files.size == 2) // Check file 1 records val file1 = files.head assert(file1.getName.endsWith(".0000-0001")) val dpArray1 = readAvro(file1) assert(dpArray1.size == 2) assert(dpArray1(0).getValue == 0) assert(dpArray1(0).getTags.get("node") == "0") assert(dpArray1(1).getValue == 1) assert(dpArray1(1).getTags.get("node") == "1") // Check file 2 records val file2 = files.last assert(file2.getName.endsWith(".0002-0002")) val dpArray2 = readAvro(file2) assert(dpArray2.size == 1) assert(dpArray2(0).getValue == 2) assert(dpArray2(0).getTags.get("node") == "2") } private def createData(startTime: Long, values: Double*): List[Datapoint] = { values.toList.zipWithIndex.map { case (v, i) => val tags = Map( "name" -> "cpu", "node" -> s"$i" ) Datapoint(tags, startTime + i * 1000, v, 60000) } } private def listFilesSorted(dir: String): List[File] = { val d = new File(dir) if (!d.exists()) { Nil } else { new File(dir).listFiles().filter(_.isFile).toList.sortBy(_.getName) } } private def readAvro(file: File): Array[AvroDatapoint] = { val userDatumReader = new SpecificDatumReader[AvroDatapoint](classOf[AvroDatapoint]) val dataFileReader = new DataFileReader[AvroDatapoint](file, userDatumReader) val dpListBuf = ListBuffer.empty[AvroDatapoint] try { while (dataFileReader.hasNext) { dpListBuf.addOne(dataFileReader.next) } } finally { dataFileReader.close() } dpListBuf.toArray } }
Example 3
Source File: AvroTest.scala From iep-apps with Apache License 2.0 | 5 votes |
package com.netflix.atlas.persistence import java.io.File import java.nio.file.Files import java.nio.file.Paths import org.apache.avro.file.DataFileReader import org.apache.avro.specific.SpecificDatumReader // Read metadata for all avro files in given directory object AvroTest { def main(args: Array[String]): Unit = { val dir = args(0) Files .walk(Paths.get(dir)) .filter(path => Files.isRegularFile(path)) .forEach(p => readFile(p.toFile)) } private def readFile(file: File): Unit = { println(s"##### Reading file: $file") var count = 0 val userDatumReader = new SpecificDatumReader[AvroDatapoint](classOf[AvroDatapoint]) val dataFileReader = new DataFileReader[AvroDatapoint](file, userDatumReader) while (dataFileReader.hasNext) { dataFileReader.next() count += 1 } println(s" blockCount = ${dataFileReader.getBlockCount}") println(s" blockSize = ${dataFileReader.getBlockSize}") println(s" numRecords = $count") dataFileReader.close() println } }
Example 4
Source File: OutputStreamTest.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s.streams.output import java.io.ByteArrayOutputStream import com.sksamuel.avro4s._ import org.apache.avro.file.{DataFileReader, SeekableByteArrayInput} import org.apache.avro.generic.{GenericDatumReader, GenericRecord} import org.apache.avro.io.DecoderFactory import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers trait OutputStreamTest extends AnyFunSuite with Matchers { def readData[T: SchemaFor](out: ByteArrayOutputStream): GenericRecord = readData(out.toByteArray) def readData[T: SchemaFor](bytes: Array[Byte]): GenericRecord = { val datumReader = new GenericDatumReader[GenericRecord](AvroSchema[T]) val dataFileReader = new DataFileReader[GenericRecord](new SeekableByteArrayInput(bytes), datumReader) dataFileReader.next } def writeData[T: Encoder : SchemaFor](t: T): ByteArrayOutputStream = { val out = new ByteArrayOutputStream val avro = AvroOutputStream.data[T].to(out).build() avro.write(t) avro.close() out } def readBinary[T: SchemaFor](out: ByteArrayOutputStream): GenericRecord = readBinary(out.toByteArray) def readBinary[T: SchemaFor](bytes: Array[Byte]): GenericRecord = { val datumReader = new GenericDatumReader[GenericRecord](AvroSchema[T]) val decoder = DecoderFactory.get().binaryDecoder(new SeekableByteArrayInput(bytes), null) datumReader.read(null, decoder) } def writeBinary[T: Encoder : SchemaFor](t: T): ByteArrayOutputStream = { val out = new ByteArrayOutputStream val avro = AvroOutputStream.binary[T].to(out).build() avro.write(t) avro.close() out } def readJson[T: SchemaFor](out: ByteArrayOutputStream): GenericRecord = readJson(out.toByteArray) def readJson[T: SchemaFor](bytes: Array[Byte]): GenericRecord = { val schema = AvroSchema[T] val datumReader = new GenericDatumReader[GenericRecord](schema) val decoder = DecoderFactory.get().jsonDecoder(schema, new SeekableByteArrayInput(bytes)) datumReader.read(null, decoder) } def writeJson[T: Encoder : SchemaFor](t: T): ByteArrayOutputStream = { val out = new ByteArrayOutputStream val avro = AvroOutputStream.json[T].to(out).build() avro.write(t) avro.close() out } def writeRead[T: Encoder : SchemaFor](t: T)(fn: GenericRecord => Any): Unit = { { val out = writeData(t) val record = readData(out) fn(record) } { val out = writeBinary(t) val record = readBinary(out) fn(record) } { val out = writeJson(t) val record = readJson(out) fn(record) } } }
Example 5
Source File: GithubIssue191.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s.github import java.io.ByteArrayOutputStream import com.sksamuel.avro4s.{AvroOutputStream, AvroSchema} import org.apache.avro.file.{DataFileReader, SeekableByteArrayInput} import org.apache.avro.generic.{GenericDatumReader, GenericRecord} import org.apache.avro.util.Utf8 import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers final case class SN(value: String) extends AnyVal final case class SimpleUser(name: String, sn: Option[SN]) class GithubIssue191 extends AnyFunSuite with Matchers { test("writing out AnyVal in an option") { implicit val schema = AvroSchema[SimpleUser] val bytes = new ByteArrayOutputStream val out = AvroOutputStream.data[SimpleUser].to(bytes).build() out.write(SimpleUser("Tom", Some(SN("123")))) out.close() val datumReader = new GenericDatumReader[GenericRecord](schema) val dataFileReader = new DataFileReader[GenericRecord](new SeekableByteArrayInput(bytes.toByteArray), datumReader) val record = new Iterator[GenericRecord] { override def hasNext: Boolean = dataFileReader.hasNext override def next(): GenericRecord = dataFileReader.next }.toList.head record.getSchema shouldBe schema record.get("name") shouldBe new Utf8("Tom") record.get("sn") shouldBe new Utf8("123") } }
Example 6
Source File: AvroIO.scala From ratatool with Apache License 2.0 | 5 votes |
package com.spotify.ratatool.io import java.io.{File, InputStream, OutputStream} import java.nio.ByteBuffer import java.nio.channels.SeekableByteChannel import com.google.common.io.ByteStreams import org.apache.avro.Schema import org.apache.avro.file.{DataFileReader, DataFileWriter, SeekableByteArrayInput, SeekableInput} import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord} import org.apache.avro.io.{DatumReader, DatumWriter} import org.apache.avro.reflect.{ReflectDatumReader, ReflectDatumWriter} import org.apache.avro.specific.{SpecificDatumReader, SpecificDatumWriter, SpecificRecord} import org.apache.beam.sdk.io.FileSystems import org.apache.beam.sdk.io.fs.MatchResult.Metadata import scala.jdk.CollectionConverters._ import scala.reflect.ClassTag def writeToOutputStream[T: ClassTag](data: Iterable[T], schema: Schema, os: OutputStream): Unit = { val fileWriter = new DataFileWriter(createDatumWriter[T]).create(schema, os) data.foreach(fileWriter.append) fileWriter.close() } def getAvroSchemaFromFile(path: String): Schema = { require(FileStorage(path).exists, s"File `$path` does not exist!") val files = FileStorage(path).listFiles.filter(_.resourceId.getFilename.endsWith(".avro")) require(files.nonEmpty, s"File `$path` does not contain avro files") val reader = new GenericDatumReader[GenericRecord]() val dfr = new DataFileReader[GenericRecord](AvroIO.getAvroSeekableInput(files.head), reader) dfr.getSchema } private def getAvroSeekableInput(meta: Metadata): SeekableInput = new SeekableInput { require(meta.isReadSeekEfficient) private val in = FileSystems.open(meta.resourceId()).asInstanceOf[SeekableByteChannel] override def read(b: Array[Byte], off: Int, len: Int): Int = in.read(ByteBuffer.wrap(b, off, len)) override def tell(): Long = in.position() override def length(): Long = in.size() override def seek(p: Long): Unit = in.position(p) override def close(): Unit = in.close() } }
Example 7
Source File: DefaultFrameReader.scala From mleap with Apache License 2.0 | 5 votes |
package ml.combust.mleap.avro import java.nio.charset.Charset import org.apache.avro.file.{DataFileReader, SeekableByteArrayInput} import org.apache.avro.generic.{GenericData, GenericDatumReader} import SchemaConverter._ import ml.combust.mleap.runtime.serialization.{BuiltinFormats, FrameReader} import ml.combust.mleap.core.types.StructType import ml.combust.mleap.runtime.frame.{ArrayRow, DefaultLeapFrame, Row} import scala.collection.mutable import scala.util.Try class DefaultFrameReader extends FrameReader { val valueConverter = ValueConverter() override def fromBytes(bytes: Array[Byte], charset: Charset = BuiltinFormats.charset): Try[DefaultLeapFrame] = Try { val datumReader = new GenericDatumReader[GenericData.Record]() val reader = new DataFileReader[GenericData.Record](new SeekableByteArrayInput(bytes), datumReader) val avroSchema = reader.getSchema val schema = avroSchema: StructType val readers = schema.fields.map(_.dataType).map(valueConverter.avroToMleap) var record = new GenericData.Record(avroSchema) var rows = mutable.Seq[Row]() while(Try(reader.hasNext).getOrElse(false)) { record = reader.next(record) val row = ArrayRow(new Array[Any](schema.fields.length)) for(i <- schema.fields.indices) { row.set(i, readers(i)(record.get(i))) } rows :+= row } DefaultLeapFrame(schema, rows) } }
Example 8
Source File: StandardTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.apache.avro.generic.{ GenericRecord, GenericDatumReader, GenericDatumWriter } import org.specs2.mutable.Specification object StandardTestUtil extends Specification { def write(file: File, records: List[GenericRecord]) = { val userDatumWriter = new GenericDatumWriter[GenericRecord] val dataFileWriter = new DataFileWriter[GenericRecord](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read(file: File, records: List[GenericRecord]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new GenericDatumReader[GenericRecord](schema) val dataFileReader = new DataFileReader[GenericRecord](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the StandardDatumReader. var record: GenericRecord = null.asInstanceOf[GenericRecord] var sameRecord: GenericRecord = null.asInstanceOf[GenericRecord] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord.equals(record) } def verifyWriteAndRead(records: List[GenericRecord]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } }
Example 9
Source File: StandardTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.apache.avro.generic.{ GenericRecord, GenericDatumReader, GenericDatumWriter } import org.specs2.mutable.Specification object StandardTestUtil extends Specification { def write(file: File, records: List[GenericRecord]) = { val userDatumWriter = new GenericDatumWriter[GenericRecord] val dataFileWriter = new DataFileWriter[GenericRecord](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read(file: File, records: List[GenericRecord]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new GenericDatumReader[GenericRecord](schema) val dataFileReader = new DataFileReader[GenericRecord](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the StandardDatumReader. var record: GenericRecord = null.asInstanceOf[GenericRecord] var sameRecord: GenericRecord = null.asInstanceOf[GenericRecord] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord must ===(record) } def verifyWriteAndRead(records: List[GenericRecord]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } }
Example 10
Source File: StandardTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.apache.avro.generic.{ GenericRecord, GenericDatumReader, GenericDatumWriter } import org.specs2.mutable.Specification object StandardTestUtil extends Specification { def write(file: File, records: List[GenericRecord]) = { val userDatumWriter = new GenericDatumWriter[GenericRecord] val dataFileWriter = new DataFileWriter[GenericRecord](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read(file: File, records: List[GenericRecord]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new GenericDatumReader[GenericRecord](schema) val dataFileReader = new DataFileReader[GenericRecord](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the StandardDatumReader. var record: GenericRecord = null.asInstanceOf[GenericRecord] var sameRecord: GenericRecord = null.asInstanceOf[GenericRecord] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord must ===(record) } def verifyWriteAndRead(records: List[GenericRecord]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } }
Example 11
Source File: StandardDefaultValuesSpec.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
import test._ import org.specs2.mutable.Specification import com.sksamuel.avro4s.RecordFormat import java.io.File import shapeless.Inl import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.apache.avro.generic.{ GenericRecord, GenericDatumReader, GenericDatumWriter} class StandardDefaultValuesSpec extends Specification { skipAll "A case class with default values" should { "deserialize correctly" in { val format = RecordFormat[DefaultTest] val record = DefaultTest() val avro = format.to(record) val sameRecord = format.from(avro) sameRecord.suit === DefaultEnum.SPADES sameRecord.number === 0 sameRecord.str === "str" sameRecord.optionString === None // sameRecord.optionStringValue === Some("default") // sameRecord.embedded === Embedded(1) sameRecord.defaultArray === List(1,3,4,5) sameRecord.optionalEnum === None sameRecord.defaultMap === Map("Hello" -> "world", "Merry" -> "Christmas") sameRecord.byt === "\u00FF".getBytes // sameRecord.defaultEither === Left(2) // sameRecord.defaultCoproduct === Inl(3) } } }
Example 12
Source File: SpecificTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.io.{DecoderFactory, EncoderFactory} import org.apache.avro.generic.{ GenericDatumReader, GenericRecord} import org.apache.avro.specific.{ SpecificDatumReader, SpecificDatumWriter, SpecificRecordBase } import org.apache.avro.Schema import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.specs2.mutable.Specification object SpecificTestUtil extends Specification { def write[T <: SpecificRecordBase](file: File, records: List[T]) = { val userDatumWriter = new SpecificDatumWriter[T] val dataFileWriter = new DataFileWriter[T](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read[T <: SpecificRecordBase](file: File, records: List[T]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new SpecificDatumReader[T](schema) val dataFileReader = new DataFileReader[T](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the SpecificDatumReader. var record: T = null.asInstanceOf[T] var sameRecord: T = null.asInstanceOf[T] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord must ===(record) } def verifyWriteAndRead[T <: SpecificRecordBase](records: List[T]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } def verifyEncodeDecode[T <: SpecificRecordBase](record: T) = { val schema = record.getSchema val writer = new SpecificDatumWriter[T](schema) val out = new java.io.ByteArrayOutputStream() val encoder = EncoderFactory.get().binaryEncoder(out, null) writer.write(record, encoder) encoder.flush val ba = out.toByteArray ba.size must ===(1) ba(0) must ===(0) out.close val reader = new SpecificDatumReader[T](schema) val decoder = DecoderFactory.get().binaryDecoder(ba, null) val decoded = reader.read(record, decoder) decoded must ===(record) } }
Example 13
Source File: StandardTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.apache.avro.generic.{ GenericRecord, GenericDatumReader, GenericDatumWriter } import org.specs2.mutable.Specification object StandardTestUtil extends Specification { def write(file: File, records: List[GenericRecord]) = { val userDatumWriter = new GenericDatumWriter[GenericRecord] val dataFileWriter = new DataFileWriter[GenericRecord](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read(file: File, records: List[GenericRecord]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new GenericDatumReader[GenericRecord](schema) val dataFileReader = new DataFileReader[GenericRecord](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the StandardDatumReader. var record: GenericRecord = null.asInstanceOf[GenericRecord] var sameRecord: GenericRecord = null.asInstanceOf[GenericRecord] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord must ===(record) } def verifyWriteAndRead(records: List[GenericRecord]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } }
Example 14
Source File: StandardTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.apache.avro.generic.{ GenericRecord, GenericDatumReader, GenericDatumWriter } import org.specs2.mutable.Specification object StandardTestUtil extends Specification { def write(file: File, records: List[GenericRecord]) = { val userDatumWriter = new GenericDatumWriter[GenericRecord] val dataFileWriter = new DataFileWriter[GenericRecord](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read(file: File, records: List[GenericRecord]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new GenericDatumReader[GenericRecord](schema) val dataFileReader = new DataFileReader[GenericRecord](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the StandardDatumReader. var record: GenericRecord = null.asInstanceOf[GenericRecord] var sameRecord: GenericRecord = null.asInstanceOf[GenericRecord] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord must ===(record) } def verifyWriteAndRead(records: List[GenericRecord]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } }
Example 15
Source File: StandardTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.apache.avro.generic.{ GenericRecord, GenericDatumReader, GenericDatumWriter } import org.specs2.mutable.Specification object StandardTestUtil extends Specification { def write(file: File, records: List[GenericRecord]) = { val userDatumWriter = new GenericDatumWriter[GenericRecord] val dataFileWriter = new DataFileWriter[GenericRecord](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read(file: File, records: List[GenericRecord]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new GenericDatumReader[GenericRecord](schema) val dataFileReader = new DataFileReader[GenericRecord](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the StandardDatumReader. var record: GenericRecord = null.asInstanceOf[GenericRecord] var sameRecord: GenericRecord = null.asInstanceOf[GenericRecord] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord.equals(record) } def verifyWriteAndRead(records: List[GenericRecord]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } }
Example 16
Source File: SpecificTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.io.{DecoderFactory, EncoderFactory} import org.apache.avro.generic.{ GenericDatumReader, GenericRecord} import org.apache.avro.specific.{ SpecificDatumReader, SpecificDatumWriter, SpecificRecordBase } import org.apache.avro.Schema import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.specs2.mutable.Specification object SpecificTestUtil extends Specification { def write[T <: SpecificRecordBase](file: File, records: List[T]) = { val userDatumWriter = new SpecificDatumWriter[T] val dataFileWriter = new DataFileWriter[T](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read[T <: SpecificRecordBase](file: File, records: List[T]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new SpecificDatumReader[T](schema) val dataFileReader = new DataFileReader[T](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the SpecificDatumReader. var record: T = null.asInstanceOf[T] var sameRecord: T = null.asInstanceOf[T] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord.equals(record) } def verifyWriteAndRead[T <: SpecificRecordBase](records: List[T]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } def verifyEncodeDecode[T <: SpecificRecordBase](record: T) = { val schema = record.getSchema val writer = new SpecificDatumWriter[T](schema) val out = new java.io.ByteArrayOutputStream() val encoder = EncoderFactory.get().binaryEncoder(out, null) writer.write(record, encoder) encoder.flush val ba = out.toByteArray ba.size must ===(1) ba(0) must ===(0) out.close val reader = new SpecificDatumReader[T](schema) val decoder = DecoderFactory.get().binaryDecoder(ba, null) val decoded = reader.read(record, decoder) decoded must ===(record) } }
Example 17
Source File: SpecificDefautValuesSpec.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
import test._ import org.specs2.mutable.Specification import java.io.File import org.apache.avro.generic.{ GenericDatumReader, GenericRecord} import org.apache.avro.specific.{ SpecificDatumReader, SpecificDatumWriter, SpecificRecordBase } import org.apache.avro.file.DataFileReader import DefaultEnum._ class SpecificDefaultValuesSpec extends Specification { "A case class with default values" should { "deserialize correctly" in { val record = DefaultTest() val records = List(record) val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() SpecificTestUtil.write(file, records) val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new SpecificDatumReader[DefaultTest](schema) val dataFileReader = new DataFileReader[DefaultTest](file, userDatumReader) val sameRecord = dataFileReader.next sameRecord.suit === SPADES sameRecord.number === 0 sameRecord.str === "str" sameRecord.optionString === None sameRecord.optionStringValue === Some("default") sameRecord.embedded === Embedded(1) sameRecord.defaultArray === List(1,3,4,5) sameRecord.optionalEnum === None sameRecord.defaultMap === Map("Hello" -> "world", "Merry" -> "Christmas") sameRecord.byt === "\u00FF".getBytes } } }
Example 18
Source File: SpecificTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.io.{DecoderFactory, EncoderFactory} import org.apache.avro.generic.{ GenericDatumReader, GenericRecord} import org.apache.avro.specific.{ SpecificDatumReader, SpecificDatumWriter, SpecificRecordBase } import org.apache.avro.Schema import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.specs2.mutable.Specification object SpecificTestUtil extends Specification { def write[T <: SpecificRecordBase](file: File, records: List[T]) = { val userDatumWriter = new SpecificDatumWriter[T]() val dataFileWriter = new DataFileWriter[T](userDatumWriter) dataFileWriter.create(records.head.getSchema, file) records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close() } def read[T <: SpecificRecordBase](file: File, records: List[T]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new SpecificDatumReader[T](schema) val dataFileReader = new DataFileReader[T](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the SpecificDatumReader. var record: T = null.asInstanceOf[T] var sameRecord: T = null.asInstanceOf[T] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord must ===(record) } def verifyWriteAndRead[T <: SpecificRecordBase](records: List[T]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } def verifyEncodeDecode[T <: SpecificRecordBase](record: T) = { val schema = record.getSchema val writer = new SpecificDatumWriter[T](schema) val out = new java.io.ByteArrayOutputStream() val encoder = EncoderFactory.get().binaryEncoder(out, null) writer.write(record, encoder) encoder.flush val ba = out.toByteArray ba.size must ===(1) ba(0) must ===(0) out.close val reader = new SpecificDatumReader[T](schema) val decoder = DecoderFactory.get().binaryDecoder(ba, null) val decoded = reader.read(record, decoder) decoded must ===(record) } }
Example 19
Source File: SpecificDefautValuesSpec.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
import test._ import org.specs2.mutable.Specification import java.io.File import org.apache.avro.generic.{ GenericDatumReader, GenericRecord} import org.apache.avro.specific.{ SpecificDatumReader, SpecificDatumWriter, SpecificRecordBase } import org.apache.avro.file.DataFileReader import DefaultEnum._ class SpecificDefaultValuesSpec extends Specification { "A case class with default values" should { "deserialize correctly" in { val record = DefaultTest() val records = List(record) val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() SpecificTestUtil.write(file, records) val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new SpecificDatumReader[DefaultTest](schema) val dataFileReader = new DataFileReader[DefaultTest](file, userDatumReader) val sameRecord = dataFileReader.next sameRecord.suit === SPADES sameRecord.number === 0 sameRecord.str === "str" sameRecord.optionString === None sameRecord.optionStringValue === Some("default") sameRecord.embedded === Embedded(1) sameRecord.defaultArray === List(1,3,4,5) sameRecord.optionalEnum === None sameRecord.defaultMap === Map("Hello" -> "world", "Merry" -> "Christmas") sameRecord.byt === "\u00FF".getBytes } } }
Example 20
Source File: SpecificTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.io.{DecoderFactory, EncoderFactory} import org.apache.avro.generic.{ GenericDatumReader, GenericRecord} import org.apache.avro.specific.{ SpecificDatumReader, SpecificDatumWriter, SpecificRecordBase } import org.apache.avro.Schema import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.specs2.mutable.Specification object SpecificTestUtil extends Specification { def write[T <: SpecificRecordBase](file: File, records: List[T]) = { val userDatumWriter = new SpecificDatumWriter[T] val dataFileWriter = new DataFileWriter[T](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read[T <: SpecificRecordBase](file: File, records: List[T]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new SpecificDatumReader[T](schema) val dataFileReader = new DataFileReader[T](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the SpecificDatumReader. var record: T = null.asInstanceOf[T] var sameRecord: T = null.asInstanceOf[T] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord must ===(record) } def verifyWriteAndRead[T <: SpecificRecordBase](records: List[T]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } def verifyEncodeDecode[T <: SpecificRecordBase](record: T) = { val schema = record.getSchema val writer = new SpecificDatumWriter[T](schema) val out = new java.io.ByteArrayOutputStream() val encoder = EncoderFactory.get().binaryEncoder(out, null) writer.write(record, encoder) encoder.flush val ba = out.toByteArray ba.size must ===(1) ba(0) must ===(0) out.close val reader = new SpecificDatumReader[T](schema) val decoder = DecoderFactory.get().binaryDecoder(ba, null) val decoded = reader.read(record, decoder) decoded must ===(record) } }
Example 21
Source File: SpecificDefautValuesSpec.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
import test._ import org.specs2.mutable.Specification import java.io.File import org.apache.avro.generic.{ GenericDatumReader, GenericRecord} import org.apache.avro.specific.{ SpecificDatumReader, SpecificDatumWriter, SpecificRecordBase } import org.apache.avro.file.DataFileReader class SpecificDefaultValuesSpec extends Specification { "A case class with default values" should { "deserialize correctly" in { val record = DefaultTest() val records = List(record) val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() SpecificTestUtil.write(file, records) val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new SpecificDatumReader[DefaultTest](schema) val dataFileReader = new DataFileReader[DefaultTest](file, userDatumReader) val sameRecord = dataFileReader.next sameRecord.suit === "SPADES" sameRecord.number === 0 sameRecord.str === "str" sameRecord.optionString === None sameRecord.optionStringValue === Some("default") sameRecord.embedded === Embedded(1) sameRecord.defaultArray === Array(1,3,4,5) sameRecord.optionalEnum === None sameRecord.defaultMap === Map("Hello" -> "world", "Merry" -> "Christmas") sameRecord.byt === "\u00FF".getBytes } } }
Example 22
Source File: StandardTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.apache.avro.generic.{ GenericRecord, GenericDatumReader, GenericDatumWriter } import org.specs2.mutable.Specification object StandardTestUtil extends Specification { def write(file: File, records: List[GenericRecord]) = { val userDatumWriter = new GenericDatumWriter[GenericRecord] val dataFileWriter = new DataFileWriter[GenericRecord](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read(file: File, records: List[GenericRecord]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new GenericDatumReader[GenericRecord](schema) val dataFileReader = new DataFileReader[GenericRecord](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the StandardDatumReader. var record: GenericRecord = null.asInstanceOf[GenericRecord] var sameRecord: GenericRecord = null.asInstanceOf[GenericRecord] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord must ===(record) } def verifyWriteAndRead(records: List[GenericRecord]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } }
Example 23
Source File: StandardTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.apache.avro.generic.{ GenericRecord, GenericDatumReader, GenericDatumWriter } import org.specs2.mutable.Specification object StandardTestUtil extends Specification { def write(file: File, records: List[GenericRecord]) = { val userDatumWriter = new GenericDatumWriter[GenericRecord] val dataFileWriter = new DataFileWriter[GenericRecord](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read(file: File, records: List[GenericRecord]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new GenericDatumReader[GenericRecord](schema) val dataFileReader = new DataFileReader[GenericRecord](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the StandardDatumReader. var record: GenericRecord = null.asInstanceOf[GenericRecord] var sameRecord: GenericRecord = null.asInstanceOf[GenericRecord] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord must ===(record) } def verifyWriteAndRead(records: List[GenericRecord]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } }