org.apache.avro.file.DataFileWriter Scala Examples
The following examples show how to use org.apache.avro.file.DataFileWriter.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: StandardTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.apache.avro.generic.{ GenericRecord, GenericDatumReader, GenericDatumWriter } import org.specs2.mutable.Specification object StandardTestUtil extends Specification { def write(file: File, records: List[GenericRecord]) = { val userDatumWriter = new GenericDatumWriter[GenericRecord] val dataFileWriter = new DataFileWriter[GenericRecord](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read(file: File, records: List[GenericRecord]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new GenericDatumReader[GenericRecord](schema) val dataFileReader = new DataFileReader[GenericRecord](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the StandardDatumReader. var record: GenericRecord = null.asInstanceOf[GenericRecord] var sameRecord: GenericRecord = null.asInstanceOf[GenericRecord] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord must ===(record) } def verifyWriteAndRead(records: List[GenericRecord]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } }
Example 2
Source File: AvroDataOutputStream.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s import java.io.OutputStream import org.apache.avro.Schema import org.apache.avro.file.{CodecFactory, DataFileWriter} import org.apache.avro.generic.{GenericDatumWriter, GenericRecord} case class AvroDataOutputStream[T](os: OutputStream, codec: CodecFactory) (implicit encoder: Encoder[T]) extends AvroOutputStream[T] { val resolved = encoder.resolveEncoder() val (writer, writeFn) = resolved.schema.getType match { case Schema.Type.DOUBLE | Schema.Type.LONG | Schema.Type.BOOLEAN | Schema.Type.STRING | Schema.Type.INT | Schema.Type.FLOAT => val datumWriter = new GenericDatumWriter[T](resolved.schema) val dataFileWriter = new DataFileWriter[T](datumWriter) dataFileWriter.setCodec(codec) dataFileWriter.create(resolved.schema, os) (dataFileWriter, (t: T) => dataFileWriter.append(t)) case _ => val datumWriter = new GenericDatumWriter[GenericRecord](resolved.schema) val dataFileWriter = new DataFileWriter[GenericRecord](datumWriter) dataFileWriter.setCodec(codec) dataFileWriter.create(resolved.schema, os) (dataFileWriter, (t: T) => { val record = resolved.encode(t).asInstanceOf[GenericRecord] dataFileWriter.append(record) }) } override def close(): Unit = { flush() writer.close() } override def write(t: T): Unit = { writeFn(t) } override def flush(): Unit = writer.flush() override def fSync(): Unit = writer.fSync() }
Example 3
Source File: AvroIO.scala From ratatool with Apache License 2.0 | 5 votes |
package com.spotify.ratatool.io import java.io.{File, InputStream, OutputStream} import java.nio.ByteBuffer import java.nio.channels.SeekableByteChannel import com.google.common.io.ByteStreams import org.apache.avro.Schema import org.apache.avro.file.{DataFileReader, DataFileWriter, SeekableByteArrayInput, SeekableInput} import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord} import org.apache.avro.io.{DatumReader, DatumWriter} import org.apache.avro.reflect.{ReflectDatumReader, ReflectDatumWriter} import org.apache.avro.specific.{SpecificDatumReader, SpecificDatumWriter, SpecificRecord} import org.apache.beam.sdk.io.FileSystems import org.apache.beam.sdk.io.fs.MatchResult.Metadata import scala.jdk.CollectionConverters._ import scala.reflect.ClassTag def writeToOutputStream[T: ClassTag](data: Iterable[T], schema: Schema, os: OutputStream): Unit = { val fileWriter = new DataFileWriter(createDatumWriter[T]).create(schema, os) data.foreach(fileWriter.append) fileWriter.close() } def getAvroSchemaFromFile(path: String): Schema = { require(FileStorage(path).exists, s"File `$path` does not exist!") val files = FileStorage(path).listFiles.filter(_.resourceId.getFilename.endsWith(".avro")) require(files.nonEmpty, s"File `$path` does not contain avro files") val reader = new GenericDatumReader[GenericRecord]() val dfr = new DataFileReader[GenericRecord](AvroIO.getAvroSeekableInput(files.head), reader) dfr.getSchema } private def getAvroSeekableInput(meta: Metadata): SeekableInput = new SeekableInput { require(meta.isReadSeekEfficient) private val in = FileSystems.open(meta.resourceId()).asInstanceOf[SeekableByteChannel] override def read(b: Array[Byte], off: Int, len: Int): Int = in.read(ByteBuffer.wrap(b, off, len)) override def tell(): Long = in.position() override def length(): Long = in.size() override def seek(p: Long): Unit = in.position(p) override def close(): Unit = in.close() } }
Example 4
Source File: DefaultFrameWriter.scala From mleap with Apache License 2.0 | 5 votes |
package ml.combust.mleap.avro import java.io.ByteArrayOutputStream import java.nio.charset.Charset import org.apache.avro.Schema import org.apache.avro.file.DataFileWriter import org.apache.avro.generic.{GenericData, GenericDatumWriter} import SchemaConverter._ import ml.combust.mleap.runtime.frame.LeapFrame import ml.combust.mleap.runtime.serialization.{BuiltinFormats, FrameWriter} import resource._ import scala.util.{Failure, Try} class DefaultFrameWriter[LF <: LeapFrame[LF]](frame: LF) extends FrameWriter { val valueConverter = ValueConverter() override def toBytes(charset: Charset = BuiltinFormats.charset): Try[Array[Byte]] = { (for(out <- managed(new ByteArrayOutputStream())) yield { val writers = frame.schema.fields.map(_.dataType).map(valueConverter.mleapToAvro) val avroSchema = frame.schema: Schema val record = new GenericData.Record(avroSchema) val datumWriter = new GenericDatumWriter[GenericData.Record](avroSchema) val writer = new DataFileWriter[GenericData.Record](datumWriter) writer.create(avroSchema, out) for(row <- frame.collect()) { var i = 0 for(writer <- writers) { record.put(i, writer(row.getRaw(i))) i = i + 1 } Try(writer.append(record)) match { case Failure(error) => error.printStackTrace() case _ => } } writer.close() out.toByteArray }).tried } }
Example 5
Source File: StandardTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.apache.avro.generic.{ GenericRecord, GenericDatumReader, GenericDatumWriter } import org.specs2.mutable.Specification object StandardTestUtil extends Specification { def write(file: File, records: List[GenericRecord]) = { val userDatumWriter = new GenericDatumWriter[GenericRecord] val dataFileWriter = new DataFileWriter[GenericRecord](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read(file: File, records: List[GenericRecord]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new GenericDatumReader[GenericRecord](schema) val dataFileReader = new DataFileReader[GenericRecord](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the StandardDatumReader. var record: GenericRecord = null.asInstanceOf[GenericRecord] var sameRecord: GenericRecord = null.asInstanceOf[GenericRecord] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord.equals(record) } def verifyWriteAndRead(records: List[GenericRecord]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } }
Example 6
Source File: StandardTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.apache.avro.generic.{ GenericRecord, GenericDatumReader, GenericDatumWriter } import org.specs2.mutable.Specification object StandardTestUtil extends Specification { def write(file: File, records: List[GenericRecord]) = { val userDatumWriter = new GenericDatumWriter[GenericRecord] val dataFileWriter = new DataFileWriter[GenericRecord](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read(file: File, records: List[GenericRecord]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new GenericDatumReader[GenericRecord](schema) val dataFileReader = new DataFileReader[GenericRecord](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the StandardDatumReader. var record: GenericRecord = null.asInstanceOf[GenericRecord] var sameRecord: GenericRecord = null.asInstanceOf[GenericRecord] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord must ===(record) } def verifyWriteAndRead(records: List[GenericRecord]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } }
Example 7
Source File: StandardTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.apache.avro.generic.{ GenericRecord, GenericDatumReader, GenericDatumWriter } import org.specs2.mutable.Specification object StandardTestUtil extends Specification { def write(file: File, records: List[GenericRecord]) = { val userDatumWriter = new GenericDatumWriter[GenericRecord] val dataFileWriter = new DataFileWriter[GenericRecord](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read(file: File, records: List[GenericRecord]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new GenericDatumReader[GenericRecord](schema) val dataFileReader = new DataFileReader[GenericRecord](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the StandardDatumReader. var record: GenericRecord = null.asInstanceOf[GenericRecord] var sameRecord: GenericRecord = null.asInstanceOf[GenericRecord] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord must ===(record) } def verifyWriteAndRead(records: List[GenericRecord]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } }
Example 8
Source File: StandardDefaultValuesSpec.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
import test._ import org.specs2.mutable.Specification import com.sksamuel.avro4s.RecordFormat import java.io.File import shapeless.Inl import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.apache.avro.generic.{ GenericRecord, GenericDatumReader, GenericDatumWriter} class StandardDefaultValuesSpec extends Specification { skipAll "A case class with default values" should { "deserialize correctly" in { val format = RecordFormat[DefaultTest] val record = DefaultTest() val avro = format.to(record) val sameRecord = format.from(avro) sameRecord.suit === DefaultEnum.SPADES sameRecord.number === 0 sameRecord.str === "str" sameRecord.optionString === None // sameRecord.optionStringValue === Some("default") // sameRecord.embedded === Embedded(1) sameRecord.defaultArray === List(1,3,4,5) sameRecord.optionalEnum === None sameRecord.defaultMap === Map("Hello" -> "world", "Merry" -> "Christmas") sameRecord.byt === "\u00FF".getBytes // sameRecord.defaultEither === Left(2) // sameRecord.defaultCoproduct === Inl(3) } } }
Example 9
Source File: SpecificTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.io.{DecoderFactory, EncoderFactory} import org.apache.avro.generic.{ GenericDatumReader, GenericRecord} import org.apache.avro.specific.{ SpecificDatumReader, SpecificDatumWriter, SpecificRecordBase } import org.apache.avro.Schema import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.specs2.mutable.Specification object SpecificTestUtil extends Specification { def write[T <: SpecificRecordBase](file: File, records: List[T]) = { val userDatumWriter = new SpecificDatumWriter[T] val dataFileWriter = new DataFileWriter[T](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read[T <: SpecificRecordBase](file: File, records: List[T]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new SpecificDatumReader[T](schema) val dataFileReader = new DataFileReader[T](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the SpecificDatumReader. var record: T = null.asInstanceOf[T] var sameRecord: T = null.asInstanceOf[T] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord must ===(record) } def verifyWriteAndRead[T <: SpecificRecordBase](records: List[T]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } def verifyEncodeDecode[T <: SpecificRecordBase](record: T) = { val schema = record.getSchema val writer = new SpecificDatumWriter[T](schema) val out = new java.io.ByteArrayOutputStream() val encoder = EncoderFactory.get().binaryEncoder(out, null) writer.write(record, encoder) encoder.flush val ba = out.toByteArray ba.size must ===(1) ba(0) must ===(0) out.close val reader = new SpecificDatumReader[T](schema) val decoder = DecoderFactory.get().binaryDecoder(ba, null) val decoded = reader.read(record, decoder) decoded must ===(record) } }
Example 10
Source File: AvroWriter.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.avro import java.io.OutputStream import java.util.concurrent.atomic.AtomicInteger import io.eels.Row import io.eels.schema.StructType import org.apache.avro.file.DataFileWriter import org.apache.avro.generic import org.apache.avro.generic.GenericRecord class AvroWriter(structType: StructType, out: OutputStream) { private val schema = AvroSchemaFns.toAvroSchema(structType) private val datumWriter = new generic.GenericDatumWriter[GenericRecord](schema) private val dataFileWriter = new DataFileWriter[GenericRecord](datumWriter) private val serializer = new RowSerializer(schema) private val _records = new AtomicInteger(0) dataFileWriter.create(schema, out) def write(row: Row): Unit = { val record = serializer.serialize(row) dataFileWriter.append(record) _records.incrementAndGet() } def records: Int = _records.get() def close(): Unit = { dataFileWriter.flush() dataFileWriter.close() } }
Example 11
Source File: StandardTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.apache.avro.generic.{ GenericRecord, GenericDatumReader, GenericDatumWriter } import org.specs2.mutable.Specification object StandardTestUtil extends Specification { def write(file: File, records: List[GenericRecord]) = { val userDatumWriter = new GenericDatumWriter[GenericRecord] val dataFileWriter = new DataFileWriter[GenericRecord](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read(file: File, records: List[GenericRecord]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new GenericDatumReader[GenericRecord](schema) val dataFileReader = new DataFileReader[GenericRecord](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the StandardDatumReader. var record: GenericRecord = null.asInstanceOf[GenericRecord] var sameRecord: GenericRecord = null.asInstanceOf[GenericRecord] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord.equals(record) } def verifyWriteAndRead(records: List[GenericRecord]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } }
Example 12
Source File: SpecificTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.io.{DecoderFactory, EncoderFactory} import org.apache.avro.generic.{ GenericDatumReader, GenericRecord} import org.apache.avro.specific.{ SpecificDatumReader, SpecificDatumWriter, SpecificRecordBase } import org.apache.avro.Schema import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.specs2.mutable.Specification object SpecificTestUtil extends Specification { def write[T <: SpecificRecordBase](file: File, records: List[T]) = { val userDatumWriter = new SpecificDatumWriter[T] val dataFileWriter = new DataFileWriter[T](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read[T <: SpecificRecordBase](file: File, records: List[T]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new SpecificDatumReader[T](schema) val dataFileReader = new DataFileReader[T](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the SpecificDatumReader. var record: T = null.asInstanceOf[T] var sameRecord: T = null.asInstanceOf[T] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord.equals(record) } def verifyWriteAndRead[T <: SpecificRecordBase](records: List[T]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } def verifyEncodeDecode[T <: SpecificRecordBase](record: T) = { val schema = record.getSchema val writer = new SpecificDatumWriter[T](schema) val out = new java.io.ByteArrayOutputStream() val encoder = EncoderFactory.get().binaryEncoder(out, null) writer.write(record, encoder) encoder.flush val ba = out.toByteArray ba.size must ===(1) ba(0) must ===(0) out.close val reader = new SpecificDatumReader[T](schema) val decoder = DecoderFactory.get().binaryDecoder(ba, null) val decoded = reader.read(record, decoder) decoded must ===(record) } }
Example 13
Source File: SpecificTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.io.{DecoderFactory, EncoderFactory} import org.apache.avro.generic.{ GenericDatumReader, GenericRecord} import org.apache.avro.specific.{ SpecificDatumReader, SpecificDatumWriter, SpecificRecordBase } import org.apache.avro.Schema import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.specs2.mutable.Specification object SpecificTestUtil extends Specification { def write[T <: SpecificRecordBase](file: File, records: List[T]) = { val userDatumWriter = new SpecificDatumWriter[T]() val dataFileWriter = new DataFileWriter[T](userDatumWriter) dataFileWriter.create(records.head.getSchema, file) records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close() } def read[T <: SpecificRecordBase](file: File, records: List[T]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new SpecificDatumReader[T](schema) val dataFileReader = new DataFileReader[T](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the SpecificDatumReader. var record: T = null.asInstanceOf[T] var sameRecord: T = null.asInstanceOf[T] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord must ===(record) } def verifyWriteAndRead[T <: SpecificRecordBase](records: List[T]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } def verifyEncodeDecode[T <: SpecificRecordBase](record: T) = { val schema = record.getSchema val writer = new SpecificDatumWriter[T](schema) val out = new java.io.ByteArrayOutputStream() val encoder = EncoderFactory.get().binaryEncoder(out, null) writer.write(record, encoder) encoder.flush val ba = out.toByteArray ba.size must ===(1) ba(0) must ===(0) out.close val reader = new SpecificDatumReader[T](schema) val decoder = DecoderFactory.get().binaryDecoder(ba, null) val decoded = reader.read(record, decoder) decoded must ===(record) } }
Example 14
Source File: SpecificTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.io.{DecoderFactory, EncoderFactory} import org.apache.avro.generic.{ GenericDatumReader, GenericRecord} import org.apache.avro.specific.{ SpecificDatumReader, SpecificDatumWriter, SpecificRecordBase } import org.apache.avro.Schema import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.specs2.mutable.Specification object SpecificTestUtil extends Specification { def write[T <: SpecificRecordBase](file: File, records: List[T]) = { val userDatumWriter = new SpecificDatumWriter[T] val dataFileWriter = new DataFileWriter[T](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read[T <: SpecificRecordBase](file: File, records: List[T]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new SpecificDatumReader[T](schema) val dataFileReader = new DataFileReader[T](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the SpecificDatumReader. var record: T = null.asInstanceOf[T] var sameRecord: T = null.asInstanceOf[T] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord must ===(record) } def verifyWriteAndRead[T <: SpecificRecordBase](records: List[T]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } def verifyEncodeDecode[T <: SpecificRecordBase](record: T) = { val schema = record.getSchema val writer = new SpecificDatumWriter[T](schema) val out = new java.io.ByteArrayOutputStream() val encoder = EncoderFactory.get().binaryEncoder(out, null) writer.write(record, encoder) encoder.flush val ba = out.toByteArray ba.size must ===(1) ba(0) must ===(0) out.close val reader = new SpecificDatumReader[T](schema) val decoder = DecoderFactory.get().binaryDecoder(ba, null) val decoded = reader.read(record, decoder) decoded must ===(record) } }
Example 15
Source File: StandardTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.apache.avro.generic.{ GenericRecord, GenericDatumReader, GenericDatumWriter } import org.specs2.mutable.Specification object StandardTestUtil extends Specification { def write(file: File, records: List[GenericRecord]) = { val userDatumWriter = new GenericDatumWriter[GenericRecord] val dataFileWriter = new DataFileWriter[GenericRecord](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read(file: File, records: List[GenericRecord]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new GenericDatumReader[GenericRecord](schema) val dataFileReader = new DataFileReader[GenericRecord](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the StandardDatumReader. var record: GenericRecord = null.asInstanceOf[GenericRecord] var sameRecord: GenericRecord = null.asInstanceOf[GenericRecord] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord must ===(record) } def verifyWriteAndRead(records: List[GenericRecord]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } }
Example 16
Source File: StandardTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.apache.avro.generic.{ GenericRecord, GenericDatumReader, GenericDatumWriter } import org.specs2.mutable.Specification object StandardTestUtil extends Specification { def write(file: File, records: List[GenericRecord]) = { val userDatumWriter = new GenericDatumWriter[GenericRecord] val dataFileWriter = new DataFileWriter[GenericRecord](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read(file: File, records: List[GenericRecord]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new GenericDatumReader[GenericRecord](schema) val dataFileReader = new DataFileReader[GenericRecord](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the StandardDatumReader. var record: GenericRecord = null.asInstanceOf[GenericRecord] var sameRecord: GenericRecord = null.asInstanceOf[GenericRecord] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord must ===(record) } def verifyWriteAndRead(records: List[GenericRecord]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } }
Example 17
Source File: StandardTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.apache.avro.generic.{ GenericRecord, GenericDatumReader, GenericDatumWriter } import org.specs2.mutable.Specification object StandardTestUtil extends Specification { def write(file: File, records: List[GenericRecord]) = { val userDatumWriter = new GenericDatumWriter[GenericRecord] val dataFileWriter = new DataFileWriter[GenericRecord](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read(file: File, records: List[GenericRecord]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new GenericDatumReader[GenericRecord](schema) val dataFileReader = new DataFileReader[GenericRecord](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the StandardDatumReader. var record: GenericRecord = null.asInstanceOf[GenericRecord] var sameRecord: GenericRecord = null.asInstanceOf[GenericRecord] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord must ===(record) } def verifyWriteAndRead(records: List[GenericRecord]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } }