org.apache.avro.generic.IndexedRecord Scala Examples
The following examples show how to use org.apache.avro.generic.IndexedRecord.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: ToTableRow.scala From scio with Apache License 2.0 | 5 votes |
package com.spotify.scio.extra.bigquery import com.spotify.scio.extra.bigquery.AvroConverters.AvroConversionException import java.math.{BigDecimal => JBigDecimal} import java.nio.ByteBuffer import java.util import com.spotify.scio.bigquery.TableRow import org.apache.avro.Schema import org.apache.avro.generic.{GenericFixed, IndexedRecord} import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.io.BaseEncoding import org.joda.time.format.DateTimeFormat import org.joda.time.{DateTime, LocalDate, LocalTime} import scala.jdk.CollectionConverters._ private[bigquery] trait ToTableRow { private lazy val encodingPropName: String = "bigquery.bytes.encoder" private lazy val base64Encoding: BaseEncoding = BaseEncoding.base64() private lazy val hexEncoding: BaseEncoding = BaseEncoding.base16() // YYYY-[M]M-[D]D private[this] val localDateFormatter = DateTimeFormat.forPattern("yyyy-MM-dd").withZoneUTC() // YYYY-[M]M-[D]D[( |T)[H]H:[M]M:[S]S[.DDDDDD]] private[this] val localTimeFormatter = DateTimeFormat.forPattern("HH:mm:ss.SSSSSS") // YYYY-[M]M-[D]D[( |T)[H]H:[M]M:[S]S[.DDDDDD]][time zone] private[this] val timestampFormatter = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSS") private[bigquery] def toTableRowField(fieldValue: Any, field: Schema.Field): Any = fieldValue match { case x: CharSequence => x.toString case x: Enum[_] => x.name() case x: JBigDecimal => x.toString case x: Number => x case x: Boolean => x case x: GenericFixed => encodeByteArray(x.bytes(), field.schema()) case x: ByteBuffer => encodeByteArray(toByteArray(x), field.schema()) case x: util.Map[_, _] => toTableRowFromMap(x.asScala, field) case x: java.lang.Iterable[_] => toTableRowFromIterable(x.asScala, field) case x: IndexedRecord => AvroConverters.toTableRow(x) case x: LocalDate => localDateFormatter.print(x) case x: LocalTime => localTimeFormatter.print(x) case x: DateTime => timestampFormatter.print(x) case _ => throw AvroConversionException( s"ToTableRow conversion failed:" + s"could not match ${fieldValue.getClass}" ) } private def toTableRowFromIterable(iterable: Iterable[Any], field: Schema.Field): util.List[_] = iterable .map { item => if (item.isInstanceOf[Iterable[_]] || item.isInstanceOf[Map[_, _]]) { throw AvroConversionException( s"ToTableRow conversion failed for item $item: " + s"iterable and map types not supported" ) } toTableRowField(item, field) } .toList .asJava private def toTableRowFromMap(map: Iterable[Any], field: Schema.Field): util.List[_] = map .map { case (k, v) => new TableRow() .set("key", toTableRowField(k, field)) .set("value", toTableRowField(v, field)) } .toList .asJava private def encodeByteArray(bytes: Array[Byte], fieldSchema: Schema): String = Option(fieldSchema.getProp(encodingPropName)) match { case Some("BASE64") => base64Encoding.encode(bytes) case Some("HEX") => hexEncoding.encode(bytes) case Some(encoding) => throw AvroConversionException(s"Unsupported encoding $encoding") case None => base64Encoding.encode(bytes) } private def toByteArray(buffer: ByteBuffer) = { val copy = buffer.asReadOnlyBuffer val bytes = new Array[Byte](copy.limit) copy.rewind copy.get(bytes) bytes } }
Example 2
Source File: SCollectionSyntax.scala From scio with Apache License 2.0 | 5 votes |
package com.spotify.scio.extra.bigquery.syntax import com.google.api.services.bigquery.model.TableReference import com.spotify.scio.annotations.experimental import com.spotify.scio.bigquery.BigQueryTable.WriteParam import com.spotify.scio.bigquery.{BigQueryTable, Table, TableRow} import com.spotify.scio.io.ClosedTap import com.spotify.scio.util.ScioUtil import com.spotify.scio.values.SCollection import org.apache.avro.Schema import org.apache.avro.generic.IndexedRecord import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.{CreateDisposition, WriteDisposition} import scala.reflect.ClassTag trait SCollectionSyntax { implicit def toAvroToBigQuerySCollection[T <: IndexedRecord: ClassTag]( data: SCollection[T] ): AvroToBigQuerySCollectionOps[T] = new AvroToBigQuerySCollectionOps[T](data) } final class AvroToBigQuerySCollectionOps[T <: IndexedRecord: ClassTag]( private val self: SCollection[T] ) extends Serializable { import com.spotify.scio.extra.bigquery.AvroConverters._ @experimental def saveAvroAsBigQuery( table: TableReference, avroSchema: Schema = null, writeDisposition: WriteDisposition = null, createDisposition: CreateDisposition = null, tableDescription: String = null ): ClosedTap[TableRow] = { val schema: Schema = Option(avroSchema) .getOrElse { val cls = ScioUtil.classOf[T] if (classOf[IndexedRecord] isAssignableFrom cls) { cls.getMethod("getClassSchema").invoke(null).asInstanceOf[Schema] } else { throw AvroConversionException("Could not invoke $SCHEMA on provided Avro type") } } val params = WriteParam(toTableSchema(schema), writeDisposition, createDisposition, tableDescription) self .map(toTableRow(_)) .write(BigQueryTable(Table.Ref(table)))(params) } }
Example 3
Source File: AvroConverters.scala From scio with Apache License 2.0 | 5 votes |
package com.spotify.scio.extra.bigquery import com.google.api.services.bigquery.model.TableSchema import com.spotify.scio.annotations.experimental import com.spotify.scio.bigquery.TableRow import org.apache.avro.Schema import org.apache.avro.generic.IndexedRecord import scala.jdk.CollectionConverters._ object AvroConverters extends ToTableRow with ToTableSchema { @experimental def toTableRow[T <: IndexedRecord](record: T): TableRow = { val row = new TableRow record.getSchema.getFields.asScala.foreach { field => Option(record.get(field.pos)).foreach { fieldValue => row.set(field.name, toTableRowField(fieldValue, field)) } } row } @experimental def toTableSchema(avroSchema: Schema): TableSchema = { val fields = getFieldSchemas(avroSchema) new TableSchema().setFields(fields.asJava) } final case class AvroConversionException( private val message: String, private val cause: Throwable = null ) extends Exception(message, cause) }
Example 4
Source File: AvroInstances.scala From scio with Apache License 2.0 | 5 votes |
package com.spotify.scio.schemas.instances import com.spotify.scio.schemas.{RawRecord, Schema} import org.apache.avro.specific.SpecificRecord import org.apache.avro.generic.{GenericRecord, IndexedRecord} import org.apache.beam.sdk.schemas.utils.AvroUtils import org.apache.beam.sdk.schemas.{AvroRecordSchema, Schema => BSchema} import org.apache.beam.sdk.transforms.SerializableFunction import org.apache.beam.sdk.values.{Row, TypeDescriptor} import scala.jdk.CollectionConverters._ import scala.reflect.{classTag, ClassTag} trait AvroInstances { implicit def avroSchema[T <: SpecificRecord: ClassTag]: Schema[T] = { // TODO: broken because of a bug upstream https://issues.apache.org/jira/browse/BEAM-6742 // RawRecord[T](new AvroRecordSchema()) import org.apache.avro.reflect.ReflectData val rc = classTag[T].runtimeClass.asInstanceOf[Class[T]] val provider = new AvroRecordSchema() val td = TypeDescriptor.of(rc) val schema = provider.schemaFor(td) val avroSchema = new AvroInstances.SerializableSchema(ReflectData.get().getSchema(td.getRawType)) def fromRow = provider.fromRowFunction(td) val toRow: SerializableFunction[T, Row] = new SerializableFunction[T, Row] { def apply(t: T): Row = AvroInstances.recordtoRow(schema, avroSchema, t) } RawRecord[T](schema, fromRow, toRow) } def fromAvroSchema(schema: org.apache.avro.Schema): Schema[GenericRecord] = { val beamSchema = AvroUtils.toBeamSchema(schema) val avroSchema = new AvroInstances.SerializableSchema(schema) val toRow = new SerializableFunction[GenericRecord, Row] { def apply(t: GenericRecord): Row = AvroInstances.recordtoRow[GenericRecord](beamSchema, avroSchema, t) } val fromRow = new SerializableFunction[Row, GenericRecord] { def apply(t: Row): GenericRecord = AvroUtils.toGenericRecord(t, avroSchema.get) } RawRecord[GenericRecord](beamSchema, fromRow, toRow) } } object AvroInstances { private class SerializableSchema(@transient private val schema: org.apache.avro.Schema) extends Serializable { private[this] val stringSchema = schema.toString def get: org.apache.avro.Schema = new org.apache.avro.Schema.Parser().parse(stringSchema) } // Workaround BEAM-6742 private def recordtoRow[T <: IndexedRecord]( schema: BSchema, avroSchema: SerializableSchema, t: T ): Row = { val row = Row.withSchema(schema) schema.getFields.asScala.zip(avroSchema.get.getFields.asScala).zipWithIndex.foreach { case ((f, a), i) => val value = t.get(i) val v = AvroUtils.convertAvroFieldStrict(value, a.schema, f.getType) row.addValue(v) } row.build() } }