org.apache.hadoop.hbase.io.ImmutableBytesWritable Scala Examples
The following examples show how to use org.apache.hadoop.hbase.io.ImmutableBytesWritable.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: HBase.scala From AI with Apache License 2.0 | 6 votes |
package com.bigchange.hbase import com.bigchange.util.HBaseUtil._ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hbase.{HBaseConfiguration, HColumnDescriptor, HTableDescriptor, TableName} import org.apache.hadoop.hbase.client.{Result, _} import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.hadoop.hbase.mapreduce.TableInputFormat import org.apache.hadoop.hbase.protobuf.ProtobufUtil import org.apache.hadoop.hbase.protobuf.generated.ClientProtos import org.apache.hadoop.hbase.util.Base64 import org.apache.spark.SparkContext def existRowKey(row:String, table: Table): Boolean ={ val get = new Get(row.getBytes()) val result = table.get(get) if (result.isEmpty) { warn("hbase table don't have this data,execute insert") return false } true } def getConfiguration = if(hBaseConfiguration == null) { warn("hbase setDefaultConfiguration....") setDefaultConfiguration } else hBaseConfiguration def setDefaultConfiguration = { hBaseConfiguration = HBaseConfiguration.create // 本地测试 需配置的选项, 在集群上通过对应配置文件路径自动获得 hBaseConfiguration.set("fs.defaultFS", "hdfs://ns1"); // nameservices的路径 hBaseConfiguration.set("dfs.nameservices", "ns1"); // hBaseConfiguration.set("dfs.ha.namenodes.ns1", "nn1,nn2"); //namenode的路径 hBaseConfiguration.set("dfs.namenode.rpc-address.ns1.nn1", "server3:9000"); // namenode 通信地址 hBaseConfiguration.set("dfs.namenode.rpc-address.ns1.nn2", "server4:9000"); // namenode 通信地址 // 设置namenode自动切换的实现类 hBaseConfiguration.set("dfs.client.failover.proxy.provider.ns1", "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider") hBaseConfiguration.set("hbase.rootdir", "hdfs://ns1/hbase") hBaseConfiguration.set("hbase.zookeeper.quorum", "server0,server1,server2") hBaseConfiguration.set("hbase.zookeeper.property.clientPort", "2181") hBaseConfiguration } }
Example 2
Source File: HbRddWriter.scala From hbrdd with Apache License 2.0 | 5 votes |
package top.spoofer.hbrdd.hbsupport import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.spark.rdd.RDD import top.spoofer.hbrdd.config.HbRddConfig import top.spoofer.hbrdd.unit.HbRddFormatsWriter import top.spoofer.hbrdd._ import HbRddWritPuter._ trait HbRddWriter { type TsValue[A] = (Long, A) // (ts, A) val LATEST_TIMESTAMP = Long.MaxValue final class SingleFamilyRDDWriter[A]( val rdd: RDD[(String, Map[String, A])], val put: HbRddPuter[A] ) extends HbRddWritCommon[A] with Serializable { def put2Hbase(tableName: String, family: String)(implicit config: HbRddConfig) = { val job = createJob(tableName, config.getHbaseConfig) rdd.flatMap({ case (rowId, data) => convert2Writable(rowId, Map(family -> data), put) }) .saveAsNewAPIHadoopDataset(job.getConfiguration) } }
Example 3
Source File: HBaseReaders.scala From cuesheet with Apache License 2.0 | 5 votes |
package com.kakao.cuesheet.convert import com.kakao.mango.util.Conversions._ import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.client.Result import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.hadoop.hbase.mapreduce.TableInputFormat import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import scala.collection.JavaConversions._ trait HBaseReaders { val sc: SparkContext def hbaseTable(quorum: String, table: String): RDD[(String, ((String, String), (Long, String)))] = { hbaseTableBinary(quorum, table).map { case (rowkey, ((family, qualifier), (timestamp, value))) => (rowkey.string, ((family.string, qualifier.string), (timestamp, value.string))) } } def hbaseColumnBinary(quorum: String, table: String, family: Array[Byte], qualifier: Array[Byte]): RDD[(Array[Byte], (Long, Array[Byte]))] = { hbaseTableBinary(quorum, table).collect { case (rowkey, ((f, q), cell)) if family.sameElements(f) && qualifier.sameElements(q) => (rowkey, cell) } } def hbaseColumn(quorum: String, table: String, family: String, qualifier: String): RDD[(String, (Long, String))] = { hbaseTable(quorum, table).collect { case (rowkey, ((f, q), cell)) if family == f && qualifier == q => (rowkey, cell) } } }
Example 4
Source File: HBaseSimpleRDD.scala From spark-hbase-connector with Apache License 2.0 | 5 votes |
package it.nerdammer.spark.hbase import it.nerdammer.spark.hbase.conversion.FieldReader import org.apache.hadoop.hbase.CellUtil import org.apache.hadoop.hbase.client.Result import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.rdd.{NewHadoopRDD, RDD} import org.apache.spark.{Partition, TaskContext} import scala.reflect.ClassTag class HBaseSimpleRDD[R: ClassTag](hadoopHBase: NewHadoopRDD[ImmutableBytesWritable, Result], builder: HBaseReaderBuilder[R], saltingLength: Int = 0) (implicit mapper: FieldReader[R], saltingProvider: SaltingProviderFactory[String]) extends RDD[R](hadoopHBase) { override def getPartitions: Array[Partition] = firstParent[(ImmutableBytesWritable, Result)].partitions override def compute(split: Partition, context: TaskContext) = { // val cleanConversion = sc.clean ---> next version firstParent[(ImmutableBytesWritable, Result)].iterator(split, context) .map(e => conversion(e._1, e._2)) } def conversion(key: ImmutableBytesWritable, row: Result) = { val columnNames = HBaseUtils.chosenColumns(builder.columns, mapper.columns) val columnNamesFC = HBaseUtils.columnsWithFamily(builder.columnFamily, columnNames) val columns = columnNamesFC .map(t => (Bytes.toBytes(t._1), Bytes.toBytes(t._2))) .map(t => if(row.containsColumn(t._1, t._2)) Some(CellUtil.cloneValue(row.getColumnLatestCell(t._1, t._2)).array) else None) .toList mapper.map(Some(key.get.drop(saltingLength)) :: columns) } }
Example 5
Source File: GraphX.scala From unicorn with Apache License 2.0 | 5 votes |
package unicorn.narwhal.graph import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.client.Result import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.hadoop.hbase.mapreduce.TableInputFormat import org.apache.spark.SparkContext import unicorn.bigtable.hbase.HBaseTable import unicorn.json._ import unicorn.unibase.graph.{ReadOnlyGraph, GraphSerializer, GraphVertexColumnFamily, GraphOutEdgeColumnFamily} def graphx(sc: SparkContext): org.apache.spark.graphx.Graph[JsObject, (String, JsValue)] = { val conf = HBaseConfiguration.create() conf.set(TableInputFormat.INPUT_TABLE, name) conf.setInt(TableInputFormat.SCAN_CACHEDROWS, 500) conf.setBoolean(TableInputFormat.SCAN_CACHEBLOCKS, false) conf.set(TableInputFormat.SCAN_COLUMNS, s"$GraphVertexColumnFamily $GraphOutEdgeColumnFamily") val rdd = sc.newAPIHadoopRDD( conf, classOf[TableInputFormat], classOf[ImmutableBytesWritable], classOf[Result] ) val rows = rdd.mapPartitions { it => val serializer = new GraphSerializer() it.map { tuple => val row = HBaseTable.getRow(tuple._2) serializer.deserializeVertex(row) } } val vertices = rows.map { vertex => (vertex.id, vertex.properties) } val edges = rows.flatMap { vertex => vertex.edges.map { edge => org.apache.spark.graphx.Edge(edge.from, edge.to, (edge.label, edge.properties)) } } org.apache.spark.graphx.Graph(vertices, edges) } }
Example 6
Source File: Phoenix.scala From shc with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.hbase.types import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.phoenix.query.QueryConstants import org.apache.phoenix.schema._ import org.apache.phoenix.schema.RowKeySchema.RowKeySchemaBuilder import org.apache.phoenix.schema.types._ import org.apache.spark.sql.Row import org.apache.spark.sql.execution.datasources.hbase._ import org.apache.spark.sql.types._ class Phoenix(f:Option[Field] = None) extends SHCDataType { private var schema: RowKeySchema = null def fromBytes(src: HBaseType): Any = { if (f.isDefined) { mapToPhoenixTypeInstance(f.get.dt).toObject(src) } else { throw new UnsupportedOperationException( "Phoenix coder: without field metadata, 'fromBytes' conversion can not be supported") } } def toBytes(input: Any): Array[Byte] = { input match { case data: Boolean => PBoolean.INSTANCE.toBytes(data) case data: Byte => PTinyint.INSTANCE.toBytes(data) case data: Array[Byte] => PVarbinary.INSTANCE.toBytes(data) case data: Double => PDouble.INSTANCE.toBytes(data) case data: Float => PFloat.INSTANCE.toBytes(data) case data: Int => PInteger.INSTANCE.toBytes(data) case data: Long => PLong.INSTANCE.toBytes(data) case data: Short => PSmallint.INSTANCE.toBytes(data) case data: String => PVarchar.INSTANCE.toBytes(data) case _ => throw new UnsupportedOperationException(s"unsupported data type $input") } } override def isRowKeySupported(): Boolean = true override def isCompositeKeySupported(): Boolean = true override def decodeCompositeRowKey(row: Array[Byte], keyFields: Seq[Field]): Map[Field, Any] = { if (schema == null) schema = buildSchema(keyFields) val ptr: ImmutableBytesWritable = new ImmutableBytesWritable val maxOffest = schema.iterator(row, 0, row.length, ptr) var ret = Map.empty[Field, Any] for (i <- 0 until schema.getFieldCount) { if (schema.next(ptr, i, maxOffest) != null) { val value = mapToPhoenixTypeInstance(keyFields(i).dt) .toObject(ptr, schema.getField(i).getDataType, SortOrder.getDefault) ret += ((keyFields(i), value)) } } ret } override def encodeCompositeRowKey(rkIdxedFields: Seq[(Int, Field)], row: Row): Seq[Array[Byte]] = { rkIdxedFields.map { case (x, y) => var ret = toBytes(row(x)) // the last dimension of composite key does not need SEPARATOR if (y.length == -1 && x < rkIdxedFields.size - 1) ret ++= QueryConstants.SEPARATOR_BYTE_ARRAY ret } } private def buildSchema(keyFields: Seq[Field]): RowKeySchema = { val builder: RowKeySchemaBuilder = new RowKeySchemaBuilder(keyFields.length) keyFields.foreach{ x => builder.addField(buildPDatum(x.dt), false, SortOrder.getDefault) } builder.build } private def mapToPhoenixTypeInstance(input: DataType): PDataType[_] = { input match { case BooleanType => PBoolean.INSTANCE case ByteType => PTinyint.INSTANCE case DoubleType => PDouble.INSTANCE case IntegerType => PInteger.INSTANCE case FloatType => PFloat.INSTANCE case LongType => PLong.INSTANCE case ShortType => PSmallint.INSTANCE case StringType => PVarchar.INSTANCE case BinaryType => PVarbinary.INSTANCE case _ => throw new UnsupportedOperationException(s"unsupported data type $input") } } private def buildPDatum(input: DataType): PDatum = new PDatum { override def getScale: Integer = null override def isNullable: Boolean = false override def getDataType: PDataType[_] = mapToPhoenixTypeInstance(input) override def getMaxLength: Integer = null override def getSortOrder: SortOrder = SortOrder.getDefault } }
Example 7
Source File: HBaseUtils.scala From bigdata-examples with Apache License 2.0 | 5 votes |
package com.timeyang.common.util import com.timeyang.common.config.BaseConf import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hbase.{HBaseConfiguration, HColumnDescriptor, HTableDescriptor, TableName} import org.apache.hadoop.hbase.client._ import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.hadoop.hbase.mapreduce.TableOutputFormat import org.apache.hadoop.hbase.protobuf.ProtobufUtil import org.apache.hadoop.hbase.protobuf.generated.ClientProtos import org.apache.hadoop.hbase.util.Base64 import org.apache.hadoop.mapreduce.Job def createHbaseOutputJob(tableName: String): Job = { val conf = HBaseUtils.newConf() conf.set(TableOutputFormat.OUTPUT_TABLE, tableName) val job = Job.getInstance(conf) job.setOutputKeyClass(classOf[ImmutableBytesWritable]) job.setOutputValueClass(classOf[Put]) job.setOutputFormatClass(classOf[TableOutputFormat[ImmutableBytesWritable]]) job } }