org.apache.hadoop.hbase.client.Result Scala Examples
The following examples show how to use org.apache.hadoop.hbase.client.Result.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: HBase.scala From AI with Apache License 2.0 | 6 votes |
package com.bigchange.hbase import com.bigchange.util.HBaseUtil._ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hbase.{HBaseConfiguration, HColumnDescriptor, HTableDescriptor, TableName} import org.apache.hadoop.hbase.client.{Result, _} import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.hadoop.hbase.mapreduce.TableInputFormat import org.apache.hadoop.hbase.protobuf.ProtobufUtil import org.apache.hadoop.hbase.protobuf.generated.ClientProtos import org.apache.hadoop.hbase.util.Base64 import org.apache.spark.SparkContext def existRowKey(row:String, table: Table): Boolean ={ val get = new Get(row.getBytes()) val result = table.get(get) if (result.isEmpty) { warn("hbase table don't have this data,execute insert") return false } true } def getConfiguration = if(hBaseConfiguration == null) { warn("hbase setDefaultConfiguration....") setDefaultConfiguration } else hBaseConfiguration def setDefaultConfiguration = { hBaseConfiguration = HBaseConfiguration.create // 本地测试 需配置的选项, 在集群上通过对应配置文件路径自动获得 hBaseConfiguration.set("fs.defaultFS", "hdfs://ns1"); // nameservices的路径 hBaseConfiguration.set("dfs.nameservices", "ns1"); // hBaseConfiguration.set("dfs.ha.namenodes.ns1", "nn1,nn2"); //namenode的路径 hBaseConfiguration.set("dfs.namenode.rpc-address.ns1.nn1", "server3:9000"); // namenode 通信地址 hBaseConfiguration.set("dfs.namenode.rpc-address.ns1.nn2", "server4:9000"); // namenode 通信地址 // 设置namenode自动切换的实现类 hBaseConfiguration.set("dfs.client.failover.proxy.provider.ns1", "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider") hBaseConfiguration.set("hbase.rootdir", "hdfs://ns1/hbase") hBaseConfiguration.set("hbase.zookeeper.quorum", "server0,server1,server2") hBaseConfiguration.set("hbase.zookeeper.property.clientPort", "2181") hBaseConfiguration } }
Example 2
Source File: HbasePublisher.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.hbase import java.util import java.util.concurrent.atomic.AtomicBoolean import com.sksamuel.exts.io.Using import com.sksamuel.exts.metrics.Timed import io.eels.Row import io.eels.datastream.{Publisher, Subscriber, Subscription} import io.eels.schema.StructType import org.apache.hadoop.hbase.TableName import org.apache.hadoop.hbase.client.{Connection, Result, Scan} import scala.collection.mutable.ArrayBuffer class HbasePublisher(connection: Connection, schema: StructType, namespace: String, tableName: String, bufferSize: Int, maxRows: Long, scanner: Scan, implicit val serializer: HbaseSerializer) extends Publisher[Seq[Row]] with Timed with Using { private val table = connection.getTable(TableName.valueOf(namespace, tableName)) override def subscribe(subscriber: Subscriber[Seq[Row]]): Unit = { try { using(new CloseableIterator) { rowIter => val running = new AtomicBoolean(true) subscriber.subscribed(Subscription.fromRunning(running)) val buffer = new ArrayBuffer[Row](bufferSize) while (rowIter.hasNext && running.get()) { buffer append rowIter.next() if (buffer.size == bufferSize) { subscriber.next(buffer.toVector) buffer.clear() } } if (buffer.nonEmpty) subscriber.next(buffer.toVector) subscriber.completed() } } catch { case t: Throwable => subscriber.error(t) } } class CloseableIterator extends Iterator[Row] with AutoCloseable { private val resultScanner = table.getScanner(scanner) private val resultScannerIter = resultScanner.iterator() private var rowCount = 0 private var iter: Iterator[Row] = Iterator.empty override def hasNext: Boolean = rowCount < maxRows && iter.hasNext || { if (rowCount < maxRows && resultScannerIter.hasNext) { iter = HBaseResultsIterator(schema, resultScannerIter) iter.hasNext } else false } override def next(): Row = { rowCount += 1 iter.next() } override def close(): Unit = { resultScanner.close() } } case class HBaseResultsIterator(schema: StructType, resultIter: util.Iterator[Result])(implicit serializer: HbaseSerializer) extends Iterator[Row] { override def hasNext: Boolean = resultIter.hasNext override def next(): Row = { val resultRow = resultIter.next() val values = schema.fields.map { field => if (!field.key) { val value = resultRow.getValue(field.columnFamily.getOrElse(sys.error(s"No Column Family defined for field '${field.name}'")).getBytes, field.name.getBytes) if (value != null) serializer.fromBytes(value, field.name, field.dataType) else null } else serializer.fromBytes(resultRow.getRow, field.name, field.dataType) } Row(schema, values) } } }
Example 3
Source File: HBaseReaders.scala From cuesheet with Apache License 2.0 | 5 votes |
package com.kakao.cuesheet.convert import com.kakao.mango.util.Conversions._ import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.client.Result import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.hadoop.hbase.mapreduce.TableInputFormat import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import scala.collection.JavaConversions._ trait HBaseReaders { val sc: SparkContext def hbaseTable(quorum: String, table: String): RDD[(String, ((String, String), (Long, String)))] = { hbaseTableBinary(quorum, table).map { case (rowkey, ((family, qualifier), (timestamp, value))) => (rowkey.string, ((family.string, qualifier.string), (timestamp, value.string))) } } def hbaseColumnBinary(quorum: String, table: String, family: Array[Byte], qualifier: Array[Byte]): RDD[(Array[Byte], (Long, Array[Byte]))] = { hbaseTableBinary(quorum, table).collect { case (rowkey, ((f, q), cell)) if family.sameElements(f) && qualifier.sameElements(q) => (rowkey, cell) } } def hbaseColumn(quorum: String, table: String, family: String, qualifier: String): RDD[(String, (Long, String))] = { hbaseTable(quorum, table).collect { case (rowkey, ((f, q), cell)) if family == f && qualifier == q => (rowkey, cell) } } }
Example 4
Source File: HBaseSimpleRDD.scala From spark-hbase-connector with Apache License 2.0 | 5 votes |
package it.nerdammer.spark.hbase import it.nerdammer.spark.hbase.conversion.FieldReader import org.apache.hadoop.hbase.CellUtil import org.apache.hadoop.hbase.client.Result import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.rdd.{NewHadoopRDD, RDD} import org.apache.spark.{Partition, TaskContext} import scala.reflect.ClassTag class HBaseSimpleRDD[R: ClassTag](hadoopHBase: NewHadoopRDD[ImmutableBytesWritable, Result], builder: HBaseReaderBuilder[R], saltingLength: Int = 0) (implicit mapper: FieldReader[R], saltingProvider: SaltingProviderFactory[String]) extends RDD[R](hadoopHBase) { override def getPartitions: Array[Partition] = firstParent[(ImmutableBytesWritable, Result)].partitions override def compute(split: Partition, context: TaskContext) = { // val cleanConversion = sc.clean ---> next version firstParent[(ImmutableBytesWritable, Result)].iterator(split, context) .map(e => conversion(e._1, e._2)) } def conversion(key: ImmutableBytesWritable, row: Result) = { val columnNames = HBaseUtils.chosenColumns(builder.columns, mapper.columns) val columnNamesFC = HBaseUtils.columnsWithFamily(builder.columnFamily, columnNames) val columns = columnNamesFC .map(t => (Bytes.toBytes(t._1), Bytes.toBytes(t._2))) .map(t => if(row.containsColumn(t._1, t._2)) Some(CellUtil.cloneValue(row.getColumnLatestCell(t._1, t._2)).array) else None) .toList mapper.map(Some(key.get.drop(saltingLength)) :: columns) } }
Example 5
Source File: HBaseBulkGetExample.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.spark.SparkContext import org.apache.hadoop.hbase.{CellUtil, TableName, HBaseConfiguration} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.client.Get import org.apache.hadoop.hbase.client.Result import org.apache.spark.SparkConf object HBaseBulkGetExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseBulkGetExample {tableName}") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName) val sc = new SparkContext(sparkConf) try { //[(Array[Byte])] val rdd = sc.parallelize(Array( Bytes.toBytes("1"), Bytes.toBytes("2"), Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5"), Bytes.toBytes("6"), Bytes.toBytes("7"))) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) val getRdd = hbaseContext.bulkGet[Array[Byte], String]( TableName.valueOf(tableName), 2, rdd, record => { System.out.println("making Get") new Get(record) }, (result: Result) => { val it = result.listCells().iterator() val b = new StringBuilder b.append(Bytes.toString(result.getRow) + ":") while (it.hasNext) { val cell = it.next() val q = Bytes.toString(CellUtil.cloneQualifier(cell)) if (q.equals("counter")) { b.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")") } else { b.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")") } } b.toString() }) getRdd.collect().foreach(v => println(v)) } finally { sc.stop() } } }
Example 6
Source File: HBaseBulkGetExample.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.rdd import org.apache.hadoop.hbase.client.{Result, Get} import org.apache.hadoop.hbase.{CellUtil, TableName, HBaseConfiguration} import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ import org.apache.spark.{SparkContext, SparkConf} object HBaseBulkGetExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseBulkGetExample {tableName}") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName) val sc = new SparkContext(sparkConf) try { //[(Array[Byte])] val rdd = sc.parallelize(Array( Bytes.toBytes("1"), Bytes.toBytes("2"), Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5"), Bytes.toBytes("6"), Bytes.toBytes("7"))) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) val getRdd = rdd.hbaseBulkGet[String](hbaseContext, TableName.valueOf(tableName), 2, record => { System.out.println("making Get") new Get(record) }, (result: Result) => { val it = result.listCells().iterator() val b = new StringBuilder b.append(Bytes.toString(result.getRow) + ":") while (it.hasNext) { val cell = it.next() val q = Bytes.toString(CellUtil.cloneQualifier(cell)) if (q.equals("counter")) { b.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")") } else { b.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")") } } b.toString() }) getRdd.collect().foreach(v => println(v)) } finally { sc.stop() } } }
Example 7
Source File: GraphX.scala From unicorn with Apache License 2.0 | 5 votes |
package unicorn.narwhal.graph import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.client.Result import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.hadoop.hbase.mapreduce.TableInputFormat import org.apache.spark.SparkContext import unicorn.bigtable.hbase.HBaseTable import unicorn.json._ import unicorn.unibase.graph.{ReadOnlyGraph, GraphSerializer, GraphVertexColumnFamily, GraphOutEdgeColumnFamily} def graphx(sc: SparkContext): org.apache.spark.graphx.Graph[JsObject, (String, JsValue)] = { val conf = HBaseConfiguration.create() conf.set(TableInputFormat.INPUT_TABLE, name) conf.setInt(TableInputFormat.SCAN_CACHEDROWS, 500) conf.setBoolean(TableInputFormat.SCAN_CACHEBLOCKS, false) conf.set(TableInputFormat.SCAN_COLUMNS, s"$GraphVertexColumnFamily $GraphOutEdgeColumnFamily") val rdd = sc.newAPIHadoopRDD( conf, classOf[TableInputFormat], classOf[ImmutableBytesWritable], classOf[Result] ) val rows = rdd.mapPartitions { it => val serializer = new GraphSerializer() it.map { tuple => val row = HBaseTable.getRow(tuple._2) serializer.deserializeVertex(row) } } val vertices = rows.map { vertex => (vertex.id, vertex.properties) } val edges = rows.flatMap { vertex => vertex.edges.map { edge => org.apache.spark.graphx.Edge(edge.from, edge.to, (edge.label, edge.properties)) } } org.apache.spark.graphx.Graph(vertices, edges) } }
Example 8
Source File: HBaseBulkGetExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.client.Get import org.apache.hadoop.hbase.client.Result import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.CellUtil import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseBulkGetExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseBulkGetExample {tableName} missing an argument") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName) val sc = new SparkContext(sparkConf) try { //[(Array[Byte])] val rdd = sc.parallelize(Array( Bytes.toBytes("1"), Bytes.toBytes("2"), Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5"), Bytes.toBytes("6"), Bytes.toBytes("7"))) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) val getRdd = hbaseContext.bulkGet[Array[Byte], String]( TableName.valueOf(tableName), 2, rdd, record => { System.out.println("making Get") new Get(record) }, (result: Result) => { val it = result.listCells().iterator() val b = new StringBuilder b.append(Bytes.toString(result.getRow) + ":") while (it.hasNext) { val cell = it.next() val q = Bytes.toString(CellUtil.cloneQualifier(cell)) if (q.equals("counter")) { b.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")") } else { b.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")") } } b.toString() }) getRdd.collect().foreach(v => println(v)) } finally { sc.stop() } } }
Example 9
Source File: HBaseBulkGetExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.rdd import org.apache.hadoop.hbase.client.Get import org.apache.hadoop.hbase.client.Result import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.CellUtil import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseBulkGetExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseBulkGetExample {tableName} is missing an argument") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName) val sc = new SparkContext(sparkConf) try { //[(Array[Byte])] val rdd = sc.parallelize(Array( Bytes.toBytes("1"), Bytes.toBytes("2"), Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5"), Bytes.toBytes("6"), Bytes.toBytes("7"))) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) val getRdd = rdd.hbaseBulkGet[String](hbaseContext, TableName.valueOf(tableName), 2, record => { System.out.println("making Get") new Get(record) }, (result: Result) => { val it = result.listCells().iterator() val b = new StringBuilder b.append(Bytes.toString(result.getRow) + ":") while (it.hasNext) { val cell = it.next() val q = Bytes.toString(CellUtil.cloneQualifier(cell)) if (q.equals("counter")) { b.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")") } else { b.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")") } } b.toString() }) getRdd.collect().foreach(v => println(v)) } finally { sc.stop() } } }