org.apache.hadoop.hbase.CellUtil Scala Examples
The following examples show how to use org.apache.hadoop.hbase.CellUtil.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: SparkApplicationTester.scala From TopNotch with Apache License 2.0 | 5 votes |
package com.bfm.topnotch import org.scalatest.OneInstancePerTest import org.apache.hadoop.hbase.CellUtil import org.apache.hadoop.hbase.client.{HConnection, HTableInterface, Put} import org.apache.spark._ import org.apache.spark.sql.SparkSession import org.scalamock.scalatest.MockFactory import org.scalatest.FlatSpec import com.typesafe.scalalogging.StrictLogging /** * This class handles some of the boilerplate of testing SparkApplications with HBase writers */ abstract class SparkApplicationTester extends FlatSpec with OneInstancePerTest with MockFactory with StrictLogging with SharedSparkContext { protected val hconn = mock[HConnection] lazy val spark = SparkSession .builder() .appName(getClass.getName) .master("local") .config("spark.sql.shuffle.partitions", "4") //setting this to false to emulate HiveQL's case insensitivity for column names .config("spark.sql.caseSensitive", "false") .getOrCreate() /** * Verify that the next HTable will receive the correct puts. Call this once per HTable that is supposed to be created and written to. * Note: All HBase tests for a SparkApplication object must be run sequentially in order for us to keep track of HTableInterface mocks * @param tests The test's expected name for the HTable and expected values for the Put objects placed in the HTable * @param acceptAnyPut Tells the mock to accept any put value. This is useful for tests using the mock and but not * testing what is put inside it. */ def setHBaseMock(tests: HTableParams, acceptAnyPut: Boolean = false): Unit = { val table = mock[HTableInterface] inSequence { (hconn.getTable(_: String)).expects(tests.tableName).returning(table) inAnyOrder { for (correctPut <- tests.puts) { if (acceptAnyPut) { (table.put(_: Put)).expects(*) } else { (table.put(_: Put)).expects(where { (actualPut: Put) => val actualValue = CellUtil.cloneValue(actualPut.get(correctPut.columnFamily, correctPut.columnQualifier).get(0)) correctPut.valueTest(actualValue) // just return true, as if issues, will have exception thrown by value test true }) } } } (table.close _).expects().returns() } } /** * Set the next HTable will accept anything accept anything. This is useful if testing a thing that needs an hbase * table, but the specific test isn't testing the hbase functionality. * * @param tableName the name of the table that will be accessed. */ def allowAnyHBaseActions(tableName: String): Unit ={ setHBaseMock(new HTableParams(tableName, Seq(null)), true) } /** * The set of parameters defining what values should be used to create the HTable * @param tableName The name of the table the test expects to be created * @param puts The list of parameters for the puts that the test expects to be placed in the table */ case class HTableParams( tableName: String, puts: Seq[HPutParams] ) /** * The list of values that the test expects to be in a put. * @param row The name of the row to put into HBase * @param columnFamily The cell's column family * @param columnQualifier The cell's column qualifier * @param correctString A string representing the correct value or an error message * @param valueTest The method for checking if the value in the cell is correct. Done as the actual and intended values * in a cell may be equal even if they don't have the expression as an array of bytes. * This should throw an exception on failure, using a call like shouldBe */ case class HPutParams( row: Array[Byte], columnFamily: Array[Byte], columnQualifier: Array[Byte], correctString: String, valueTest: Array[Byte] => Unit ) }
Example 2
Source File: HBaseSimpleRDD.scala From spark-hbase-connector with Apache License 2.0 | 5 votes |
package it.nerdammer.spark.hbase import it.nerdammer.spark.hbase.conversion.FieldReader import org.apache.hadoop.hbase.CellUtil import org.apache.hadoop.hbase.client.Result import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.rdd.{NewHadoopRDD, RDD} import org.apache.spark.{Partition, TaskContext} import scala.reflect.ClassTag class HBaseSimpleRDD[R: ClassTag](hadoopHBase: NewHadoopRDD[ImmutableBytesWritable, Result], builder: HBaseReaderBuilder[R], saltingLength: Int = 0) (implicit mapper: FieldReader[R], saltingProvider: SaltingProviderFactory[String]) extends RDD[R](hadoopHBase) { override def getPartitions: Array[Partition] = firstParent[(ImmutableBytesWritable, Result)].partitions override def compute(split: Partition, context: TaskContext) = { // val cleanConversion = sc.clean ---> next version firstParent[(ImmutableBytesWritable, Result)].iterator(split, context) .map(e => conversion(e._1, e._2)) } def conversion(key: ImmutableBytesWritable, row: Result) = { val columnNames = HBaseUtils.chosenColumns(builder.columns, mapper.columns) val columnNamesFC = HBaseUtils.columnsWithFamily(builder.columnFamily, columnNames) val columns = columnNamesFC .map(t => (Bytes.toBytes(t._1), Bytes.toBytes(t._2))) .map(t => if(row.containsColumn(t._1, t._2)) Some(CellUtil.cloneValue(row.getColumnLatestCell(t._1, t._2)).array) else None) .toList mapper.map(Some(key.get.drop(saltingLength)) :: columns) } }
Example 3
Source File: HBaseBulkGetExample.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.spark.SparkContext import org.apache.hadoop.hbase.{CellUtil, TableName, HBaseConfiguration} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.client.Get import org.apache.hadoop.hbase.client.Result import org.apache.spark.SparkConf object HBaseBulkGetExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseBulkGetExample {tableName}") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName) val sc = new SparkContext(sparkConf) try { //[(Array[Byte])] val rdd = sc.parallelize(Array( Bytes.toBytes("1"), Bytes.toBytes("2"), Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5"), Bytes.toBytes("6"), Bytes.toBytes("7"))) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) val getRdd = hbaseContext.bulkGet[Array[Byte], String]( TableName.valueOf(tableName), 2, rdd, record => { System.out.println("making Get") new Get(record) }, (result: Result) => { val it = result.listCells().iterator() val b = new StringBuilder b.append(Bytes.toString(result.getRow) + ":") while (it.hasNext) { val cell = it.next() val q = Bytes.toString(CellUtil.cloneQualifier(cell)) if (q.equals("counter")) { b.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")") } else { b.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")") } } b.toString() }) getRdd.collect().foreach(v => println(v)) } finally { sc.stop() } } }
Example 4
Source File: HBaseBulkGetExample.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.rdd import org.apache.hadoop.hbase.client.{Result, Get} import org.apache.hadoop.hbase.{CellUtil, TableName, HBaseConfiguration} import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ import org.apache.spark.{SparkContext, SparkConf} object HBaseBulkGetExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseBulkGetExample {tableName}") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName) val sc = new SparkContext(sparkConf) try { //[(Array[Byte])] val rdd = sc.parallelize(Array( Bytes.toBytes("1"), Bytes.toBytes("2"), Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5"), Bytes.toBytes("6"), Bytes.toBytes("7"))) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) val getRdd = rdd.hbaseBulkGet[String](hbaseContext, TableName.valueOf(tableName), 2, record => { System.out.println("making Get") new Get(record) }, (result: Result) => { val it = result.listCells().iterator() val b = new StringBuilder b.append(Bytes.toString(result.getRow) + ":") while (it.hasNext) { val cell = it.next() val q = Bytes.toString(CellUtil.cloneQualifier(cell)) if (q.equals("counter")) { b.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")") } else { b.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")") } } b.toString() }) getRdd.collect().foreach(v => println(v)) } finally { sc.stop() } } }
Example 5
Source File: HBaseBulkGetExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.client.Get import org.apache.hadoop.hbase.client.Result import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.CellUtil import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseBulkGetExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseBulkGetExample {tableName} missing an argument") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName) val sc = new SparkContext(sparkConf) try { //[(Array[Byte])] val rdd = sc.parallelize(Array( Bytes.toBytes("1"), Bytes.toBytes("2"), Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5"), Bytes.toBytes("6"), Bytes.toBytes("7"))) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) val getRdd = hbaseContext.bulkGet[Array[Byte], String]( TableName.valueOf(tableName), 2, rdd, record => { System.out.println("making Get") new Get(record) }, (result: Result) => { val it = result.listCells().iterator() val b = new StringBuilder b.append(Bytes.toString(result.getRow) + ":") while (it.hasNext) { val cell = it.next() val q = Bytes.toString(CellUtil.cloneQualifier(cell)) if (q.equals("counter")) { b.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")") } else { b.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")") } } b.toString() }) getRdd.collect().foreach(v => println(v)) } finally { sc.stop() } } }
Example 6
Source File: HBaseBulkGetExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.rdd import org.apache.hadoop.hbase.client.Get import org.apache.hadoop.hbase.client.Result import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.CellUtil import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseBulkGetExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseBulkGetExample {tableName} is missing an argument") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName) val sc = new SparkContext(sparkConf) try { //[(Array[Byte])] val rdd = sc.parallelize(Array( Bytes.toBytes("1"), Bytes.toBytes("2"), Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5"), Bytes.toBytes("6"), Bytes.toBytes("7"))) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) val getRdd = rdd.hbaseBulkGet[String](hbaseContext, TableName.valueOf(tableName), 2, record => { System.out.println("making Get") new Get(record) }, (result: Result) => { val it = result.listCells().iterator() val b = new StringBuilder b.append(Bytes.toString(result.getRow) + ":") while (it.hasNext) { val cell = it.next() val q = Bytes.toString(CellUtil.cloneQualifier(cell)) if (q.equals("counter")) { b.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")") } else { b.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")") } } b.toString() }) getRdd.collect().foreach(v => println(v)) } finally { sc.stop() } } }
Example 7
Source File: HbaseReaderHelper.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.hbase.writers import com.datamountaineer.streamreactor.connect.hbase.BytesHelper._ import com.datamountaineer.streamreactor.connect.hbase.HbaseHelper import org.apache.hadoop.hbase.client.{Connection, ConnectionFactory, Scan} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.{CellUtil, HBaseConfiguration, TableName} import scala.collection.JavaConverters._ object HbaseReaderHelper { def createConnection: Connection = { ConnectionFactory.createConnection(HBaseConfiguration.create()) } def getAllRecords(tableName: String, columnFamily: String)(implicit connection: Connection): List[HbaseRowData] = { HbaseHelper.withTable(TableName.valueOf(tableName)) { tbl => val scan = new Scan() scan.addFamily(columnFamily.fromString()) val scanner = tbl.getScanner(scan) scanner.asScala.map { rs => val cells = rs.rawCells().map { cell => Bytes.toString(CellUtil.cloneQualifier(cell)) -> CellUtil.cloneValue(cell) }.toMap HbaseRowData(rs.getRow, cells) }.toList } } } case class HbaseRowData(key: Array[Byte], cells: Map[String, Array[Byte]])