org.apache.hadoop.hbase.CellUtil Scala Examples

The following examples show how to use org.apache.hadoop.hbase.CellUtil. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: SparkApplicationTester.scala    From TopNotch   with Apache License 2.0 5 votes vote down vote up
package com.bfm.topnotch

import org.scalatest.OneInstancePerTest
import org.apache.hadoop.hbase.CellUtil
import org.apache.hadoop.hbase.client.{HConnection, HTableInterface, Put}
import org.apache.spark._
import org.apache.spark.sql.SparkSession
import org.scalamock.scalatest.MockFactory
import org.scalatest.FlatSpec
import com.typesafe.scalalogging.StrictLogging

/**
 * This class handles some of the boilerplate of testing SparkApplications with HBase writers
 */
abstract class SparkApplicationTester extends FlatSpec with OneInstancePerTest with MockFactory with StrictLogging
  with SharedSparkContext {
  protected val hconn = mock[HConnection]
  lazy val spark = SparkSession
    .builder()
    .appName(getClass.getName)
    .master("local")
    .config("spark.sql.shuffle.partitions", "4")
    //setting this to false to emulate HiveQL's case insensitivity for column names
    .config("spark.sql.caseSensitive", "false")
    .getOrCreate()

  /**
   * Verify that the next HTable will receive the correct puts. Call this once per HTable that is supposed to be created and written to.
   * Note: All HBase tests for a SparkApplication object must be run sequentially in order for us to keep track of HTableInterface mocks
   * @param tests The test's expected name for the HTable and expected values for the Put objects placed in the HTable
   * @param acceptAnyPut Tells the mock to accept any put value. This is useful for tests using the mock and but not
   *                     testing what is put inside it.
   */
  def setHBaseMock(tests: HTableParams, acceptAnyPut: Boolean = false): Unit = {
    val table = mock[HTableInterface]
    inSequence {
      (hconn.getTable(_: String)).expects(tests.tableName).returning(table)
      inAnyOrder {
        for (correctPut <- tests.puts) {
          if (acceptAnyPut) {
            (table.put(_: Put)).expects(*)
          }
          else {
            (table.put(_: Put)).expects(where {
              (actualPut: Put) =>
                val actualValue = CellUtil.cloneValue(actualPut.get(correctPut.columnFamily, correctPut.columnQualifier).get(0))
                correctPut.valueTest(actualValue)
                // just return true, as if issues, will have exception thrown by value test
                true
            })
          }
        }
      }
      (table.close _).expects().returns()
    }
  }

  /**
    * Set the next HTable will accept anything accept anything. This is useful if testing a thing that needs an hbase
    * table, but the specific test isn't testing the hbase functionality.
    *
    * @param tableName the name of the table that will be accessed.
    */
  def allowAnyHBaseActions(tableName: String): Unit ={
    setHBaseMock(new HTableParams(tableName, Seq(null)), true)
  }

  /**
   * The set of parameters defining what values should be used to create the HTable
   * @param tableName The name of the table the test expects to be created
   * @param puts The list of parameters for the puts that the test expects to be placed in the table
   */
  case class HTableParams(
                           tableName: String,
                           puts: Seq[HPutParams]
                           )

  /**
   * The list of values that the test expects to be in a put.
   * @param row The name of the row to put into HBase
   * @param columnFamily The cell's column family
   * @param columnQualifier The cell's column qualifier
   * @param correctString A string representing the correct value or an error message
   * @param valueTest The method for checking if the value in the cell is correct. Done as the actual and intended values
   *                  in a cell may be equal even if they don't have the expression as an array of bytes.
    *                 This should throw an exception on failure, using a call like shouldBe
   */
  case class HPutParams(
                         row: Array[Byte],
                         columnFamily: Array[Byte],
                         columnQualifier: Array[Byte],
                         correctString: String,
                         valueTest: Array[Byte] => Unit
                         )
} 
Example 2
Source File: HBaseSimpleRDD.scala    From spark-hbase-connector   with Apache License 2.0 5 votes vote down vote up
package it.nerdammer.spark.hbase

import it.nerdammer.spark.hbase.conversion.FieldReader
import org.apache.hadoop.hbase.CellUtil
import org.apache.hadoop.hbase.client.Result
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.rdd.{NewHadoopRDD, RDD}
import org.apache.spark.{Partition, TaskContext}

import scala.reflect.ClassTag

class HBaseSimpleRDD[R: ClassTag](hadoopHBase: NewHadoopRDD[ImmutableBytesWritable, Result], builder: HBaseReaderBuilder[R], saltingLength: Int = 0)
                       (implicit mapper: FieldReader[R], saltingProvider: SaltingProviderFactory[String]) extends RDD[R](hadoopHBase) {

  override def getPartitions: Array[Partition] = firstParent[(ImmutableBytesWritable, Result)].partitions

  override def compute(split: Partition, context: TaskContext) = {
    // val cleanConversion = sc.clean ---> next version
    firstParent[(ImmutableBytesWritable, Result)].iterator(split, context)
      .map(e => conversion(e._1, e._2))
  }

  def conversion(key: ImmutableBytesWritable, row: Result) = {

    val columnNames = HBaseUtils.chosenColumns(builder.columns, mapper.columns)

    val columnNamesFC = HBaseUtils.columnsWithFamily(builder.columnFamily, columnNames)

    val columns = columnNamesFC
      .map(t => (Bytes.toBytes(t._1), Bytes.toBytes(t._2)))
      .map(t => if(row.containsColumn(t._1, t._2)) Some(CellUtil.cloneValue(row.getColumnLatestCell(t._1, t._2)).array) else None)
      .toList

    mapper.map(Some(key.get.drop(saltingLength)) :: columns)
  }
} 
Example 3
Source File: HBaseBulkGetExample.scala    From SparkOnHBase   with Apache License 2.0 5 votes vote down vote up
package org.apache.hadoop.hbase.spark.example.hbasecontext

import org.apache.hadoop.hbase.spark.HBaseContext
import org.apache.spark.SparkContext
import org.apache.hadoop.hbase.{CellUtil, TableName, HBaseConfiguration}
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.client.Get
import org.apache.hadoop.hbase.client.Result
import org.apache.spark.SparkConf


object HBaseBulkGetExample {
  def main(args: Array[String]) {
    if (args.length < 1) {
      println("HBaseBulkGetExample {tableName}")
      return
    }

    val tableName = args(0)

    val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName)
    val sc = new SparkContext(sparkConf)

    try {

      //[(Array[Byte])]
      val rdd = sc.parallelize(Array(
        Bytes.toBytes("1"),
        Bytes.toBytes("2"),
        Bytes.toBytes("3"),
        Bytes.toBytes("4"),
        Bytes.toBytes("5"),
        Bytes.toBytes("6"),
        Bytes.toBytes("7")))

      val conf = HBaseConfiguration.create()

      val hbaseContext = new HBaseContext(sc, conf)

      val getRdd = hbaseContext.bulkGet[Array[Byte], String](
        TableName.valueOf(tableName),
        2,
        rdd,
        record => {
          System.out.println("making Get")
          new Get(record)
        },
        (result: Result) => {

          val it = result.listCells().iterator()
          val b = new StringBuilder

          b.append(Bytes.toString(result.getRow) + ":")

          while (it.hasNext) {
            val cell = it.next()
            val q = Bytes.toString(CellUtil.cloneQualifier(cell))
            if (q.equals("counter")) {
              b.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")")
            } else {
              b.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")")
            }
          }
          b.toString()
        })

      getRdd.collect().foreach(v => println(v))

    } finally {
      sc.stop()
    }
  }
} 
Example 4
Source File: HBaseBulkGetExample.scala    From SparkOnHBase   with Apache License 2.0 5 votes vote down vote up
package org.apache.hadoop.hbase.spark.example.rdd

import org.apache.hadoop.hbase.client.{Result, Get}
import org.apache.hadoop.hbase.{CellUtil, TableName, HBaseConfiguration}
import org.apache.hadoop.hbase.spark.HBaseContext
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._
import org.apache.spark.{SparkContext, SparkConf}


object HBaseBulkGetExample {
  def main(args: Array[String]) {
    if (args.length < 1) {
      println("HBaseBulkGetExample {tableName}")
      return
    }

    val tableName = args(0)

    val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName)
    val sc = new SparkContext(sparkConf)

    try {

      //[(Array[Byte])]
      val rdd = sc.parallelize(Array(
        Bytes.toBytes("1"),
        Bytes.toBytes("2"),
        Bytes.toBytes("3"),
        Bytes.toBytes("4"),
        Bytes.toBytes("5"),
        Bytes.toBytes("6"),
        Bytes.toBytes("7")))

      val conf = HBaseConfiguration.create()

      val hbaseContext = new HBaseContext(sc, conf)

      val getRdd = rdd.hbaseBulkGet[String](hbaseContext, TableName.valueOf(tableName), 2,
        record => {
          System.out.println("making Get")
          new Get(record)
        },
        (result: Result) => {

          val it = result.listCells().iterator()
          val b = new StringBuilder

          b.append(Bytes.toString(result.getRow) + ":")

          while (it.hasNext) {
            val cell = it.next()
            val q = Bytes.toString(CellUtil.cloneQualifier(cell))
            if (q.equals("counter")) {
              b.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")")
            } else {
              b.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")")
            }
          }
          b.toString()
        })

      getRdd.collect().foreach(v => println(v))

    } finally {
      sc.stop()
    }
  }
} 
Example 5
Source File: HBaseBulkGetExample.scala    From hbase-connectors   with Apache License 2.0 5 votes vote down vote up
package org.apache.hadoop.hbase.spark.example.hbasecontext

import org.apache.hadoop.hbase.client.Get
import org.apache.hadoop.hbase.client.Result
import org.apache.hadoop.hbase.spark.HBaseContext
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.CellUtil
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.TableName
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.yetus.audience.InterfaceAudience


@InterfaceAudience.Private
object HBaseBulkGetExample {
  def main(args: Array[String]) {
    if (args.length < 1) {
      println("HBaseBulkGetExample {tableName} missing an argument")
      return
    }

    val tableName = args(0)

    val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName)
    val sc = new SparkContext(sparkConf)

    try {

      //[(Array[Byte])]
      val rdd = sc.parallelize(Array(
        Bytes.toBytes("1"),
        Bytes.toBytes("2"),
        Bytes.toBytes("3"),
        Bytes.toBytes("4"),
        Bytes.toBytes("5"),
        Bytes.toBytes("6"),
        Bytes.toBytes("7")))

      val conf = HBaseConfiguration.create()

      val hbaseContext = new HBaseContext(sc, conf)

      val getRdd = hbaseContext.bulkGet[Array[Byte], String](
        TableName.valueOf(tableName),
        2,
        rdd,
        record => {
          System.out.println("making Get")
          new Get(record)
        },
        (result: Result) => {

          val it = result.listCells().iterator()
          val b = new StringBuilder

          b.append(Bytes.toString(result.getRow) + ":")

          while (it.hasNext) {
            val cell = it.next()
            val q = Bytes.toString(CellUtil.cloneQualifier(cell))
            if (q.equals("counter")) {
              b.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")")
            } else {
              b.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")")
            }
          }
          b.toString()
        })

      getRdd.collect().foreach(v => println(v))

    } finally {
      sc.stop()
    }
  }
} 
Example 6
Source File: HBaseBulkGetExample.scala    From hbase-connectors   with Apache License 2.0 5 votes vote down vote up
package org.apache.hadoop.hbase.spark.example.rdd

import org.apache.hadoop.hbase.client.Get
import org.apache.hadoop.hbase.client.Result
import org.apache.hadoop.hbase.spark.HBaseContext
import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.CellUtil
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.TableName
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.yetus.audience.InterfaceAudience


@InterfaceAudience.Private
object HBaseBulkGetExample {
  def main(args: Array[String]) {
    if (args.length < 1) {
      println("HBaseBulkGetExample {tableName} is missing an argument")
      return
    }

    val tableName = args(0)

    val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName)
    val sc = new SparkContext(sparkConf)

    try {

      //[(Array[Byte])]
      val rdd = sc.parallelize(Array(
        Bytes.toBytes("1"),
        Bytes.toBytes("2"),
        Bytes.toBytes("3"),
        Bytes.toBytes("4"),
        Bytes.toBytes("5"),
        Bytes.toBytes("6"),
        Bytes.toBytes("7")))

      val conf = HBaseConfiguration.create()

      val hbaseContext = new HBaseContext(sc, conf)

      val getRdd = rdd.hbaseBulkGet[String](hbaseContext, TableName.valueOf(tableName), 2,
        record => {
          System.out.println("making Get")
          new Get(record)
        },
        (result: Result) => {

          val it = result.listCells().iterator()
          val b = new StringBuilder

          b.append(Bytes.toString(result.getRow) + ":")

          while (it.hasNext) {
            val cell = it.next()
            val q = Bytes.toString(CellUtil.cloneQualifier(cell))
            if (q.equals("counter")) {
              b.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")")
            } else {
              b.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")")
            }
          }
          b.toString()
        })

      getRdd.collect().foreach(v => println(v))

    } finally {
      sc.stop()
    }
  }
} 
Example 7
Source File: HbaseReaderHelper.scala    From stream-reactor   with Apache License 2.0 5 votes vote down vote up
package com.datamountaineer.streamreactor.connect.hbase.writers

import com.datamountaineer.streamreactor.connect.hbase.BytesHelper._
import com.datamountaineer.streamreactor.connect.hbase.HbaseHelper
import org.apache.hadoop.hbase.client.{Connection, ConnectionFactory, Scan}
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.{CellUtil, HBaseConfiguration, TableName}

import scala.collection.JavaConverters._

object HbaseReaderHelper {
  def createConnection: Connection = {
    ConnectionFactory.createConnection(HBaseConfiguration.create())
  }

  def getAllRecords(tableName: String, columnFamily: String)(implicit connection: Connection): List[HbaseRowData] = {
    HbaseHelper.withTable(TableName.valueOf(tableName)) { tbl =>
      val scan = new Scan()
      scan.addFamily(columnFamily.fromString())
      val scanner = tbl.getScanner(scan)
      scanner.asScala.map { rs =>
        val cells = rs.rawCells().map { cell =>
          Bytes.toString(CellUtil.cloneQualifier(cell)) -> CellUtil.cloneValue(cell)
        }.toMap
        HbaseRowData(rs.getRow, cells)
      }.toList
    }
  }

}

case class HbaseRowData(key: Array[Byte], cells: Map[String, Array[Byte]])