org.apache.hadoop.hbase.util.Bytes Scala Examples
The following examples show how to use org.apache.hadoop.hbase.util.Bytes.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: HBaseLocalClient.scala From gimel with Apache License 2.0 | 5 votes |
package com.paypal.gimel.hbase.utilities import java.io.File import scala.collection.mutable.ArrayBuffer import com.google.common.io.Files import org.apache.hadoop.hbase.{HBaseTestingUtility, TableName} import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD import org.apache.spark.sql.{DataFrame, SparkSession} import org.apache.spark.sql.execution.QueryExecution import org.apache.spark.sql.execution.datasources.hbase.SparkHBaseConf import org.apache.spark.sql.util._ import org.scalatest.{BeforeAndAfterAll, FunSuite, Matchers} import com.paypal.gimel.common.catalog.Field import com.paypal.gimel.hbase.DataSet class HBaseLocalClient extends FunSuite with Matchers with BeforeAndAfterAll { var sparkSession : SparkSession = _ var dataSet: DataSet = _ val hbaseTestingUtility = new HBaseTestingUtility() val tableName = "test_table" val cfs = Array("personal", "professional") val columns = Array("id", "name", "age", "address", "company", "designation", "salary") val fields = columns.map(col => new Field(col)) val metrics = ArrayBuffer.empty[(String, QueryExecution, Long)] protected override def beforeAll(): Unit = { val tempDir: File = Files.createTempDir tempDir.deleteOnExit hbaseTestingUtility.startMiniCluster() SparkHBaseConf.conf = hbaseTestingUtility.getConfiguration createTable(tableName, cfs) val conf = new SparkConf conf.set(SparkHBaseConf.testConf, "true") sparkSession = SparkSession.builder() .master("local") .appName("HBase Test") .config(conf) .getOrCreate() val listener = new QueryExecutionListener { // Only test successful case here, so no need to implement `onFailure` override def onFailure(funcName: String, qe: QueryExecution, exception: Exception): Unit = {} override def onSuccess(funcName: String, qe: QueryExecution, duration: Long): Unit = { metrics += ((funcName, qe, duration)) } } sparkSession.listenerManager.register(listener) sparkSession.sparkContext.setLogLevel("ERROR") dataSet = new DataSet(sparkSession) } protected override def afterAll(): Unit = { hbaseTestingUtility.shutdownMiniCluster() sparkSession.close() } def createTable(name: String, cfs: Array[String]) { val tName = Bytes.toBytes(name) val bcfs = cfs.map(Bytes.toBytes(_)) try { hbaseTestingUtility.deleteTable(TableName.valueOf(tName)) } catch { case _ : Throwable => println("No table = " + name + " found") } hbaseTestingUtility.createMultiRegionTable(TableName.valueOf(tName), bcfs) } // Mocks data for testing def mockDataInDataFrame(numberOfRows: Int): DataFrame = { def stringed(n: Int) = s"""{"id": "$n","name": "MAC-$n", "address": "MAC-${n + 1}", "age": "${n + 1}", "company": "MAC-$n", "designation": "MAC-$n", "salary": "${n * 10000}" }""" val texts: Seq[String] = (1 to numberOfRows).map { x => stringed(x) } val rdd: RDD[String] = sparkSession.sparkContext.parallelize(texts) val dataFrame: DataFrame = sparkSession.read.json(rdd) dataFrame } }
Example 2
Source File: HBaseBulkGetExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.rdd import org.apache.hadoop.hbase.client.Get import org.apache.hadoop.hbase.client.Result import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.CellUtil import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseBulkGetExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseBulkGetExample {tableName} is missing an argument") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName) val sc = new SparkContext(sparkConf) try { //[(Array[Byte])] val rdd = sc.parallelize(Array( Bytes.toBytes("1"), Bytes.toBytes("2"), Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5"), Bytes.toBytes("6"), Bytes.toBytes("7"))) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) val getRdd = rdd.hbaseBulkGet[String](hbaseContext, TableName.valueOf(tableName), 2, record => { System.out.println("making Get") new Get(record) }, (result: Result) => { val it = result.listCells().iterator() val b = new StringBuilder b.append(Bytes.toString(result.getRow) + ":") while (it.hasNext) { val cell = it.next() val q = Bytes.toString(CellUtil.cloneQualifier(cell)) if (q.equals("counter")) { b.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")") } else { b.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")") } } b.toString() }) getRdd.collect().foreach(v => println(v)) } finally { sc.stop() } } }
Example 3
Source File: HBaseForeachPartitionExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.rdd import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseForeachPartitionExample { def main(args: Array[String]) { if (args.length < 2) { println("HBaseForeachPartitionExample {tableName} {columnFamily} are missing an arguments") return } val tableName = args(0) val columnFamily = args(1) val sparkConf = new SparkConf().setAppName("HBaseForeachPartitionExample " + tableName + " " + columnFamily) val sc = new SparkContext(sparkConf) try { //[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])] val rdd = sc.parallelize(Array( (Bytes.toBytes("1"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))), (Bytes.toBytes("2"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))), (Bytes.toBytes("3"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))), (Bytes.toBytes("4"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))), (Bytes.toBytes("5"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5")))) )) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) rdd.hbaseForeachPartition(hbaseContext, (it, connection) => { val m = connection.getBufferedMutator(TableName.valueOf(tableName)) it.foreach(r => { val put = new Put(r._1) r._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, putValue._3)) m.mutate(put) }) m.flush() m.close() }) } finally { sc.stop() } } }
Example 4
Source File: HBaseBulkDeleteExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.rdd import org.apache.hadoop.hbase.client.Delete import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseBulkDeleteExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseBulkDeleteExample {tableName} are missing an argument") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseBulkDeleteExample " + tableName) val sc = new SparkContext(sparkConf) try { //[Array[Byte]] val rdd = sc.parallelize(Array( Bytes.toBytes("1"), Bytes.toBytes("2"), Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5") )) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) rdd.hbaseBulkDelete(hbaseContext, TableName.valueOf(tableName), putRecord => new Delete(putRecord), 4) } finally { sc.stop() } } }
Example 5
Source File: HBaseBulkPutExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.rdd import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseBulkPutExample { def main(args: Array[String]) { if (args.length < 2) { println("HBaseBulkPutExample {tableName} {columnFamily} are missing an arguments") return } val tableName = args(0) val columnFamily = args(1) val sparkConf = new SparkConf().setAppName("HBaseBulkPutExample " + tableName + " " + columnFamily) val sc = new SparkContext(sparkConf) try { //[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])] val rdd = sc.parallelize(Array( (Bytes.toBytes("1"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))), (Bytes.toBytes("2"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))), (Bytes.toBytes("3"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))), (Bytes.toBytes("4"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))), (Bytes.toBytes("5"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5")))) )) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) rdd.hbaseBulkPut(hbaseContext, TableName.valueOf(tableName), (putRecord) => { val put = new Put(putRecord._1) putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, putValue._3)) put }) } finally { sc.stop() } } }
Example 6
Source File: HBaseBulkGetExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.client.Get import org.apache.hadoop.hbase.client.Result import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.CellUtil import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseBulkGetExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseBulkGetExample {tableName} missing an argument") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName) val sc = new SparkContext(sparkConf) try { //[(Array[Byte])] val rdd = sc.parallelize(Array( Bytes.toBytes("1"), Bytes.toBytes("2"), Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5"), Bytes.toBytes("6"), Bytes.toBytes("7"))) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) val getRdd = hbaseContext.bulkGet[Array[Byte], String]( TableName.valueOf(tableName), 2, rdd, record => { System.out.println("making Get") new Get(record) }, (result: Result) => { val it = result.listCells().iterator() val b = new StringBuilder b.append(Bytes.toString(result.getRow) + ":") while (it.hasNext) { val cell = it.next() val q = Bytes.toString(CellUtil.cloneQualifier(cell)) if (q.equals("counter")) { b.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")") } else { b.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")") } } b.toString() }) getRdd.collect().foreach(v => println(v)) } finally { sc.stop() } } }
Example 7
Source File: HBaseStreamingBulkPutExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.streaming.Seconds import org.apache.spark.streaming.StreamingContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseStreamingBulkPutExample { def main(args: Array[String]) { if (args.length < 4) { println("HBaseStreamingBulkPutExample " + "{host} {port} {tableName} {columnFamily} are missing an argument") return } val host = args(0) val port = args(1) val tableName = args(2) val columnFamily = args(3) val sparkConf = new SparkConf().setAppName("HBaseStreamingBulkPutExample " + tableName + " " + columnFamily) val sc = new SparkContext(sparkConf) try { val ssc = new StreamingContext(sc, Seconds(1)) val lines = ssc.socketTextStream(host, port.toInt) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) hbaseContext.streamBulkPut[String](lines, TableName.valueOf(tableName), (putRecord) => { if (putRecord.length() > 0) { val put = new Put(Bytes.toBytes(putRecord)) put.addColumn(Bytes.toBytes("c"), Bytes.toBytes("foo"), Bytes.toBytes("bar")) put } else { null } }) ssc.start() ssc.awaitTerminationOrTimeout(60000) } finally { sc.stop() } } }
Example 8
Source File: HBaseBulkPutExampleFromFile.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.hadoop.io.LongWritable import org.apache.hadoop.io.Text import org.apache.hadoop.mapred.TextInputFormat import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseBulkPutExampleFromFile { def main(args: Array[String]) { if (args.length < 3) { println("HBaseBulkPutExampleFromFile {tableName} {columnFamily} {inputFile} are missing an argument") return } val tableName = args(0) val columnFamily = args(1) val inputFile = args(2) val sparkConf = new SparkConf().setAppName("HBaseBulkPutExampleFromFile " + tableName + " " + columnFamily + " " + inputFile) val sc = new SparkContext(sparkConf) try { var rdd = sc.hadoopFile( inputFile, classOf[TextInputFormat], classOf[LongWritable], classOf[Text]).map(v => { System.out.println("reading-" + v._2.toString) v._2.toString }) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) hbaseContext.bulkPut[String](rdd, TableName.valueOf(tableName), (putRecord) => { System.out.println("hbase-" + putRecord) val put = new Put(Bytes.toBytes("Value- " + putRecord)) put.addColumn(Bytes.toBytes("c"), Bytes.toBytes("1"), Bytes.toBytes(putRecord.length())) put }); } finally { sc.stop() } } }
Example 9
Source File: HBaseBulkDeleteExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.client.Delete import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseBulkDeleteExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseBulkDeleteExample {tableName} missing an argument") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseBulkDeleteExample " + tableName) val sc = new SparkContext(sparkConf) try { //[Array[Byte]] val rdd = sc.parallelize(Array( Bytes.toBytes("1"), Bytes.toBytes("2"), Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5") )) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) hbaseContext.bulkDelete[Array[Byte]](rdd, TableName.valueOf(tableName), putRecord => new Delete(putRecord), 4) } finally { sc.stop() } } }
Example 10
Source File: HBaseBulkPutExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseBulkPutExample { def main(args: Array[String]) { if (args.length < 2) { println("HBaseBulkPutExample {tableName} {columnFamily} are missing an arguments") return } val tableName = args(0) val columnFamily = args(1) val sparkConf = new SparkConf().setAppName("HBaseBulkPutExample " + tableName + " " + columnFamily) val sc = new SparkContext(sparkConf) try { //[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])] val rdd = sc.parallelize(Array( (Bytes.toBytes("1"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))), (Bytes.toBytes("2"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))), (Bytes.toBytes("3"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))), (Bytes.toBytes("4"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))), (Bytes.toBytes("5"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5")))) )) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) hbaseContext.bulkPut[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])](rdd, TableName.valueOf(tableName), (putRecord) => { val put = new Put(putRecord._1) putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, putValue._3)) put }); } finally { sc.stop() } } }
Example 11
Source File: HBaseDistributedScanExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.client.Scan import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseDistributedScanExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseDistributedScanExample {tableName} missing an argument") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseDistributedScanExample " + tableName ) val sc = new SparkContext(sparkConf) try { val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) val scan = new Scan() scan.setCaching(100) val getRdd = hbaseContext.hbaseRDD(TableName.valueOf(tableName), scan) getRdd.foreach(v => println(Bytes.toString(v._1.get()))) println("Length: " + getRdd.map(r => r._1.copyBytes()).collect().length); } finally { sc.stop() } } }
Example 12
Source File: HBaseBulkPutTimestampExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.spark.SparkContext import org.apache.hadoop.hbase.{HBaseConfiguration, TableName} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.client.Put import org.apache.spark.SparkConf import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseBulkPutTimestampExample { def main(args: Array[String]) { if (args.length < 2) { System.out.println("HBaseBulkPutTimestampExample {tableName} {columnFamily} are missing an argument") return } val tableName = args(0) val columnFamily = args(1) val sparkConf = new SparkConf().setAppName("HBaseBulkPutTimestampExample " + tableName + " " + columnFamily) val sc = new SparkContext(sparkConf) try { val rdd = sc.parallelize(Array( (Bytes.toBytes("6"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))), (Bytes.toBytes("7"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))), (Bytes.toBytes("8"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))), (Bytes.toBytes("9"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))), (Bytes.toBytes("10"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5")))))) val conf = HBaseConfiguration.create() val timeStamp = System.currentTimeMillis() val hbaseContext = new HBaseContext(sc, conf) hbaseContext.bulkPut[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])](rdd, TableName.valueOf(tableName), (putRecord) => { val put = new Put(putRecord._1) putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, timeStamp, putValue._3)) put }) } finally { sc.stop() } } }
Example 13
Source File: ReadFilter.scala From hbase-rdd-examples with Apache License 2.0 | 5 votes |
package unicredit.example import org.apache.hadoop.hbase.filter.PrefixFilter import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.{SparkConf, SparkContext} import unicredit.spark.hbase._ object ReadFilter extends App { val name = "Example of read from HBase table" lazy val sparkConf = new SparkConf().setAppName(name) lazy val sc = new SparkContext(sparkConf) implicit val config = HBaseConfig() // Assumes hbase-site.xml is on classpath val columns = Map( "cf1" -> Set("col1", "col2"), "cf2" -> Set("col3") ) val filter = new PrefixFilter(Bytes.toBytes("abc")) sc.hbase[String]("test-table", columns, filter) .map({ case (k, v) => val cf1 = v("cf1") val col1 = cf1("col1") val col2 = cf1("col2") val col3 = v("cf2")("col3") List(k, col1, col2, col3) mkString "\t" }) .saveAsTextFile("test-output") }
Example 14
Source File: HBaseMapPartitionExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.rdd import org.apache.hadoop.hbase.client.Get import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseMapPartitionExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseMapPartitionExample {tableName} is missing an argument") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseMapPartitionExample " + tableName) val sc = new SparkContext(sparkConf) try { //[(Array[Byte])] val rdd = sc.parallelize(Array( Bytes.toBytes("1"), Bytes.toBytes("2"), Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5"), Bytes.toBytes("6"), Bytes.toBytes("7"))) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) val getRdd = rdd.hbaseMapPartitions[String](hbaseContext, (it, connection) => { val table = connection.getTable(TableName.valueOf(tableName)) it.map{r => //batching would be faster. This is just an example val result = table.get(new Get(r)) val it = result.listCells().iterator() val b = new StringBuilder b.append(Bytes.toString(result.getRow) + ":") while (it.hasNext) { val cell = it.next() val q = Bytes.toString(cell.getQualifierArray) if (q.equals("counter")) { b.append("(" + q + "," + Bytes.toLong(cell.getValueArray) + ")") } else { b.append("(" + q + "," + Bytes.toString(cell.getValueArray) + ")") } } b.toString() } }) getRdd.collect().foreach(v => println(v)) } finally { sc.stop() } } }
Example 15
Source File: HBasePut.scala From gimel with Apache License 2.0 | 5 votes |
package com.paypal.gimel.hbase.utilities import org.apache.hadoop.hbase.{HBaseConfiguration, TableName} import org.apache.hadoop.hbase.client.{ConnectionFactory, Put} import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.sql.{DataFrame, SparkSession} import com.paypal.gimel.hbase.conf.{HbaseClientConfiguration, HbaseConfigs} import com.paypal.gimel.logger.Logger object HBasePut { def apply(sparkSession: SparkSession): HBasePut = new HBasePut(sparkSession) } class HBasePut(sparkSession: SparkSession) { val logger = Logger() lazy val hbaseUtilities = HBaseUtilities(sparkSession) def putRows(hbaseTable: String, dataFrame: DataFrame, rowKeyColumn: String, columns: Array[String], cfColsMap: Map[String, String]) { try { // Configure And Connect val conf = HBaseConfiguration.create() val cnxn = ConnectionFactory.createConnection(conf) // Create Connection to HBase table val tbl = cnxn.getTable(TableName.valueOf(hbaseTable)) val rows = dataFrame.rdd.map { row => (row.getAs(rowKeyColumn).toString, columns.map(eachCol => (cfColsMap.getOrElse(eachCol, ""), eachCol, row.getAs(eachCol).asInstanceOf[String])) ) }.collect() // Performing put operation on each row of dataframe rows.foreach { row => val putRow: Put = new Put(Bytes.toBytes(row._1.asInstanceOf[String])) row._2.foreach(x => if (x._2 != rowKeyColumn) putRow.addColumn(Bytes.toBytes(x._1), Bytes.toBytes(x._2), Bytes.toBytes(x._3))) tbl.put(putRow) } tbl.close() } catch { case ex: Throwable => ex.printStackTrace() throw ex } } }
Example 16
Source File: HogConfig.scala From hogzilla with GNU General Public License v2.0 | 5 votes |
package org.hogzilla.util import java.security.MessageDigest import org.apache.hadoop.hbase.util.Bytes import javax.xml.bind.DatatypeConverter import math._ import com.typesafe.config.Config import scala.collection.mutable.HashSet object HogConfig { def get(config:Config,key:String,valueType:String,default:Any):Any = { if(config==null) return default try { val value = config.getString(key) if(value.isEmpty()) return default // Return default value println(f"Configuration: $key => $value") if(valueType.equals("Int")) value.toInt else if(valueType.equals("Double")) value.toDouble else if(valueType.equals("Long")) value.toLong else if(valueType.equals("Set(Int)")) { val patternSet="Set\\(".r val patternSetEnd="\\)".r if(value.equals("Set()")) return Set() return (patternSetEnd replaceAllIn((patternSet replaceAllIn(value, "")),"")) .split(",").map({x => x.toInt}).toSet } else if(valueType.equals("Set(String)")) { val patternSet="Set\\(".r val patternSetEnd="\\)".r if(value.equals("Set()")) return Set() return (patternSetEnd replaceAllIn((patternSet replaceAllIn(value, "")),"")) .split(",").map({x => println(x.toString.trim()) ; x.toString.trim()}).toSet } else default // Create type first } catch { case t: Throwable => t.printStackTrace() println(f"Problem parsing $key . Check if it is ok. Using default value") return default } } def getInt(config:Config,key:String,default:Any):Int = { get(config,key,"Int",default).asInstanceOf[Int] } def getLong(config:Config,key:String,default:Any):Long = { get(config,key,"Long",default).asInstanceOf[Long] } def getDouble(config:Config,key:String,default:Any):Double = { get(config,key,"Double",default).asInstanceOf[Long] } def getSetInt(config:Config,key:String,default:Any):Set[Int] = { get(config,key,"Set(Int)",default).asInstanceOf[Set[Int]] } def getSetString(config:Config,key:String,default:Any):Set[String] = { get(config,key,"Set(String)",default).asInstanceOf[Set[String]] } }
Example 17
Source File: HogGeograph.scala From hogzilla with GNU General Public License v2.0 | 5 votes |
package org.hogzilla.util import java.security.MessageDigest import org.apache.hadoop.hbase.util.Bytes import javax.xml.bind.DatatypeConverter import math._ object HogGeograph { val R = 6372.8 //radius in km def haversineDistance(lat1:Double, lon1:Double, lat2:Double, lon2:Double):Double = { val dLat=(lat2 - lat1).toRadians val dLon=(lon2 - lon1).toRadians val a = pow(sin(dLat/2),2) + pow(sin(dLon/2),2) * cos(lat1.toRadians) * cos(lat2.toRadians) val c = 2 * asin(sqrt(a)) R * c } def haversineDistanceFromStrings(coords1:String, coords2:String):Double = { try { val coordsDouble1 = coords1.split(",").map({ x => x.toDouble }) val coordsDouble2 = coords2.split(",").map({ x => x.toDouble }) haversineDistance(coordsDouble1(0),coordsDouble1(1),coordsDouble2(0),coordsDouble2(1)) } catch { case t: Throwable => // t.printStackTrace() // Return a large distance 999999999D } } }
Example 18
Source File: HogEvent.scala From hogzilla with GNU General Public License v2.0 | 5 votes |
package org.hogzilla.event import java.util.HashMap import java.util.Map import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.util.Bytes import org.hogzilla.hbase.HogHBaseRDD import org.hogzilla.util.HogFlow import java.net.InetAddress class HogEvent(flow:HogFlow) { var sensorid:Int=0 var signature_id:Double=0 var priorityid:Int=0 var text:String="" var data:Map[String,String]=new HashMap() var ports:String="" var title:String="" var username:String="" var coords:String="" def formatIPtoBytes(ip:String):Array[Byte] = { try { // Eca! Snorby doesn't support IPv6 yet. See https://github.com/Snorby/snorby/issues/65 if(ip.contains(":")) InetAddress.getByName("255.255.6.6").getAddress else InetAddress.getByName(ip).getAddress } catch { case t: Throwable => // Bogus address! InetAddress.getByName("255.255.1.1").getAddress } } def alert() { val put = new Put(Bytes.toBytes(flow.get("flow:id"))) put.add(Bytes.toBytes("event"), Bytes.toBytes("note"), Bytes.toBytes(text)) put.add(Bytes.toBytes("event"), Bytes.toBytes("lower_ip"), formatIPtoBytes(flow.lower_ip)) put.add(Bytes.toBytes("event"), Bytes.toBytes("upper_ip"), formatIPtoBytes(flow.upper_ip)) put.add(Bytes.toBytes("event"), Bytes.toBytes("lower_ip_str"), Bytes.toBytes(flow.lower_ip)) put.add(Bytes.toBytes("event"), Bytes.toBytes("upper_ip_str"), Bytes.toBytes(flow.upper_ip)) put.add(Bytes.toBytes("event"), Bytes.toBytes("signature_id"), Bytes.toBytes("%.0f".format(signature_id))) put.add(Bytes.toBytes("event"), Bytes.toBytes("time"), Bytes.toBytes(System.currentTimeMillis)) put.add(Bytes.toBytes("event"), Bytes.toBytes("ports"), Bytes.toBytes(ports)) put.add(Bytes.toBytes("event"), Bytes.toBytes("title"), Bytes.toBytes(title)) if(!username.equals("")) put.add(Bytes.toBytes("event"), Bytes.toBytes("username"), Bytes.toBytes(username)) if(!coords.equals("")) put.add(Bytes.toBytes("event"), Bytes.toBytes("coords"), Bytes.toBytes(coords)) HogHBaseRDD.hogzilla_events.put(put) //println(f"ALERT: $text%100s\n\n@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@") } }
Example 19
Source File: HogSignature.scala From hogzilla with GNU General Public License v2.0 | 5 votes |
package org.hogzilla.event import org.hogzilla.hbase.HogHBaseRDD import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.client.Get import org.apache.hadoop.hbase.client.Put case class HogSignature(signature_class:Int, signature_name:String, signature_priority:Int, signature_revision:Int, signature_id:Double,signature_group_id:Int) { //Example: 3,"HZ: Suspicious DNS flow identified by K-Means clustering",2,1,826000001,826 def saveHBase():HogSignature = { val get = new Get(Bytes.toBytes("%.0f".format(signature_id))) if(!HogHBaseRDD.hogzilla_sensor.exists(get)) { val put = new Put(Bytes.toBytes("%.0f".format(signature_id))) put.add(Bytes.toBytes("signature"), Bytes.toBytes("id"), Bytes.toBytes("%.0f".format(signature_id))) put.add(Bytes.toBytes("signature"), Bytes.toBytes("class"), Bytes.toBytes(signature_class.toString())) put.add(Bytes.toBytes("signature"), Bytes.toBytes("name"), Bytes.toBytes(signature_name)) put.add(Bytes.toBytes("signature"), Bytes.toBytes("priority"), Bytes.toBytes(signature_priority.toString())) put.add(Bytes.toBytes("signature"), Bytes.toBytes("revision"), Bytes.toBytes(signature_revision.toString())) put.add(Bytes.toBytes("signature"), Bytes.toBytes("group_id"), Bytes.toBytes(signature_group_id.toString())) HogHBaseRDD.hogzilla_signatures.put(put) } this } }
Example 20
Source File: HogHBaseReputation.scala From hogzilla with GNU General Public License v2.0 | 5 votes |
package org.hogzilla.hbase import scala.math.random import java.lang.Math import org.apache.spark._ import org.apache.hadoop.hbase.client.HBaseAdmin import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.{HBaseConfiguration, HTableDescriptor, TableName} import org.apache.hadoop.hbase.mapreduce.TableInputFormat import org.apache.spark.mllib.regression.{LabeledPoint,LinearRegressionModel,LinearRegressionWithSGD} import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.rdd.RDD import org.apache.hadoop.hbase.client.HTable import org.apache.hadoop.hbase.filter.SingleColumnValueFilter import org.apache.hadoop.hbase.filter.BinaryComparator import org.apache.hadoop.hbase.filter.FilterList import org.apache.hadoop.hbase.filter.CompareFilter import java.util.ArrayList import org.apache.hadoop.hbase.client.Scan import org.apache.hadoop.hbase.filter.Filter import scala.collection.mutable.HashSet import org.apache.hadoop.hbase.client.Put object HogHBaseReputation { // Ex: MX, whitelist def getReputationList(listName:String, listType:String):Set[String] = { val list = new HashSet[String] val filters: ArrayList[Filter] = new ArrayList(); val colValFilter1 = new SingleColumnValueFilter(Bytes.toBytes("rep"), Bytes.toBytes("list_type"), CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes(listType))) colValFilter1.setFilterIfMissing(false); val colValFilter2 = new SingleColumnValueFilter(Bytes.toBytes("rep"), Bytes.toBytes("list"), CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes(listName))) colValFilter2.setFilterIfMissing(false); filters.add(colValFilter1); filters.add(colValFilter2); val filterList = new FilterList( FilterList.Operator.MUST_PASS_ALL, filters); val scan = new Scan() scan.setFilter(filterList) val it = HogHBaseRDD.hogzilla_reputation.getScanner(scan).iterator() while(it.hasNext()) { list.add( Bytes.toString(it.next().getValue(Bytes.toBytes("rep"),Bytes.toBytes("ip"))) ) } list.toSet } def saveReputationList(listName:String, listType:String, ip:String) = { val put = new Put(Bytes.toBytes(ip+"-"+listName+"-"+listType)) put.add(Bytes.toBytes("rep"), Bytes.toBytes("list_type"), Bytes.toBytes(listType)) put.add(Bytes.toBytes("rep"), Bytes.toBytes("list"), Bytes.toBytes(listName)) put.add(Bytes.toBytes("rep"), Bytes.toBytes("ip"), Bytes.toBytes(ip)) HogHBaseRDD.hogzilla_reputation.put(put) } }
Example 21
Source File: HogHBaseCluster.scala From hogzilla with GNU General Public License v2.0 | 5 votes |
package org.hogzilla.hbase import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.rdd.RDD import org.apache.spark.mllib.linalg.Vector import org.apache.hadoop.hbase.client.Get import org.apache.hadoop.hbase.client.Delete import org.hogzilla.cluster.HogClusterMember object HogHBaseCluster { def formatClusterTitle(clusterCentroid: List[(Long,Double)], clusterIdx:Int):String = { val mainTitle = "Group "+clusterIdx.toString+" - "+ clusterCentroid .filter({case (port,rate) => rate > 4.999 }) .map({case (port,rate) => port.toString()+":"+"%.0f".format(rate)+"%" }).mkString(", ") val onePercentList= clusterCentroid .filter({case (port,rate) => .9999 < rate & rate < 5 }) if(onePercentList.size>0) { mainTitle+", "+ onePercentList.map({case (port,rate) => port.toString() }).mkString("(",", ",")"+"> 1%") }else { mainTitle } } def deleteCluster(clusterIdx:Int)= { val del = new Delete(Bytes.toBytes(clusterIdx.toString)) HogHBaseRDD.hogzilla_clusters.delete(del) } def deleteClusterMember(memberIP:String)= { val del = new Delete(Bytes.toBytes(memberIP)) HogHBaseRDD.hogzilla_cluster_members.delete(del) } def saveCluster(clusterIdx:Int, clusterCentroid:List[(Long,Double)], clusterSize: Long, members:Array[String]) = { val memberString = members.mkString(",") val put = new Put(Bytes.toBytes(clusterIdx.toString)) put.add(Bytes.toBytes("info"), Bytes.toBytes("title"), Bytes.toBytes(formatClusterTitle(clusterCentroid,clusterIdx))) put.add(Bytes.toBytes("info"), Bytes.toBytes("size"), Bytes.toBytes(clusterSize.toString)) put.add(Bytes.toBytes("info"), Bytes.toBytes("centroid"), Bytes.toBytes(clusterCentroid.mkString("[",",","]"))) put.add(Bytes.toBytes("info"), Bytes.toBytes("members"), Bytes.toBytes(memberString)) HogHBaseRDD.hogzilla_clusters.put(put) } def saveClusterMember(clusterMember:HogClusterMember) = { val put = new Put(Bytes.toBytes(clusterMember.memberIP.toString)) put.add(Bytes.toBytes("info"), Bytes.toBytes("title"), Bytes.toBytes(clusterMember.formatTitle)) put.add(Bytes.toBytes("cluster"),Bytes.toBytes("size"), Bytes.toBytes(clusterMember.clusterSize.toString)) put.add(Bytes.toBytes("cluster"),Bytes.toBytes("centroid"), Bytes.toBytes(clusterMember.centroid.mkString("[",",","]"))) put.add(Bytes.toBytes("cluster"),Bytes.toBytes("idx"), Bytes.toBytes(clusterMember.clusterIdx.toString)) put.add(Bytes.toBytes("cluster"),Bytes.toBytes("description"),Bytes.toBytes(formatClusterTitle(clusterMember.centroid,clusterMember.clusterIdx))) put.add(Bytes.toBytes("member"), Bytes.toBytes("ports"), Bytes.toBytes("TCP: "+clusterMember.ports.mkString(""," ",""))) put.add(Bytes.toBytes("member"), Bytes.toBytes("frequencies"),Bytes.toBytes("TCP: "+ clusterMember.frequency_vector .filter({case (port,freq) => clusterMember.ports.contains(port)}) .map({case (port,freq) => port.toString+"="+ "%.0f".format(freq)+"%" }) .mkString(""," ","") )) put.add(Bytes.toBytes("member"), Bytes.toBytes("ip"), Bytes.toBytes(clusterMember.memberIP)) put.add(Bytes.toBytes("member"), Bytes.toBytes("distance"), Bytes.toBytes("%.2f".format(clusterMember.distance))) HogHBaseRDD.hogzilla_cluster_members.put(put) } }
Example 22
Source File: BytesUtilsSuite.scala From Spark-SQL-on-HBase with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hbase import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.Logging import org.apache.spark.sql.hbase.types.HBaseBytesType import org.apache.spark.sql.hbase.util.BinaryBytesUtils import org.apache.spark.sql.types._ import org.scalatest.{BeforeAndAfterAll, FunSuite} class BytesUtilsSuite extends FunSuite with BeforeAndAfterAll with Logging { test("Bytes Ordering Test") { val s = Seq(-257, -256, -255, -129, -128, -127, -64, -16, -4, -1, 0, 1, 4, 16, 64, 127, 128, 129, 255, 256, 257) val result = s.map(i => (i, BinaryBytesUtils.create(IntegerType).toBytes(i))) .sortWith((f, s) => HBaseBytesType.ordering.gt( f._2.asInstanceOf[HBaseBytesType.InternalType], s._2.asInstanceOf[HBaseBytesType.InternalType])) assert(result.map(a => a._1) == s.sorted.reverse) } def compare(a: Array[Byte], b: Array[Byte]): Int = { val length = Math.min(a.length, b.length) var result: Int = 0 for (i <- 0 to length - 1) { val diff: Int = (a(i) & 0xff).asInstanceOf[Byte] - (b(i) & 0xff).asInstanceOf[Byte] if (diff != 0) { result = diff } } result } test("Bytes Utility Test") { assert(BinaryBytesUtils.toBoolean(BinaryBytesUtils.create(BooleanType) .toBytes(input = true), 0) === true) assert(BinaryBytesUtils.toBoolean(BinaryBytesUtils.create(BooleanType) .toBytes(input = false), 0) === false) assert(BinaryBytesUtils.toDouble(BinaryBytesUtils.create(DoubleType).toBytes(12.34d), 0) === 12.34d) assert(BinaryBytesUtils.toDouble(BinaryBytesUtils.create(DoubleType).toBytes(-12.34d), 0) === -12.34d) assert(BinaryBytesUtils.toFloat(BinaryBytesUtils.create(FloatType).toBytes(12.34f), 0) === 12.34f) assert(BinaryBytesUtils.toFloat(BinaryBytesUtils.create(FloatType).toBytes(-12.34f), 0) === -12.34f) assert(BinaryBytesUtils.toInt(BinaryBytesUtils.create(IntegerType).toBytes(12), 0) === 12) assert(BinaryBytesUtils.toInt(BinaryBytesUtils.create(IntegerType).toBytes(-12), 0) === -12) assert(BinaryBytesUtils.toLong(BinaryBytesUtils.create(LongType).toBytes(1234l), 0) === 1234l) assert(BinaryBytesUtils.toLong(BinaryBytesUtils.create(LongType).toBytes(-1234l), 0) === -1234l) assert(BinaryBytesUtils.toShort(BinaryBytesUtils.create(ShortType) .toBytes(12.asInstanceOf[Short]), 0) === 12) assert(BinaryBytesUtils.toShort(BinaryBytesUtils.create(ShortType) .toBytes(-12.asInstanceOf[Short]), 0) === -12) assert(BinaryBytesUtils.toUTF8String(BinaryBytesUtils.create(StringType).toBytes("abc"), 0, 3) === UTF8String("abc")) assert(BinaryBytesUtils.toUTF8String(BinaryBytesUtils.create(StringType).toBytes(""), 0, 0) === UTF8String("")) assert(BinaryBytesUtils.toByte(BinaryBytesUtils.create(ByteType) .toBytes(5.asInstanceOf[Byte]), 0) === 5) assert(BinaryBytesUtils.toByte(BinaryBytesUtils.create(ByteType) .toBytes(-5.asInstanceOf[Byte]), 0) === -5) assert(compare(BinaryBytesUtils.create(IntegerType).toBytes(128), BinaryBytesUtils.create(IntegerType).toBytes(-128)) > 0) } test("byte array plus one") { var byteArray = Array[Byte](0x01.toByte, 127.toByte) assert(Bytes.compareTo(BinaryBytesUtils.addOne(byteArray), Array[Byte](0x01.toByte, 0x80.toByte)) == 0) byteArray = Array[Byte](0xff.toByte, 0xff.toByte) assert(BinaryBytesUtils.addOne(byteArray) == null) byteArray = Array[Byte](0x02.toByte, 0xff.toByte) assert(Bytes.compareTo(BinaryBytesUtils.addOne(byteArray), Array[Byte](0x03.toByte, 0x00.toByte)) == 0) } test("float comparison") { val f1 = BinaryBytesUtils.create(FloatType).toBytes(-1.23f) val f2 = BinaryBytesUtils.create(FloatType).toBytes(100f) assert(Bytes.compareTo(f1, f2) < 0) } }
Example 23
Source File: HBasePartitioner.scala From Spark-SQL-on-HBase with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hbase import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.Partitioner import org.apache.spark.util.CollectionsUtils object HBasePartitioner { implicit object HBaseRawOrdering extends Ordering[HBaseRawType] { def compare(a: HBaseRawType, b: HBaseRawType) = Bytes.compareTo(a, b) } } class HBasePartitioner (var splitKeys: Array[HBaseRawType]) extends Partitioner { import HBasePartitioner.HBaseRawOrdering type t = HBaseRawType lazy private val len = splitKeys.length // For pre-split table splitKeys(0) = bytes[0], to remove it, // otherwise partition 0 always be empty and // we will miss the last region's date when bulk load lazy private val realSplitKeys = if (splitKeys.isEmpty) splitKeys else splitKeys.tail def numPartitions = if (len == 0) 1 else len @transient private lazy val binarySearch: ((Array[t], t) => Int) = CollectionsUtils.makeBinarySearch[t] def getPartition(key: Any): Int = { val k = key.asInstanceOf[t] var partition = 0 if (len <= 128 && len > 0) { // If we have less than 128 partitions naive search val ordering = implicitly[Ordering[t]] while (partition < realSplitKeys.length && ordering.gt(k, realSplitKeys(partition))) { partition += 1 } } else { // Determine which binary search method to use only once. partition = binarySearch(realSplitKeys, k) // binarySearch either returns the match location or -[insertion point]-1 if (partition < 0) { partition = -partition - 1 } if (partition > realSplitKeys.length) { partition = realSplitKeys.length } } partition } override def equals(other: Any): Boolean = other match { case r: HBasePartitioner => r.splitKeys.sameElements(splitKeys) case _ => false } override def hashCode(): Int = { val prime = 31 var result = 1 var i = 0 while (i < splitKeys.length) { result = prime * result + splitKeys(i).hashCode i += 1 } result = prime * result result } }
Example 24
Source File: HBasePartitioner.scala From Heracles with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hbase import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.Partitioner import org.apache.spark.util.CollectionsUtils object HBasePartitioner { implicit object HBaseRawOrdering extends Ordering[HBaseRawType] { def compare(a: HBaseRawType, b: HBaseRawType) = Bytes.compareTo(a, b) } } class HBasePartitioner (val splitKeys: Array[HBaseRawType]) extends Partitioner { import HBasePartitioner.HBaseRawOrdering type t = HBaseRawType lazy private val len = splitKeys.length // For pre-split table splitKeys(0) = bytes[0], to remove it, // otherwise partition 0 always be empty and // we will miss the last region's date when bulk load lazy private val realSplitKeys = if (splitKeys.isEmpty) splitKeys else splitKeys.tail override def numPartitions = if (len == 0) 1 else len @transient private lazy val binarySearch: ((Array[t], t) => Int) = CollectionsUtils.makeBinarySearch[t] override def getPartition(key: Any): Int = { val k = key.asInstanceOf[t] var partition = 0 if (len <= 128 && len > 0) { // If we have less than 128 partitions naive search val ordering = implicitly[Ordering[t]] while (partition < realSplitKeys.length && ordering.gt(k, realSplitKeys(partition))) { partition += 1 } } else { // Determine which binary search method to use only once. partition = binarySearch(realSplitKeys, k) // binarySearch either returns the match location or -[insertion point]-1 if (partition < 0) { partition = -partition - 1 } if (partition > realSplitKeys.length) { partition = realSplitKeys.length } } partition } override def equals(other: Any): Boolean = other match { case r: HBasePartitioner => r.splitKeys.sameElements(splitKeys) case _ => false } override def hashCode(): Int = { val prime = 31 var result = 1 var i = 0 while (i < splitKeys.length) { result = prime * result + splitKeys(i).hashCode i += 1 } result = prime * result result } }
Example 25
Source File: ByteArrayComparable.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark import org.apache.hadoop.hbase.util.Bytes class ByteArrayComparable(val bytes:Array[Byte], val offset:Int = 0, var length:Int = -1) extends Comparable[ByteArrayComparable] { if (length == -1) { length = bytes.length } override def compareTo(o: ByteArrayComparable): Int = { Bytes.compareTo(bytes, offset, length, o.bytes, o.offset, o.length) } override def hashCode(): Int = { Bytes.hashCode(bytes, offset, length) } override def equals (obj: Any): Boolean = { obj match { case b: ByteArrayComparable => Bytes.equals(bytes, offset, length, b.bytes, b.offset, b.length) case _ => false } } }
Example 26
Source File: HbaseReaderHelper.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.hbase.writers import com.datamountaineer.streamreactor.connect.hbase.BytesHelper._ import com.datamountaineer.streamreactor.connect.hbase.HbaseHelper import org.apache.hadoop.hbase.client.{Connection, ConnectionFactory, Scan} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.{CellUtil, HBaseConfiguration, TableName} import scala.collection.JavaConverters._ object HbaseReaderHelper { def createConnection: Connection = { ConnectionFactory.createConnection(HBaseConfiguration.create()) } def getAllRecords(tableName: String, columnFamily: String)(implicit connection: Connection): List[HbaseRowData] = { HbaseHelper.withTable(TableName.valueOf(tableName)) { tbl => val scan = new Scan() scan.addFamily(columnFamily.fromString()) val scanner = tbl.getScanner(scan) scanner.asScala.map { rs => val cells = rs.rawCells().map { cell => Bytes.toString(CellUtil.cloneQualifier(cell)) -> CellUtil.cloneValue(cell) }.toMap HbaseRowData(rs.getRow, cells) }.toList } } } case class HbaseRowData(key: Array[Byte], cells: Map[String, Array[Byte]])
Example 27
Source File: HBaseServiceLayer.scala From Taxi360 with Apache License 2.0 | 5 votes |
package com.cloudera.sa.taxi360.server.hbase import javax.ws.rs._ import javax.ws.rs.core.MediaType import com.cloudera.sa.taxi360.model.NyTaxiYellowTrip import com.cloudera.sa.taxi360.streaming.ingestion.hbase.TaxiTripHBaseHelper import org.apache.hadoop.hbase.{HBaseConfiguration, TableName} import org.apache.hadoop.hbase.client.{ConnectionFactory, Scan} import org.apache.hadoop.hbase.util.Bytes import scala.collection.mutable @Path("rest") class HBaseServiceLayer { @GET @Path("hello") @Produces(Array(MediaType.TEXT_PLAIN)) def hello(): String = { "Hello World" } @GET @Path("vender/{venderId}/timeline") @Produces(Array(MediaType.APPLICATION_JSON)) def getTripTimeLine (@PathParam("venderId") venderId:String, @QueryParam("startTime") startTime:String = Long.MinValue.toString, @QueryParam("endTime") endTime:String = Long.MaxValue.toString): Array[NyTaxiYellowTrip] = { val table = HBaseGlobalValues.connection.getTable(TableName.valueOf(HBaseGlobalValues.appEventTableName)) val st = if (startTime == null) { Long.MinValue.toString } else { startTime } val et = if (endTime == null) { Long.MaxValue.toString } else { endTime } val scan = new Scan() val startRowKey = TaxiTripHBaseHelper.generateRowKey(venderId, st.toLong, HBaseGlobalValues.numberOfSalts) println("startRowKey:" + Bytes.toString(startRowKey)) scan.setStartRow(startRowKey) val endRowKey = TaxiTripHBaseHelper.generateRowKey(venderId, et.toLong, HBaseGlobalValues.numberOfSalts) println("endRowKey:" + Bytes.toString(endRowKey)) scan.setStopRow(endRowKey) val scannerIt = table.getScanner(scan).iterator() val tripList = new mutable.MutableList[NyTaxiYellowTrip] while(scannerIt.hasNext) { val result = scannerIt.next() tripList += TaxiTripHBaseHelper.convertToTaxiTrip(result) println("Found a trip:" + TaxiTripHBaseHelper.convertToTaxiTrip(result)) } println("tripList.size:" + tripList.size) tripList.toArray } }
Example 28
Source File: CreateSaltedTable.scala From Taxi360 with Apache License 2.0 | 5 votes |
package com.cloudera.sa.taxi360.setup.hbase import java.io.File import org.apache.commons.lang.StringUtils import org.apache.hadoop.hbase.{HBaseConfiguration, HColumnDescriptor, HTableDescriptor, TableName} import org.apache.hadoop.hbase.client.ConnectionFactory import org.apache.hadoop.hbase.io.compress.Compression import org.apache.hadoop.hbase.regionserver.{BloomType, ConstantSizeRegionSplitPolicy} import org.apache.hadoop.hbase.util.Bytes import scala.collection.mutable object CreateSaltedTable { def main(args:Array[String]): Unit = { if (args.length == 0) { println("<tableName> <columnFamily> <regionCount> <numOfSalts> <hbaseConfigFolder>") } val tableName = args(0) val columnFamilyName = args(1) val regionCount = args(2).toInt val numOfSalts = args(3).toInt val hbaseConfigFolder = args(4) val conf = HBaseConfiguration.create() conf.addResource(new File(hbaseConfigFolder + "hbase-site.xml").toURI.toURL) val connection = ConnectionFactory.createConnection(conf) val admin = connection.getAdmin val tableDescriptor = new HTableDescriptor(TableName.valueOf(tableName)) val columnDescriptor = new HColumnDescriptor(columnFamilyName) columnDescriptor.setCompressionType(Compression.Algorithm.SNAPPY) columnDescriptor.setBlocksize(64 * 1024) columnDescriptor.setBloomFilterType(BloomType.ROW) tableDescriptor.addFamily(columnDescriptor) tableDescriptor.setMaxFileSize(Long.MaxValue) tableDescriptor.setRegionSplitPolicyClassName(classOf[ConstantSizeRegionSplitPolicy].getName) val splitKeys = new mutable.MutableList[Array[Byte]] for (i <- 0 to regionCount) { val regionSplitStr = StringUtils.leftPad((i*(numOfSalts/regionCount)).toString, 4, "0") splitKeys += Bytes.toBytes(regionSplitStr) } admin.createTable(tableDescriptor, splitKeys.toArray) } }
Example 29
Source File: Test.scala From shc with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.sql.execution.datasources.hbase.HBaseTableCatalog import org.apache.spark.sql.types.BinaryType object Test { def main(args: Array[String]) { val a: Array[Byte] = Array.fill(10)(Byte.MinValue) val b = Bytes.toBytes ("row003") System.arraycopy(b, 0, a, 0, b.length) val c = Bytes.toBytes(Int.MinValue) System.arraycopy(c, 0, a, b.length, c.length) val len = a.indexOf(HBaseTableCatalog.delimiter, 0) val s1 = Bytes.toString(a, 0, 6) val s2 = Bytes.toString(a, 0, len) val l = Array.fill(8)(Byte.MaxValue) Bytes.putDouble(l, 0, Double.MinValue) val m = Array.fill(8)(Byte.MaxValue) Bytes.putDouble(m, 0, -20.0) val n = Array.fill(8)(Byte.MaxValue) Bytes.putDouble(n, 0, 0.0) val o = Array.fill(8)(Byte.MaxValue) Bytes.putDouble(o, 0, 20.0) val p = Array.fill(8)(Byte.MaxValue) Bytes.putDouble(p, 0, Double.MaxValue) val c1 = BinaryType.ordering.compare(l, m) val c2 = BinaryType.ordering.compare(m, n) val c3 = BinaryType.ordering.compare(n, o) val c4 = BinaryType.ordering.compare(o, p) val p1 = Array.fill(10)(0: Byte) Bytes.putBytes(p1, 0, Bytes.toBytes("row010"), 0, 6) val p2 = Array.fill(10)(-1: Byte) Bytes.putBytes(p2, 0, Bytes.toBytes("row010"), 0, 6) val p3 = Array.fill(10)(Byte.MaxValue) Bytes.putBytes(p3, 0, Bytes.toBytes("row010"), 0, 6) Bytes.putInt(p3, 6, 10) val p4 = Bytes.compareTo(p1, p3) val p5 = Bytes.compareTo(p2, p3) val z = Array.fill(4)(Byte.MinValue) Bytes.putInt(z, 0, -1) val z1 = Array.fill(4)(Byte.MinValue) Bytes.putInt(z1, 0, -2147483648) val z2 = Bytes.compareTo(z, z1) val t = Array.fill(4)(-1: Byte) println(Bytes.toInt(t)) val s = Bytes.toBytes(1.4.asInstanceOf[Float]) println(Bytes.toInt(s)) println(Bytes.toFloat(s)) val w = Bytes.toBytes(-1.4.asInstanceOf[Float]) println(Bytes.toInt(w)) println(Bytes.toFloat(w)) val buffer1 = Bytes.toBytes(-1.0f) val b1 = Bytes.toInt(buffer1) var buffer = Array.fill(4)(-1: Byte) var buffer2 = Bytes.toBytes(-1.0f) var buffer3 = java.lang.Float.floatToIntBits(-1.0f) val b3 = Bytes.toBytes(buffer3) val out = Bytes.toInt(buffer1) ^ Integer.MIN_VALUE buffer2 = Bytes.toBytes(out) var i: Int = java.lang.Float.floatToIntBits(-1.0f) i = (i ^ ((i >> Integer.SIZE - 1) | Integer.MIN_VALUE)) + 1 Bytes.putInt(buffer, 0, i) val mn = Bytes.toBytes(-0.0f) println(Bytes.toFloat(mn)) println(Float.MinPositiveValue) println(s"a") } }
Example 30
package org.apache.spark.sql import java.io.File import com.google.common.io.Files import org.apache.hadoop.hbase.{HColumnDescriptor, HTableDescriptor, TableName, HBaseTestingUtility} import org.apache.hadoop.hbase.client.{Scan, Put, ConnectionFactory, Table} import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.sql.execution.datasources.hbase.SparkHBaseConf import org.apache.spark.sql.types.UTF8String import org.apache.spark.{SparkContext, SparkConf, Logging} import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} import scala.collection.JavaConverters._ class SHC extends FunSuite with BeforeAndAfterEach with BeforeAndAfterAll with Logging { implicit class StringToColumn(val sc: StringContext) { def $(args: Any*): ColumnName = { new ColumnName(sc.s(args: _*)) } } private[spark] var htu = HBaseTestingUtility.createLocalHTU() private[spark] def tableName = "table1" private[spark] def columnFamilies: Array[String] = Array.tabulate(9){ x=> s"cf$x"} var table: Table = null val conf = new SparkConf conf.set(SparkHBaseConf.testConf, "true") SparkHBaseConf.conf = htu.getConfiguration // private[spark] var columnFamilyStr = Bytes.toString(columnFamily) def catalog = s"""{ |"table":{"namespace":"default", "name":"table1"}, |"rowkey":"key", |"columns":{ |"col0":{"cf":"rowkey", "col":"key", "type":"string"}, |"col1":{"cf":"cf1", "col":"col1", "type":"boolean"}, |"col2":{"cf":"cf2", "col":"col2", "type":"double"}, |"col3":{"cf":"cf3", "col":"col3", "type":"float"}, |"col4":{"cf":"cf4", "col":"col4", "type":"int"}, |"col5":{"cf":"cf5", "col":"col5", "type":"bigint"}, |"col6":{"cf":"cf6", "col":"col6", "type":"smallint"}, |"col7":{"cf":"cf7", "col":"col7", "type":"string"}, |"col8":{"cf":"cf8", "col":"col8", "type":"tinyint"} |} |}""".stripMargin override def beforeAll() { val tempDir: File = Files.createTempDir tempDir.deleteOnExit htu.cleanupTestDir htu.startMiniZKCluster htu.startMiniHBaseCluster(1, 4) logInfo(" - minicluster started") println(" - minicluster started") } override def afterAll() { try { table.close() println("shutdown") htu.deleteTable(TableName.valueOf(tableName)) logInfo("shuting down minicluster") htu.shutdownMiniHBaseCluster htu.shutdownMiniZKCluster logInfo(" - minicluster shut down") htu.cleanupTestDir } catch { case _ => logError("teardown error") } } def createTable(name: String, cfs: Array[String]) { val tName = Bytes.toBytes(name) val bcfs = cfs.map(Bytes.toBytes(_)) try { htu.deleteTable(TableName.valueOf(tName)) } catch { case _ => logInfo(" - no table " + name + " found") } htu.createMultiRegionTable(TableName.valueOf(tName), bcfs) } def createTable(name: Array[Byte], cfs: Array[Array[Byte]]) { try { htu.deleteTable(TableName.valueOf(name)) } catch { case _ => logInfo(" - no table " + Bytes.toString(name) + " found") } htu.createMultiRegionTable(TableName.valueOf(name), cfs) } }
Example 31
Source File: Utils.scala From shc with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.hbase import java.util import java.util.Comparator import org.apache.avro.generic.GenericRecord import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.sql.catalyst.expressions.MutableRow import org.apache.spark.sql.catalyst.util._ import org.apache.spark.sql.execution.SparkSqlSerializer import org.apache.spark.sql.types._ import scala.collection.mutable.ArrayBuffer import scala.math.Ordering object Utils { def setRowCol( row: MutableRow, field: (Field, Int), src: HBaseType, offset: Int, length: Int): Unit = { val index = field._2 val f = field._1 if (f.sedes.isDefined) { // If we already have sedes defined , use it. val m = f.sedes.get.deserialize(src, offset, length) row.update(index, m) } else if (f.exeSchema.isDefined) { // println("avro schema is defined to do deserialization") // If we have avro schema defined, use it to get record, and then covert them to catalyst data type val m = AvroSedes.deserialize(src, f.exeSchema.get) // println(m) val n = f.avroToCatalyst.map(_(m)) row.update(index, n.get) } else { // Fall back to atomic type f.dt match { case BooleanType => row.setBoolean(index, toBoolean(src, offset)) case ByteType => row.setByte(index, src(offset)) case DoubleType => row.setDouble(index, Bytes.toDouble(src, offset)) case FloatType => row.setFloat(index, Bytes.toFloat(src, offset)) case IntegerType => row.setInt(index, Bytes.toInt(src, offset)) case LongType => row.setLong(index, Bytes.toLong(src, offset)) case ShortType => row.setShort(index, Bytes.toShort(src, offset)) case StringType => row.update(index, toUTF8String(src, offset, length)) case BinaryType => val newArray = new Array[Byte](length) System.arraycopy(src, offset, newArray, 0, length) row.update(index, newArray) case _ => row.update(index, SparkSqlSerializer.deserialize[Any](src)) //TODO } } } // convert input to data type def toBytes(input: Any, field: Field): Array[Byte] = { if (field.sedes.isDefined) { field.sedes.get.serialize(input) } else if (field.schema.isDefined) { // Here we assume the top level type is structType val record = field.catalystToAvro(input) AvroSedes.serialize(record, field.schema.get) } else { input match { case data: Boolean => Bytes.toBytes(data) case data: Byte => Array(data) case data: Array[Byte] => data case data: Double => Bytes.toBytes(data) case data: Float => Bytes.toBytes(data) case data: Int => Bytes.toBytes(data) case data: Long => Bytes.toBytes(data) case data: Short => Bytes.toBytes(data) case data: UTF8String => data.getBytes case data: String => Bytes.toBytes(data) //Bytes.toBytes(input.asInstanceOf[String])//input.asInstanceOf[UTF8String].getBytes case _ => throw new Exception(s"unsupported data type ${field.dt}") //TODO } } } def toBoolean(input: HBaseType, offset: Int): Boolean = { input(offset) != 0 } def toUTF8String(input: HBaseType, offset: Int, length: Int): UTF8String = { UTF8String(input.slice(offset, offset + length)) } }
Example 32
Source File: Sedes.scala From shc with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.hbase import java.io.ByteArrayInputStream import org.apache.avro.Schema import org.apache.avro.Schema.Type._ import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord} import org.apache.avro.io._ import org.apache.commons.io.output.ByteArrayOutputStream import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.sql.types._ trait Sedes { def serialize(value: Any): Array[Byte] def deserialize(bytes: Array[Byte], start: Int, end: Int): Any } class DoubleSedes extends Sedes { override def serialize(value: Any): Array[Byte] = Bytes.toBytes(value.asInstanceOf[Double]) override def deserialize(bytes: Array[Byte], start: Int, end: Int): Any = { Bytes.toLong(bytes, start) } }
Example 33
Source File: HBaseTestSuite.scala From shc with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import java.io.File import scala.collection.JavaConverters._ import com.google.common.io.Files import org.apache.hadoop.hbase.client._ import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.{TableName, HBaseTestingUtility} import org.apache.spark.sql.execution.datasources.hbase.Logging import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} class HBaseTestSuite extends FunSuite with BeforeAndAfterEach with BeforeAndAfterAll with Logging { private[spark] var htu = HBaseTestingUtility.createLocalHTU() private[spark] var tableName: Array[Byte] = Bytes.toBytes("t1") private[spark] var columnFamily: Array[Byte] = Bytes.toBytes("cf0") private[spark] var columnFamilies: Array[Array[Byte]] = Array(Bytes.toBytes("cf0"), Bytes.toBytes("cf1"), Bytes.toBytes("cf2"), Bytes.toBytes("cf3"), Bytes.toBytes("cf4")) var table: Table = null // private[spark] var columnFamilyStr = Bytes.toString(columnFamily) override def beforeAll() { val tempDir: File = Files.createTempDir tempDir.deleteOnExit htu.cleanupTestDir htu.startMiniZKCluster htu.startMiniHBaseCluster(1, 4) logInfo(" - minicluster started") println(" - minicluster started") try { htu.deleteTable(TableName.valueOf(tableName)) //htu.createTable(TableName.valueOf(tableName), columnFamily, 2, Bytes.toBytes("abc"), Bytes.toBytes("xyz"), 2) } catch { case _ : Throwable => logInfo(" - no table " + Bytes.toString(tableName) + " found") } setupTable() } override def afterAll() { try { table.close() println("shutdown") htu.deleteTable(TableName.valueOf(tableName)) logInfo("shuting down minicluster") htu.shutdownMiniHBaseCluster htu.shutdownMiniZKCluster logInfo(" - minicluster shut down") htu.cleanupTestDir } catch { case _ : Throwable => logError("teardown error") } } def setupTable() { val config = htu.getConfiguration htu.createMultiRegionTable(TableName.valueOf(tableName), columnFamilies) println("create htable t1") val connection = ConnectionFactory.createConnection(config) val r = connection.getRegionLocator(TableName.valueOf("t1")) table = connection.getTable(TableName.valueOf("t1")) val regionLocations = r.getAllRegionLocations.asScala.toSeq println(s"$regionLocations size: ${regionLocations.size}") (0 until 100).foreach { x => var put = new Put(Bytes.toBytes(s"row$x")) (0 until 5).foreach { y => put.addColumn(columnFamilies(y), Bytes.toBytes(s"c$y"), Bytes.toBytes(s"value $x $y")) } table.put(put) } } }
Example 34
Source File: Test.scala From shc with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.sql.execution.datasources.hbase.HBaseTableCatalog import org.apache.spark.sql.types.BinaryType object Test { def main(args: Array[String]) { val a: Array[Byte] = Array.fill(10)(Byte.MinValue) val b = Bytes.toBytes ("row003") System.arraycopy(b, 0, a, 0, b.length) val c = Bytes.toBytes(Int.MinValue) System.arraycopy(c, 0, a, b.length, c.length) val len = a.indexOf(HBaseTableCatalog.delimiter, 0) val s1 = Bytes.toString(a, 0, 6) val s2 = Bytes.toString(a, 0, len) val l = Array.fill(8)(Byte.MaxValue) Bytes.putDouble(l, 0, Double.MinValue) val m = Array.fill(8)(Byte.MaxValue) Bytes.putDouble(m, 0, -20.0) val n = Array.fill(8)(Byte.MaxValue) Bytes.putDouble(n, 0, 0.0) val o = Array.fill(8)(Byte.MaxValue) Bytes.putDouble(o, 0, 20.0) val p = Array.fill(8)(Byte.MaxValue) Bytes.putDouble(p, 0, Double.MaxValue) val c1 = BinaryType.ordering.compare(l, m) val c2 = BinaryType.ordering.compare(m, n) val c3 = BinaryType.ordering.compare(n, o) val c4 = BinaryType.ordering.compare(o, p) val p1 = Array.fill(10)(0: Byte) Bytes.putBytes(p1, 0, Bytes.toBytes("row010"), 0, 6) val p2 = Array.fill(10)(-1: Byte) Bytes.putBytes(p2, 0, Bytes.toBytes("row010"), 0, 6) val p3 = Array.fill(10)(Byte.MaxValue) Bytes.putBytes(p3, 0, Bytes.toBytes("row010"), 0, 6) Bytes.putInt(p3, 6, 10) val p4 = Bytes.compareTo(p1, p3) val p5 = Bytes.compareTo(p2, p3) val z = Array.fill(4)(Byte.MinValue) Bytes.putInt(z, 0, -1) val z1 = Array.fill(4)(Byte.MinValue) Bytes.putInt(z1, 0, -2147483648) val z2 = Bytes.compareTo(z, z1) val t = Array.fill(4)(-1: Byte) println(Bytes.toInt(t)) val s = Bytes.toBytes(1.4.asInstanceOf[Float]) println(Bytes.toInt(s)) println(Bytes.toFloat(s)) val w = Bytes.toBytes(-1.4.asInstanceOf[Float]) println(Bytes.toInt(w)) println(Bytes.toFloat(w)) val buffer1 = Bytes.toBytes(-1.0f) val b1 = Bytes.toInt(buffer1) var buffer = Array.fill(4)(-1: Byte) var buffer2 = Bytes.toBytes(-1.0f) var buffer3 = java.lang.Float.floatToIntBits(-1.0f) val b3 = Bytes.toBytes(buffer3) val out = Bytes.toInt(buffer1) ^ Integer.MIN_VALUE buffer2 = Bytes.toBytes(out) var i: Int = java.lang.Float.floatToIntBits(-1.0f) i = (i ^ ((i >> Integer.SIZE - 1) | Integer.MIN_VALUE)) + 1 Bytes.putInt(buffer, 0, i) val mn = Bytes.toBytes(-0.0f) println(Bytes.toFloat(mn)) println(Float.MinPositiveValue) println(s"a") } }
Example 35
package org.apache.spark.sql import org.apache.spark.sql.execution.datasources.hbase.Logging import java.io.File import com.google.common.io.Files import org.apache.hadoop.hbase.client.Table import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.{HBaseTestingUtility, TableName} import org.apache.spark.sql.execution.datasources.hbase.SparkHBaseConf import org.apache.spark.{SparkContext, SparkConf} import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} class SHC extends FunSuite with BeforeAndAfterEach with BeforeAndAfterAll with Logging { implicit class StringToColumn(val sc: StringContext) { def $(args: Any*): ColumnName = { new ColumnName(sc.s(args: _*)) } } var spark: SparkSession = null var sc: SparkContext = null var sqlContext: SQLContext = null var df: DataFrame = null private[spark] var htu = new HBaseTestingUtility private[spark] def tableName = "table1" private[spark] def columnFamilies: Array[String] = Array.tabulate(9){ x=> s"cf$x"} var table: Table = null val conf = new SparkConf conf.set(SparkHBaseConf.testConf, "true") // private[spark] var columnFamilyStr = Bytes.toString(columnFamily) def defineCatalog(tName: String) = s"""{ |"table":{"namespace":"default", "name":"$tName"}, |"rowkey":"key", |"columns":{ |"col0":{"cf":"rowkey", "col":"key", "type":"string"}, |"col1":{"cf":"cf1", "col":"col1", "type":"boolean"}, |"col2":{"cf":"cf2", "col":"col2", "type":"double"}, |"col3":{"cf":"cf3", "col":"col3", "type":"float"}, |"col4":{"cf":"cf4", "col":"col4", "type":"int"}, |"col5":{"cf":"cf5", "col":"col5", "type":"bigint"}, |"col6":{"cf":"cf6", "col":"col6", "type":"smallint"}, |"col7":{"cf":"cf7", "col":"col7", "type":"string"}, |"col8":{"cf":"cf8", "col":"col8", "type":"tinyint"} |} |}""".stripMargin @deprecated(since = "04.12.2017(dd/mm/year)", message = "use `defineCatalog` instead") def catalog = defineCatalog(tableName) override def beforeAll() { val tempDir: File = Files.createTempDir tempDir.deleteOnExit htu.startMiniCluster SparkHBaseConf.conf = htu.getConfiguration logInfo(" - minicluster started") println(" - minicluster started") spark = SparkSession.builder() .master("local") .appName("HBaseTest") .config(conf) .getOrCreate() sqlContext = spark.sqlContext sc = spark.sparkContext } override def afterAll() { htu.shutdownMiniCluster() spark.stop() } def createTable(name: String, cfs: Array[String]) { val tName = Bytes.toBytes(name) val bcfs = cfs.map(Bytes.toBytes(_)) try { htu.deleteTable(TableName.valueOf(tName)) } catch { case _ : Throwable => logInfo(" - no table " + name + " found") } htu.createMultiRegionTable(TableName.valueOf(tName), bcfs) } def createTable(name: Array[Byte], cfs: Array[Array[Byte]]) { try { htu.deleteTable(TableName.valueOf(name)) } catch { case _ : Throwable => logInfo(" - no table " + Bytes.toString(name) + " found") } htu.createMultiRegionTable(TableName.valueOf(name), cfs) } }
Example 36
Source File: AvroRecordRowKeyBuilderTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.hbase import com.datamountaineer.streamreactor.connect.hbase.BytesHelper._ import com.datamountaineer.streamreactor.connect.hbase.avro.AvroRecordFieldExtractorMapFn import org.apache.avro.Schema import org.apache.avro.generic.GenericRecord import org.apache.hadoop.hbase.util.Bytes import org.apache.kafka.connect.sink.SinkRecord import org.mockito.MockitoSugar import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class AvroRecordRowKeyBuilderTest extends AnyWordSpec with Matchers with MockitoSugar { val schema: Schema = new Schema.Parser().parse(PersonAvroSchema.schema) "AvroRecordRowKeyBuilder" should { "extract the values from the avro record and create the key" in { val keys = Seq("firstName", "lastName", "age") val rowKeyBuilder = new AvroRecordRowKeyBuilderBytes(AvroRecordFieldExtractorMapFn(schema, keys), keys) val sinkRecord = mock[SinkRecord] val firstName = "Jack" val lastName = "Smith" val age = 29 val record = new GenericRecord { val values: Map[String, AnyRef] = Map("firstName" -> firstName, "lastName" -> lastName, "age" -> Int.box(age)) override def get(key: String): AnyRef = values(key) override def put(key: String, v: scala.Any): Unit = sys.error("not supported") override def get(i: Int): AnyRef = sys.error("not supported") override def put(i: Int, v: scala.Any): Unit = sys.error("not supported") override def getSchema: Schema = sys.error("not supported") } val expectedValue = Bytes.add( Array( firstName.fromString(), rowKeyBuilder.delimBytes, lastName.fromString(), rowKeyBuilder.delimBytes, age.fromInt())) rowKeyBuilder.build(sinkRecord, record) shouldBe expectedValue } } }
Example 37
Source File: StructFieldsRowKeyBuilderTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.hbase import com.datamountaineer.streamreactor.connect.hbase.BytesHelper._ import org.apache.hadoop.hbase.util.Bytes import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class StructFieldsRowKeyBuilderTest extends AnyWordSpec with Matchers { "StructFieldsRowKeyBuilder" should { "raise an exception if the field is not present in the struct" in { intercept[IllegalArgumentException] { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema).put("firstName", "Alex").put("age", 30) val sinkRecord = new SinkRecord("sometopic", 1, null, null, schema, struct, 1) //val field = Field("threshold", "threshold", false) StructFieldsRowKeyBuilderBytes(List("threshold")).build(sinkRecord, null) } } "create the row key based on one single field in the struct" in { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema).put("firstName", "Alex").put("age", 30) //val field = Field("firstName", "firstName", true) val sinkRecord = new SinkRecord("sometopic", 1, null, null, schema, struct, 1) StructFieldsRowKeyBuilderBytes(List("firstName")).build(sinkRecord, null) shouldBe "Alex".fromString } "create the row key based on more thant one field in the struct" in { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema).put("firstName", "Alex").put("age", 30) //val field = Field("firstName", "firstName", true) //val field2 = Field("age", "age", true) val sinkRecord = new SinkRecord("sometopic", 1, null, null, schema, struct, 1) StructFieldsRowKeyBuilderBytes(List("firstName", "age")).build(sinkRecord, null) shouldBe Bytes.add("Alex".fromString(), "\n".fromString(), 30.fromInt()) } } }
Example 38
Source File: ColumnFamilyQualifierMapKeyWrapper.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark import org.apache.hadoop.hbase.util.Bytes class ColumnFamilyQualifierMapKeyWrapper(val columnFamily:Array[Byte], val columnFamilyOffSet:Int, val columnFamilyLength:Int, val qualifier:Array[Byte], val qualifierOffSet:Int, val qualifierLength:Int) extends Serializable{ override def equals(other:Any): Boolean = { val otherWrapper = other.asInstanceOf[ColumnFamilyQualifierMapKeyWrapper] Bytes.compareTo(columnFamily, columnFamilyOffSet, columnFamilyLength, otherWrapper.columnFamily, otherWrapper.columnFamilyOffSet, otherWrapper.columnFamilyLength) == 0 && Bytes.compareTo(qualifier, qualifierOffSet, qualifierLength, otherWrapper.qualifier, otherWrapper.qualifierOffSet, otherWrapper.qualifierLength) == 0 } override def hashCode():Int = { Bytes.hashCode(columnFamily, columnFamilyOffSet, columnFamilyLength) + Bytes.hashCode(qualifier, qualifierOffSet, qualifierLength) } def cloneColumnFamily():Array[Byte] = { val resultArray = new Array[Byte](columnFamilyLength) System.arraycopy(columnFamily, columnFamilyOffSet, resultArray, 0, columnFamilyLength) resultArray } def cloneQualifier():Array[Byte] = { val resultArray = new Array[Byte](qualifierLength) System.arraycopy(qualifier, qualifierOffSet, resultArray, 0, qualifierLength) resultArray } }
Example 39
Source File: GenericRowKeyBuilderTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.hbase import com.datamountaineer.streamreactor.connect.hbase.BytesHelper._ import org.apache.hadoop.hbase.util.Bytes import org.apache.kafka.connect.data.Schema import org.apache.kafka.connect.sink.SinkRecord import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class GenericRowKeyBuilderTest extends AnyWordSpec with Matchers { "GenericRowKeyBuilder" should { "use the topic, partition and offset to make the key" in { val topic = "sometopic" val partition = 2 val offset = 1243L val sinkRecord = new SinkRecord(topic, partition, Schema.INT32_SCHEMA, 345, Schema.STRING_SCHEMA, "", offset) val keyBuilder = new GenericRowKeyBuilderBytes() val expected = Bytes.add(Array(topic.fromString(), keyBuilder.delimiterBytes, partition.fromString(), keyBuilder.delimiterBytes, offset.fromString())) keyBuilder.build(sinkRecord, Nil) shouldBe expected } } }
Example 40
Source File: AvroRecordFieldExtractorMapFnTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.hbase.avro import java.nio.file.Paths import org.apache.avro.Schema import org.apache.hadoop.hbase.util.Bytes import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class AvroRecordFieldExtractorMapFnTest extends AnyWordSpec with Matchers { val schema: Schema = new Schema.Parser().parse(Paths.get(getClass.getResource("/person.avsc").toURI).toFile) "AvroRecordFieldExtractorMapFn" should { "raise an exception if the given field does not exist in the schema" in { intercept[IllegalArgumentException] { AvroRecordFieldExtractorMapFn(schema, Seq("wrongField")) } } "raise an exception if the given field is not a primitive" in { intercept[IllegalArgumentException] { AvroRecordFieldExtractorMapFn(schema, Seq("address")) } } "create the mappings for all the given fields" in { val mappings = AvroRecordFieldExtractorMapFn(schema, Seq("firstName", "age")) val fnFirstName = mappings("firstName") val firstName = "Beaky" fnFirstName(firstName) shouldBe Bytes.toBytes(firstName) val fnAge = mappings("age") val age = 31 fnAge(age) shouldBe Bytes.toBytes(age) intercept[ClassCastException] { fnAge(12.4) } } } }
Example 41
Source File: HBaseCatalogSuite.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark import org.apache.hadoop.hbase.spark.datasources.{DataTypeParserWrapper, DoubleSerDes, HBaseTableCatalog} import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.sql.types._ import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} class HBaseCatalogSuite extends FunSuite with BeforeAndAfterEach with BeforeAndAfterAll with Logging { val map = s"""MAP<int, struct<varchar:string>>""" val array = s"""array<struct<tinYint:tinyint>>""" val arrayMap = s"""MAp<int, ARRAY<double>>""" val catalog = s"""{ |"table":{"namespace":"default", "name":"htable"}, |"rowkey":"key1:key2", |"columns":{ |"col1":{"cf":"rowkey", "col":"key1", "type":"string"}, |"col2":{"cf":"rowkey", "col":"key2", "type":"double"}, |"col3":{"cf":"cf1", "col":"col2", "type":"binary"}, |"col4":{"cf":"cf1", "col":"col3", "type":"timestamp"}, |"col5":{"cf":"cf1", "col":"col4", "type":"double", "serdes":"${classOf[DoubleSerDes].getName}"}, |"col6":{"cf":"cf1", "col":"col5", "type":"$map"}, |"col7":{"cf":"cf1", "col":"col6", "type":"$array"}, |"col8":{"cf":"cf1", "col":"col7", "type":"$arrayMap"}, |"col9":{"cf":"cf1", "col":"col8", "type":"date"}, |"col10":{"cf":"cf1", "col":"col9", "type":"timestamp"} |} |}""".stripMargin val parameters = Map(HBaseTableCatalog.tableCatalog->catalog) val t = HBaseTableCatalog(parameters) def checkDataType(dataTypeString: String, expectedDataType: DataType): Unit = { test(s"parse ${dataTypeString.replace("\n", "")}") { assert(DataTypeParserWrapper.parse(dataTypeString) === expectedDataType) } } test("basic") { assert(t.getField("col1").isRowKey == true) assert(t.getPrimaryKey == "key1") assert(t.getField("col3").dt == BinaryType) assert(t.getField("col4").dt == TimestampType) assert(t.getField("col5").dt == DoubleType) assert(t.getField("col5").serdes != None) assert(t.getField("col4").serdes == None) assert(t.getField("col1").isRowKey) assert(t.getField("col2").isRowKey) assert(!t.getField("col3").isRowKey) assert(t.getField("col2").length == Bytes.SIZEOF_DOUBLE) assert(t.getField("col1").length == -1) assert(t.getField("col8").length == -1) assert(t.getField("col9").dt == DateType) assert(t.getField("col10").dt == TimestampType) } checkDataType( map, t.getField("col6").dt ) checkDataType( array, t.getField("col7").dt ) checkDataType( arrayMap, t.getField("col8").dt ) test("convert") { val m = Map("hbase.columns.mapping" -> "KEY_FIELD STRING :key, A_FIELD STRING c:a, B_FIELD DOUBLE c:b, C_FIELD BINARY c:c,", "hbase.table" -> "t1") val map = HBaseTableCatalog.convert(m) val json = map.get(HBaseTableCatalog.tableCatalog).get val parameters = Map(HBaseTableCatalog.tableCatalog->json) val t = HBaseTableCatalog(parameters) assert(t.getField("KEY_FIELD").isRowKey) assert(DataTypeParserWrapper.parse("STRING") === t.getField("A_FIELD").dt) assert(!t.getField("A_FIELD").isRowKey) assert(DataTypeParserWrapper.parse("DOUBLE") === t.getField("B_FIELD").dt) assert(DataTypeParserWrapper.parse("BINARY") === t.getField("C_FIELD").dt) } test("compatibility") { val m = Map("hbase.columns.mapping" -> "KEY_FIELD STRING :key, A_FIELD STRING c:a, B_FIELD DOUBLE c:b, C_FIELD BINARY c:c,", "hbase.table" -> "t1") val t = HBaseTableCatalog(m) assert(t.getField("KEY_FIELD").isRowKey) assert(DataTypeParserWrapper.parse("STRING") === t.getField("A_FIELD").dt) assert(!t.getField("A_FIELD").isRowKey) assert(DataTypeParserWrapper.parse("DOUBLE") === t.getField("B_FIELD").dt) assert(DataTypeParserWrapper.parse("BINARY") === t.getField("C_FIELD").dt) } }
Example 42
Source File: ByteArrayComparable.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark import org.apache.yetus.audience.InterfaceAudience; import org.apache.hadoop.hbase.util.Bytes @InterfaceAudience.Public class ByteArrayComparable(val bytes:Array[Byte], val offset:Int = 0, var length:Int = -1) extends Comparable[ByteArrayComparable] { if (length == -1) { length = bytes.length } override def compareTo(o: ByteArrayComparable): Int = { Bytes.compareTo(bytes, offset, length, o.bytes, o.offset, o.length) } override def hashCode(): Int = { Bytes.hashCode(bytes, offset, length) } override def equals (obj: Any): Boolean = { obj match { case b: ByteArrayComparable => Bytes.equals(bytes, offset, length, b.bytes, b.offset, b.length) case _ => false } } }
Example 43
Source File: ColumnFamilyQualifierMapKeyWrapper.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark import org.apache.yetus.audience.InterfaceAudience; import org.apache.hadoop.hbase.util.Bytes @InterfaceAudience.Public class ColumnFamilyQualifierMapKeyWrapper(val columnFamily:Array[Byte], val columnFamilyOffSet:Int, val columnFamilyLength:Int, val qualifier:Array[Byte], val qualifierOffSet:Int, val qualifierLength:Int) extends Serializable{ override def equals(other:Any): Boolean = { val otherWrapper = other.asInstanceOf[ColumnFamilyQualifierMapKeyWrapper] Bytes.compareTo(columnFamily, columnFamilyOffSet, columnFamilyLength, otherWrapper.columnFamily, otherWrapper.columnFamilyOffSet, otherWrapper.columnFamilyLength) == 0 && Bytes.compareTo(qualifier, qualifierOffSet, qualifierLength, otherWrapper.qualifier, otherWrapper.qualifierOffSet, otherWrapper.qualifierLength) == 0 } override def hashCode():Int = { Bytes.hashCode(columnFamily, columnFamilyOffSet, columnFamilyLength) + Bytes.hashCode(qualifier, qualifierOffSet, qualifierLength) } def cloneColumnFamily():Array[Byte] = { val resultArray = new Array[Byte](columnFamilyLength) System.arraycopy(columnFamily, columnFamilyOffSet, resultArray, 0, columnFamilyLength) resultArray } def cloneQualifier():Array[Byte] = { val resultArray = new Array[Byte](qualifierLength) System.arraycopy(qualifier, qualifierOffSet, resultArray, 0, qualifierLength) resultArray } }
Example 44
Source File: ByteArrayWrapper.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark import java.io.Serializable import org.apache.yetus.audience.InterfaceAudience; import org.apache.hadoop.hbase.util.Bytes @InterfaceAudience.Public class ByteArrayWrapper (var value:Array[Byte]) extends Comparable[ByteArrayWrapper] with Serializable { override def compareTo(valueOther: ByteArrayWrapper): Int = { Bytes.compareTo(value,valueOther.value) } override def equals(o2: Any): Boolean = { o2 match { case wrapper: ByteArrayWrapper => Bytes.equals(value, wrapper.value) case _ => false } } override def hashCode():Int = { Bytes.hashCode(value) } }
Example 45
Source File: BulkLoadPartitioner.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark import java.util import java.util.Comparator import org.apache.yetus.audience.InterfaceAudience; import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.Partitioner @InterfaceAudience.Public class BulkLoadPartitioner(startKeys:Array[Array[Byte]]) extends Partitioner { // when table not exist, startKeys = Byte[0][] override def numPartitions: Int = if (startKeys.length == 0) 1 else startKeys.length override def getPartition(key: Any): Int = { val comparator: Comparator[Array[Byte]] = new Comparator[Array[Byte]] { override def compare(o1: Array[Byte], o2: Array[Byte]): Int = { Bytes.compareTo(o1, o2) } } val rowKey:Array[Byte] = key match { case qualifier: KeyFamilyQualifier => qualifier.rowKey case wrapper: ByteArrayWrapper => wrapper.value case _ => key.asInstanceOf[Array[Byte]] } var partition = util.Arrays.binarySearch(startKeys, rowKey, comparator) if (partition < 0) partition = partition * -1 + -2 if (partition < 0) partition = 0 partition } }
Example 46
Source File: package.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark import org.apache.hadoop.hbase.util.Bytes import scala.math.Ordering // TODO: add @InterfaceAudience.Private if https://issues.scala-lang.org/browse/SI-3600 is resolved package object hbase { type HBaseType = Array[Byte] def bytesMin = new Array[Byte](0) def bytesMax = null val ByteMax = -1.asInstanceOf[Byte] val ByteMin = 0.asInstanceOf[Byte] val ord: Ordering[HBaseType] = new Ordering[HBaseType] { def compare(x: Array[Byte], y: Array[Byte]): Int = { return Bytes.compareTo(x, y) } } //Do not use BinaryType.ordering implicit val order: Ordering[HBaseType] = ord }
Example 47
Source File: Utils.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.datasources import java.sql.{Date, Timestamp} import org.apache.hadoop.hbase.spark.AvroSerdes import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String import org.apache.yetus.audience.InterfaceAudience; @InterfaceAudience.Private object Utils { def hbaseFieldToScalaType( f: Field, src: Array[Byte], offset: Int, length: Int): Any = { if (f.exeSchema.isDefined) { // If we have avro schema defined, use it to get record, and then convert them to catalyst data type val m = AvroSerdes.deserialize(src, f.exeSchema.get) val n = f.avroToCatalyst.map(_(m)) n.get } else { // Fall back to atomic type f.dt match { case BooleanType => src(offset) != 0 case ByteType => src(offset) case ShortType => Bytes.toShort(src, offset) case IntegerType => Bytes.toInt(src, offset) case LongType => Bytes.toLong(src, offset) case FloatType => Bytes.toFloat(src, offset) case DoubleType => Bytes.toDouble(src, offset) case DateType => new Date(Bytes.toLong(src, offset)) case TimestampType => new Timestamp(Bytes.toLong(src, offset)) case StringType => UTF8String.fromBytes(src, offset, length) case BinaryType => val newArray = new Array[Byte](length) System.arraycopy(src, offset, newArray, 0, length) newArray // TODO: SparkSqlSerializer.deserialize[Any](src) case _ => throw new Exception(s"unsupported data type ${f.dt}") } } } // convert input to data type def toBytes(input: Any, field: Field): Array[Byte] = { if (field.schema.isDefined) { // Here we assume the top level type is structType val record = field.catalystToAvro(input) AvroSerdes.serialize(record, field.schema.get) } else { field.dt match { case BooleanType => Bytes.toBytes(input.asInstanceOf[Boolean]) case ByteType => Array(input.asInstanceOf[Number].byteValue) case ShortType => Bytes.toBytes(input.asInstanceOf[Number].shortValue) case IntegerType => Bytes.toBytes(input.asInstanceOf[Number].intValue) case LongType => Bytes.toBytes(input.asInstanceOf[Number].longValue) case FloatType => Bytes.toBytes(input.asInstanceOf[Number].floatValue) case DoubleType => Bytes.toBytes(input.asInstanceOf[Number].doubleValue) case DateType | TimestampType => Bytes.toBytes(input.asInstanceOf[java.util.Date].getTime) case StringType => Bytes.toBytes(input.toString) case BinaryType => input.asInstanceOf[Array[Byte]] case _ => throw new Exception(s"unsupported data type ${field.dt}") } } } }
Example 48
Source File: SerDes.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.datasources import org.apache.hadoop.hbase.util.Bytes import org.apache.yetus.audience.InterfaceAudience // TODO: This is not really used in code. @InterfaceAudience.Public trait SerDes { def serialize(value: Any): Array[Byte] def deserialize(bytes: Array[Byte], start: Int, end: Int): Any } // TODO: This is not really used in code. @InterfaceAudience.Private class DoubleSerDes extends SerDes { override def serialize(value: Any): Array[Byte] = Bytes.toBytes(value.asInstanceOf[Double]) override def deserialize(bytes: Array[Byte], start: Int, end: Int): Any = { Bytes.toDouble(bytes, start) } }
Example 49
Source File: KeyFamilyQualifier.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark import java.io.Serializable import org.apache.yetus.audience.InterfaceAudience; import org.apache.hadoop.hbase.util.Bytes @InterfaceAudience.Public class KeyFamilyQualifier(val rowKey:Array[Byte], val family:Array[Byte], val qualifier:Array[Byte]) extends Comparable[KeyFamilyQualifier] with Serializable { override def compareTo(o: KeyFamilyQualifier): Int = { var result = Bytes.compareTo(rowKey, o.rowKey) if (result == 0) { result = Bytes.compareTo(family, o.family) if (result == 0) result = Bytes.compareTo(qualifier, o.qualifier) } result } override def toString: String = { Bytes.toString(rowKey) + ":" + Bytes.toString(family) + ":" + Bytes.toString(qualifier) } }
Example 50
Source File: IndexEdgeDeserializable.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.core.storage.serde.indexedge.wide import org.apache.hadoop.hbase.util.Bytes import org.apache.s2graph.core._ import org.apache.s2graph.core.schema.{Label, LabelMeta, ServiceColumn} import org.apache.s2graph.core.storage.serde.StorageDeserializable._ import org.apache.s2graph.core.storage._ import org.apache.s2graph.core.storage.serde.Deserializable import org.apache.s2graph.core.types._ class IndexEdgeDeserializable(graph: S2GraphLike, bytesToLongFunc: (Array[Byte], Int) => Long = bytesToLong) extends Deserializable[S2EdgeLike] { type QualifierRaw = (Array[(LabelMeta, InnerValLike)], VertexId, Byte, Boolean, Int) type ValueRaw = (Array[(LabelMeta, InnerValLike)], Int) val builder = graph.elementBuilder override def fromKeyValues[T: CanSKeyValue](_kvs: Seq[T], cacheElementOpt: Option[S2EdgeLike]): Option[S2EdgeLike] = { try { assert(_kvs.size == 1) // val kvs = _kvs.map { kv => implicitly[CanSKeyValue[T]].toSKeyValue(kv) } val kv = implicitly[CanSKeyValue[T]].toSKeyValue(_kvs.head) val version = kv.timestamp var pos = 0 val (srcVertexId, srcIdLen) = SourceVertexId.fromBytes(kv.row, pos, kv.row.length, HBaseType.DEFAULT_VERSION) pos += srcIdLen val labelWithDir = LabelWithDirection(Bytes.toInt(kv.row, pos, 4)) pos += 4 val (labelIdxSeq, isInverted) = bytesToLabelIndexSeqWithIsInverted(kv.row, pos) pos += 1 if (isInverted) None else { val label = Label.findById(labelWithDir.labelId) val schemaVer = label.schemaVersion val srcVertex = builder.newVertex(srcVertexId, version) //TODO: var tsVal = version if (kv.qualifier.isEmpty) { val degreeVal = bytesToLongFunc(kv.value, 0) val tgtVertexId = VertexId(ServiceColumn.Default, InnerVal.withStr("0", schemaVer)) val tgtVertex = builder.newVertex(tgtVertexId, version) val edge = builder.newEdge(srcVertex, tgtVertex, label, labelWithDir.dir, GraphUtil.defaultOpByte, version, S2Edge.EmptyState) edge.propertyInner(LabelMeta.timestamp.name, version, version) edge.propertyInner(LabelMeta.degree.name, degreeVal, version) edge.tgtVertex = builder.newVertex(tgtVertexId, version) edge.setOp(GraphUtil.defaultOpByte) edge.setTsInnerValOpt(Option(InnerVal.withLong(tsVal, schemaVer))) Option(edge) } else { pos = 0 val (idxPropsRaw, endAt) = bytesToProps(kv.qualifier, pos, schemaVer) pos = endAt val (tgtVertexIdRaw, tgtVertexIdLen) = if (endAt == kv.qualifier.length) { (HBaseType.defaultTgtVertexId, 0) } else { TargetVertexId.fromBytes(kv.qualifier, endAt, kv.qualifier.length, schemaVer) } pos += tgtVertexIdLen val op = if (kv.qualifier.length == pos) GraphUtil.defaultOpByte else kv.qualifier(kv.qualifier.length-1) val tgtVertex = builder.newVertex(tgtVertexIdRaw, version) val edge = builder.newEdge(srcVertex, tgtVertex, label, labelWithDir.dir, GraphUtil.defaultOpByte, version, S2Edge.EmptyState) val index = label.indicesMap.getOrElse(labelIdxSeq, throw new RuntimeException(s"invalid index seq: ${label.id.get}, ${labelIdxSeq}")) if (edge.checkProperty(LabelMeta.to.name)) { val vId = edge.property(LabelMeta.to.name).asInstanceOf[S2Property[_]].innerValWithTs val tgtVertex = builder.newVertex(TargetVertexId(ServiceColumn.Default, vId.innerVal), version) edge.setTgtVertex(tgtVertex) } edge.propertyInner(LabelMeta.timestamp.name, tsVal, version) edge.setOp(op) edge.setTsInnerValOpt(Option(InnerVal.withLong(tsVal, schemaVer))) Option(edge) } } } catch { case e: Exception => None } } }
Example 51
Source File: HBasePartitioner.scala From Backup-Repo with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hbase import java.io.{IOException, ObjectInputStream, ObjectOutputStream} import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.serializer.JavaSerializer import org.apache.spark.util.{CollectionsUtils, Utils} import org.apache.spark.{Partitioner, SparkEnv} object HBasePartitioner { implicit object HBaseRawOrdering extends Ordering[HBaseRawType] { def compare(a: HBaseRawType, b: HBaseRawType) = Bytes.compareTo(a, b) } } class HBasePartitioner (var splitKeys: Array[HBaseRawType]) extends Partitioner { import HBasePartitioner.HBaseRawOrdering type t = HBaseRawType lazy private val len = splitKeys.length // For pre-split table splitKeys(0) = bytes[0], to remove it, // otherwise partition 0 always be empty and // we will miss the last region's date when bulk load lazy private val realSplitKeys = if (splitKeys.isEmpty) splitKeys else splitKeys.tail def numPartitions = if (len == 0) 1 else len @transient private val binarySearch: ((Array[t], t) => Int) = CollectionsUtils.makeBinarySearch[t] def getPartition(key: Any): Int = { val k = key.asInstanceOf[t] var partition = 0 if (len <= 128 && len > 0) { // If we have less than 128 partitions naive search val ordering = implicitly[Ordering[t]] while (partition < realSplitKeys.length && ordering.gt(k, realSplitKeys(partition))) { partition += 1 } } else { // Determine which binary search method to use only once. partition = binarySearch(realSplitKeys, k) // binarySearch either returns the match location or -[insertion point]-1 if (partition < 0) { partition = -partition - 1 } if (partition > realSplitKeys.length) { partition = realSplitKeys.length } } partition } override def equals(other: Any): Boolean = other match { case r: HBasePartitioner => r.splitKeys.sameElements(splitKeys) case _ => false } override def hashCode(): Int = { val prime = 31 var result = 1 var i = 0 while (i < splitKeys.length) { result = prime * result + splitKeys(i).hashCode i += 1 } result = prime * result result } }
Example 52
Source File: HBaseServiceLayer.scala From Taxi360 with Apache License 2.0 | 5 votes |
package com.hadooparchitecturebook.taxi360.server.hbase import javax.ws.rs._ import javax.ws.rs.core.MediaType import com.hadooparchitecturebook.taxi360.model.NyTaxiYellowTrip import com.hadooparchitecturebook.taxi360.streaming.ingestion.hbase.TaxiTripHBaseHelper import org.apache.hadoop.hbase.{HBaseConfiguration, TableName} import org.apache.hadoop.hbase.client.{ConnectionFactory, Scan} import org.apache.hadoop.hbase.util.Bytes import scala.collection.mutable @Path("rest") class HBaseServiceLayer { @GET @Path("hello") @Produces(Array(MediaType.TEXT_PLAIN)) def hello(): String = { "Hello World" } @GET @Path("vender/{venderId}/timeline") @Produces(Array(MediaType.APPLICATION_JSON)) def getTripTimeLine (@PathParam("venderId") venderId:String, @QueryParam("startTime") startTime:String = Long.MinValue.toString, @QueryParam("endTime") endTime:String = Long.MaxValue.toString): Array[NyTaxiYellowTrip] = { val table = HBaseGlobalValues.connection.getTable(TableName.valueOf(HBaseGlobalValues.appEventTableName)) val st = if (startTime == null) { Long.MinValue.toString } else { startTime } val et = if (endTime == null) { Long.MaxValue.toString } else { endTime } val scan = new Scan() val startRowKey = TaxiTripHBaseHelper.generateRowKey(venderId, st.toLong, HBaseGlobalValues.numberOfSalts) println("startRowKey:" + Bytes.toString(startRowKey)) scan.setStartRow(startRowKey) val endRowKey = TaxiTripHBaseHelper.generateRowKey(venderId, et.toLong, HBaseGlobalValues.numberOfSalts) println("endRowKey:" + Bytes.toString(endRowKey)) scan.setStopRow(endRowKey) val scannerIt = table.getScanner(scan).iterator() val tripList = new mutable.MutableList[NyTaxiYellowTrip] while(scannerIt.hasNext) { val result = scannerIt.next() tripList += TaxiTripHBaseHelper.convertToTaxiTrip(result) println("Found a trip:" + TaxiTripHBaseHelper.convertToTaxiTrip(result)) } println("tripList.size:" + tripList.size) tripList.toArray } }
Example 53
Source File: CreateSaltedTable.scala From Taxi360 with Apache License 2.0 | 5 votes |
package com.hadooparchitecturebook.taxi360.setup.hbase import java.io.File import org.apache.commons.lang.StringUtils import org.apache.hadoop.hbase.{HBaseConfiguration, HColumnDescriptor, HTableDescriptor, TableName} import org.apache.hadoop.hbase.client.ConnectionFactory import org.apache.hadoop.hbase.io.compress.Compression import org.apache.hadoop.hbase.regionserver.{BloomType, ConstantSizeRegionSplitPolicy} import org.apache.hadoop.hbase.util.Bytes import scala.collection.mutable object CreateSaltedTable { def main(args:Array[String]): Unit = { if (args.length == 0) { println("<tableName> <columnFamily> <regionCount> <numOfSalts> <hbaseConfigFolder>") } val tableName = args(0) val columnFamilyName = args(1) val regionCount = args(2).toInt val numOfSalts = args(3).toInt val hbaseConfigFolder = args(4) val conf = HBaseConfiguration.create() conf.addResource(new File(hbaseConfigFolder + "hbase-site.xml").toURI.toURL) val connection = ConnectionFactory.createConnection(conf) val admin = connection.getAdmin val tableDescriptor = new HTableDescriptor(TableName.valueOf(tableName)) val columnDescriptor = new HColumnDescriptor(columnFamilyName) columnDescriptor.setCompressionType(Compression.Algorithm.SNAPPY) columnDescriptor.setBlocksize(64 * 1024) columnDescriptor.setBloomFilterType(BloomType.ROW) tableDescriptor.addFamily(columnDescriptor) tableDescriptor.setMaxFileSize(Long.MaxValue) tableDescriptor.setRegionSplitPolicyClassName(classOf[ConstantSizeRegionSplitPolicy].getName) val splitKeys = new mutable.MutableList[Array[Byte]] for (i <- 0 to regionCount) { val regionSplitStr = StringUtils.leftPad((i*(numOfSalts/regionCount)).toString, 4, "0") splitKeys += Bytes.toBytes(regionSplitStr) } admin.createTable(tableDescriptor, splitKeys.toArray) } }
Example 54
Source File: BytesUtilV1.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.counter.core.v1 import org.apache.hadoop.hbase.util.Bytes import org.apache.s2graph.counter.core.TimedQualifier.IntervalUnit import org.apache.s2graph.counter.core.{TimedQualifier, ExactQualifier, ExactKeyTrait, BytesUtil} import org.apache.s2graph.counter.models.Counter.ItemType import org.apache.s2graph.counter.util.Hashes import scala.collection.mutable.ArrayBuffer object BytesUtilV1 extends BytesUtil { // ExactKey: [hash(2b)][policy(4b)][item(variable)] val BUCKET_BYTE_SIZE = Bytes.SIZEOF_SHORT val POLICY_ID_SIZE = Bytes.SIZEOF_INT val INTERVAL_SIZE = Bytes.SIZEOF_BYTE val TIMESTAMP_SIZE = Bytes.SIZEOF_LONG val TIMED_QUALIFIER_SIZE = INTERVAL_SIZE + TIMESTAMP_SIZE override def getRowKeyPrefix(id: Int): Array[Byte] = { Bytes.toBytes(id) } override def toBytes(key: ExactKeyTrait): Array[Byte] = { val buff = new ArrayBuffer[Byte] // hash key (2 byte) buff ++= Bytes.toBytes(Hashes.murmur3(key.itemKey)).take(BUCKET_BYTE_SIZE) buff ++= getRowKeyPrefix(key.policyId) buff ++= { key.itemType match { case ItemType.INT => Bytes.toBytes(key.itemKey.toInt) case ItemType.LONG => Bytes.toBytes(key.itemKey.toLong) case ItemType.STRING | ItemType.BLOB => Bytes.toBytes(key.itemKey) } } buff.toArray } override def toBytes(eq: ExactQualifier): Array[Byte] = { toBytes(eq.tq) ++ eq.dimension.getBytes } override def toBytes(tq: TimedQualifier): Array[Byte] = { Bytes.toBytes(tq.q.toString) ++ Bytes.toBytes(tq.ts) } override def toExactQualifier(bytes: Array[Byte]): ExactQualifier = { // qualifier: interval, ts, dimension 순서 val tq = toTimedQualifier(bytes) val dimension = Bytes.toString(bytes, TIMED_QUALIFIER_SIZE, bytes.length - TIMED_QUALIFIER_SIZE) ExactQualifier(tq, dimension) } override def toTimedQualifier(bytes: Array[Byte]): TimedQualifier = { val interval = Bytes.toString(bytes, 0, INTERVAL_SIZE) val ts = Bytes.toLong(bytes, INTERVAL_SIZE) TimedQualifier(IntervalUnit.withName(interval), ts) } }
Example 55
Source File: Hashes.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.counter.util import org.apache.hadoop.hbase.util.Bytes import scala.util.hashing.MurmurHash3 object Hashes { def sha1(s: String): String = { val md = java.security.MessageDigest.getInstance("SHA-1") Bytes.toHex(md.digest(s.getBytes("UTF-8"))) } private def positiveHash(h: Int): Int = { if (h < 0) -1 * (h + 1) else h } def murmur3(s: String): Int = { val hash = MurmurHash3.stringHash(s) positiveHash(hash) } }
Example 56
Source File: DistributedScanner.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.counter.helper import java.util import java.util.Comparator import com.google.common.primitives.SignedBytes import org.apache.hadoop.hbase.client._ import org.apache.hadoop.hbase.util.Bytes object DistributedScanner { val BUCKET_BYTE_SIZE = Bytes.SIZEOF_BYTE def getRealRowKey(result: Result): Array[Byte] = { result.getRow.drop(BUCKET_BYTE_SIZE) } } class DistributedScanner(table: Table, scan: Scan) extends AbstractClientScanner { import DistributedScanner._ private val BYTE_MAX = BigInt(256) private[helper] val scanners = { for { i <- 0 until BYTE_MAX.pow(BUCKET_BYTE_SIZE).toInt } yield { val bucketBytes: Array[Byte] = Bytes.toBytes(i).takeRight(BUCKET_BYTE_SIZE) val newScan = new Scan(scan).setStartRow(bucketBytes ++ scan.getStartRow).setStopRow(bucketBytes ++ scan.getStopRow) table.getScanner(newScan) } } val resultCache = new util.TreeMap[Result, java.util.Iterator[Result]](new Comparator[Result] { val comparator = SignedBytes.lexicographicalComparator() override def compare(o1: Result, o2: Result): Int = { comparator.compare(getRealRowKey(o1), getRealRowKey(o2)) } }) lazy val initialized = { val iterators = scanners.map(_.iterator()).filter(_.hasNext) iterators.foreach { it => resultCache.put(it.next(), it) } iterators.nonEmpty } override def next(): Result = { if (initialized) { Option(resultCache.pollFirstEntry()).map { entry => val it = entry.getValue if (it.hasNext) { // fill cache resultCache.put(it.next(), it) } entry.getKey }.orNull } else { null } } override def close(): Unit = { for { scanner <- scanners } { scanner.close() } } override def renewLease(): Boolean = true }
Example 57
Source File: BulkLoadPartitioner.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.loader.spark import java.util import java.util.Comparator import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.Partitioner class BulkLoadPartitioner(startKeys:Array[Array[Byte]]) extends Partitioner { override def numPartitions: Int = startKeys.length override def getPartition(key: Any): Int = { val rowKey:Array[Byte] = key match { case qualifier: KeyFamilyQualifier => qualifier.rowKey case _ => key.asInstanceOf[Array[Byte]] } val comparator: Comparator[Array[Byte]] = new Comparator[Array[Byte]] { override def compare(o1: Array[Byte], o2: Array[Byte]): Int = { Bytes.compareTo(o1, o2) } } val partition = util.Arrays.binarySearch(startKeys, rowKey, comparator) if (partition < 0) partition * -1 + -2 else partition } }
Example 58
Source File: KeyFamilyQualifier.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.loader.spark import java.io.Serializable import org.apache.hadoop.hbase.util.Bytes class KeyFamilyQualifier(val rowKey:Array[Byte], val family:Array[Byte], val qualifier:Array[Byte]) extends Comparable[KeyFamilyQualifier] with Serializable { override def compareTo(o: KeyFamilyQualifier): Int = { var result = Bytes.compareTo(rowKey, o.rowKey) if (result == 0) { result = Bytes.compareTo(family, o.family) if (result == 0) result = Bytes.compareTo(qualifier, o.qualifier) } result } override def toString: String = { Bytes.toString(rowKey) + ":" + Bytes.toString(family) + ":" + Bytes.toString(qualifier) } }
Example 59
Source File: StorageSerializable.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.core.storage.serde import org.apache.hadoop.hbase.util.Bytes import org.apache.s2graph.core.schema.{ColumnMeta, LabelMeta} import org.apache.s2graph.core.storage.SKeyValue import org.apache.s2graph.core.types.{InnerValLike, InnerValLikeWithTs} object StorageSerializable { def propsToBytes(props: Seq[(LabelMeta, InnerValLike)]): Array[Byte] = { val len = props.length assert(len < Byte.MaxValue) var bytes = Array.fill(1)(len.toByte) for ((_, v) <- props) bytes = Bytes.add(bytes, v.bytes) bytes } def vertexPropsToBytes(props: Seq[(ColumnMeta, Array[Byte])]): Array[Byte] = { val len = props.length assert(len < Byte.MaxValue) var bytes = Array.fill(1)(len.toByte) for ((k, v) <- props) bytes = Bytes.add(bytes, Bytes.toBytes(k.seq.toInt), v) bytes } def propsToKeyValues(props: Seq[(LabelMeta, InnerValLike)]): Array[Byte] = { val len = props.length assert(len < Byte.MaxValue) var bytes = Array.fill(1)(len.toByte) for ((k, v) <- props) bytes = Bytes.add(bytes, Array.fill(1)(k.seq), v.bytes) bytes } def propsToKeyValuesWithTs(props: Seq[(LabelMeta, InnerValLikeWithTs)]): Array[Byte] = { val len = props.length assert(len < Byte.MaxValue) var bytes = Array.fill(1)(len.toByte) for ((k, v) <- props) bytes = Bytes.add(bytes, Array.fill(1)(k.seq), v.bytes) bytes } def labelOrderSeqWithIsInverted(labelOrderSeq: Byte, isInverted: Boolean): Array[Byte] = { assert(labelOrderSeq < (1 << 6)) val byte = labelOrderSeq << 1 | (if (isInverted) 1 else 0) Array.fill(1)(byte.toByte) } def intToBytes(value: Int): Array[Byte] = Bytes.toBytes(value) def longToBytes(value: Long): Array[Byte] = Bytes.toBytes(value) } trait StorageSerializable[E] { val cf = Serializable.edgeCf def table: Array[Byte] def ts: Long def toRowKey: Array[Byte] def toQualifier: Array[Byte] def toValue: Array[Byte] def toKeyValues: Seq[SKeyValue] = { val row = toRowKey val qualifier = toQualifier val value = toValue val kv = SKeyValue(table, row, cf, qualifier, value, ts) // logger.debug(s"[SER]: ${kv.toLogString}}") Seq(kv) } }
Example 60
Source File: VertexSerializable.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.core.storage.serde.vertex.tall import org.apache.hadoop.hbase.util.Bytes import org.apache.s2graph.core.{S2Vertex, S2VertexLike} import org.apache.s2graph.core.storage.SKeyValue import org.apache.s2graph.core.storage.serde.Serializable import org.apache.s2graph.core.storage.serde.StorageSerializable._ import scala.collection.JavaConverters._ case class VertexSerializable(vertex: S2VertexLike, intToBytes: Int => Array[Byte] = intToBytes) extends Serializable[S2VertexLike] { override val table = vertex.hbaseTableName.getBytes override val ts = vertex.ts override val cf = Serializable.vertexCf override def toRowKey: Array[Byte] = vertex.id.bytes override def toQualifier: Array[Byte] = Array.empty[Byte] override def toValue: Array[Byte] = { val props = (vertex.props.asScala ++ vertex.defaultProps.asScala).toSeq.map { case (_, v) => v.columnMeta -> v.innerVal.bytes } vertexPropsToBytes(props) } override def toKeyValues: Seq[SKeyValue] = { // val row = toRowKey // val qualifier = toQualifier // val value = toValue // Seq( // SKeyValue(vertex.hbaseTableName.getBytes, row, cf, qualifier, value, vertex.ts) // ) (vertex.props.asScala ++ vertex.defaultProps.asScala).toSeq.map { case (_, v) => val row = Bytes.add(vertex.id.bytes, Array.fill(1)(v.columnMeta.seq)) val qualifier = Array.empty[Byte] val value = v.innerVal.bytes SKeyValue(vertex.hbaseTableName.getBytes, row, cf, qualifier, value, vertex.ts) } } }
Example 61
Source File: IndexEdgeSerializable.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.core.storage.serde.indexedge.tall import org.apache.hadoop.hbase.util.Bytes import org.apache.s2graph.core.schema.LabelMeta import org.apache.s2graph.core.types.VertexId import org.apache.s2graph.core.{GraphUtil, IndexEdge} import org.apache.s2graph.core.storage.serde.StorageSerializable._ import org.apache.s2graph.core.storage.serde.Serializable class IndexEdgeSerializable(indexEdge: IndexEdge, longToBytes: Long => Array[Byte] = longToBytes) extends Serializable[IndexEdge] { override def ts = indexEdge.version override def table = indexEdge.label.hbaseTableName.getBytes("UTF-8") def idxPropsMap = indexEdge.orders.toMap def idxPropsBytes = propsToBytes(indexEdge.orders) override def toRowKey: Array[Byte] = { val srcIdBytes = VertexId.toSourceVertexId(indexEdge.srcVertex.id).bytes val labelWithDirBytes = indexEdge.labelWithDir.bytes val labelIndexSeqWithIsInvertedBytes = labelOrderSeqWithIsInverted(indexEdge.labelIndexSeq, isInverted = false) val row = Bytes.add(srcIdBytes, labelWithDirBytes, labelIndexSeqWithIsInvertedBytes) // logger.error(s"${row.toList}\n${srcIdBytes.toList}\n${labelWithDirBytes.toList}\n${labelIndexSeqWithIsInvertedBytes.toList}") if (indexEdge.degreeEdge) row else { val qualifier = idxPropsMap.get(LabelMeta.to) match { case None => Bytes.add(idxPropsBytes, VertexId.toTargetVertexId(indexEdge.tgtVertex.id).bytes) case Some(vId) => idxPropsBytes } val opByte = if (indexEdge.op == GraphUtil.operations("incrementCount")) indexEdge.op else GraphUtil.defaultOpByte Bytes.add(row, qualifier, Array.fill(1)(opByte)) } } override def toQualifier: Array[Byte] = Array.empty[Byte] override def toValue: Array[Byte] = if (indexEdge.degreeEdge) longToBytes(indexEdge.property(LabelMeta.degree).innerVal.toString().toLong) else if (indexEdge.op == GraphUtil.operations("incrementCount")) longToBytes(indexEdge.property(LabelMeta.count).innerVal.toString().toLong) else propsToKeyValues(indexEdge.metas.toSeq) }
Example 62
Source File: BytesUtilsSuite.scala From Backup-Repo with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hbase import org.apache.spark.Logging import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.sql.types._ import org.apache.spark.sql.hbase.types.HBaseBytesType import org.apache.spark.sql.hbase.util.BytesUtils import org.scalatest.{BeforeAndAfterAll, FunSuite} class BytesUtilsSuite extends FunSuite with BeforeAndAfterAll with Logging { test("Bytes Ordering Test") { val s = Seq(-257, -256, -255, -129, -128, -127, -64, -16, -4, -1, 0, 1, 4, 16, 64, 127, 128, 129, 255, 256, 257) val result = s.map(i => (i, BytesUtils.create(IntegerType).toBytes(i))) .sortWith((f, s) => HBaseBytesType.ordering.gt( f._2.asInstanceOf[HBaseBytesType.InternalType], s._2.asInstanceOf[HBaseBytesType.InternalType])) assert(result.map(a => a._1) == s.sorted.reverse) } def compare(a: Array[Byte], b: Array[Byte]): Int = { val length = Math.min(a.length, b.length) var result: Int = 0 for (i <- 0 to length - 1) { val diff: Int = (a(i) & 0xff).asInstanceOf[Byte] - (b(i) & 0xff).asInstanceOf[Byte] if (diff != 0) { result = diff } } result } test("Bytes Utility Test") { assert(BytesUtils.toBoolean(BytesUtils.create(BooleanType) .toBytes(input = true), 0) === true) assert(BytesUtils.toBoolean(BytesUtils.create(BooleanType) .toBytes(input = false), 0) === false) assert(BytesUtils.toDouble(BytesUtils.create(DoubleType).toBytes(12.34d), 0) === 12.34d) assert(BytesUtils.toDouble(BytesUtils.create(DoubleType).toBytes(-12.34d), 0) === -12.34d) assert(BytesUtils.toFloat(BytesUtils.create(FloatType).toBytes(12.34f), 0) === 12.34f) assert(BytesUtils.toFloat(BytesUtils.create(FloatType).toBytes(-12.34f), 0) === -12.34f) assert(BytesUtils.toInt(BytesUtils.create(IntegerType).toBytes(12), 0) === 12) assert(BytesUtils.toInt(BytesUtils.create(IntegerType).toBytes(-12), 0) === -12) assert(BytesUtils.toLong(BytesUtils.create(LongType).toBytes(1234l), 0) === 1234l) assert(BytesUtils.toLong(BytesUtils.create(LongType).toBytes(-1234l), 0) === -1234l) assert(BytesUtils.toShort(BytesUtils.create(ShortType) .toBytes(12.asInstanceOf[Short]), 0) === 12) assert(BytesUtils.toShort(BytesUtils.create(ShortType) .toBytes(-12.asInstanceOf[Short]), 0) === -12) assert(BytesUtils.toUTF8String(BytesUtils.create(StringType).toBytes("abc"), 0, 3) === UTF8String("abc")) assert(BytesUtils.toUTF8String(BytesUtils.create(StringType).toBytes(""), 0, 0) === UTF8String("")) assert(BytesUtils.toByte(BytesUtils.create(ByteType) .toBytes(5.asInstanceOf[Byte]), 0) === 5) assert(BytesUtils.toByte(BytesUtils.create(ByteType) .toBytes(-5.asInstanceOf[Byte]), 0) === -5) assert(compare(BytesUtils.create(IntegerType).toBytes(128), BytesUtils.create(IntegerType).toBytes(-128)) > 0) } test("byte array plus one") { var byteArray = Array[Byte](0x01.toByte, 127.toByte) assert(Bytes.compareTo(BytesUtils.addOne(byteArray), Array[Byte](0x01.toByte, 0x80.toByte)) == 0) byteArray = Array[Byte](0xff.toByte, 0xff.toByte) assert(BytesUtils.addOne(byteArray) == null) byteArray = Array[Byte](0x02.toByte, 0xff.toByte) assert(Bytes.compareTo(BytesUtils.addOne(byteArray), Array[Byte](0x03.toByte, 0x00.toByte)) == 0) } test("float comparison") { val f1 = BytesUtils.create(FloatType).toBytes(-1.23f) val f2 = BytesUtils.create(FloatType).toBytes(100f) assert(Bytes.compareTo(f1, f2) < 0) } }
Example 63
Source File: IndexEdgeSerializable.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.core.storage.serde.indexedge.wide import org.apache.hadoop.hbase.util.Bytes import org.apache.s2graph.core.schema.LabelMeta import org.apache.s2graph.core.types.VertexId import org.apache.s2graph.core.{GraphUtil, IndexEdge} import org.apache.s2graph.core.storage.serde.StorageSerializable._ import org.apache.s2graph.core.storage.serde.Serializable class IndexEdgeSerializable(indexEdge: IndexEdge, longToBytes: Long => Array[Byte] = longToBytes) extends Serializable[IndexEdge] { override def ts = indexEdge.version override def table = indexEdge.label.hbaseTableName.getBytes("UTF-8") def idxPropsMap = indexEdge.orders.toMap def idxPropsBytes = propsToBytes(indexEdge.orders) override def toRowKey: Array[Byte] = { val srcIdBytes = VertexId.toSourceVertexId(indexEdge.srcVertex.id).bytes val labelWithDirBytes = indexEdge.labelWithDir.bytes val labelIndexSeqWithIsInvertedBytes = labelOrderSeqWithIsInverted(indexEdge.labelIndexSeq, isInverted = false) Bytes.add(srcIdBytes, labelWithDirBytes, labelIndexSeqWithIsInvertedBytes) } override def toQualifier: Array[Byte] = { val tgtIdBytes = VertexId.toTargetVertexId(indexEdge.tgtVertex.id).bytes if (indexEdge.degreeEdge) Array.empty[Byte] else { if (indexEdge.op == GraphUtil.operations("incrementCount")) { Bytes.add(idxPropsBytes, tgtIdBytes, Array.fill(1)(indexEdge.op)) } else { idxPropsMap.get(LabelMeta.to) match { case None => Bytes.add(idxPropsBytes, tgtIdBytes) case Some(vId) => idxPropsBytes } } } } override def toValue: Array[Byte] = if (indexEdge.degreeEdge) longToBytes(indexEdge.property(LabelMeta.degree).innerVal.toString().toLong) else if (indexEdge.op == GraphUtil.operations("incrementCount")) longToBytes(indexEdge.property(LabelMeta.count).innerVal.toString().toLong) else propsToKeyValues(indexEdge.metas.toSeq) }
Example 64
Source File: SnapshotEdgeDeserializable.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.core.storage.serde.snapshotedge.tall import org.apache.hadoop.hbase.util.Bytes import org.apache.s2graph.core.schema.{Label, LabelMeta, ServiceColumn} import org.apache.s2graph.core.storage.serde.StorageDeserializable._ import org.apache.s2graph.core.storage.CanSKeyValue import org.apache.s2graph.core.types._ import org.apache.s2graph.core._ import org.apache.s2graph.core.storage.serde.Deserializable import org.apache.s2graph.core.utils.logger class SnapshotEdgeDeserializable(graph: S2GraphLike) extends Deserializable[SnapshotEdge] { val builder = graph.elementBuilder def statusCodeWithOp(byte: Byte): (Byte, Byte) = { val statusCode = byte >> 4 val op = byte & ((1 << 4) - 1) (statusCode.toByte, op.toByte) } override def fromKeyValues[T: CanSKeyValue](_kvs: Seq[T], cacheElementOpt: Option[SnapshotEdge]): Option[SnapshotEdge] = { try { val kvs = _kvs.map { kv => implicitly[CanSKeyValue[T]].toSKeyValue(kv) } assert(kvs.size == 1) val kv = kvs.head val version = kv.timestamp var pos = 0 val (srcVertexId, srcIdLen) = SourceVertexId.fromBytes(kv.row, pos, kv.row.length, HBaseType.DEFAULT_VERSION) pos += srcIdLen val isTallSchema = pos + 5 != kv.row.length var tgtVertexId = TargetVertexId(ServiceColumn.Default, srcVertexId.innerId) if (isTallSchema) { val (tgtId, tgtBytesLen) = InnerVal.fromBytes(kv.row, pos, kv.row.length, HBaseType.DEFAULT_VERSION) tgtVertexId = TargetVertexId(ServiceColumn.Default, tgtId) pos += tgtBytesLen } val labelWithDir = LabelWithDirection(Bytes.toInt(kv.row, pos, 4)) pos += 4 val (labelIdxSeq, isInverted) = bytesToLabelIndexSeqWithIsInverted(kv.row, pos) pos += 1 if (!isInverted) None else { val label = Label.findById(labelWithDir.labelId) val schemaVer = label.schemaVersion // val srcVertexId = SourceVertexId(ServiceColumn.Default, srcIdAndTgtId.srcInnerId) // val tgtVertexId = SourceVertexId(ServiceColumn.Default, tgtId.tgtInnerId) var pos = 0 val (statusCode, op) = statusCodeWithOp(kv.value(pos)) pos += 1 val (props, endAt) = bytesToKeyValuesWithTs(kv.value, pos, schemaVer, label) val kvsMap = props.toMap val tsInnerVal = kvsMap(LabelMeta.timestamp).innerVal val ts = tsInnerVal.toString.toLong pos = endAt val _pendingEdgeOpt = if (pos == kv.value.length) None else { val (pendingEdgeStatusCode, pendingEdgeOp) = statusCodeWithOp(kv.value(pos)) pos += 1 // val versionNum = Bytes.toLong(kv.value, pos, 8) // pos += 8 val (pendingEdgeProps, endAt) = bytesToKeyValuesWithTs(kv.value, pos, schemaVer, label) pos = endAt val lockTs = Option(Bytes.toLong(kv.value, pos, 8)) val pendingEdge = builder.newEdge( builder.newVertex(srcVertexId, version), builder.newVertex(tgtVertexId, version), label, labelWithDir.dir, pendingEdgeOp, version, pendingEdgeProps.toMap, statusCode = pendingEdgeStatusCode, lockTs = lockTs, tsInnerValOpt = Option(tsInnerVal)) Option(pendingEdge) } val snapshotEdge = builder.newSnapshotEdge( builder.newVertex(srcVertexId, ts), builder.newVertex(tgtVertexId, ts), label, labelWithDir.dir, op, version, props.toMap, statusCode = statusCode, pendingEdgeOpt = _pendingEdgeOpt, lockTs = None, tsInnerValOpt = Option(tsInnerVal)) Option(snapshotEdge) } } catch { case e: Exception => logger.error("#" * 100, e) None } } }
Example 65
Source File: SnapshotEdgeSerializable.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.core.storage.serde.snapshotedge.tall import org.apache.hadoop.hbase.util.Bytes import org.apache.s2graph.core.{S2Edge, SnapshotEdge} import org.apache.s2graph.core.schema.LabelIndex import org.apache.s2graph.core.storage.serde._ import org.apache.s2graph.core.storage.serde.StorageSerializable._ import org.apache.s2graph.core.types.SourceAndTargetVertexIdPair class SnapshotEdgeSerializable(snapshotEdge: SnapshotEdge) extends Serializable[SnapshotEdge] { override def ts = snapshotEdge.version override def table = snapshotEdge.label.hbaseTableName.getBytes("UTF-8") def statusCodeWithOp(statusCode: Byte, op: Byte): Array[Byte] = { val byte = (((statusCode << 4) | op).toByte) Array.fill(1)(byte.toByte) } def valueBytes() = Bytes.add(statusCodeWithOp(snapshotEdge.statusCode, snapshotEdge.op), snapshotEdge.propsToKeyValuesWithTs) override def toRowKey: Array[Byte] = { val srcIdAndTgtIdBytes = SourceAndTargetVertexIdPair(snapshotEdge.srcVertex.innerId, snapshotEdge.tgtVertex.innerId).bytes val labelWithDirBytes = snapshotEdge.labelWithDir.bytes val labelIndexSeqWithIsInvertedBytes = labelOrderSeqWithIsInverted(LabelIndex.DefaultSeq, isInverted = true) Bytes.add(srcIdAndTgtIdBytes, labelWithDirBytes, labelIndexSeqWithIsInvertedBytes) } override def toQualifier: Array[Byte] = Array.empty[Byte] override def toValue: Array[Byte] = snapshotEdge.pendingEdgeOpt match { case None => valueBytes() case Some(pendingEdge) => val opBytes = statusCodeWithOp(pendingEdge.getStatusCode(), pendingEdge.getOp()) val versionBytes = Array.empty[Byte] val propsBytes = S2Edge.serializePropsWithTs(pendingEdge) val lockBytes = Bytes.toBytes(pendingEdge.getLockTs().get) Bytes.add(Bytes.add(valueBytes(), opBytes, versionBytes), Bytes.add(propsBytes, lockBytes)) } }
Example 66
Source File: SnapshotEdgeDeserializable.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.core.storage.serde.snapshotedge.wide import org.apache.hadoop.hbase.util.Bytes import org.apache.s2graph.core.schema.{Label, LabelMeta} import org.apache.s2graph.core.storage.serde.StorageDeserializable._ import org.apache.s2graph.core.storage.CanSKeyValue import org.apache.s2graph.core.types.{HBaseType, LabelWithDirection, SourceVertexId, TargetVertexId} import org.apache.s2graph.core._ import org.apache.s2graph.core.storage.serde.Deserializable class SnapshotEdgeDeserializable(graph: S2GraphLike) extends Deserializable[SnapshotEdge] { val builder = graph.elementBuilder def statusCodeWithOp(byte: Byte): (Byte, Byte) = { val statusCode = byte >> 4 val op = byte & ((1 << 4) - 1) (statusCode.toByte, op.toByte) } override def fromKeyValues[T: CanSKeyValue](_kvs: Seq[T], cacheElementOpt: Option[SnapshotEdge]): Option[SnapshotEdge] = { try { val kvs = _kvs.map { kv => implicitly[CanSKeyValue[T]].toSKeyValue(kv) } assert(kvs.size == 1) val kv = kvs.head val version = kv.timestamp var pos = 0 val (srcVertexId, srcIdLen) = SourceVertexId.fromBytes(kv.row, pos, kv.row.length, HBaseType.DEFAULT_VERSION) pos += srcIdLen val labelWithDir = LabelWithDirection(Bytes.toInt(kv.row, pos, 4)) pos += 4 val (labelIdxSeq, isInverted) = bytesToLabelIndexSeqWithIsInverted(kv.row, pos) pos += 1 if (!isInverted) None else { val label = Label.findById(labelWithDir.labelId) val schemaVer = label.schemaVersion val srcVertex = builder.newVertex(srcVertexId, version) val (tgtVertexId, _) = TargetVertexId.fromBytes(kv.qualifier, 0, kv.qualifier.length, schemaVer) var pos = 0 val (statusCode, op) = statusCodeWithOp(kv.value(pos)) pos += 1 val (props, endAt) = bytesToKeyValuesWithTs(kv.value, pos, schemaVer, label) val kvsMap = props.toMap val tsInnerVal = kvsMap(LabelMeta.timestamp).innerVal val ts = tsInnerVal.toString.toLong pos = endAt val _pendingEdgeOpt = if (pos == kv.value.length) None else { val (pendingEdgeStatusCode, pendingEdgeOp) = statusCodeWithOp(kv.value(pos)) pos += 1 // val versionNum = Bytes.toLong(kv.value, pos, 8) // pos += 8 val (pendingEdgeProps, endAt) = bytesToKeyValuesWithTs(kv.value, pos, schemaVer, label) pos = endAt val lockTs = Option(Bytes.toLong(kv.value, pos, 8)) val pendingEdge = builder.newEdge( builder.newVertex(srcVertexId, version), builder.newVertex(tgtVertexId, version), label, labelWithDir.dir, pendingEdgeOp, version, pendingEdgeProps.toMap, statusCode = pendingEdgeStatusCode, lockTs = lockTs, tsInnerValOpt = Option(tsInnerVal)) Option(pendingEdge) } val snapshotEdge = builder.newSnapshotEdge( builder.newVertex(srcVertexId, ts), builder.newVertex(tgtVertexId, ts), label, labelWithDir.dir, op, version, props.toMap, statusCode = statusCode, pendingEdgeOpt = _pendingEdgeOpt, lockTs = None, tsInnerValOpt = Option(tsInnerVal)) Option(snapshotEdge) } } catch { case e: Exception => None } } }
Example 67
Source File: SnapshotEdgeSerializable.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.core.storage.serde.snapshotedge.wide import org.apache.hadoop.hbase.util.Bytes import org.apache.s2graph.core.{S2Edge, SnapshotEdge} import org.apache.s2graph.core.schema.LabelIndex import org.apache.s2graph.core.storage.serde.Serializable import org.apache.s2graph.core.storage.serde.StorageSerializable._ import org.apache.s2graph.core.types.VertexId class SnapshotEdgeSerializable(snapshotEdge: SnapshotEdge) extends Serializable[SnapshotEdge] { override def ts = snapshotEdge.version override def table = snapshotEdge.label.hbaseTableName.getBytes("UTF-8") def statusCodeWithOp(statusCode: Byte, op: Byte): Array[Byte] = { val byte = (((statusCode << 4) | op).toByte) Array.fill(1)(byte.toByte) } def valueBytes() = Bytes.add(statusCodeWithOp(snapshotEdge.statusCode, snapshotEdge.op), snapshotEdge.propsToKeyValuesWithTs) override def toRowKey: Array[Byte] = { val srcIdBytes = VertexId.toSourceVertexId(snapshotEdge.srcVertex.id).bytes val labelWithDirBytes = snapshotEdge.labelWithDir.bytes val labelIndexSeqWithIsInvertedBytes = labelOrderSeqWithIsInverted(LabelIndex.DefaultSeq, isInverted = true) Bytes.add(srcIdBytes, labelWithDirBytes, labelIndexSeqWithIsInvertedBytes) } override def toQualifier: Array[Byte] = VertexId.toTargetVertexId(snapshotEdge.tgtVertex.id).bytes override def toValue: Array[Byte] = snapshotEdge.pendingEdgeOpt match { case None => valueBytes() case Some(pendingEdge) => val opBytes = statusCodeWithOp(pendingEdge.getStatusCode(), pendingEdge.getOp()) val versionBytes = Array.empty[Byte] val propsBytes = S2Edge.serializePropsWithTs(pendingEdge) val lockBytes = Bytes.toBytes(pendingEdge.getLockTs().get) Bytes.add(Bytes.add(valueBytes(), opBytes, versionBytes), Bytes.add(propsBytes, lockBytes)) } }
Example 68
Source File: SKeyValue.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.core.storage import java.nio.charset.StandardCharsets import org.apache.hadoop.hbase.util.Bytes import org.hbase.async.KeyValue object SKeyValue { // val SnapshotEdgeCf = "s".getBytes(StandardCharsets.UTF_8) val EdgeCf = "e".getBytes(StandardCharsets.UTF_8) val VertexCf = "v".getBytes(StandardCharsets.UTF_8) val Put = 1 val Delete = 2 val Increment = 3 val Default = Put } case class SKeyValue(table: Array[Byte], row: Array[Byte], cf: Array[Byte], qualifier: Array[Byte], value: Array[Byte], timestamp: Long, operation: Int = SKeyValue.Default, durability: Boolean = true) { def toLogString = { Map("table" -> Bytes.toString(table), "row" -> row.toList, "cf" -> Bytes.toString(cf), "qualifier" -> qualifier.toList, "value" -> value.toList, "timestamp" -> timestamp, "operation" -> operation, "durability" -> durability).toString() } override def toString(): String = toLogString def toKeyValue: KeyValue = new KeyValue(row, cf, qualifier, timestamp, value) } trait CanSKeyValue[T] { def toSKeyValue(from: T): SKeyValue } object CanSKeyValue { def instance[T](f: T => SKeyValue): CanSKeyValue[T] = new CanSKeyValue[T] { override def toSKeyValue(from: T): SKeyValue = f.apply(from) } // For asyncbase KeyValues implicit val asyncKeyValue = instance[KeyValue] { kv => SKeyValue(Array.empty[Byte], kv.key(), kv.family(), kv.qualifier(), kv.value(), kv.timestamp()) } implicit val hbaseKeyValue = instance[org.apache.hadoop.hbase.KeyValue] { kv => SKeyValue(Array.empty[Byte], kv.getRow, kv.getFamily, kv.getQualifier, kv.getValue, kv.getTimestamp) } // For asyncbase KeyValues implicit val sKeyValue = instance[SKeyValue](identity) // For hbase KeyValues }
Example 69
Source File: AsynchbaseEdgeFetcher.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.core.storage.hbase import java.util import com.stumbleupon.async.Deferred import com.typesafe.config.Config import org.apache.hadoop.hbase.util.Bytes import org.apache.s2graph.core._ import org.apache.s2graph.core.schema.Label import org.apache.s2graph.core.storage.serde.Serializable import org.apache.s2graph.core.storage.{CanSKeyValue, StorageIO, StorageSerDe} import org.apache.s2graph.core.types.{HBaseType, VertexId} import org.apache.s2graph.core.utils.{CanDefer, DeferCache, Extensions, logger} import org.hbase.async._ import scala.concurrent.{ExecutionContext, Future} class AsynchbaseEdgeFetcher(val graph: S2GraphLike, val config: Config, val client: HBaseClient, val serDe: StorageSerDe, val io: StorageIO) extends EdgeFetcher { import AsynchbaseStorage._ import CanDefer._ import Extensions.DeferOps import scala.collection.JavaConverters._ val edge = graph.elementBuilder.toRequestEdge(queryRequest, parentEdges) val request = buildRequest(client, serDe, queryRequest, edge) val (intervalMaxBytes, intervalMinBytes) = queryParam.buildInterval(Option(edge)) val requestCacheKey = Bytes.add(toCacheKeyBytes(request), intervalMaxBytes, intervalMinBytes) if (cacheTTL <= 0) fetchInner(request) else { val cacheKeyBytes = Bytes.add(queryRequest.query.queryOption.cacheKeyBytes, requestCacheKey) // val cacheKeyBytes = toCacheKeyBytes(request) val cacheKey = queryParam.toCacheKey(cacheKeyBytes) futureCache.getOrElseUpdate(cacheKey, cacheTTL)(fetchInner(request)) } } }
Example 70
Source File: RocksVertexFetcher.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.core.storage.rocks import com.typesafe.config.Config import org.apache.hadoop.hbase.util.Bytes import org.apache.s2graph.core._ import org.apache.s2graph.core.schema.ServiceColumn import org.apache.s2graph.core.storage.rocks.RocksStorage.{qualifier, table} import org.apache.s2graph.core.storage.{SKeyValue, StorageIO, StorageSerDe} import org.apache.s2graph.core.types.HBaseType import org.rocksdb.RocksDB import scala.collection.mutable.ArrayBuffer import scala.concurrent.{ExecutionContext, Future} class RocksVertexFetcher(val graph: S2GraphLike, val config: Config, val db: RocksDB, val vdb: RocksDB, val serDe: StorageSerDe, val io: StorageIO) extends VertexFetcher { private def fetchKeyValues(queryRequest: QueryRequest, vertex: S2VertexLike)(implicit ec: ExecutionContext): Future[Seq[SKeyValue]] = { val rpc = RocksStorage.buildRequest(queryRequest, vertex) RocksStorage.fetchKeyValues(vdb, db, rpc) } override def fetchVertices(vertexQueryParam: VertexQueryParam)(implicit ec: ExecutionContext): Future[Seq[S2VertexLike]] = { def fromResult(kvs: Seq[SKeyValue], version: String): Seq[S2VertexLike] = { if (kvs.isEmpty) Nil else serDe.vertexDeserializer(version).fromKeyValues(kvs, None).toSeq.filter(vertexQueryParam.where.get.filter) } val vertices = vertexQueryParam.vertexIds.map(vId => graph.elementBuilder.newVertex(vId)) val futures = vertices.map { vertex => val queryParam = QueryParam.Empty val q = Query.toQuery(Seq(vertex), Seq(queryParam)) val queryRequest = QueryRequest(q, stepIdx = -1, vertex, queryParam) fetchKeyValues(queryRequest, vertex).map { kvs => fromResult(kvs, vertex.serviceColumn.schemaVersion) } recoverWith { case ex: Throwable => Future.successful(Nil) } } Future.sequence(futures).map(_.flatten) } override def fetchVerticesAll()(implicit ec: ExecutionContext) = { import scala.collection.mutable val vertices = new ArrayBuffer[S2VertexLike]() ServiceColumn.findAll().groupBy(_.service.hTableName).toSeq.foreach { case (hTableName, columns) => val distinctColumns = columns.toSet val iter = vdb.newIterator() val buffer = mutable.ListBuffer.empty[SKeyValue] var oldVertexIdBytes = Array.empty[Byte] var minusPos = 0 try { iter.seekToFirst() while (iter.isValid) { val row = iter.key() if (!Bytes.equals(oldVertexIdBytes, 0, oldVertexIdBytes.length - minusPos, row, 0, row.length - 1)) { if (buffer.nonEmpty) serDe.vertexDeserializer(schemaVer = HBaseType.DEFAULT_VERSION).fromKeyValues(buffer, None) .filter(v => distinctColumns(v.serviceColumn)) .foreach { vertex => vertices += vertex } oldVertexIdBytes = row minusPos = 1 buffer.clear() } val kv = SKeyValue(table, iter.key(), SKeyValue.VertexCf, qualifier, iter.value(), System.currentTimeMillis()) buffer += kv iter.next() } if (buffer.nonEmpty) serDe.vertexDeserializer(schemaVer = HBaseType.DEFAULT_VERSION).fromKeyValues(buffer, None) .filter(v => distinctColumns(v.serviceColumn)) .foreach { vertex => vertices += vertex } } finally { iter.close() } } Future.successful(vertices) } }
Example 71
Source File: LabelWithDirection.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.core.types import org.apache.hadoop.hbase.util.Bytes import org.apache.s2graph.core.GraphUtil object LabelWithDirection { import HBaseType._ def apply(compositeInt: Int): LabelWithDirection = { // logger.debug(s"CompositeInt: $compositeInt") val dir = compositeInt & ((1 << bitsForDir) - 1) val labelId = compositeInt >> bitsForDir LabelWithDirection(labelId, dir) } def labelOrderSeqWithIsInverted(labelOrderSeq: Byte, isInverted: Boolean): Array[Byte] = { assert(labelOrderSeq < (1 << 6)) val byte = labelOrderSeq << 1 | (if (isInverted) 1 else 0) Array.fill(1)(byte.toByte) } def bytesToLabelIndexSeqWithIsInverted(bytes: Array[Byte], offset: Int): (Byte, Boolean) = { val byte = bytes(offset) val isInverted = if ((byte & 1) != 0) true else false val labelOrderSeq = byte >> 1 (labelOrderSeq.toByte, isInverted) } } case class LabelWithDirection(labelId: Int, dir: Int) extends HBaseSerializable { import HBaseType._ assert(dir < (1 << bitsForDir)) assert(labelId < (Int.MaxValue >> bitsForDir)) lazy val labelBits = labelId << bitsForDir lazy val compositeInt = labelBits | dir def bytes = { Bytes.toBytes(compositeInt) } lazy val dirToggled = LabelWithDirection(labelId, GraphUtil.toggleDir(dir)) def updateDir(newDir: Int) = LabelWithDirection(labelId, newDir) def isDirected = dir == 0 || dir == 1 override def hashCode(): Int = compositeInt override def equals(other: Any): Boolean = { other match { case o: LabelWithDirection => hashCode == o.hashCode() case _ => false } } }
Example 72
Source File: S2GraphCellReader.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.s2jobs.serde.reader import org.apache.hadoop.hbase.Cell import org.apache.hadoop.hbase.util.Bytes import org.apache.s2graph.core.storage.SKeyValue import org.apache.s2graph.core.types.HBaseType import org.apache.s2graph.core.{GraphElement, S2Graph} import org.apache.s2graph.s2jobs.serde.GraphElementReadable class S2GraphCellReader(elementType: String) extends GraphElementReadable[Seq[Cell]]{ override def read(s2: S2Graph)(cells: Seq[Cell]): Seq[GraphElement] = { val schemaVer = HBaseType.DEFAULT_VERSION val kvs = cells.map { cell => new SKeyValue(Array.empty[Byte], cell.getRow, cell.getFamily, cell.getQualifier, cell.getValue, cell.getTimestamp, SKeyValue.Default) } elementType.toLowerCase match { case "vertex" | "v" => s2.defaultStorage.serDe.vertexDeserializer(schemaVer) .fromKeyValues(kvs, None).map(_.asInstanceOf[GraphElement]).toSeq case "indexedge" | "ie" => kvs.flatMap { kv => s2.defaultStorage.serDe.indexEdgeDeserializer(schemaVer) .fromKeyValues(Seq(kv), None).map(_.asInstanceOf[GraphElement]) } case "snapshotedge" | "se" => kvs.flatMap { kv => s2.defaultStorage.serDe.snapshotEdgeDeserializer(schemaVer) .fromKeyValues(Seq(kv), None).map(_.asInstanceOf[GraphElement]) } case _ => throw new IllegalArgumentException(s"$elementType is not supported.") } } }
Example 73
Source File: BulkLoadPartitioner.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.s2jobs.spark import java.util import java.util.Comparator import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.Partitioner class BulkLoadPartitioner(startKeys:Array[Array[Byte]]) extends Partitioner { override def numPartitions: Int = startKeys.length override def getPartition(key: Any): Int = { val rowKey:Array[Byte] = key match { case qualifier: KeyFamilyQualifier => qualifier.rowKey case _ => key.asInstanceOf[Array[Byte]] } val comparator: Comparator[Array[Byte]] = new Comparator[Array[Byte]] { override def compare(o1: Array[Byte], o2: Array[Byte]): Int = { Bytes.compareTo(o1, o2) } } val partition = util.Arrays.binarySearch(startKeys, rowKey, comparator) if (partition < 0) partition * -1 + -2 else partition } }
Example 74
Source File: HBaseBulkDeleteExample.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.spark.SparkContext import org.apache.hadoop.hbase.{TableName, HBaseConfiguration} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.client.Delete import org.apache.spark.SparkConf object HBaseBulkDeleteExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseBulkDeletesExample {tableName} ") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseBulkDeleteExample " + tableName) val sc = new SparkContext(sparkConf) try { //[Array[Byte]] val rdd = sc.parallelize(Array( Bytes.toBytes("1"), Bytes.toBytes("2"), Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5") )) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) hbaseContext.bulkDelete[Array[Byte]](rdd, TableName.valueOf(tableName), putRecord => new Delete(putRecord), 4) } finally { sc.stop() } } }
Example 75
Source File: ByteArrayWrapper.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark import java.io.Serializable import org.apache.hadoop.hbase.util.Bytes class ByteArrayWrapper (var value:Array[Byte]) extends Comparable[ByteArrayWrapper] with Serializable { override def compareTo(valueOther: ByteArrayWrapper): Int = { Bytes.compareTo(value,valueOther.value) } override def equals(o2: Any): Boolean = { o2 match { case wrapper: ByteArrayWrapper => Bytes.equals(value, wrapper.value) case _ => false } } override def hashCode():Int = { Bytes.hashCode(value) } }
Example 76
Source File: BulkLoadPartitioner.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark import java.util import java.util.Comparator import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.Partitioner class BulkLoadPartitioner(startKeys:Array[Array[Byte]]) extends Partitioner { override def numPartitions: Int = startKeys.length override def getPartition(key: Any): Int = { val comparator: Comparator[Array[Byte]] = new Comparator[Array[Byte]] { override def compare(o1: Array[Byte], o2: Array[Byte]): Int = { Bytes.compareTo(o1, o2) } } val rowKey:Array[Byte] = key match { case qualifier: KeyFamilyQualifier => qualifier.rowKey case wrapper: ByteArrayWrapper => wrapper.value case _ => key.asInstanceOf[Array[Byte]] } val partition = util.Arrays.binarySearch(startKeys, rowKey, comparator) if (partition < 0) partition * -1 + -2 else partition } }
Example 77
Source File: KeyFamilyQualifier.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark import java.io.Serializable import org.apache.hadoop.hbase.util.Bytes class KeyFamilyQualifier(val rowKey:Array[Byte], val family:Array[Byte], val qualifier:Array[Byte]) extends Comparable[KeyFamilyQualifier] with Serializable { override def compareTo(o: KeyFamilyQualifier): Int = { var result = Bytes.compareTo(rowKey, o.rowKey) if (result == 0) { result = Bytes.compareTo(family, o.family) if (result == 0) result = Bytes.compareTo(qualifier, o.qualifier) } result } override def toString: String = { Bytes.toString(rowKey) + ":" + Bytes.toString(family) + ":" + Bytes.toString(qualifier) } }
Example 78
Source File: HBaseMapPartitionExample.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.rdd import org.apache.hadoop.hbase.client.Get import org.apache.hadoop.hbase.{TableName, HBaseConfiguration} import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.{SparkContext, SparkConf} object HBaseMapPartitionExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseBulkGetExample {tableName}") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName) val sc = new SparkContext(sparkConf) try { //[(Array[Byte])] val rdd = sc.parallelize(Array( Bytes.toBytes("1"), Bytes.toBytes("2"), Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5"), Bytes.toBytes("6"), Bytes.toBytes("7"))) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) val getRdd = rdd.hbaseMapPartitions[String](hbaseContext, (it, connection) => { val table = connection.getTable(TableName.valueOf(tableName)) it.map{r => //batching would be faster. This is just an example val result = table.get(new Get(r)) val it = result.listCells().iterator() val b = new StringBuilder b.append(Bytes.toString(result.getRow) + ":") while (it.hasNext) { val cell = it.next() val q = Bytes.toString(cell.getQualifierArray) if (q.equals("counter")) { b.append("(" + q + "," + Bytes.toLong(cell.getValueArray) + ")") } else { b.append("(" + q + "," + Bytes.toString(cell.getValueArray) + ")") } } b.toString() } }) getRdd.collect().foreach(v => println(v)) } finally { sc.stop() } } }
Example 79
Source File: HBaseBulkGetExample.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.rdd import org.apache.hadoop.hbase.client.{Result, Get} import org.apache.hadoop.hbase.{CellUtil, TableName, HBaseConfiguration} import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ import org.apache.spark.{SparkContext, SparkConf} object HBaseBulkGetExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseBulkGetExample {tableName}") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName) val sc = new SparkContext(sparkConf) try { //[(Array[Byte])] val rdd = sc.parallelize(Array( Bytes.toBytes("1"), Bytes.toBytes("2"), Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5"), Bytes.toBytes("6"), Bytes.toBytes("7"))) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) val getRdd = rdd.hbaseBulkGet[String](hbaseContext, TableName.valueOf(tableName), 2, record => { System.out.println("making Get") new Get(record) }, (result: Result) => { val it = result.listCells().iterator() val b = new StringBuilder b.append(Bytes.toString(result.getRow) + ":") while (it.hasNext) { val cell = it.next() val q = Bytes.toString(CellUtil.cloneQualifier(cell)) if (q.equals("counter")) { b.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")") } else { b.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")") } } b.toString() }) getRdd.collect().foreach(v => println(v)) } finally { sc.stop() } } }
Example 80
Source File: HBaseForeachPartitionExample.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.rdd import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.{TableName, HBaseConfiguration} import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.{SparkContext, SparkConf} object HBaseForeachPartitionExample { def main(args: Array[String]) { if (args.length < 2) { println("HBaseBulkPutExample {tableName} {columnFamily}") return } val tableName = args(0) val columnFamily = args(1) val sparkConf = new SparkConf().setAppName("HBaseBulkPutExample " + tableName + " " + columnFamily) val sc = new SparkContext(sparkConf) try { //[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])] val rdd = sc.parallelize(Array( (Bytes.toBytes("1"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))), (Bytes.toBytes("2"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))), (Bytes.toBytes("3"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))), (Bytes.toBytes("4"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))), (Bytes.toBytes("5"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5")))) )) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) rdd.hbaseForeachPartition(hbaseContext, (it, connection) => { val m = connection.getBufferedMutator(TableName.valueOf(tableName)) it.foreach(r => { val put = new Put(r._1) r._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, putValue._3)) m.mutate(put) }) m.flush() m.close() }) } finally { sc.stop() } } }
Example 81
Source File: HBaseBulkDeleteExample.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.rdd import org.apache.hadoop.hbase.client.Delete import org.apache.hadoop.hbase.{TableName, HBaseConfiguration} import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.{SparkContext, SparkConf} object HBaseBulkDeleteExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseBulkDeletesExample {tableName} ") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseBulkDeleteExample " + tableName) val sc = new SparkContext(sparkConf) try { //[Array[Byte]] val rdd = sc.parallelize(Array( Bytes.toBytes("1"), Bytes.toBytes("2"), Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5") )) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) rdd.hbaseBulkDelete(hbaseContext, TableName.valueOf(tableName), putRecord => new Delete(putRecord), 4) } finally { sc.stop() } } }
Example 82
Source File: HBaseBulkPutExample.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.rdd import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.{HBaseConfiguration, TableName} import org.apache.spark.{SparkConf, SparkContext} object HBaseBulkPutExample { def main(args: Array[String]) { if (args.length < 2) { println("HBaseBulkPutExample {tableName} {columnFamily}") return } val tableName = args(0) val columnFamily = args(1) val sparkConf = new SparkConf().setAppName("HBaseBulkPutExample " + tableName + " " + columnFamily) val sc = new SparkContext(sparkConf) try { //[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])] val rdd = sc.parallelize(Array( (Bytes.toBytes("1"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))), (Bytes.toBytes("2"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))), (Bytes.toBytes("3"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))), (Bytes.toBytes("4"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))), (Bytes.toBytes("5"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5")))) )) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) rdd.hbaseBulkPut(hbaseContext, TableName.valueOf(tableName), (putRecord) => { val put = new Put(putRecord._1) putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, putValue._3)) put }) } finally { sc.stop() } } }
Example 83
Source File: HBaseBulkGetExample.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.spark.SparkContext import org.apache.hadoop.hbase.{CellUtil, TableName, HBaseConfiguration} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.client.Get import org.apache.hadoop.hbase.client.Result import org.apache.spark.SparkConf object HBaseBulkGetExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseBulkGetExample {tableName}") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName) val sc = new SparkContext(sparkConf) try { //[(Array[Byte])] val rdd = sc.parallelize(Array( Bytes.toBytes("1"), Bytes.toBytes("2"), Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5"), Bytes.toBytes("6"), Bytes.toBytes("7"))) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) val getRdd = hbaseContext.bulkGet[Array[Byte], String]( TableName.valueOf(tableName), 2, rdd, record => { System.out.println("making Get") new Get(record) }, (result: Result) => { val it = result.listCells().iterator() val b = new StringBuilder b.append(Bytes.toString(result.getRow) + ":") while (it.hasNext) { val cell = it.next() val q = Bytes.toString(CellUtil.cloneQualifier(cell)) if (q.equals("counter")) { b.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")") } else { b.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")") } } b.toString() }) getRdd.collect().foreach(v => println(v)) } finally { sc.stop() } } }
Example 84
Source File: HBaseBulkPutExampleFromFile.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.spark.SparkContext import org.apache.hadoop.hbase.{TableName, HBaseConfiguration} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.mapred.TextInputFormat import org.apache.hadoop.io.LongWritable import org.apache.hadoop.io.Text import org.apache.spark.SparkConf object HBaseBulkPutExampleFromFile { def main(args: Array[String]) { if (args.length < 3) { println("HBaseBulkPutExampleFromFile {tableName} {columnFamily} {inputFile}") return } val tableName = args(0) val columnFamily = args(1) val inputFile = args(2) val sparkConf = new SparkConf().setAppName("HBaseBulkPutExampleFromFile " + tableName + " " + columnFamily + " " + inputFile) val sc = new SparkContext(sparkConf) try { var rdd = sc.hadoopFile( inputFile, classOf[TextInputFormat], classOf[LongWritable], classOf[Text]).map(v => { System.out.println("reading-" + v._2.toString) v._2.toString }) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) hbaseContext.bulkPut[String](rdd, TableName.valueOf(tableName), (putRecord) => { System.out.println("hbase-" + putRecord) val put = new Put(Bytes.toBytes("Value- " + putRecord)) put.addColumn(Bytes.toBytes("c"), Bytes.toBytes("1"), Bytes.toBytes(putRecord.length())) put }); } finally { sc.stop() } } }
Example 85
Source File: KeyFamilyQualifier.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.s2jobs.spark import java.io.Serializable import org.apache.hadoop.hbase.util.Bytes class KeyFamilyQualifier(val rowKey:Array[Byte], val family:Array[Byte], val qualifier:Array[Byte]) extends Comparable[KeyFamilyQualifier] with Serializable { override def compareTo(o: KeyFamilyQualifier): Int = { var result = Bytes.compareTo(rowKey, o.rowKey) if (result == 0) { result = Bytes.compareTo(family, o.family) if (result == 0) result = Bytes.compareTo(qualifier, o.qualifier) } result } override def toString: String = { Bytes.toString(rowKey) + ":" + Bytes.toString(family) + ":" + Bytes.toString(qualifier) } }
Example 86
Source File: HBaseBulkPutExample.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.spark.SparkContext import org.apache.hadoop.hbase.{TableName, HBaseConfiguration} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.client.Put import org.apache.spark.SparkConf object HBaseBulkPutExample { def main(args: Array[String]) { if (args.length < 2) { println("HBaseBulkPutExample {tableName} {columnFamily}") return } val tableName = args(0) val columnFamily = args(1) val sparkConf = new SparkConf().setAppName("HBaseBulkPutExample " + tableName + " " + columnFamily) val sc = new SparkContext(sparkConf) try { //[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])] val rdd = sc.parallelize(Array( (Bytes.toBytes("1"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))), (Bytes.toBytes("2"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))), (Bytes.toBytes("3"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))), (Bytes.toBytes("4"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))), (Bytes.toBytes("5"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5")))) )) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) hbaseContext.bulkPut[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])](rdd, TableName.valueOf(tableName), (putRecord) => { val put = new Put(putRecord._1) putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, putValue._3)) put }); } finally { sc.stop() } } }
Example 87
Source File: HBaseDistributedScanExample.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.spark.SparkContext import org.apache.hadoop.hbase.{TableName, HBaseConfiguration} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.client.Scan import org.apache.spark.SparkConf object HBaseDistributedScanExample { def main(args: Array[String]) { if (args.length < 1) { println("GenerateGraphs {tableName}") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseDistributedScanExample " + tableName ) val sc = new SparkContext(sparkConf) try { val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) val scan = new Scan() scan.setCaching(100) val getRdd = hbaseContext.hbaseRDD(TableName.valueOf(tableName), scan) getRdd.foreach(v => println(Bytes.toString(v._1.get()))) println("Length: " + getRdd.map(r => r._1.copyBytes()).collect().length); //.collect().foreach(v => println(Bytes.toString(v._1.get()))) } finally { sc.stop() } } }
Example 88
Source File: HBaseBulkPutTimestampExample.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.spark.SparkContext import org.apache.hadoop.hbase.{TableName, HBaseConfiguration} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.client.Put import org.apache.spark.SparkConf object HBaseBulkPutTimestampExample { def main(args: Array[String]) { if (args.length < 2) { System.out.println("HBaseBulkPutTimestampExample {tableName} {columnFamily}") return } val tableName = args(0) val columnFamily = args(1) val sparkConf = new SparkConf().setAppName("HBaseBulkPutTimestampExample " + tableName + " " + columnFamily) val sc = new SparkContext(sparkConf) try { val rdd = sc.parallelize(Array( (Bytes.toBytes("6"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))), (Bytes.toBytes("7"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))), (Bytes.toBytes("8"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))), (Bytes.toBytes("9"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))), (Bytes.toBytes("10"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5")))))) val conf = HBaseConfiguration.create() val timeStamp = System.currentTimeMillis() val hbaseContext = new HBaseContext(sc, conf) hbaseContext.bulkPut[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])](rdd, TableName.valueOf(tableName), (putRecord) => { val put = new Put(putRecord._1) putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, timeStamp, putValue._3)) put }) } finally { sc.stop() } } }
Example 89
Source File: HBaseSimpleRDD.scala From spark-hbase-connector with Apache License 2.0 | 5 votes |
package it.nerdammer.spark.hbase import it.nerdammer.spark.hbase.conversion.FieldReader import org.apache.hadoop.hbase.CellUtil import org.apache.hadoop.hbase.client.Result import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.rdd.{NewHadoopRDD, RDD} import org.apache.spark.{Partition, TaskContext} import scala.reflect.ClassTag class HBaseSimpleRDD[R: ClassTag](hadoopHBase: NewHadoopRDD[ImmutableBytesWritable, Result], builder: HBaseReaderBuilder[R], saltingLength: Int = 0) (implicit mapper: FieldReader[R], saltingProvider: SaltingProviderFactory[String]) extends RDD[R](hadoopHBase) { override def getPartitions: Array[Partition] = firstParent[(ImmutableBytesWritable, Result)].partitions override def compute(split: Partition, context: TaskContext) = { // val cleanConversion = sc.clean ---> next version firstParent[(ImmutableBytesWritable, Result)].iterator(split, context) .map(e => conversion(e._1, e._2)) } def conversion(key: ImmutableBytesWritable, row: Result) = { val columnNames = HBaseUtils.chosenColumns(builder.columns, mapper.columns) val columnNamesFC = HBaseUtils.columnsWithFamily(builder.columnFamily, columnNames) val columns = columnNamesFC .map(t => (Bytes.toBytes(t._1), Bytes.toBytes(t._2))) .map(t => if(row.containsColumn(t._1, t._2)) Some(CellUtil.cloneValue(row.getColumnLatestCell(t._1, t._2)).array) else None) .toList mapper.map(Some(key.get.drop(saltingLength)) :: columns) } }
Example 90
Source File: HBaseSinkSpec.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.external.hbase import akka.actor.ActorSystem import org.apache.gearpump.Message import org.apache.gearpump.cluster.UserConfig import org.apache.gearpump.external.hbase.HBaseSink.{HBaseWriter, HBaseWriterFactory} import org.apache.gearpump.streaming.MockUtil import org.apache.gearpump.streaming.task.TaskContext import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hbase.TableName import org.apache.hadoop.hbase.client._ import org.apache.hadoop.hbase.util.Bytes import org.mockito.Mockito._ import org.scalacheck.Gen import org.scalatest.mock.MockitoSugar import org.scalatest.prop.PropertyChecks import org.scalatest.{Matchers, PropSpec} class HBaseSinkSpec extends PropSpec with PropertyChecks with Matchers with MockitoSugar { property("HBaseSink should invoke HBaseWriter for writing message to HBase") { val hbaseWriter = mock[HBaseWriter] val hbaseWriterFactory = mock[HBaseWriterFactory] implicit val system: ActorSystem = MockUtil.system val userConfig = UserConfig.empty val tableName = "hbase" when(hbaseWriterFactory.getHBaseWriter(userConfig, tableName)) .thenReturn(hbaseWriter) val hbaseSink = new HBaseSink(userConfig, tableName, hbaseWriterFactory) hbaseSink.open(MockUtil.mockTaskContext) forAll(Gen.alphaStr) { (value: String) => val message = Message(value) hbaseSink.write(message) verify(hbaseWriter, atLeastOnce()).put(value) } hbaseSink.close() verify(hbaseWriter).close() } property("HBaseWriter should insert a row successfully") { val table = mock[Table] val config = mock[Configuration] val connection = mock[Connection] val taskContext = mock[TaskContext] val map = Map[String, String]("HBASESINK" -> "hbasesink", "TABLE_NAME" -> "hbase.table.name", "COLUMN_FAMILY" -> "hbase.table.column.family", "COLUMN_NAME" -> "hbase.table.column.name", "HBASE_USER" -> "hbase.user", "GEARPUMP_KERBEROS_PRINCIPAL" -> "gearpump.kerberos.principal", "GEARPUMP_KEYTAB_FILE" -> "gearpump.keytab.file" ) val userConfig = new UserConfig(map) val tableName = "hbase" val row = "row" val group = "group" val name = "name" val value = "3.0" when(connection.getTable(TableName.valueOf(tableName))).thenReturn(table) val put = new Put(Bytes.toBytes(row)) put.addColumn(Bytes.toBytes(group), Bytes.toBytes(name), Bytes.toBytes(value)) val hbaseWriter = new HBaseWriter(connection, tableName) hbaseWriter.insert(Bytes.toBytes(row), Bytes.toBytes(group), Bytes.toBytes(name), Bytes.toBytes(value)) verify(table).put(MockUtil.argMatch[Put](_.getRow sameElements Bytes.toBytes(row))) } }
Example 91
Source File: HBaseUtil.scala From sprue with Apache License 2.0 | 5 votes |
package com.cloudera.sprue import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.util.Bytes object HBaseUtil { val columnFamily: String = "cf1" def insertIncomingDataIntoHBase(patient: Patient): Put = { if (patient.getPatientId == null) { return null } else { val put = new Put(Bytes.toBytes(patient.getPatientId)) if (patient.getPatientId != null) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("patientId"), Bytes.toBytes(patient.getPatientId)) if (patient.getLocation != null) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("location"), Bytes.toBytes(patient.getLocation)) if ((patient.getEvaluationDate : java.lang.Long) != null) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("evaluationDate"), Bytes.toBytes(patient.getEvaluationDate)) if ((patient.getTemperature : java.lang.Float) != null) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("temperature"), Bytes.toBytes(patient.getTemperature)) if ((patient.getWbc : java.lang.Integer) != null) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("wbc"), Bytes.toBytes(patient.getWbc)) if ((patient.getHeartRate : java.lang.Integer) != null) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("heartRate"), Bytes.toBytes(patient.getHeartRate)) if ((patient.getRespiratoryRate : java.lang.Integer) != null) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("respiratoryRate"), Bytes.toBytes(patient.getRespiratoryRate)) if ((patient.getSbp : java.lang.Integer) != null) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("sbp"), Bytes.toBytes(patient.getSbp)) if ((patient.getHypotension : java.lang.Integer) != null) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("hypotension"), Bytes.toBytes(patient.getHypotension)) if (patient.getInfectionFlag != null) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("infectionFlag"), Bytes.toBytes(patient.getInfectionFlag)) if ((patient.getOrganFailCount : java.lang.Integer) != null) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("organFailCount"), Bytes.toBytes(patient.getOrganFailCount)) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("evalFinished"), Bytes.toBytes("N")) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("recordUpdatedTime"), Bytes.toBytes(System.currentTimeMillis)) } } def insertEvaluatedDataIntoHBase(patient: Patient): Put = { if (patient.getPatientId == null) { return null } else { val put = new Put(Bytes.toBytes(patient.getPatientId)) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("evalFinished"), Bytes.toBytes("Y")) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("sirsCounter"), Bytes.toBytes(patient.getSirsCounter)) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("sirsFlag"), Bytes.toBytes(patient.getSirsFlag)) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("sepsisFlag"), Bytes.toBytes(patient.getSepsisFlag)) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("severeSepsisFlag"), Bytes.toBytes(patient.getSevereSepsisFlag)) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("septicShockFlag"), Bytes.toBytes(patient.getSepticShockFlag)) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("organDysfunctionFlag"), Bytes.toBytes(patient.getOrganDysfunctionSyndrome)) put.add(Bytes.toBytes(columnFamily), Bytes.toBytes("systemEvalTime"), Bytes.toBytes(System.currentTimeMillis)) put } } }
Example 92
Source File: L6-14HBase.scala From prosparkstreaming with Apache License 2.0 | 5 votes |
package org.apress.prospark import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.mapreduce.TableOutputFormat import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.io.Text import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD.rddToPairRDDFunctions import org.apache.spark.streaming.Seconds import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.dstream.DStream.toPairDStreamFunctions import org.json4s.DefaultFormats import org.json4s.jvalue2extractable import org.json4s.jvalue2monadic import org.json4s.native.JsonMethods.parse import org.json4s.string2JsonInput object HBaseSinkApp { def main(args: Array[String]) { if (args.length != 5) { System.err.println( "Usage: HBaseSinkApp <appname> <hbaseMaster> <tableName> <columnFamilyName> <columnName>") System.exit(1) } val Seq(appName, hbaseMaster, tableName, columnFamilyName, columnName) = args.toSeq val conf = new SparkConf() .setAppName(appName) .setJars(SparkContext.jarOfClass(this.getClass).toSeq) val batchInterval = 10 val windowSize = 20 val slideInterval = 10 val ssc = new StreamingContext(conf, Seconds(batchInterval)) HttpUtils.createStream(ssc, url = "https://query.yahooapis.com/v1/public/yql?q=select%20*%20from%20yahoo.finance.quotes%20where%20symbol%20in%20(%22IBM,GOOG,MSFT,AAPL,FB,ORCL,YHOO,TWTR,LNKD,INTC%22)%0A%09%09&format=json&diagnostics=true&env=http%3A%2F%2Fdatatables.org%2Falltables.env", interval = batchInterval) .flatMap(rec => { implicit val formats = DefaultFormats val query = parse(rec) \ "query" ((query \ "results" \ "quote").children) .map(rec => ((rec \ "symbol").extract[String], (rec \ "LastTradePriceOnly").extract[String].toFloat)) }) .reduceByKeyAndWindow((x: Float, y: Float) => (x + y), Seconds(windowSize), Seconds(slideInterval)) .foreachRDD(rdd => { val hbaseConf = HBaseConfiguration.create() hbaseConf.set(TableOutputFormat.OUTPUT_TABLE, tableName) hbaseConf.set("hbase.master", hbaseMaster) val jobConf = new Configuration(hbaseConf) jobConf.set("mapreduce.job.outputformat.class", classOf[TableOutputFormat[Text]].getName) rdd.map(rec => { val put = new Put(rec._1.getBytes) put.addColumn(columnFamilyName.getBytes, columnName.getBytes, Bytes.toBytes(rec._2 / (windowSize / batchInterval))) (rec._1, put) }).saveAsNewAPIHadoopDataset(jobConf) }) ssc.start() ssc.awaitTermination() } }
Example 93
Source File: L6-16SparkHBase.scala From prosparkstreaming with Apache License 2.0 | 5 votes |
package org.apress.prospark import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.streaming.Seconds import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.dstream.DStream.toPairDStreamFunctions import org.json4s.DefaultFormats import org.json4s.jvalue2extractable import org.json4s.jvalue2monadic import org.json4s.native.JsonMethods.parse import org.json4s.string2JsonInput object SparkHBaseBulkPutApp { def main(args: Array[String]) { if (args.length != 4) { System.err.println( "Usage: SparkHBaseBulkPutApp <appname> <tableName> <columnFamilyName> <columnName>") System.exit(1) } val Seq(appName, tableName, columnFamilyName, columnName) = args.toSeq val conf = new SparkConf() .setAppName(appName) .setJars(SparkContext.jarOfClass(this.getClass).toSeq) val batchInterval = 10 val windowSize = 20 val slideInterval = 10 val ssc = new StreamingContext(conf, Seconds(batchInterval)) val hbaseConf = HBaseConfiguration.create() val hContext = new HBaseContext(ssc.sparkContext, hbaseConf) val windowed = HttpUtils.createStream(ssc, url = "https://query.yahooapis.com/v1/public/yql?q=select%20*%20from%20yahoo.finance.quotes%20where%20symbol%20in%20(%22IBM,GOOG,MSFT,AAPL,FB,ORCL,YHOO,TWTR,LNKD,INTC%22)%0A%09%09&format=json&diagnostics=true&env=http%3A%2F%2Fdatatables.org%2Falltables.env", interval = batchInterval) .flatMap(rec => { implicit val formats = DefaultFormats val query = parse(rec) \ "query" ((query \ "results" \ "quote").children) .map(rec => ((rec \ "symbol").extract[String], (rec \ "LastTradePriceOnly").extract[String].toFloat)) }) .reduceByKeyAndWindow((x: Float, y: Float) => (x + y), Seconds(windowSize), Seconds(slideInterval)) hContext.streamBulkPut[(String, Float)](windowed, TableName.valueOf(tableName), rec => { val put = new Put(rec._1.getBytes) put.addColumn(columnFamilyName.getBytes, columnName.getBytes, Bytes.toBytes(rec._2 / (windowSize / batchInterval))) put }) ssc.start() ssc.awaitTermination() } }
Example 94
Source File: HbRddReaders.scala From hbrdd with Apache License 2.0 | 5 votes |
package top.spoofer.hbrdd.unit import org.apache.hadoop.hbase.util.Bytes import org.json4s._ trait HbRddReaders { implicit val hbBooleanReader = new HbRddFormatsReader[Boolean] { def formatsRead(readData: Array[Byte]): Boolean = Bytes.toBoolean(readData) } implicit val hbByteArrayReader = new HbRddFormatsReader[Array[Byte]] { def formatsRead(readData: Array[Byte]): Array[Byte] = readData } implicit val hbShortReader = new HbRddFormatsReader[Short] { def formatsRead(readData: Array[Byte]): Short = Bytes.toShort(readData) } implicit val hbIntReader = new HbRddFormatsReader[Int] { def formatsRead(readData: Array[Byte]): Int = Bytes.toInt(readData) } implicit val hbFloatReader = new HbRddFormatsReader[Float] { def formatsRead(readData: Array[Byte]): Float = Bytes.toFloat(readData) } implicit val hbDoubleReader = new HbRddFormatsReader[Double] { def formatsRead(readData: Array[Byte]): Double = Bytes.toDouble(readData) } implicit val hbLongReader = new HbRddFormatsReader[Long] { def formatsRead(readData: Array[Byte]): Long = Bytes.toLong(readData) } implicit val hbStringReader = new HbRddFormatsReader[String] { def formatsRead(readData: Array[Byte]): String = Bytes.toString(readData) } implicit val hbJsonReader = new HbRddFormatsReader[JValue] { import org.json4s.jackson.JsonMethods._ def formatsRead(readData: Array[Byte]): JValue = parse(Bytes.toString(readData)) } }
Example 95
Source File: HbRddWriters.scala From hbrdd with Apache License 2.0 | 5 votes |
package top.spoofer.hbrdd.unit import org.apache.hadoop.hbase.util.Bytes import org.json4s._ trait HbRddWriters { implicit val hbBooleanWriter = new HbRddFormatsWriter[Boolean] { def formatsWrite(writeData: Boolean): Array[Byte] = Bytes.toBytes(writeData) } implicit val hbArrayWriter = new HbRddFormatsWriter[Array[Byte]] { def formatsWrite(writeData: Array[Byte]): Array[Byte] = writeData } implicit val hbShortWriter = new HbRddFormatsWriter[Short] { def formatsWrite(writeData: Short): Array[Byte] = Bytes.toBytes(writeData) } implicit val hbIntWriter = new HbRddFormatsWriter[Int] { def formatsWrite(writeData: Int): Array[Byte] = Bytes.toBytes(writeData) } implicit val hbFloatWriter = new HbRddFormatsWriter[Float] { def formatsWrite(writeData: Float): Array[Byte] = Bytes.toBytes(writeData) } implicit val hbDoubleWrite = new HbRddFormatsWriter[Double] { def formatsWrite(writeData: Double): Array[Byte] = Bytes.toBytes(writeData) } implicit val hbLongWrite = new HbRddFormatsWriter[Long] { def formatsWrite(writeData: Long): Array[Byte] = Bytes.toBytes(writeData) } implicit val hbStringWrite = new HbRddFormatsWriter[String] { def formatsWrite(writeData: String): Array[Byte] = Bytes.toBytes(writeData) } implicit val hbJsonWrite = new HbRddFormatsWriter[JValue] { import org.json4s.jackson.JsonMethods._ def formatsWrite(writeData: JValue): Array[Byte] = Bytes.toBytes(compact(writeData)) } }
Example 96
Source File: HbasePredicate.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.hbase import io.eels._ import io.eels.schema.{DataType, StructType} import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp import org.apache.hadoop.hbase.filter._ import org.apache.hadoop.hbase.util.Bytes import scala.collection.JavaConversions._ // These are simply marker predicates used for pattern matching case class ContainsPredicate(name: String, value: Any) extends NamedPredicate(name) { override def eval(row: Row): Boolean = true } case class RegexPredicate(name: String, value: Any) extends NamedPredicate(name) { override def eval(row: Row): Boolean = true } case class StartsWithPredicate(name: String, value: Any) extends NamedPredicate(name) { override def eval(row: Row): Boolean = true } case class NotEqualsPredicate(name: String, value: Any) extends NamedPredicate(name) { override def eval(row: Row): Boolean = row.get(name) != value } object HbasePredicate { private val ByteComparableClazz = classOf[BinaryComparator] private val StringComparableClazz = classOf[SubstringComparator] private val RegexStringComparableClazz = classOf[RegexStringComparator] private val BinaryPrefixComparableClazz = classOf[BinaryPrefixComparator] def apply(pred: Predicate)(implicit schema: StructType, serializer: HbaseSerializer): FilterList = pred match { case EqualsPredicate(name, value) => new FilterList(hbaseFiler(name, CompareOp.EQUAL, value, ByteComparableClazz)) case NotEqualsPredicate(name, value) => new FilterList(hbaseFiler(name, CompareOp.NOT_EQUAL, value, ByteComparableClazz)) case ContainsPredicate(name, value) => new FilterList(hbaseFiler(name, CompareOp.EQUAL, value, StringComparableClazz)) case StartsWithPredicate(name, value) => new FilterList(hbaseFiler(name, CompareOp.EQUAL, value, BinaryPrefixComparableClazz)) case RegexPredicate(name, value) => new FilterList(hbaseFiler(name, CompareOp.EQUAL, value, RegexStringComparableClazz)) case GtPredicate(name, value) => new FilterList(hbaseFiler(name, CompareOp.GREATER, value, ByteComparableClazz)) case GtePredicate(name, value) => new FilterList(hbaseFiler(name, CompareOp.GREATER_OR_EQUAL, value, ByteComparableClazz)) case LtPredicate(name, value) => new FilterList(hbaseFiler(name, CompareOp.LESS, value, ByteComparableClazz)) case LtePredicate(name, value) => new FilterList(hbaseFiler(name, CompareOp.LESS_OR_EQUAL, value, ByteComparableClazz)) case AndPredicate(predicates: Seq[Predicate]) => new FilterList(FilterList.Operator.MUST_PASS_ALL, predicates.map(apply).flatMap(_.getFilters)) case OrPredicate(predicates: Seq[Predicate]) => new FilterList(FilterList.Operator.MUST_PASS_ONE, predicates.map(apply).flatMap(_.getFilters)) case _@predicateType => sys.error(s"Predicate type '${predicateType.getClass.getSimpleName}' is not supported!") } def hbaseFiler[T](name: String, compareOp: CompareOp, value: Any, comparableClass: Class[T]) (implicit schema: StructType, serializer: HbaseSerializer): Filter = { val field = schema.fields.find(_.name == name).getOrElse(sys.error(s"Field '$name' in the predicate is not defined in the EEL schema")) if (field.key) { new RowFilter(compareOp, hbaseComparator(comparableClass, name, field.dataType, value)) } else { new SingleColumnValueFilter( Bytes.toBytes(field.columnFamily.getOrElse(sys.error(s"No Column Family defined for field '${field.name}'"))), Bytes.toBytes(name), compareOp, hbaseComparator(comparableClass, name, field.dataType, value)) } } def hbaseComparator[T](comparableClass: Class[T], name: String, dataType: DataType, value: Any) (implicit schema: StructType, serializer: HbaseSerializer): ByteArrayComparable = (comparableClass, value) match { case (ByteComparableClazz, _) => new BinaryComparator(serializer.toBytes(value, name, dataType)) case (RegexStringComparableClazz, stringValue: String) => new RegexStringComparator(stringValue) case (StringComparableClazz, stringValue: String) => new SubstringComparator(stringValue) case (BinaryPrefixComparableClazz, _) => new BinaryPrefixComparator(serializer.toBytes(value, name, dataType)) } // Shorthand predicate names def or(left: Predicate, right: Predicate) = OrPredicate(Seq(left, right)) def and(left: Predicate, right: Predicate) = AndPredicate(Seq(left, right)) def equals(name: String, value: Any) = EqualsPredicate(name, value) def notEquals(name: String, value: Any) = NotEqualsPredicate(name, value) def gt(name: String, value: Any) = GtPredicate(name, value) def gte(name: String, value: Any) = GtePredicate(name, value) def lt(name: String, value: Any) = LtPredicate(name, value) def lte(name: String, value: Any) = LtePredicate(name, value) def regex(name: String, value: Any) = RegexPredicate(name, value) def contains(name: String, value: Any) = ContainsPredicate(name, value) def startsWith(name: String, value: Any) = StartsWithPredicate(name, value) }
Example 97
Source File: HBaseStreamingBulkPutExample.scala From SparkOnHBase with Apache License 2.0 | 4 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.spark.SparkContext import org.apache.hadoop.hbase.{TableName, HBaseConfiguration} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.client.Put import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.Seconds import org.apache.spark.SparkConf object HBaseStreamingBulkPutExample { def main(args: Array[String]) { if (args.length < 4) { println("HBaseStreamingBulkPutExample " + "{host} {port} {tableName} {columnFamily}") return } val host = args(0) val port = args(1) val tableName = args(2) val columnFamily = args(3) val sparkConf = new SparkConf().setAppName("HBaseBulkPutTimestampExample " + tableName + " " + columnFamily) val sc = new SparkContext(sparkConf) try { val ssc = new StreamingContext(sc, Seconds(1)) val lines = ssc.socketTextStream(host, port.toInt) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) hbaseContext.streamBulkPut[String](lines, TableName.valueOf(tableName), (putRecord) => { if (putRecord.length() > 0) { val put = new Put(Bytes.toBytes(putRecord)) put.addColumn(Bytes.toBytes("c"), Bytes.toBytes("foo"), Bytes.toBytes("bar")) put } else { null } }) ssc.start() ssc.awaitTerminationOrTimeout(60000) } finally { sc.stop() } } }