org.apache.hadoop.hbase.TableName Scala Examples
The following examples show how to use org.apache.hadoop.hbase.TableName.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: HBase.scala From AI with Apache License 2.0 | 6 votes |
package com.bigchange.hbase import com.bigchange.util.HBaseUtil._ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hbase.{HBaseConfiguration, HColumnDescriptor, HTableDescriptor, TableName} import org.apache.hadoop.hbase.client.{Result, _} import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.hadoop.hbase.mapreduce.TableInputFormat import org.apache.hadoop.hbase.protobuf.ProtobufUtil import org.apache.hadoop.hbase.protobuf.generated.ClientProtos import org.apache.hadoop.hbase.util.Base64 import org.apache.spark.SparkContext def existRowKey(row:String, table: Table): Boolean ={ val get = new Get(row.getBytes()) val result = table.get(get) if (result.isEmpty) { warn("hbase table don't have this data,execute insert") return false } true } def getConfiguration = if(hBaseConfiguration == null) { warn("hbase setDefaultConfiguration....") setDefaultConfiguration } else hBaseConfiguration def setDefaultConfiguration = { hBaseConfiguration = HBaseConfiguration.create // 本地测试 需配置的选项, 在集群上通过对应配置文件路径自动获得 hBaseConfiguration.set("fs.defaultFS", "hdfs://ns1"); // nameservices的路径 hBaseConfiguration.set("dfs.nameservices", "ns1"); // hBaseConfiguration.set("dfs.ha.namenodes.ns1", "nn1,nn2"); //namenode的路径 hBaseConfiguration.set("dfs.namenode.rpc-address.ns1.nn1", "server3:9000"); // namenode 通信地址 hBaseConfiguration.set("dfs.namenode.rpc-address.ns1.nn2", "server4:9000"); // namenode 通信地址 // 设置namenode自动切换的实现类 hBaseConfiguration.set("dfs.client.failover.proxy.provider.ns1", "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider") hBaseConfiguration.set("hbase.rootdir", "hdfs://ns1/hbase") hBaseConfiguration.set("hbase.zookeeper.quorum", "server0,server1,server2") hBaseConfiguration.set("hbase.zookeeper.property.clientPort", "2181") hBaseConfiguration } }
Example 2
Source File: HBaseForeachPartitionExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.rdd import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseForeachPartitionExample { def main(args: Array[String]) { if (args.length < 2) { println("HBaseForeachPartitionExample {tableName} {columnFamily} are missing an arguments") return } val tableName = args(0) val columnFamily = args(1) val sparkConf = new SparkConf().setAppName("HBaseForeachPartitionExample " + tableName + " " + columnFamily) val sc = new SparkContext(sparkConf) try { //[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])] val rdd = sc.parallelize(Array( (Bytes.toBytes("1"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))), (Bytes.toBytes("2"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))), (Bytes.toBytes("3"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))), (Bytes.toBytes("4"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))), (Bytes.toBytes("5"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5")))) )) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) rdd.hbaseForeachPartition(hbaseContext, (it, connection) => { val m = connection.getBufferedMutator(TableName.valueOf(tableName)) it.foreach(r => { val put = new Put(r._1) r._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, putValue._3)) m.mutate(put) }) m.flush() m.close() }) } finally { sc.stop() } } }
Example 3
Source File: HBasePut.scala From gimel with Apache License 2.0 | 5 votes |
package com.paypal.gimel.hbase.utilities import org.apache.hadoop.hbase.{HBaseConfiguration, TableName} import org.apache.hadoop.hbase.client.{ConnectionFactory, Put} import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.sql.{DataFrame, SparkSession} import com.paypal.gimel.hbase.conf.{HbaseClientConfiguration, HbaseConfigs} import com.paypal.gimel.logger.Logger object HBasePut { def apply(sparkSession: SparkSession): HBasePut = new HBasePut(sparkSession) } class HBasePut(sparkSession: SparkSession) { val logger = Logger() lazy val hbaseUtilities = HBaseUtilities(sparkSession) def putRows(hbaseTable: String, dataFrame: DataFrame, rowKeyColumn: String, columns: Array[String], cfColsMap: Map[String, String]) { try { // Configure And Connect val conf = HBaseConfiguration.create() val cnxn = ConnectionFactory.createConnection(conf) // Create Connection to HBase table val tbl = cnxn.getTable(TableName.valueOf(hbaseTable)) val rows = dataFrame.rdd.map { row => (row.getAs(rowKeyColumn).toString, columns.map(eachCol => (cfColsMap.getOrElse(eachCol, ""), eachCol, row.getAs(eachCol).asInstanceOf[String])) ) }.collect() // Performing put operation on each row of dataframe rows.foreach { row => val putRow: Put = new Put(Bytes.toBytes(row._1.asInstanceOf[String])) row._2.foreach(x => if (x._2 != rowKeyColumn) putRow.addColumn(Bytes.toBytes(x._1), Bytes.toBytes(x._2), Bytes.toBytes(x._3))) tbl.put(putRow) } tbl.close() } catch { case ex: Throwable => ex.printStackTrace() throw ex } } }
Example 4
Source File: HBaseLocalClient.scala From gimel with Apache License 2.0 | 5 votes |
package com.paypal.gimel.hbase.utilities import java.io.File import scala.collection.mutable.ArrayBuffer import com.google.common.io.Files import org.apache.hadoop.hbase.{HBaseTestingUtility, TableName} import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD import org.apache.spark.sql.{DataFrame, SparkSession} import org.apache.spark.sql.execution.QueryExecution import org.apache.spark.sql.execution.datasources.hbase.SparkHBaseConf import org.apache.spark.sql.util._ import org.scalatest.{BeforeAndAfterAll, FunSuite, Matchers} import com.paypal.gimel.common.catalog.Field import com.paypal.gimel.hbase.DataSet class HBaseLocalClient extends FunSuite with Matchers with BeforeAndAfterAll { var sparkSession : SparkSession = _ var dataSet: DataSet = _ val hbaseTestingUtility = new HBaseTestingUtility() val tableName = "test_table" val cfs = Array("personal", "professional") val columns = Array("id", "name", "age", "address", "company", "designation", "salary") val fields = columns.map(col => new Field(col)) val metrics = ArrayBuffer.empty[(String, QueryExecution, Long)] protected override def beforeAll(): Unit = { val tempDir: File = Files.createTempDir tempDir.deleteOnExit hbaseTestingUtility.startMiniCluster() SparkHBaseConf.conf = hbaseTestingUtility.getConfiguration createTable(tableName, cfs) val conf = new SparkConf conf.set(SparkHBaseConf.testConf, "true") sparkSession = SparkSession.builder() .master("local") .appName("HBase Test") .config(conf) .getOrCreate() val listener = new QueryExecutionListener { // Only test successful case here, so no need to implement `onFailure` override def onFailure(funcName: String, qe: QueryExecution, exception: Exception): Unit = {} override def onSuccess(funcName: String, qe: QueryExecution, duration: Long): Unit = { metrics += ((funcName, qe, duration)) } } sparkSession.listenerManager.register(listener) sparkSession.sparkContext.setLogLevel("ERROR") dataSet = new DataSet(sparkSession) } protected override def afterAll(): Unit = { hbaseTestingUtility.shutdownMiniCluster() sparkSession.close() } def createTable(name: String, cfs: Array[String]) { val tName = Bytes.toBytes(name) val bcfs = cfs.map(Bytes.toBytes(_)) try { hbaseTestingUtility.deleteTable(TableName.valueOf(tName)) } catch { case _ : Throwable => println("No table = " + name + " found") } hbaseTestingUtility.createMultiRegionTable(TableName.valueOf(tName), bcfs) } // Mocks data for testing def mockDataInDataFrame(numberOfRows: Int): DataFrame = { def stringed(n: Int) = s"""{"id": "$n","name": "MAC-$n", "address": "MAC-${n + 1}", "age": "${n + 1}", "company": "MAC-$n", "designation": "MAC-$n", "salary": "${n * 10000}" }""" val texts: Seq[String] = (1 to numberOfRows).map { x => stringed(x) } val rdd: RDD[String] = sparkSession.sparkContext.parallelize(texts) val dataFrame: DataFrame = sparkSession.read.json(rdd) dataFrame } }
Example 5
Source File: HBaseBulkPutTimestampExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.spark.SparkContext import org.apache.hadoop.hbase.{HBaseConfiguration, TableName} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.client.Put import org.apache.spark.SparkConf import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseBulkPutTimestampExample { def main(args: Array[String]) { if (args.length < 2) { System.out.println("HBaseBulkPutTimestampExample {tableName} {columnFamily} are missing an argument") return } val tableName = args(0) val columnFamily = args(1) val sparkConf = new SparkConf().setAppName("HBaseBulkPutTimestampExample " + tableName + " " + columnFamily) val sc = new SparkContext(sparkConf) try { val rdd = sc.parallelize(Array( (Bytes.toBytes("6"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))), (Bytes.toBytes("7"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))), (Bytes.toBytes("8"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))), (Bytes.toBytes("9"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))), (Bytes.toBytes("10"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5")))))) val conf = HBaseConfiguration.create() val timeStamp = System.currentTimeMillis() val hbaseContext = new HBaseContext(sc, conf) hbaseContext.bulkPut[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])](rdd, TableName.valueOf(tableName), (putRecord) => { val put = new Put(putRecord._1) putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, timeStamp, putValue._3)) put }) } finally { sc.stop() } } }
Example 6
Source File: HBaseDistributedScanExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.client.Scan import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseDistributedScanExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseDistributedScanExample {tableName} missing an argument") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseDistributedScanExample " + tableName ) val sc = new SparkContext(sparkConf) try { val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) val scan = new Scan() scan.setCaching(100) val getRdd = hbaseContext.hbaseRDD(TableName.valueOf(tableName), scan) getRdd.foreach(v => println(Bytes.toString(v._1.get()))) println("Length: " + getRdd.map(r => r._1.copyBytes()).collect().length); } finally { sc.stop() } } }
Example 7
Source File: HBaseBulkPutExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseBulkPutExample { def main(args: Array[String]) { if (args.length < 2) { println("HBaseBulkPutExample {tableName} {columnFamily} are missing an arguments") return } val tableName = args(0) val columnFamily = args(1) val sparkConf = new SparkConf().setAppName("HBaseBulkPutExample " + tableName + " " + columnFamily) val sc = new SparkContext(sparkConf) try { //[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])] val rdd = sc.parallelize(Array( (Bytes.toBytes("1"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))), (Bytes.toBytes("2"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))), (Bytes.toBytes("3"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))), (Bytes.toBytes("4"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))), (Bytes.toBytes("5"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5")))) )) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) hbaseContext.bulkPut[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])](rdd, TableName.valueOf(tableName), (putRecord) => { val put = new Put(putRecord._1) putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, putValue._3)) put }); } finally { sc.stop() } } }
Example 8
Source File: HBaseBulkDeleteExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.client.Delete import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseBulkDeleteExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseBulkDeleteExample {tableName} missing an argument") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseBulkDeleteExample " + tableName) val sc = new SparkContext(sparkConf) try { //[Array[Byte]] val rdd = sc.parallelize(Array( Bytes.toBytes("1"), Bytes.toBytes("2"), Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5") )) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) hbaseContext.bulkDelete[Array[Byte]](rdd, TableName.valueOf(tableName), putRecord => new Delete(putRecord), 4) } finally { sc.stop() } } }
Example 9
Source File: HBaseBulkPutExampleFromFile.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.hadoop.io.LongWritable import org.apache.hadoop.io.Text import org.apache.hadoop.mapred.TextInputFormat import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseBulkPutExampleFromFile { def main(args: Array[String]) { if (args.length < 3) { println("HBaseBulkPutExampleFromFile {tableName} {columnFamily} {inputFile} are missing an argument") return } val tableName = args(0) val columnFamily = args(1) val inputFile = args(2) val sparkConf = new SparkConf().setAppName("HBaseBulkPutExampleFromFile " + tableName + " " + columnFamily + " " + inputFile) val sc = new SparkContext(sparkConf) try { var rdd = sc.hadoopFile( inputFile, classOf[TextInputFormat], classOf[LongWritable], classOf[Text]).map(v => { System.out.println("reading-" + v._2.toString) v._2.toString }) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) hbaseContext.bulkPut[String](rdd, TableName.valueOf(tableName), (putRecord) => { System.out.println("hbase-" + putRecord) val put = new Put(Bytes.toBytes("Value- " + putRecord)) put.addColumn(Bytes.toBytes("c"), Bytes.toBytes("1"), Bytes.toBytes(putRecord.length())) put }); } finally { sc.stop() } } }
Example 10
Source File: HBaseStreamingBulkPutExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.streaming.Seconds import org.apache.spark.streaming.StreamingContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseStreamingBulkPutExample { def main(args: Array[String]) { if (args.length < 4) { println("HBaseStreamingBulkPutExample " + "{host} {port} {tableName} {columnFamily} are missing an argument") return } val host = args(0) val port = args(1) val tableName = args(2) val columnFamily = args(3) val sparkConf = new SparkConf().setAppName("HBaseStreamingBulkPutExample " + tableName + " " + columnFamily) val sc = new SparkContext(sparkConf) try { val ssc = new StreamingContext(sc, Seconds(1)) val lines = ssc.socketTextStream(host, port.toInt) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) hbaseContext.streamBulkPut[String](lines, TableName.valueOf(tableName), (putRecord) => { if (putRecord.length() > 0) { val put = new Put(Bytes.toBytes(putRecord)) put.addColumn(Bytes.toBytes("c"), Bytes.toBytes("foo"), Bytes.toBytes("bar")) put } else { null } }) ssc.start() ssc.awaitTerminationOrTimeout(60000) } finally { sc.stop() } } }
Example 11
Source File: HBaseBulkGetExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.client.Get import org.apache.hadoop.hbase.client.Result import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.CellUtil import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseBulkGetExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseBulkGetExample {tableName} missing an argument") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName) val sc = new SparkContext(sparkConf) try { //[(Array[Byte])] val rdd = sc.parallelize(Array( Bytes.toBytes("1"), Bytes.toBytes("2"), Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5"), Bytes.toBytes("6"), Bytes.toBytes("7"))) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) val getRdd = hbaseContext.bulkGet[Array[Byte], String]( TableName.valueOf(tableName), 2, rdd, record => { System.out.println("making Get") new Get(record) }, (result: Result) => { val it = result.listCells().iterator() val b = new StringBuilder b.append(Bytes.toString(result.getRow) + ":") while (it.hasNext) { val cell = it.next() val q = Bytes.toString(CellUtil.cloneQualifier(cell)) if (q.equals("counter")) { b.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")") } else { b.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")") } } b.toString() }) getRdd.collect().foreach(v => println(v)) } finally { sc.stop() } } }
Example 12
Source File: HBaseBulkPutExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.rdd import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseBulkPutExample { def main(args: Array[String]) { if (args.length < 2) { println("HBaseBulkPutExample {tableName} {columnFamily} are missing an arguments") return } val tableName = args(0) val columnFamily = args(1) val sparkConf = new SparkConf().setAppName("HBaseBulkPutExample " + tableName + " " + columnFamily) val sc = new SparkContext(sparkConf) try { //[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])] val rdd = sc.parallelize(Array( (Bytes.toBytes("1"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))), (Bytes.toBytes("2"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))), (Bytes.toBytes("3"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))), (Bytes.toBytes("4"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))), (Bytes.toBytes("5"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5")))) )) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) rdd.hbaseBulkPut(hbaseContext, TableName.valueOf(tableName), (putRecord) => { val put = new Put(putRecord._1) putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, putValue._3)) put }) } finally { sc.stop() } } }
Example 13
Source File: HBaseBulkDeleteExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.rdd import org.apache.hadoop.hbase.client.Delete import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseBulkDeleteExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseBulkDeleteExample {tableName} are missing an argument") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseBulkDeleteExample " + tableName) val sc = new SparkContext(sparkConf) try { //[Array[Byte]] val rdd = sc.parallelize(Array( Bytes.toBytes("1"), Bytes.toBytes("2"), Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5") )) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) rdd.hbaseBulkDelete(hbaseContext, TableName.valueOf(tableName), putRecord => new Delete(putRecord), 4) } finally { sc.stop() } } }
Example 14
Source File: Hdfs2HBase.scala From wow-spark with MIT License | 5 votes |
package com.sev7e0.wow.hbase import java.util import org.apache.hadoop.hbase.{HBaseConfiguration, TableName} import org.apache.hadoop.hbase.client.{ConnectionFactory, Put} import org.apache.spark.{SparkConf, SparkContext} object Hdfs2HBase { def main(args: Array[String]): Unit = { val conf = new SparkConf() .setMaster("spark://spark01:7077") .setAppName(Hdfs2HBase.getClass.getName) .set("spark.jars", "target/wow-spark-1.0-SNAPSHOT.jar") val sparkContext = new SparkContext(conf) val userRDD = sparkContext.textFile("hdfs://spark01:9000/spark/users.dat",2).map(_.split("::")) userRDD.foreachPartition(iter =>{ val configuration = HBaseConfiguration.create() // configuration.set("hbase.zookeeper.quorum","spark01:2181,spark02:2181,spark03:2181") configuration.set("hbase.zookeeper.quorum", "spark01") configuration.set("hbase.zookeeper.property.clientPort", "2181") //创建连接 val connection = ConnectionFactory.createConnection(configuration) //get table object val person = connection.getTable(TableName.valueOf("users")) iter.foreach(p=>{ val arrayList = new util.ArrayList[Put]() val put = new Put(p(0).getBytes) arrayList.add(put.addColumn("f1".getBytes,"gender".getBytes,p(1).getBytes)) arrayList.add(put.addColumn("f1".getBytes,"age".getBytes,p(2).getBytes)) arrayList.add(put.addColumn("f2".getBytes,"position".getBytes,p(3).getBytes)) arrayList.add(put.addColumn("f2".getBytes,"code".getBytes,p(4).getBytes)) person.put(arrayList) }) }) sparkContext.stop() } }
Example 15
Source File: HBaseBulkGetExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.rdd import org.apache.hadoop.hbase.client.Get import org.apache.hadoop.hbase.client.Result import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.CellUtil import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseBulkGetExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseBulkGetExample {tableName} is missing an argument") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName) val sc = new SparkContext(sparkConf) try { //[(Array[Byte])] val rdd = sc.parallelize(Array( Bytes.toBytes("1"), Bytes.toBytes("2"), Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5"), Bytes.toBytes("6"), Bytes.toBytes("7"))) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) val getRdd = rdd.hbaseBulkGet[String](hbaseContext, TableName.valueOf(tableName), 2, record => { System.out.println("making Get") new Get(record) }, (result: Result) => { val it = result.listCells().iterator() val b = new StringBuilder b.append(Bytes.toString(result.getRow) + ":") while (it.hasNext) { val cell = it.next() val q = Bytes.toString(CellUtil.cloneQualifier(cell)) if (q.equals("counter")) { b.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")") } else { b.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")") } } b.toString() }) getRdd.collect().foreach(v => println(v)) } finally { sc.stop() } } }
Example 16
Source File: HBaseMapPartitionExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.rdd import org.apache.hadoop.hbase.client.Get import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseMapPartitionExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseMapPartitionExample {tableName} is missing an argument") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseMapPartitionExample " + tableName) val sc = new SparkContext(sparkConf) try { //[(Array[Byte])] val rdd = sc.parallelize(Array( Bytes.toBytes("1"), Bytes.toBytes("2"), Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5"), Bytes.toBytes("6"), Bytes.toBytes("7"))) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) val getRdd = rdd.hbaseMapPartitions[String](hbaseContext, (it, connection) => { val table = connection.getTable(TableName.valueOf(tableName)) it.map{r => //batching would be faster. This is just an example val result = table.get(new Get(r)) val it = result.listCells().iterator() val b = new StringBuilder b.append(Bytes.toString(result.getRow) + ":") while (it.hasNext) { val cell = it.next() val q = Bytes.toString(cell.getQualifierArray) if (q.equals("counter")) { b.append("(" + q + "," + Bytes.toLong(cell.getValueArray) + ")") } else { b.append("(" + q + "," + Bytes.toString(cell.getValueArray) + ")") } } b.toString() } }) getRdd.collect().foreach(v => println(v)) } finally { sc.stop() } } }
Example 17
Source File: HbaseHelper.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.hbase import com.typesafe.scalalogging.StrictLogging import org.apache.hadoop.hbase.TableName import org.apache.hadoop.hbase.client.{Connection, Table} object HbaseHelper extends StrictLogging { def autoclose[C <: AutoCloseable, T](closeable: C)(thunk: C => T): T = { try { thunk(closeable) } finally { if (closeable != null) { closeable.close() } } } def withTable[T](tableName: TableName)(thunk: Table => T)(implicit connection: Connection): T = { autoclose(connection.getTable(tableName))(thunk) } }
Example 18
Source File: HbaseReaderHelper.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.hbase.writers import com.datamountaineer.streamreactor.connect.hbase.BytesHelper._ import com.datamountaineer.streamreactor.connect.hbase.HbaseHelper import org.apache.hadoop.hbase.client.{Connection, ConnectionFactory, Scan} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.{CellUtil, HBaseConfiguration, TableName} import scala.collection.JavaConverters._ object HbaseReaderHelper { def createConnection: Connection = { ConnectionFactory.createConnection(HBaseConfiguration.create()) } def getAllRecords(tableName: String, columnFamily: String)(implicit connection: Connection): List[HbaseRowData] = { HbaseHelper.withTable(TableName.valueOf(tableName)) { tbl => val scan = new Scan() scan.addFamily(columnFamily.fromString()) val scanner = tbl.getScanner(scan) scanner.asScala.map { rs => val cells = rs.rawCells().map { cell => Bytes.toString(CellUtil.cloneQualifier(cell)) -> CellUtil.cloneValue(cell) }.toMap HbaseRowData(rs.getRow, cells) }.toList } } } case class HbaseRowData(key: Array[Byte], cells: Map[String, Array[Byte]])
Example 19
Source File: HbaseTableHelper.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.hbase import org.apache.hadoop.hbase.client.Connection import org.apache.hadoop.hbase.{HColumnDescriptor, HTableDescriptor, TableName} object HbaseTableHelper { def createTable(tableName: String, columnFamily: String)(implicit connection: Connection): Unit = { createTable(TableName.valueOf(tableName), columnFamily) } def createTable(tableName: TableName, columnFamily: String)(implicit connection: Connection): Unit = { HbaseHelper.autoclose(connection.getAdmin) { admin => if (admin.tableExists(tableName)) throw new IllegalArgumentException(s"${tableName.getNameAsString}") val descriptor = new HTableDescriptor(tableName) val colFamDesc = new HColumnDescriptor(columnFamily) colFamDesc.setMaxVersions(1) descriptor.addFamily(colFamDesc) admin.createTable(descriptor) } } def deleteTable(tableName: TableName)(implicit connection: Connection): Unit = { HbaseHelper.autoclose(connection.getAdmin) { admin => admin.disableTable(tableName) admin.deleteTable(tableName) } } def deleteTable(tableName: String)(implicit connection: Connection): Unit = { deleteTable(TableName.valueOf(tableName)) } }
Example 20
package org.apache.spark.sql import org.apache.spark.sql.execution.datasources.hbase.Logging import java.io.File import com.google.common.io.Files import org.apache.hadoop.hbase.client.Table import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.{HBaseTestingUtility, TableName} import org.apache.spark.sql.execution.datasources.hbase.SparkHBaseConf import org.apache.spark.{SparkContext, SparkConf} import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} class SHC extends FunSuite with BeforeAndAfterEach with BeforeAndAfterAll with Logging { implicit class StringToColumn(val sc: StringContext) { def $(args: Any*): ColumnName = { new ColumnName(sc.s(args: _*)) } } var spark: SparkSession = null var sc: SparkContext = null var sqlContext: SQLContext = null var df: DataFrame = null private[spark] var htu = new HBaseTestingUtility private[spark] def tableName = "table1" private[spark] def columnFamilies: Array[String] = Array.tabulate(9){ x=> s"cf$x"} var table: Table = null val conf = new SparkConf conf.set(SparkHBaseConf.testConf, "true") // private[spark] var columnFamilyStr = Bytes.toString(columnFamily) def defineCatalog(tName: String) = s"""{ |"table":{"namespace":"default", "name":"$tName"}, |"rowkey":"key", |"columns":{ |"col0":{"cf":"rowkey", "col":"key", "type":"string"}, |"col1":{"cf":"cf1", "col":"col1", "type":"boolean"}, |"col2":{"cf":"cf2", "col":"col2", "type":"double"}, |"col3":{"cf":"cf3", "col":"col3", "type":"float"}, |"col4":{"cf":"cf4", "col":"col4", "type":"int"}, |"col5":{"cf":"cf5", "col":"col5", "type":"bigint"}, |"col6":{"cf":"cf6", "col":"col6", "type":"smallint"}, |"col7":{"cf":"cf7", "col":"col7", "type":"string"}, |"col8":{"cf":"cf8", "col":"col8", "type":"tinyint"} |} |}""".stripMargin @deprecated(since = "04.12.2017(dd/mm/year)", message = "use `defineCatalog` instead") def catalog = defineCatalog(tableName) override def beforeAll() { val tempDir: File = Files.createTempDir tempDir.deleteOnExit htu.startMiniCluster SparkHBaseConf.conf = htu.getConfiguration logInfo(" - minicluster started") println(" - minicluster started") spark = SparkSession.builder() .master("local") .appName("HBaseTest") .config(conf) .getOrCreate() sqlContext = spark.sqlContext sc = spark.sparkContext } override def afterAll() { htu.shutdownMiniCluster() spark.stop() } def createTable(name: String, cfs: Array[String]) { val tName = Bytes.toBytes(name) val bcfs = cfs.map(Bytes.toBytes(_)) try { htu.deleteTable(TableName.valueOf(tName)) } catch { case _ : Throwable => logInfo(" - no table " + name + " found") } htu.createMultiRegionTable(TableName.valueOf(tName), bcfs) } def createTable(name: Array[Byte], cfs: Array[Array[Byte]]) { try { htu.deleteTable(TableName.valueOf(name)) } catch { case _ : Throwable => logInfo(" - no table " + Bytes.toString(name) + " found") } htu.createMultiRegionTable(TableName.valueOf(name), cfs) } }
Example 21
Source File: HBaseTestSuite.scala From shc with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import java.io.File import scala.collection.JavaConverters._ import com.google.common.io.Files import org.apache.hadoop.hbase.client._ import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.{TableName, HBaseTestingUtility} import org.apache.spark.sql.execution.datasources.hbase.Logging import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} class HBaseTestSuite extends FunSuite with BeforeAndAfterEach with BeforeAndAfterAll with Logging { private[spark] var htu = HBaseTestingUtility.createLocalHTU() private[spark] var tableName: Array[Byte] = Bytes.toBytes("t1") private[spark] var columnFamily: Array[Byte] = Bytes.toBytes("cf0") private[spark] var columnFamilies: Array[Array[Byte]] = Array(Bytes.toBytes("cf0"), Bytes.toBytes("cf1"), Bytes.toBytes("cf2"), Bytes.toBytes("cf3"), Bytes.toBytes("cf4")) var table: Table = null // private[spark] var columnFamilyStr = Bytes.toString(columnFamily) override def beforeAll() { val tempDir: File = Files.createTempDir tempDir.deleteOnExit htu.cleanupTestDir htu.startMiniZKCluster htu.startMiniHBaseCluster(1, 4) logInfo(" - minicluster started") println(" - minicluster started") try { htu.deleteTable(TableName.valueOf(tableName)) //htu.createTable(TableName.valueOf(tableName), columnFamily, 2, Bytes.toBytes("abc"), Bytes.toBytes("xyz"), 2) } catch { case _ : Throwable => logInfo(" - no table " + Bytes.toString(tableName) + " found") } setupTable() } override def afterAll() { try { table.close() println("shutdown") htu.deleteTable(TableName.valueOf(tableName)) logInfo("shuting down minicluster") htu.shutdownMiniHBaseCluster htu.shutdownMiniZKCluster logInfo(" - minicluster shut down") htu.cleanupTestDir } catch { case _ : Throwable => logError("teardown error") } } def setupTable() { val config = htu.getConfiguration htu.createMultiRegionTable(TableName.valueOf(tableName), columnFamilies) println("create htable t1") val connection = ConnectionFactory.createConnection(config) val r = connection.getRegionLocator(TableName.valueOf("t1")) table = connection.getTable(TableName.valueOf("t1")) val regionLocations = r.getAllRegionLocations.asScala.toSeq println(s"$regionLocations size: ${regionLocations.size}") (0 until 100).foreach { x => var put = new Put(Bytes.toBytes(s"row$x")) (0 until 5).foreach { y => put.addColumn(columnFamilies(y), Bytes.toBytes(s"c$y"), Bytes.toBytes(s"value $x $y")) } table.put(put) } } }
Example 22
package org.apache.spark.sql import java.io.File import com.google.common.io.Files import org.apache.hadoop.hbase.{HColumnDescriptor, HTableDescriptor, TableName, HBaseTestingUtility} import org.apache.hadoop.hbase.client.{Scan, Put, ConnectionFactory, Table} import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.sql.execution.datasources.hbase.SparkHBaseConf import org.apache.spark.sql.types.UTF8String import org.apache.spark.{SparkContext, SparkConf, Logging} import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} import scala.collection.JavaConverters._ class SHC extends FunSuite with BeforeAndAfterEach with BeforeAndAfterAll with Logging { implicit class StringToColumn(val sc: StringContext) { def $(args: Any*): ColumnName = { new ColumnName(sc.s(args: _*)) } } private[spark] var htu = HBaseTestingUtility.createLocalHTU() private[spark] def tableName = "table1" private[spark] def columnFamilies: Array[String] = Array.tabulate(9){ x=> s"cf$x"} var table: Table = null val conf = new SparkConf conf.set(SparkHBaseConf.testConf, "true") SparkHBaseConf.conf = htu.getConfiguration // private[spark] var columnFamilyStr = Bytes.toString(columnFamily) def catalog = s"""{ |"table":{"namespace":"default", "name":"table1"}, |"rowkey":"key", |"columns":{ |"col0":{"cf":"rowkey", "col":"key", "type":"string"}, |"col1":{"cf":"cf1", "col":"col1", "type":"boolean"}, |"col2":{"cf":"cf2", "col":"col2", "type":"double"}, |"col3":{"cf":"cf3", "col":"col3", "type":"float"}, |"col4":{"cf":"cf4", "col":"col4", "type":"int"}, |"col5":{"cf":"cf5", "col":"col5", "type":"bigint"}, |"col6":{"cf":"cf6", "col":"col6", "type":"smallint"}, |"col7":{"cf":"cf7", "col":"col7", "type":"string"}, |"col8":{"cf":"cf8", "col":"col8", "type":"tinyint"} |} |}""".stripMargin override def beforeAll() { val tempDir: File = Files.createTempDir tempDir.deleteOnExit htu.cleanupTestDir htu.startMiniZKCluster htu.startMiniHBaseCluster(1, 4) logInfo(" - minicluster started") println(" - minicluster started") } override def afterAll() { try { table.close() println("shutdown") htu.deleteTable(TableName.valueOf(tableName)) logInfo("shuting down minicluster") htu.shutdownMiniHBaseCluster htu.shutdownMiniZKCluster logInfo(" - minicluster shut down") htu.cleanupTestDir } catch { case _ => logError("teardown error") } } def createTable(name: String, cfs: Array[String]) { val tName = Bytes.toBytes(name) val bcfs = cfs.map(Bytes.toBytes(_)) try { htu.deleteTable(TableName.valueOf(tName)) } catch { case _ => logInfo(" - no table " + name + " found") } htu.createMultiRegionTable(TableName.valueOf(tName), bcfs) } def createTable(name: Array[Byte], cfs: Array[Array[Byte]]) { try { htu.deleteTable(TableName.valueOf(name)) } catch { case _ => logInfo(" - no table " + Bytes.toString(name) + " found") } htu.createMultiRegionTable(TableName.valueOf(name), cfs) } }
Example 23
Source File: HBaseUtils.scala From bigdata-examples with Apache License 2.0 | 5 votes |
package com.timeyang.common.util import com.timeyang.common.config.BaseConf import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hbase.{HBaseConfiguration, HColumnDescriptor, HTableDescriptor, TableName} import org.apache.hadoop.hbase.client._ import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.hadoop.hbase.mapreduce.TableOutputFormat import org.apache.hadoop.hbase.protobuf.ProtobufUtil import org.apache.hadoop.hbase.protobuf.generated.ClientProtos import org.apache.hadoop.hbase.util.Base64 import org.apache.hadoop.mapreduce.Job def createHbaseOutputJob(tableName: String): Job = { val conf = HBaseUtils.newConf() conf.set(TableOutputFormat.OUTPUT_TABLE, tableName) val job = Job.getInstance(conf) job.setOutputKeyClass(classOf[ImmutableBytesWritable]) job.setOutputValueClass(classOf[Put]) job.setOutputFormatClass(classOf[TableOutputFormat[ImmutableBytesWritable]]) job } }
Example 24
Source File: CreateSaltedTable.scala From Taxi360 with Apache License 2.0 | 5 votes |
package com.cloudera.sa.taxi360.setup.hbase import java.io.File import org.apache.commons.lang.StringUtils import org.apache.hadoop.hbase.{HBaseConfiguration, HColumnDescriptor, HTableDescriptor, TableName} import org.apache.hadoop.hbase.client.ConnectionFactory import org.apache.hadoop.hbase.io.compress.Compression import org.apache.hadoop.hbase.regionserver.{BloomType, ConstantSizeRegionSplitPolicy} import org.apache.hadoop.hbase.util.Bytes import scala.collection.mutable object CreateSaltedTable { def main(args:Array[String]): Unit = { if (args.length == 0) { println("<tableName> <columnFamily> <regionCount> <numOfSalts> <hbaseConfigFolder>") } val tableName = args(0) val columnFamilyName = args(1) val regionCount = args(2).toInt val numOfSalts = args(3).toInt val hbaseConfigFolder = args(4) val conf = HBaseConfiguration.create() conf.addResource(new File(hbaseConfigFolder + "hbase-site.xml").toURI.toURL) val connection = ConnectionFactory.createConnection(conf) val admin = connection.getAdmin val tableDescriptor = new HTableDescriptor(TableName.valueOf(tableName)) val columnDescriptor = new HColumnDescriptor(columnFamilyName) columnDescriptor.setCompressionType(Compression.Algorithm.SNAPPY) columnDescriptor.setBlocksize(64 * 1024) columnDescriptor.setBloomFilterType(BloomType.ROW) tableDescriptor.addFamily(columnDescriptor) tableDescriptor.setMaxFileSize(Long.MaxValue) tableDescriptor.setRegionSplitPolicyClassName(classOf[ConstantSizeRegionSplitPolicy].getName) val splitKeys = new mutable.MutableList[Array[Byte]] for (i <- 0 to regionCount) { val regionSplitStr = StringUtils.leftPad((i*(numOfSalts/regionCount)).toString, 4, "0") splitKeys += Bytes.toBytes(regionSplitStr) } admin.createTable(tableDescriptor, splitKeys.toArray) } }
Example 25
Source File: HBaseServiceLayer.scala From Taxi360 with Apache License 2.0 | 5 votes |
package com.cloudera.sa.taxi360.server.hbase import javax.ws.rs._ import javax.ws.rs.core.MediaType import com.cloudera.sa.taxi360.model.NyTaxiYellowTrip import com.cloudera.sa.taxi360.streaming.ingestion.hbase.TaxiTripHBaseHelper import org.apache.hadoop.hbase.{HBaseConfiguration, TableName} import org.apache.hadoop.hbase.client.{ConnectionFactory, Scan} import org.apache.hadoop.hbase.util.Bytes import scala.collection.mutable @Path("rest") class HBaseServiceLayer { @GET @Path("hello") @Produces(Array(MediaType.TEXT_PLAIN)) def hello(): String = { "Hello World" } @GET @Path("vender/{venderId}/timeline") @Produces(Array(MediaType.APPLICATION_JSON)) def getTripTimeLine (@PathParam("venderId") venderId:String, @QueryParam("startTime") startTime:String = Long.MinValue.toString, @QueryParam("endTime") endTime:String = Long.MaxValue.toString): Array[NyTaxiYellowTrip] = { val table = HBaseGlobalValues.connection.getTable(TableName.valueOf(HBaseGlobalValues.appEventTableName)) val st = if (startTime == null) { Long.MinValue.toString } else { startTime } val et = if (endTime == null) { Long.MaxValue.toString } else { endTime } val scan = new Scan() val startRowKey = TaxiTripHBaseHelper.generateRowKey(venderId, st.toLong, HBaseGlobalValues.numberOfSalts) println("startRowKey:" + Bytes.toString(startRowKey)) scan.setStartRow(startRowKey) val endRowKey = TaxiTripHBaseHelper.generateRowKey(venderId, et.toLong, HBaseGlobalValues.numberOfSalts) println("endRowKey:" + Bytes.toString(endRowKey)) scan.setStopRow(endRowKey) val scannerIt = table.getScanner(scan).iterator() val tripList = new mutable.MutableList[NyTaxiYellowTrip] while(scannerIt.hasNext) { val result = scannerIt.next() tripList += TaxiTripHBaseHelper.convertToTaxiTrip(result) println("Found a trip:" + TaxiTripHBaseHelper.convertToTaxiTrip(result)) } println("tripList.size:" + tripList.size) tripList.toArray } }
Example 26
Source File: SparkStreamingTaxiTripToHBase.scala From Taxi360 with Apache License 2.0 | 5 votes |
package com.cloudera.sa.taxi360.streaming.ingestion.hbase import java.io.File import com.cloudera.sa.taxi360.model.NyTaxiYellowTripBuilder import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.spark.HBaseDStreamFunctions._ import kafka.serializer.StringDecoder import org.apache.hadoop.hbase.{HBaseConfiguration, TableName} import org.apache.solr.common.cloud.ZooKeeperException import org.apache.spark.streaming.kafka.KafkaUtils import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.{SparkConf, SparkContext} object SparkStreamingTaxiTripToHBase { def main(args: Array[String]): Unit = { println("Java Version:" + System.getProperty("java.version")) println("Java Home:" + System.getProperties().getProperty("java.home")) val v:ZooKeeperException = null if (args.length == 0) { println("Args: <KafkaBrokerList> " + "<kafkaTopicList> " + "<numberOfSeconds>" + "<runLocal>" + "<hbaseTable>" + "<numOfSalts>" + "<checkpointDir>" + "<hbaseConfigFolder>") return } val kafkaBrokerList = args(0) val kafkaTopicList = args(1) val numberOfSeconds = args(2).toInt val runLocal = args(3).equals("l") val tableName = args(4) val numOfSalts = args(5).toInt val checkpointFolder = args(6) val hbaseConfigFolder = args(7) println("kafkaBrokerList:" + kafkaBrokerList) println("kafkaTopicList:" + kafkaTopicList) println("numberOfSeconds:" + numberOfSeconds) println("runLocal:" + runLocal) println("tableName:" + tableName) println("numOfSalts:" + numOfSalts) val sc:SparkContext = if (runLocal) { val sparkConfig = new SparkConf() sparkConfig.set("spark.broadcast.compress", "false") sparkConfig.set("spark.shuffle.compress", "false") sparkConfig.set("spark.shuffle.spill.compress", "false") new SparkContext("local[2]", "TableStatsSinglePathMain", sparkConfig) } else { val sparkConf = new SparkConf().setAppName("Spark Streaming Ingestion to HBase") new SparkContext(sparkConf) } val ssc = new StreamingContext(sc, Seconds(numberOfSeconds)) val topicsSet = kafkaTopicList.split(",").toSet val kafkaParams = Map[String, String]("metadata.broker.list" -> kafkaBrokerList) val messageStream = KafkaUtils. createDirectStream[String, String, StringDecoder, StringDecoder](ssc, kafkaParams, topicsSet) val conf = HBaseConfiguration.create() conf.addResource(new File(hbaseConfigFolder + "hbase-site.xml").toURI.toURL) val hbaseContext = new HBaseContext(sc, conf) val tripDStream = messageStream.map(r => { (r._1, r._2.split(",")) }).filter(r => r._2.size > 3).map(r => { (r._1, NyTaxiYellowTripBuilder.build(r._2)) }) tripDStream.hbaseBulkPut(hbaseContext, TableName.valueOf(tableName), taxi => { TaxiTripHBaseHelper.generatePut(taxi._2, numOfSalts) }) ssc.checkpoint(checkpointFolder) ssc.start() ssc.awaitTermination() } }
Example 27
Source File: HBaseDistributedScanExample.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.spark.SparkContext import org.apache.hadoop.hbase.{TableName, HBaseConfiguration} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.client.Scan import org.apache.spark.SparkConf object HBaseDistributedScanExample { def main(args: Array[String]) { if (args.length < 1) { println("GenerateGraphs {tableName}") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseDistributedScanExample " + tableName ) val sc = new SparkContext(sparkConf) try { val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) val scan = new Scan() scan.setCaching(100) val getRdd = hbaseContext.hbaseRDD(TableName.valueOf(tableName), scan) getRdd.foreach(v => println(Bytes.toString(v._1.get()))) println("Length: " + getRdd.map(r => r._1.copyBytes()).collect().length); //.collect().foreach(v => println(Bytes.toString(v._1.get()))) } finally { sc.stop() } } }
Example 28
Source File: CreateSaltedTable.scala From Taxi360 with Apache License 2.0 | 5 votes |
package com.hadooparchitecturebook.taxi360.setup.hbase import java.io.File import org.apache.commons.lang.StringUtils import org.apache.hadoop.hbase.{HBaseConfiguration, HColumnDescriptor, HTableDescriptor, TableName} import org.apache.hadoop.hbase.client.ConnectionFactory import org.apache.hadoop.hbase.io.compress.Compression import org.apache.hadoop.hbase.regionserver.{BloomType, ConstantSizeRegionSplitPolicy} import org.apache.hadoop.hbase.util.Bytes import scala.collection.mutable object CreateSaltedTable { def main(args:Array[String]): Unit = { if (args.length == 0) { println("<tableName> <columnFamily> <regionCount> <numOfSalts> <hbaseConfigFolder>") } val tableName = args(0) val columnFamilyName = args(1) val regionCount = args(2).toInt val numOfSalts = args(3).toInt val hbaseConfigFolder = args(4) val conf = HBaseConfiguration.create() conf.addResource(new File(hbaseConfigFolder + "hbase-site.xml").toURI.toURL) val connection = ConnectionFactory.createConnection(conf) val admin = connection.getAdmin val tableDescriptor = new HTableDescriptor(TableName.valueOf(tableName)) val columnDescriptor = new HColumnDescriptor(columnFamilyName) columnDescriptor.setCompressionType(Compression.Algorithm.SNAPPY) columnDescriptor.setBlocksize(64 * 1024) columnDescriptor.setBloomFilterType(BloomType.ROW) tableDescriptor.addFamily(columnDescriptor) tableDescriptor.setMaxFileSize(Long.MaxValue) tableDescriptor.setRegionSplitPolicyClassName(classOf[ConstantSizeRegionSplitPolicy].getName) val splitKeys = new mutable.MutableList[Array[Byte]] for (i <- 0 to regionCount) { val regionSplitStr = StringUtils.leftPad((i*(numOfSalts/regionCount)).toString, 4, "0") splitKeys += Bytes.toBytes(regionSplitStr) } admin.createTable(tableDescriptor, splitKeys.toArray) } }
Example 29
Source File: HBaseServiceLayer.scala From Taxi360 with Apache License 2.0 | 5 votes |
package com.hadooparchitecturebook.taxi360.server.hbase import javax.ws.rs._ import javax.ws.rs.core.MediaType import com.hadooparchitecturebook.taxi360.model.NyTaxiYellowTrip import com.hadooparchitecturebook.taxi360.streaming.ingestion.hbase.TaxiTripHBaseHelper import org.apache.hadoop.hbase.{HBaseConfiguration, TableName} import org.apache.hadoop.hbase.client.{ConnectionFactory, Scan} import org.apache.hadoop.hbase.util.Bytes import scala.collection.mutable @Path("rest") class HBaseServiceLayer { @GET @Path("hello") @Produces(Array(MediaType.TEXT_PLAIN)) def hello(): String = { "Hello World" } @GET @Path("vender/{venderId}/timeline") @Produces(Array(MediaType.APPLICATION_JSON)) def getTripTimeLine (@PathParam("venderId") venderId:String, @QueryParam("startTime") startTime:String = Long.MinValue.toString, @QueryParam("endTime") endTime:String = Long.MaxValue.toString): Array[NyTaxiYellowTrip] = { val table = HBaseGlobalValues.connection.getTable(TableName.valueOf(HBaseGlobalValues.appEventTableName)) val st = if (startTime == null) { Long.MinValue.toString } else { startTime } val et = if (endTime == null) { Long.MaxValue.toString } else { endTime } val scan = new Scan() val startRowKey = TaxiTripHBaseHelper.generateRowKey(venderId, st.toLong, HBaseGlobalValues.numberOfSalts) println("startRowKey:" + Bytes.toString(startRowKey)) scan.setStartRow(startRowKey) val endRowKey = TaxiTripHBaseHelper.generateRowKey(venderId, et.toLong, HBaseGlobalValues.numberOfSalts) println("endRowKey:" + Bytes.toString(endRowKey)) scan.setStopRow(endRowKey) val scannerIt = table.getScanner(scan).iterator() val tripList = new mutable.MutableList[NyTaxiYellowTrip] while(scannerIt.hasNext) { val result = scannerIt.next() tripList += TaxiTripHBaseHelper.convertToTaxiTrip(result) println("Found a trip:" + TaxiTripHBaseHelper.convertToTaxiTrip(result)) } println("tripList.size:" + tripList.size) tripList.toArray } }
Example 30
Source File: SparkStreamingTaxiTripToHBase.scala From Taxi360 with Apache License 2.0 | 5 votes |
package com.hadooparchitecturebook.taxi360.streaming.ingestion.hbase import java.io.File import com.hadooparchitecturebook.taxi360.model.NyTaxiYellowTripBuilder import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.spark.HBaseDStreamFunctions._ import kafka.serializer.StringDecoder import org.apache.hadoop.hbase.{HBaseConfiguration, TableName} import org.apache.solr.common.cloud.ZooKeeperException import org.apache.spark.streaming.kafka.KafkaUtils import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.{SparkConf, SparkContext} object SparkStreamingTaxiTripToHBase { def main(args: Array[String]): Unit = { println("Java Version:" + System.getProperty("java.version")) println("Java Home:" + System.getProperties().getProperty("java.home")) val v:ZooKeeperException = null if (args.length == 0) { println("Args: <KafkaBrokerList> " + "<kafkaTopicList> " + "<numberOfSeconds>" + "<runLocal>" + "<hbaseTable>" + "<numOfSalts>" + "<checkpointDir>" + "<hbaseConfigFolder>") return } val kafkaBrokerList = args(0) val kafkaTopicList = args(1) val numberOfSeconds = args(2).toInt val runLocal = args(3).equals("l") val tableName = args(4) val numOfSalts = args(5).toInt val checkpointFolder = args(6) val hbaseConfigFolder = args(7) println("kafkaBrokerList:" + kafkaBrokerList) println("kafkaTopicList:" + kafkaTopicList) println("numberOfSeconds:" + numberOfSeconds) println("runLocal:" + runLocal) println("tableName:" + tableName) println("numOfSalts:" + numOfSalts) val sc:SparkContext = if (runLocal) { val sparkConfig = new SparkConf() sparkConfig.set("spark.broadcast.compress", "false") sparkConfig.set("spark.shuffle.compress", "false") sparkConfig.set("spark.shuffle.spill.compress", "false") new SparkContext("local[2]", "TableStatsSinglePathMain", sparkConfig) } else { val sparkConf = new SparkConf().setAppName("Spark Streaming Ingestion to HBase") new SparkContext(sparkConf) } val ssc = new StreamingContext(sc, Seconds(numberOfSeconds)) val topicsSet = kafkaTopicList.split(",").toSet val kafkaParams = Map[String, String]("metadata.broker.list" -> kafkaBrokerList) val messageStream = KafkaUtils. createDirectStream[String, String, StringDecoder, StringDecoder](ssc, kafkaParams, topicsSet) val conf = HBaseConfiguration.create() conf.addResource(new File(hbaseConfigFolder + "hbase-site.xml").toURI.toURL) val hbaseContext = new HBaseContext(sc, conf) val tripDStream = messageStream.map(r => { (r._1, r._2.split(",")) }).filter(r => r._2.size > 3).map(r => { (r._1, NyTaxiYellowTripBuilder.build(r._2)) }) tripDStream.hbaseBulkPut(hbaseContext, TableName.valueOf(tableName), taxi => { TaxiTripHBaseHelper.generatePut(taxi._2, numOfSalts) }) ssc.checkpoint(checkpointFolder) ssc.start() ssc.awaitTermination() } }
Example 31
Source File: HbaseSinkWriter.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.hbase import java.util.concurrent.atomic.{AtomicInteger, AtomicLong} import com.sksamuel.exts.Logging import io.eels.schema.StructType import io.eels.{Row, SinkWriter} import org.apache.hadoop.hbase.TableName import org.apache.hadoop.hbase.client.{BufferedMutator, _} object HbaseSinkWriter extends Logging { def apply(namespace: String, table: String, numberOfWriters: AtomicInteger, schema: StructType, maxKeyValueSize: Option[Int], writeBufferSize: Option[Long], writeRowBatchSize: Int, serializer: HbaseSerializer, connection: Connection): Seq[HbaseSinkWriter] = { val tableName = TableName.valueOf(namespace, table) private val fieldsWithIndex = schema.fields.zipWithIndex override def write(row: Row): Unit = { if ((rowCounter.incrementAndGet() % writeRowBatchSize) == 0) mutator.flush() val rowKey = serializer.toBytes(row.values(rowKeyIndex), keyField.name, keyField.dataType) val put = new Put(rowKey) for ((field, index) <- fieldsWithIndex) { if (index != rowKeyIndex && row.values(index) != null) { val cf = field.columnFamily.getOrElse(sys.error(s"No Column Family defined for field '${field.name}'")).getBytes val col = field.name.getBytes() put.addColumn(cf, col, serializer.toBytes(row.values(index), field.name, field.dataType)) } } mutator.mutate(put) } override def close(): Unit = { mutator.flush() if (numberOfWriters.decrementAndGet() == 0) mutator.close() } }
Example 32
Source File: HbasePublisher.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.hbase import java.util import java.util.concurrent.atomic.AtomicBoolean import com.sksamuel.exts.io.Using import com.sksamuel.exts.metrics.Timed import io.eels.Row import io.eels.datastream.{Publisher, Subscriber, Subscription} import io.eels.schema.StructType import org.apache.hadoop.hbase.TableName import org.apache.hadoop.hbase.client.{Connection, Result, Scan} import scala.collection.mutable.ArrayBuffer class HbasePublisher(connection: Connection, schema: StructType, namespace: String, tableName: String, bufferSize: Int, maxRows: Long, scanner: Scan, implicit val serializer: HbaseSerializer) extends Publisher[Seq[Row]] with Timed with Using { private val table = connection.getTable(TableName.valueOf(namespace, tableName)) override def subscribe(subscriber: Subscriber[Seq[Row]]): Unit = { try { using(new CloseableIterator) { rowIter => val running = new AtomicBoolean(true) subscriber.subscribed(Subscription.fromRunning(running)) val buffer = new ArrayBuffer[Row](bufferSize) while (rowIter.hasNext && running.get()) { buffer append rowIter.next() if (buffer.size == bufferSize) { subscriber.next(buffer.toVector) buffer.clear() } } if (buffer.nonEmpty) subscriber.next(buffer.toVector) subscriber.completed() } } catch { case t: Throwable => subscriber.error(t) } } class CloseableIterator extends Iterator[Row] with AutoCloseable { private val resultScanner = table.getScanner(scanner) private val resultScannerIter = resultScanner.iterator() private var rowCount = 0 private var iter: Iterator[Row] = Iterator.empty override def hasNext: Boolean = rowCount < maxRows && iter.hasNext || { if (rowCount < maxRows && resultScannerIter.hasNext) { iter = HBaseResultsIterator(schema, resultScannerIter) iter.hasNext } else false } override def next(): Row = { rowCount += 1 iter.next() } override def close(): Unit = { resultScanner.close() } } case class HBaseResultsIterator(schema: StructType, resultIter: util.Iterator[Result])(implicit serializer: HbaseSerializer) extends Iterator[Row] { override def hasNext: Boolean = resultIter.hasNext override def next(): Row = { val resultRow = resultIter.next() val values = schema.fields.map { field => if (!field.key) { val value = resultRow.getValue(field.columnFamily.getOrElse(sys.error(s"No Column Family defined for field '${field.name}'")).getBytes, field.name.getBytes) if (value != null) serializer.fromBytes(value, field.name, field.dataType) else null } else serializer.fromBytes(resultRow.getRow, field.name, field.dataType) } Row(schema, values) } } }
Example 33
Source File: CustomedHBaseResources.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.hbase import scala.language.implicitConversions import org.apache.hadoop.hbase.TableName import org.apache.hadoop.hbase.client._ case class CustomedRegionResource(relation: HBaseRelationTrait) extends ReferencedResource { // INLINE: SmartConnection is private[hbase], so we make a fake one here var connection: SmartConnection = _ var rl: RegionLocator = _ override def init(): Unit = { connection = HBaseConnectionCache.getConnection(relation.hbaseConf) rl = connection.getRegionLocator( TableName.valueOf(relation.catalog.namespace, relation.catalog.name)) } override def destroy(): Unit = { if (rl != null) { rl.close() rl = null } if (connection != null) { connection.close() connection = null } } val regions = releaseOnException { val keys = rl.getStartEndKeys keys.getFirst .zip(keys.getSecond) .zipWithIndex .map( x => CustomedHBaseRegion( x._2, Some(x._1._1), Some(x._1._2), Some(rl.getRegionLocation(x._1._1).getHostname))) } } case class CustomedTableResource(relation: HBaseRelationTrait) extends ReferencedResource { var connection: SmartConnection = _ var table: Table = _ override def init(): Unit = { connection = HBaseConnectionCache.getConnection(relation.hbaseConf) table = connection.getTable(TableName.valueOf(relation.catalog.namespace, relation.catalog.name)) } override def destroy(): Unit = { if (table != null) { table.close() table = null } if (connection != null) { connection.close() connection = null } } def get(list: java.util.List[org.apache.hadoop.hbase.client.Get]): CustomedGetResource = releaseOnException { CustomedGetResource(this, table.get(list)) } def getScanner(scan: Scan): CustomedScanResource = releaseOnException { CustomedScanResource(this, table.getScanner(scan)) } } case class CustomedScanResource(tbr: CustomedTableResource, rs: ResultScanner) extends Resource { def release() { rs.close() tbr.release() } } case class CustomedGetResource(tbr: CustomedTableResource, rs: Array[Result]) extends Resource { def release() { tbr.release() } }
Example 34
Source File: L6-16SparkHBase.scala From prosparkstreaming with Apache License 2.0 | 5 votes |
package org.apress.prospark import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.streaming.Seconds import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.dstream.DStream.toPairDStreamFunctions import org.json4s.DefaultFormats import org.json4s.jvalue2extractable import org.json4s.jvalue2monadic import org.json4s.native.JsonMethods.parse import org.json4s.string2JsonInput object SparkHBaseBulkPutApp { def main(args: Array[String]) { if (args.length != 4) { System.err.println( "Usage: SparkHBaseBulkPutApp <appname> <tableName> <columnFamilyName> <columnName>") System.exit(1) } val Seq(appName, tableName, columnFamilyName, columnName) = args.toSeq val conf = new SparkConf() .setAppName(appName) .setJars(SparkContext.jarOfClass(this.getClass).toSeq) val batchInterval = 10 val windowSize = 20 val slideInterval = 10 val ssc = new StreamingContext(conf, Seconds(batchInterval)) val hbaseConf = HBaseConfiguration.create() val hContext = new HBaseContext(ssc.sparkContext, hbaseConf) val windowed = HttpUtils.createStream(ssc, url = "https://query.yahooapis.com/v1/public/yql?q=select%20*%20from%20yahoo.finance.quotes%20where%20symbol%20in%20(%22IBM,GOOG,MSFT,AAPL,FB,ORCL,YHOO,TWTR,LNKD,INTC%22)%0A%09%09&format=json&diagnostics=true&env=http%3A%2F%2Fdatatables.org%2Falltables.env", interval = batchInterval) .flatMap(rec => { implicit val formats = DefaultFormats val query = parse(rec) \ "query" ((query \ "results" \ "quote").children) .map(rec => ((rec \ "symbol").extract[String], (rec \ "LastTradePriceOnly").extract[String].toFloat)) }) .reduceByKeyAndWindow((x: Float, y: Float) => (x + y), Seconds(windowSize), Seconds(slideInterval)) hContext.streamBulkPut[(String, Float)](windowed, TableName.valueOf(tableName), rec => { val put = new Put(rec._1.getBytes) put.addColumn(columnFamilyName.getBytes, columnName.getBytes, Bytes.toBytes(rec._2 / (windowSize / batchInterval))) put }) ssc.start() ssc.awaitTermination() } }
Example 35
Source File: HBaseSinkSpec.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.external.hbase import akka.actor.ActorSystem import org.apache.gearpump.Message import org.apache.gearpump.cluster.UserConfig import org.apache.gearpump.external.hbase.HBaseSink.{HBaseWriter, HBaseWriterFactory} import org.apache.gearpump.streaming.MockUtil import org.apache.gearpump.streaming.task.TaskContext import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hbase.TableName import org.apache.hadoop.hbase.client._ import org.apache.hadoop.hbase.util.Bytes import org.mockito.Mockito._ import org.scalacheck.Gen import org.scalatest.mock.MockitoSugar import org.scalatest.prop.PropertyChecks import org.scalatest.{Matchers, PropSpec} class HBaseSinkSpec extends PropSpec with PropertyChecks with Matchers with MockitoSugar { property("HBaseSink should invoke HBaseWriter for writing message to HBase") { val hbaseWriter = mock[HBaseWriter] val hbaseWriterFactory = mock[HBaseWriterFactory] implicit val system: ActorSystem = MockUtil.system val userConfig = UserConfig.empty val tableName = "hbase" when(hbaseWriterFactory.getHBaseWriter(userConfig, tableName)) .thenReturn(hbaseWriter) val hbaseSink = new HBaseSink(userConfig, tableName, hbaseWriterFactory) hbaseSink.open(MockUtil.mockTaskContext) forAll(Gen.alphaStr) { (value: String) => val message = Message(value) hbaseSink.write(message) verify(hbaseWriter, atLeastOnce()).put(value) } hbaseSink.close() verify(hbaseWriter).close() } property("HBaseWriter should insert a row successfully") { val table = mock[Table] val config = mock[Configuration] val connection = mock[Connection] val taskContext = mock[TaskContext] val map = Map[String, String]("HBASESINK" -> "hbasesink", "TABLE_NAME" -> "hbase.table.name", "COLUMN_FAMILY" -> "hbase.table.column.family", "COLUMN_NAME" -> "hbase.table.column.name", "HBASE_USER" -> "hbase.user", "GEARPUMP_KERBEROS_PRINCIPAL" -> "gearpump.kerberos.principal", "GEARPUMP_KEYTAB_FILE" -> "gearpump.keytab.file" ) val userConfig = new UserConfig(map) val tableName = "hbase" val row = "row" val group = "group" val name = "name" val value = "3.0" when(connection.getTable(TableName.valueOf(tableName))).thenReturn(table) val put = new Put(Bytes.toBytes(row)) put.addColumn(Bytes.toBytes(group), Bytes.toBytes(name), Bytes.toBytes(value)) val hbaseWriter = new HBaseWriter(connection, tableName) hbaseWriter.insert(Bytes.toBytes(row), Bytes.toBytes(group), Bytes.toBytes(name), Bytes.toBytes(value)) verify(table).put(MockUtil.argMatch[Put](_.getRow sameElements Bytes.toBytes(row))) } }
Example 36
Source File: HBaseTest.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.examples import org.apache.hadoop.hbase.client.HBaseAdmin import org.apache.hadoop.hbase.{HBaseConfiguration, HTableDescriptor, TableName} import org.apache.hadoop.hbase.mapreduce.TableInputFormat import org.apache.spark._ object HBaseTest { def main(args: Array[String]) { val sparkConf = new SparkConf().setAppName("HBaseTest") val sc = new SparkContext(sparkConf) val conf = HBaseConfiguration.create() // Other options for configuring scan behavior are available. More information available at // http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/mapreduce/TableInputFormat.html conf.set(TableInputFormat.INPUT_TABLE, args(0)) // Initialize hBase table if necessary val admin = new HBaseAdmin(conf) if (!admin.isTableAvailable(args(0))) { val tableDesc = new HTableDescriptor(TableName.valueOf(args(0))) admin.createTable(tableDesc) } val hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], classOf[org.apache.hadoop.hbase.client.Result]) hBaseRDD.count() sc.stop() } }
Example 37
Source File: IntegrationUtils.scala From spark-hbase-connector with Apache License 2.0 | 5 votes |
package com.user.integration import it.nerdammer.spark.hbase.HBaseSparkConf import org.apache.hadoop.hbase.client.HBaseAdmin import org.apache.hadoop.hbase.{HColumnDescriptor, HTableDescriptor, TableName} import org.apache.spark.{SparkConf, SparkContext} object IntegrationUtils extends Serializable { @transient lazy val sparkContext: SparkContext = { val sparkConf = new SparkConf() sparkConf.set("spark.master", "local") sparkConf.set("spark.driver.allowMultipleContexts", "true") sparkConf.setAppName("test") new SparkContext(sparkConf) } def createTable(table: String, columnFamily: String): Unit = createTable(table, Seq(columnFamily)) def createTable(table: String, columnFamilies: Seq[String]): Unit = { val conf = HBaseSparkConf() val admin = new HBaseAdmin(conf.createHadoopBaseConfig()) val tableDesc = new HTableDescriptor(TableName.valueOf(table)) columnFamilies.foreach(cf => { tableDesc.addFamily(new HColumnDescriptor(cf)) }) admin.createTable(tableDesc) } def dropTable(table: String) = { val conf = HBaseSparkConf() val admin = new HBaseAdmin(conf.createHadoopBaseConfig()) admin.disableTable(table) admin.deleteTable(table) } def pad(str: String, size: Int): String = if(str.length>=size) str else pad("0" + str, size) }
Example 38
Source File: HBaseBulkPutTimestampExample.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.spark.SparkContext import org.apache.hadoop.hbase.{TableName, HBaseConfiguration} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.client.Put import org.apache.spark.SparkConf object HBaseBulkPutTimestampExample { def main(args: Array[String]) { if (args.length < 2) { System.out.println("HBaseBulkPutTimestampExample {tableName} {columnFamily}") return } val tableName = args(0) val columnFamily = args(1) val sparkConf = new SparkConf().setAppName("HBaseBulkPutTimestampExample " + tableName + " " + columnFamily) val sc = new SparkContext(sparkConf) try { val rdd = sc.parallelize(Array( (Bytes.toBytes("6"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))), (Bytes.toBytes("7"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))), (Bytes.toBytes("8"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))), (Bytes.toBytes("9"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))), (Bytes.toBytes("10"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5")))))) val conf = HBaseConfiguration.create() val timeStamp = System.currentTimeMillis() val hbaseContext = new HBaseContext(sc, conf) hbaseContext.bulkPut[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])](rdd, TableName.valueOf(tableName), (putRecord) => { val put = new Put(putRecord._1) putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, timeStamp, putValue._3)) put }) } finally { sc.stop() } } }
Example 39
Source File: HogHBaseReputation.scala From hogzilla with GNU General Public License v2.0 | 5 votes |
package org.hogzilla.hbase import scala.math.random import java.lang.Math import org.apache.spark._ import org.apache.hadoop.hbase.client.HBaseAdmin import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.{HBaseConfiguration, HTableDescriptor, TableName} import org.apache.hadoop.hbase.mapreduce.TableInputFormat import org.apache.spark.mllib.regression.{LabeledPoint,LinearRegressionModel,LinearRegressionWithSGD} import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.rdd.RDD import org.apache.hadoop.hbase.client.HTable import org.apache.hadoop.hbase.filter.SingleColumnValueFilter import org.apache.hadoop.hbase.filter.BinaryComparator import org.apache.hadoop.hbase.filter.FilterList import org.apache.hadoop.hbase.filter.CompareFilter import java.util.ArrayList import org.apache.hadoop.hbase.client.Scan import org.apache.hadoop.hbase.filter.Filter import scala.collection.mutable.HashSet import org.apache.hadoop.hbase.client.Put object HogHBaseReputation { // Ex: MX, whitelist def getReputationList(listName:String, listType:String):Set[String] = { val list = new HashSet[String] val filters: ArrayList[Filter] = new ArrayList(); val colValFilter1 = new SingleColumnValueFilter(Bytes.toBytes("rep"), Bytes.toBytes("list_type"), CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes(listType))) colValFilter1.setFilterIfMissing(false); val colValFilter2 = new SingleColumnValueFilter(Bytes.toBytes("rep"), Bytes.toBytes("list"), CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes(listName))) colValFilter2.setFilterIfMissing(false); filters.add(colValFilter1); filters.add(colValFilter2); val filterList = new FilterList( FilterList.Operator.MUST_PASS_ALL, filters); val scan = new Scan() scan.setFilter(filterList) val it = HogHBaseRDD.hogzilla_reputation.getScanner(scan).iterator() while(it.hasNext()) { list.add( Bytes.toString(it.next().getValue(Bytes.toBytes("rep"),Bytes.toBytes("ip"))) ) } list.toSet } def saveReputationList(listName:String, listType:String, ip:String) = { val put = new Put(Bytes.toBytes(ip+"-"+listName+"-"+listType)) put.add(Bytes.toBytes("rep"), Bytes.toBytes("list_type"), Bytes.toBytes(listType)) put.add(Bytes.toBytes("rep"), Bytes.toBytes("list"), Bytes.toBytes(listName)) put.add(Bytes.toBytes("rep"), Bytes.toBytes("ip"), Bytes.toBytes(ip)) HogHBaseRDD.hogzilla_reputation.put(put) } }
Example 40
Source File: HBaseBulkPutExample.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.spark.SparkContext import org.apache.hadoop.hbase.{TableName, HBaseConfiguration} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.client.Put import org.apache.spark.SparkConf object HBaseBulkPutExample { def main(args: Array[String]) { if (args.length < 2) { println("HBaseBulkPutExample {tableName} {columnFamily}") return } val tableName = args(0) val columnFamily = args(1) val sparkConf = new SparkConf().setAppName("HBaseBulkPutExample " + tableName + " " + columnFamily) val sc = new SparkContext(sparkConf) try { //[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])] val rdd = sc.parallelize(Array( (Bytes.toBytes("1"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))), (Bytes.toBytes("2"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))), (Bytes.toBytes("3"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))), (Bytes.toBytes("4"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))), (Bytes.toBytes("5"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5")))) )) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) hbaseContext.bulkPut[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])](rdd, TableName.valueOf(tableName), (putRecord) => { val put = new Put(putRecord._1) putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, putValue._3)) put }); } finally { sc.stop() } } }
Example 41
Source File: HBaseBulkDeleteExample.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.spark.SparkContext import org.apache.hadoop.hbase.{TableName, HBaseConfiguration} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.client.Delete import org.apache.spark.SparkConf object HBaseBulkDeleteExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseBulkDeletesExample {tableName} ") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseBulkDeleteExample " + tableName) val sc = new SparkContext(sparkConf) try { //[Array[Byte]] val rdd = sc.parallelize(Array( Bytes.toBytes("1"), Bytes.toBytes("2"), Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5") )) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) hbaseContext.bulkDelete[Array[Byte]](rdd, TableName.valueOf(tableName), putRecord => new Delete(putRecord), 4) } finally { sc.stop() } } }
Example 42
Source File: HBaseBulkPutExampleFromFile.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.spark.SparkContext import org.apache.hadoop.hbase.{TableName, HBaseConfiguration} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.mapred.TextInputFormat import org.apache.hadoop.io.LongWritable import org.apache.hadoop.io.Text import org.apache.spark.SparkConf object HBaseBulkPutExampleFromFile { def main(args: Array[String]) { if (args.length < 3) { println("HBaseBulkPutExampleFromFile {tableName} {columnFamily} {inputFile}") return } val tableName = args(0) val columnFamily = args(1) val inputFile = args(2) val sparkConf = new SparkConf().setAppName("HBaseBulkPutExampleFromFile " + tableName + " " + columnFamily + " " + inputFile) val sc = new SparkContext(sparkConf) try { var rdd = sc.hadoopFile( inputFile, classOf[TextInputFormat], classOf[LongWritable], classOf[Text]).map(v => { System.out.println("reading-" + v._2.toString) v._2.toString }) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) hbaseContext.bulkPut[String](rdd, TableName.valueOf(tableName), (putRecord) => { System.out.println("hbase-" + putRecord) val put = new Put(Bytes.toBytes("Value- " + putRecord)) put.addColumn(Bytes.toBytes("c"), Bytes.toBytes("1"), Bytes.toBytes(putRecord.length())) put }); } finally { sc.stop() } } }
Example 43
Source File: HBaseBulkGetExample.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.spark.SparkContext import org.apache.hadoop.hbase.{CellUtil, TableName, HBaseConfiguration} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.client.Get import org.apache.hadoop.hbase.client.Result import org.apache.spark.SparkConf object HBaseBulkGetExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseBulkGetExample {tableName}") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName) val sc = new SparkContext(sparkConf) try { //[(Array[Byte])] val rdd = sc.parallelize(Array( Bytes.toBytes("1"), Bytes.toBytes("2"), Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5"), Bytes.toBytes("6"), Bytes.toBytes("7"))) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) val getRdd = hbaseContext.bulkGet[Array[Byte], String]( TableName.valueOf(tableName), 2, rdd, record => { System.out.println("making Get") new Get(record) }, (result: Result) => { val it = result.listCells().iterator() val b = new StringBuilder b.append(Bytes.toString(result.getRow) + ":") while (it.hasNext) { val cell = it.next() val q = Bytes.toString(CellUtil.cloneQualifier(cell)) if (q.equals("counter")) { b.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")") } else { b.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")") } } b.toString() }) getRdd.collect().foreach(v => println(v)) } finally { sc.stop() } } }
Example 44
Source File: HBaseBulkPutExample.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.rdd import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.{HBaseConfiguration, TableName} import org.apache.spark.{SparkConf, SparkContext} object HBaseBulkPutExample { def main(args: Array[String]) { if (args.length < 2) { println("HBaseBulkPutExample {tableName} {columnFamily}") return } val tableName = args(0) val columnFamily = args(1) val sparkConf = new SparkConf().setAppName("HBaseBulkPutExample " + tableName + " " + columnFamily) val sc = new SparkContext(sparkConf) try { //[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])] val rdd = sc.parallelize(Array( (Bytes.toBytes("1"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))), (Bytes.toBytes("2"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))), (Bytes.toBytes("3"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))), (Bytes.toBytes("4"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))), (Bytes.toBytes("5"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5")))) )) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) rdd.hbaseBulkPut(hbaseContext, TableName.valueOf(tableName), (putRecord) => { val put = new Put(putRecord._1) putRecord._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, putValue._3)) put }) } finally { sc.stop() } } }
Example 45
Source File: HBaseBulkDeleteExample.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.rdd import org.apache.hadoop.hbase.client.Delete import org.apache.hadoop.hbase.{TableName, HBaseConfiguration} import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.{SparkContext, SparkConf} object HBaseBulkDeleteExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseBulkDeletesExample {tableName} ") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseBulkDeleteExample " + tableName) val sc = new SparkContext(sparkConf) try { //[Array[Byte]] val rdd = sc.parallelize(Array( Bytes.toBytes("1"), Bytes.toBytes("2"), Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5") )) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) rdd.hbaseBulkDelete(hbaseContext, TableName.valueOf(tableName), putRecord => new Delete(putRecord), 4) } finally { sc.stop() } } }
Example 46
Source File: HBaseForeachPartitionExample.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.rdd import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.{TableName, HBaseConfiguration} import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.{SparkContext, SparkConf} object HBaseForeachPartitionExample { def main(args: Array[String]) { if (args.length < 2) { println("HBaseBulkPutExample {tableName} {columnFamily}") return } val tableName = args(0) val columnFamily = args(1) val sparkConf = new SparkConf().setAppName("HBaseBulkPutExample " + tableName + " " + columnFamily) val sc = new SparkContext(sparkConf) try { //[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])] val rdd = sc.parallelize(Array( (Bytes.toBytes("1"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))), (Bytes.toBytes("2"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))), (Bytes.toBytes("3"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))), (Bytes.toBytes("4"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))), (Bytes.toBytes("5"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5")))) )) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) rdd.hbaseForeachPartition(hbaseContext, (it, connection) => { val m = connection.getBufferedMutator(TableName.valueOf(tableName)) it.foreach(r => { val put = new Put(r._1) r._2.foreach((putValue) => put.addColumn(putValue._1, putValue._2, putValue._3)) m.mutate(put) }) m.flush() m.close() }) } finally { sc.stop() } } }
Example 47
Source File: HBaseBulkGetExample.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.rdd import org.apache.hadoop.hbase.client.{Result, Get} import org.apache.hadoop.hbase.{CellUtil, TableName, HBaseConfiguration} import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ import org.apache.spark.{SparkContext, SparkConf} object HBaseBulkGetExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseBulkGetExample {tableName}") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName) val sc = new SparkContext(sparkConf) try { //[(Array[Byte])] val rdd = sc.parallelize(Array( Bytes.toBytes("1"), Bytes.toBytes("2"), Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5"), Bytes.toBytes("6"), Bytes.toBytes("7"))) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) val getRdd = rdd.hbaseBulkGet[String](hbaseContext, TableName.valueOf(tableName), 2, record => { System.out.println("making Get") new Get(record) }, (result: Result) => { val it = result.listCells().iterator() val b = new StringBuilder b.append(Bytes.toString(result.getRow) + ":") while (it.hasNext) { val cell = it.next() val q = Bytes.toString(CellUtil.cloneQualifier(cell)) if (q.equals("counter")) { b.append("(" + q + "," + Bytes.toLong(CellUtil.cloneValue(cell)) + ")") } else { b.append("(" + q + "," + Bytes.toString(CellUtil.cloneValue(cell)) + ")") } } b.toString() }) getRdd.collect().foreach(v => println(v)) } finally { sc.stop() } } }
Example 48
Source File: HBaseMapPartitionExample.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.rdd import org.apache.hadoop.hbase.client.Get import org.apache.hadoop.hbase.{TableName, HBaseConfiguration} import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.spark.HBaseRDDFunctions._ import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.{SparkContext, SparkConf} object HBaseMapPartitionExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseBulkGetExample {tableName}") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseBulkGetExample " + tableName) val sc = new SparkContext(sparkConf) try { //[(Array[Byte])] val rdd = sc.parallelize(Array( Bytes.toBytes("1"), Bytes.toBytes("2"), Bytes.toBytes("3"), Bytes.toBytes("4"), Bytes.toBytes("5"), Bytes.toBytes("6"), Bytes.toBytes("7"))) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) val getRdd = rdd.hbaseMapPartitions[String](hbaseContext, (it, connection) => { val table = connection.getTable(TableName.valueOf(tableName)) it.map{r => //batching would be faster. This is just an example val result = table.get(new Get(r)) val it = result.listCells().iterator() val b = new StringBuilder b.append(Bytes.toString(result.getRow) + ":") while (it.hasNext) { val cell = it.next() val q = Bytes.toString(cell.getQualifierArray) if (q.equals("counter")) { b.append("(" + q + "," + Bytes.toLong(cell.getValueArray) + ")") } else { b.append("(" + q + "," + Bytes.toString(cell.getValueArray) + ")") } } b.toString() } }) getRdd.collect().foreach(v => println(v)) } finally { sc.stop() } } }
Example 49
Source File: HBaseTest.scala From BigDatalog with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples import org.apache.hadoop.hbase.client.HBaseAdmin import org.apache.hadoop.hbase.{HBaseConfiguration, HTableDescriptor, TableName} import org.apache.hadoop.hbase.mapreduce.TableInputFormat import org.apache.spark._ object HBaseTest { def main(args: Array[String]) { val sparkConf = new SparkConf().setAppName("HBaseTest") val sc = new SparkContext(sparkConf) // please ensure HBASE_CONF_DIR is on classpath of spark driver // e.g: set it through spark.driver.extraClassPath property // in spark-defaults.conf or through --driver-class-path // command line option of spark-submit val conf = HBaseConfiguration.create() if (args.length < 1) { System.err.println("Usage: HBaseTest <table_name>") System.exit(1) } // Other options for configuring scan behavior are available. More information available at // http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/mapreduce/TableInputFormat.html conf.set(TableInputFormat.INPUT_TABLE, args(0)) // Initialize hBase table if necessary val admin = new HBaseAdmin(conf) if (!admin.isTableAvailable(args(0))) { val tableDesc = new HTableDescriptor(TableName.valueOf(args(0))) admin.createTable(tableDesc) } val hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], classOf[org.apache.hadoop.hbase.client.Result]) hBaseRDD.count() sc.stop() admin.close() } } // scalastyle:on println
Example 50
Source File: HBaseStreamingBulkPutExample.scala From SparkOnHBase with Apache License 2.0 | 4 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.spark.SparkContext import org.apache.hadoop.hbase.{TableName, HBaseConfiguration} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.client.Put import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.Seconds import org.apache.spark.SparkConf object HBaseStreamingBulkPutExample { def main(args: Array[String]) { if (args.length < 4) { println("HBaseStreamingBulkPutExample " + "{host} {port} {tableName} {columnFamily}") return } val host = args(0) val port = args(1) val tableName = args(2) val columnFamily = args(3) val sparkConf = new SparkConf().setAppName("HBaseBulkPutTimestampExample " + tableName + " " + columnFamily) val sc = new SparkContext(sparkConf) try { val ssc = new StreamingContext(sc, Seconds(1)) val lines = ssc.socketTextStream(host, port.toInt) val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) hbaseContext.streamBulkPut[String](lines, TableName.valueOf(tableName), (putRecord) => { if (putRecord.length() > 0) { val put = new Put(Bytes.toBytes(putRecord)) put.addColumn(Bytes.toBytes("c"), Bytes.toBytes("foo"), Bytes.toBytes("bar")) put } else { null } }) ssc.start() ssc.awaitTerminationOrTimeout(60000) } finally { sc.stop() } } }