org.apache.hadoop.hbase.client.Scan Scala Examples
The following examples show how to use org.apache.hadoop.hbase.client.Scan.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: HBaseServiceLayer.scala From Taxi360 with Apache License 2.0 | 5 votes |
package com.hadooparchitecturebook.taxi360.server.hbase import javax.ws.rs._ import javax.ws.rs.core.MediaType import com.hadooparchitecturebook.taxi360.model.NyTaxiYellowTrip import com.hadooparchitecturebook.taxi360.streaming.ingestion.hbase.TaxiTripHBaseHelper import org.apache.hadoop.hbase.{HBaseConfiguration, TableName} import org.apache.hadoop.hbase.client.{ConnectionFactory, Scan} import org.apache.hadoop.hbase.util.Bytes import scala.collection.mutable @Path("rest") class HBaseServiceLayer { @GET @Path("hello") @Produces(Array(MediaType.TEXT_PLAIN)) def hello(): String = { "Hello World" } @GET @Path("vender/{venderId}/timeline") @Produces(Array(MediaType.APPLICATION_JSON)) def getTripTimeLine (@PathParam("venderId") venderId:String, @QueryParam("startTime") startTime:String = Long.MinValue.toString, @QueryParam("endTime") endTime:String = Long.MaxValue.toString): Array[NyTaxiYellowTrip] = { val table = HBaseGlobalValues.connection.getTable(TableName.valueOf(HBaseGlobalValues.appEventTableName)) val st = if (startTime == null) { Long.MinValue.toString } else { startTime } val et = if (endTime == null) { Long.MaxValue.toString } else { endTime } val scan = new Scan() val startRowKey = TaxiTripHBaseHelper.generateRowKey(venderId, st.toLong, HBaseGlobalValues.numberOfSalts) println("startRowKey:" + Bytes.toString(startRowKey)) scan.setStartRow(startRowKey) val endRowKey = TaxiTripHBaseHelper.generateRowKey(venderId, et.toLong, HBaseGlobalValues.numberOfSalts) println("endRowKey:" + Bytes.toString(endRowKey)) scan.setStopRow(endRowKey) val scannerIt = table.getScanner(scan).iterator() val tripList = new mutable.MutableList[NyTaxiYellowTrip] while(scannerIt.hasNext) { val result = scannerIt.next() tripList += TaxiTripHBaseHelper.convertToTaxiTrip(result) println("Found a trip:" + TaxiTripHBaseHelper.convertToTaxiTrip(result)) } println("tripList.size:" + tripList.size) tripList.toArray } }
Example 2
Source File: HbaseScanner.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.hbase import io.eels.component.hbase.HbaseScanner.copy import io.eels.schema.StructType import org.apache.hadoop.hbase.client.Scan object HbaseScanner { def apply(schema: StructType, hbaseSource: HbaseSource)(implicit serializer: HbaseSerializer): Scan = { val scan = new Scan val keyField = schema.fields.find(_.key).getOrElse(sys.error("HBase requires a single column to be define as a key")) hbaseSource.cacheBlocks.filterNot(_ == None).map(scan.setCacheBlocks) hbaseSource.caching.filterNot(_ == None).map(scan.setCaching) hbaseSource.batch.filterNot(_ == None).map(scan.setBatch) hbaseSource.startKey.filterNot(_ == None).map(startKey => scan.withStartRow(copy(serializer.toBytes(startKey, keyField.name, keyField.dataType)))) hbaseSource.stopKey.map { key => val stopKey = copy(serializer.toBytes(key, keyField.name, keyField.dataType)) // If the stop key is marked as inclusive then increment the last byte by one - not fully tested if (hbaseSource.stopKeyInclusive) { val lastByteIncremented = (stopKey.last.toShort + 1).toByte stopKey(stopKey.length - 1) = if (lastByteIncremented > stopKey.last) lastByteIncremented else stopKey.last } scan.withStopRow(stopKey) } hbaseSource.consistency.filterNot(_ == None).map(scan.setConsistency) hbaseSource.isolationLevel.filterNot(_ == None).map(scan.setIsolationLevel) hbaseSource.timeRange.filterNot(_ == None).map(t => scan.setTimeRange(t._1, t._2)) hbaseSource.timeStamp.filterNot(_ == None).map(scan.setTimeStamp) hbaseSource.maxVersions.filterNot(_ == None).map(scan.setMaxVersions) hbaseSource.maxResultsPerColumnFamily.filterNot(_ == None).map(scan.setMaxResultsPerColumnFamily) hbaseSource.rowOffsetPerColumnFamily.filterNot(_ == None).map(scan.setRowOffsetPerColumnFamily) hbaseSource.maxResultSize.filterNot(_ == None).map(scan.setMaxResultSize) hbaseSource.reverseScan.filterNot(_ == None).map(scan.setReversed) hbaseSource.allowPartialResults.filterNot(_ == None).map(scan.setAllowPartialResults) hbaseSource.loadColumnFamiliesOnDemand.filterNot(_ == None).map(scan.setLoadColumnFamiliesOnDemand) hbaseSource.returnDeletedRows.filterNot(_ == None).map(scan.setRaw) hbaseSource.identifier.filterNot(_ == None).map(scan.setId) hbaseSource.rowPrefixFilter.filterNot(_ == None).map(scan.setRowPrefixFilter) // Setup predicate push downs hbaseSource.filterList.filterNot(_ == None).map(scan.setFilter) // Set up column projection schema schema.fields .filter(!_.key) .foreach(f => scan.addColumn(f.columnFamily.get.getBytes, f.name.getBytes)) scan } private def copy(sourceArray: Array[Byte]): Array[Byte] = { val bufferCopy = new Array[Byte](sourceArray.length) System.arraycopy(sourceArray, 0, bufferCopy, 0, bufferCopy.length) bufferCopy } }
Example 3
Source File: HbasePublisher.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.hbase import java.util import java.util.concurrent.atomic.AtomicBoolean import com.sksamuel.exts.io.Using import com.sksamuel.exts.metrics.Timed import io.eels.Row import io.eels.datastream.{Publisher, Subscriber, Subscription} import io.eels.schema.StructType import org.apache.hadoop.hbase.TableName import org.apache.hadoop.hbase.client.{Connection, Result, Scan} import scala.collection.mutable.ArrayBuffer class HbasePublisher(connection: Connection, schema: StructType, namespace: String, tableName: String, bufferSize: Int, maxRows: Long, scanner: Scan, implicit val serializer: HbaseSerializer) extends Publisher[Seq[Row]] with Timed with Using { private val table = connection.getTable(TableName.valueOf(namespace, tableName)) override def subscribe(subscriber: Subscriber[Seq[Row]]): Unit = { try { using(new CloseableIterator) { rowIter => val running = new AtomicBoolean(true) subscriber.subscribed(Subscription.fromRunning(running)) val buffer = new ArrayBuffer[Row](bufferSize) while (rowIter.hasNext && running.get()) { buffer append rowIter.next() if (buffer.size == bufferSize) { subscriber.next(buffer.toVector) buffer.clear() } } if (buffer.nonEmpty) subscriber.next(buffer.toVector) subscriber.completed() } } catch { case t: Throwable => subscriber.error(t) } } class CloseableIterator extends Iterator[Row] with AutoCloseable { private val resultScanner = table.getScanner(scanner) private val resultScannerIter = resultScanner.iterator() private var rowCount = 0 private var iter: Iterator[Row] = Iterator.empty override def hasNext: Boolean = rowCount < maxRows && iter.hasNext || { if (rowCount < maxRows && resultScannerIter.hasNext) { iter = HBaseResultsIterator(schema, resultScannerIter) iter.hasNext } else false } override def next(): Row = { rowCount += 1 iter.next() } override def close(): Unit = { resultScanner.close() } } case class HBaseResultsIterator(schema: StructType, resultIter: util.Iterator[Result])(implicit serializer: HbaseSerializer) extends Iterator[Row] { override def hasNext: Boolean = resultIter.hasNext override def next(): Row = { val resultRow = resultIter.next() val values = schema.fields.map { field => if (!field.key) { val value = resultRow.getValue(field.columnFamily.getOrElse(sys.error(s"No Column Family defined for field '${field.name}'")).getBytes, field.name.getBytes) if (value != null) serializer.fromBytes(value, field.name, field.dataType) else null } else serializer.fromBytes(resultRow.getRow, field.name, field.dataType) } Row(schema, values) } } }
Example 4
Source File: HBaseDistributedScanExample.scala From SparkOnHBase with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.spark.SparkContext import org.apache.hadoop.hbase.{TableName, HBaseConfiguration} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.client.Scan import org.apache.spark.SparkConf object HBaseDistributedScanExample { def main(args: Array[String]) { if (args.length < 1) { println("GenerateGraphs {tableName}") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseDistributedScanExample " + tableName ) val sc = new SparkContext(sparkConf) try { val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) val scan = new Scan() scan.setCaching(100) val getRdd = hbaseContext.hbaseRDD(TableName.valueOf(tableName), scan) getRdd.foreach(v => println(Bytes.toString(v._1.get()))) println("Length: " + getRdd.map(r => r._1.copyBytes()).collect().length); //.collect().foreach(v => println(Bytes.toString(v._1.get()))) } finally { sc.stop() } } }
Example 5
Source File: HogHBaseReputation.scala From hogzilla with GNU General Public License v2.0 | 5 votes |
package org.hogzilla.hbase import scala.math.random import java.lang.Math import org.apache.spark._ import org.apache.hadoop.hbase.client.HBaseAdmin import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.{HBaseConfiguration, HTableDescriptor, TableName} import org.apache.hadoop.hbase.mapreduce.TableInputFormat import org.apache.spark.mllib.regression.{LabeledPoint,LinearRegressionModel,LinearRegressionWithSGD} import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.rdd.RDD import org.apache.hadoop.hbase.client.HTable import org.apache.hadoop.hbase.filter.SingleColumnValueFilter import org.apache.hadoop.hbase.filter.BinaryComparator import org.apache.hadoop.hbase.filter.FilterList import org.apache.hadoop.hbase.filter.CompareFilter import java.util.ArrayList import org.apache.hadoop.hbase.client.Scan import org.apache.hadoop.hbase.filter.Filter import scala.collection.mutable.HashSet import org.apache.hadoop.hbase.client.Put object HogHBaseReputation { // Ex: MX, whitelist def getReputationList(listName:String, listType:String):Set[String] = { val list = new HashSet[String] val filters: ArrayList[Filter] = new ArrayList(); val colValFilter1 = new SingleColumnValueFilter(Bytes.toBytes("rep"), Bytes.toBytes("list_type"), CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes(listType))) colValFilter1.setFilterIfMissing(false); val colValFilter2 = new SingleColumnValueFilter(Bytes.toBytes("rep"), Bytes.toBytes("list"), CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes(listName))) colValFilter2.setFilterIfMissing(false); filters.add(colValFilter1); filters.add(colValFilter2); val filterList = new FilterList( FilterList.Operator.MUST_PASS_ALL, filters); val scan = new Scan() scan.setFilter(filterList) val it = HogHBaseRDD.hogzilla_reputation.getScanner(scan).iterator() while(it.hasNext()) { list.add( Bytes.toString(it.next().getValue(Bytes.toBytes("rep"),Bytes.toBytes("ip"))) ) } list.toSet } def saveReputationList(listName:String, listType:String, ip:String) = { val put = new Put(Bytes.toBytes(ip+"-"+listName+"-"+listType)) put.add(Bytes.toBytes("rep"), Bytes.toBytes("list_type"), Bytes.toBytes(listType)) put.add(Bytes.toBytes("rep"), Bytes.toBytes("list"), Bytes.toBytes(listName)) put.add(Bytes.toBytes("rep"), Bytes.toBytes("ip"), Bytes.toBytes(ip)) HogHBaseRDD.hogzilla_reputation.put(put) } }
Example 6
Source File: HBaseDistributedScanExample.scala From hbase-connectors with Apache License 2.0 | 5 votes |
package org.apache.hadoop.hbase.spark.example.hbasecontext import org.apache.hadoop.hbase.client.Scan import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.TableName import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.yetus.audience.InterfaceAudience @InterfaceAudience.Private object HBaseDistributedScanExample { def main(args: Array[String]) { if (args.length < 1) { println("HBaseDistributedScanExample {tableName} missing an argument") return } val tableName = args(0) val sparkConf = new SparkConf().setAppName("HBaseDistributedScanExample " + tableName ) val sc = new SparkContext(sparkConf) try { val conf = HBaseConfiguration.create() val hbaseContext = new HBaseContext(sc, conf) val scan = new Scan() scan.setCaching(100) val getRdd = hbaseContext.hbaseRDD(TableName.valueOf(tableName), scan) getRdd.foreach(v => println(Bytes.toString(v._1.get()))) println("Length: " + getRdd.map(r => r._1.copyBytes()).collect().length); } finally { sc.stop() } } }
Example 7
Source File: HbaseReaderHelper.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.hbase.writers import com.datamountaineer.streamreactor.connect.hbase.BytesHelper._ import com.datamountaineer.streamreactor.connect.hbase.HbaseHelper import org.apache.hadoop.hbase.client.{Connection, ConnectionFactory, Scan} import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.{CellUtil, HBaseConfiguration, TableName} import scala.collection.JavaConverters._ object HbaseReaderHelper { def createConnection: Connection = { ConnectionFactory.createConnection(HBaseConfiguration.create()) } def getAllRecords(tableName: String, columnFamily: String)(implicit connection: Connection): List[HbaseRowData] = { HbaseHelper.withTable(TableName.valueOf(tableName)) { tbl => val scan = new Scan() scan.addFamily(columnFamily.fromString()) val scanner = tbl.getScanner(scan) scanner.asScala.map { rs => val cells = rs.rawCells().map { cell => Bytes.toString(CellUtil.cloneQualifier(cell)) -> CellUtil.cloneValue(cell) }.toMap HbaseRowData(rs.getRow, cells) }.toList } } } case class HbaseRowData(key: Array[Byte], cells: Map[String, Array[Byte]])
Example 8
package org.apache.spark.sql import java.io.File import com.google.common.io.Files import org.apache.hadoop.hbase.{HColumnDescriptor, HTableDescriptor, TableName, HBaseTestingUtility} import org.apache.hadoop.hbase.client.{Scan, Put, ConnectionFactory, Table} import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.sql.execution.datasources.hbase.SparkHBaseConf import org.apache.spark.sql.types.UTF8String import org.apache.spark.{SparkContext, SparkConf, Logging} import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} import scala.collection.JavaConverters._ class SHC extends FunSuite with BeforeAndAfterEach with BeforeAndAfterAll with Logging { implicit class StringToColumn(val sc: StringContext) { def $(args: Any*): ColumnName = { new ColumnName(sc.s(args: _*)) } } private[spark] var htu = HBaseTestingUtility.createLocalHTU() private[spark] def tableName = "table1" private[spark] def columnFamilies: Array[String] = Array.tabulate(9){ x=> s"cf$x"} var table: Table = null val conf = new SparkConf conf.set(SparkHBaseConf.testConf, "true") SparkHBaseConf.conf = htu.getConfiguration // private[spark] var columnFamilyStr = Bytes.toString(columnFamily) def catalog = s"""{ |"table":{"namespace":"default", "name":"table1"}, |"rowkey":"key", |"columns":{ |"col0":{"cf":"rowkey", "col":"key", "type":"string"}, |"col1":{"cf":"cf1", "col":"col1", "type":"boolean"}, |"col2":{"cf":"cf2", "col":"col2", "type":"double"}, |"col3":{"cf":"cf3", "col":"col3", "type":"float"}, |"col4":{"cf":"cf4", "col":"col4", "type":"int"}, |"col5":{"cf":"cf5", "col":"col5", "type":"bigint"}, |"col6":{"cf":"cf6", "col":"col6", "type":"smallint"}, |"col7":{"cf":"cf7", "col":"col7", "type":"string"}, |"col8":{"cf":"cf8", "col":"col8", "type":"tinyint"} |} |}""".stripMargin override def beforeAll() { val tempDir: File = Files.createTempDir tempDir.deleteOnExit htu.cleanupTestDir htu.startMiniZKCluster htu.startMiniHBaseCluster(1, 4) logInfo(" - minicluster started") println(" - minicluster started") } override def afterAll() { try { table.close() println("shutdown") htu.deleteTable(TableName.valueOf(tableName)) logInfo("shuting down minicluster") htu.shutdownMiniHBaseCluster htu.shutdownMiniZKCluster logInfo(" - minicluster shut down") htu.cleanupTestDir } catch { case _ => logError("teardown error") } } def createTable(name: String, cfs: Array[String]) { val tName = Bytes.toBytes(name) val bcfs = cfs.map(Bytes.toBytes(_)) try { htu.deleteTable(TableName.valueOf(tName)) } catch { case _ => logInfo(" - no table " + name + " found") } htu.createMultiRegionTable(TableName.valueOf(tName), bcfs) } def createTable(name: Array[Byte], cfs: Array[Array[Byte]]) { try { htu.deleteTable(TableName.valueOf(name)) } catch { case _ => logInfo(" - no table " + Bytes.toString(name) + " found") } htu.createMultiRegionTable(TableName.valueOf(name), cfs) } }
Example 9
Source File: HBaseServiceLayer.scala From Taxi360 with Apache License 2.0 | 5 votes |
package com.cloudera.sa.taxi360.server.hbase import javax.ws.rs._ import javax.ws.rs.core.MediaType import com.cloudera.sa.taxi360.model.NyTaxiYellowTrip import com.cloudera.sa.taxi360.streaming.ingestion.hbase.TaxiTripHBaseHelper import org.apache.hadoop.hbase.{HBaseConfiguration, TableName} import org.apache.hadoop.hbase.client.{ConnectionFactory, Scan} import org.apache.hadoop.hbase.util.Bytes import scala.collection.mutable @Path("rest") class HBaseServiceLayer { @GET @Path("hello") @Produces(Array(MediaType.TEXT_PLAIN)) def hello(): String = { "Hello World" } @GET @Path("vender/{venderId}/timeline") @Produces(Array(MediaType.APPLICATION_JSON)) def getTripTimeLine (@PathParam("venderId") venderId:String, @QueryParam("startTime") startTime:String = Long.MinValue.toString, @QueryParam("endTime") endTime:String = Long.MaxValue.toString): Array[NyTaxiYellowTrip] = { val table = HBaseGlobalValues.connection.getTable(TableName.valueOf(HBaseGlobalValues.appEventTableName)) val st = if (startTime == null) { Long.MinValue.toString } else { startTime } val et = if (endTime == null) { Long.MaxValue.toString } else { endTime } val scan = new Scan() val startRowKey = TaxiTripHBaseHelper.generateRowKey(venderId, st.toLong, HBaseGlobalValues.numberOfSalts) println("startRowKey:" + Bytes.toString(startRowKey)) scan.setStartRow(startRowKey) val endRowKey = TaxiTripHBaseHelper.generateRowKey(venderId, et.toLong, HBaseGlobalValues.numberOfSalts) println("endRowKey:" + Bytes.toString(endRowKey)) scan.setStopRow(endRowKey) val scannerIt = table.getScanner(scan).iterator() val tripList = new mutable.MutableList[NyTaxiYellowTrip] while(scannerIt.hasNext) { val result = scannerIt.next() tripList += TaxiTripHBaseHelper.convertToTaxiTrip(result) println("Found a trip:" + TaxiTripHBaseHelper.convertToTaxiTrip(result)) } println("tripList.size:" + tripList.size) tripList.toArray } }