org.apache.hadoop.hbase.mapreduce.TableInputFormat Scala Examples
The following examples show how to use org.apache.hadoop.hbase.mapreduce.TableInputFormat.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: HBase.scala From AI with Apache License 2.0 | 6 votes |
package com.bigchange.hbase import com.bigchange.util.HBaseUtil._ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hbase.{HBaseConfiguration, HColumnDescriptor, HTableDescriptor, TableName} import org.apache.hadoop.hbase.client.{Result, _} import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.hadoop.hbase.mapreduce.TableInputFormat import org.apache.hadoop.hbase.protobuf.ProtobufUtil import org.apache.hadoop.hbase.protobuf.generated.ClientProtos import org.apache.hadoop.hbase.util.Base64 import org.apache.spark.SparkContext def existRowKey(row:String, table: Table): Boolean ={ val get = new Get(row.getBytes()) val result = table.get(get) if (result.isEmpty) { warn("hbase table don't have this data,execute insert") return false } true } def getConfiguration = if(hBaseConfiguration == null) { warn("hbase setDefaultConfiguration....") setDefaultConfiguration } else hBaseConfiguration def setDefaultConfiguration = { hBaseConfiguration = HBaseConfiguration.create // 本地测试 需配置的选项, 在集群上通过对应配置文件路径自动获得 hBaseConfiguration.set("fs.defaultFS", "hdfs://ns1"); // nameservices的路径 hBaseConfiguration.set("dfs.nameservices", "ns1"); // hBaseConfiguration.set("dfs.ha.namenodes.ns1", "nn1,nn2"); //namenode的路径 hBaseConfiguration.set("dfs.namenode.rpc-address.ns1.nn1", "server3:9000"); // namenode 通信地址 hBaseConfiguration.set("dfs.namenode.rpc-address.ns1.nn2", "server4:9000"); // namenode 通信地址 // 设置namenode自动切换的实现类 hBaseConfiguration.set("dfs.client.failover.proxy.provider.ns1", "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider") hBaseConfiguration.set("hbase.rootdir", "hdfs://ns1/hbase") hBaseConfiguration.set("hbase.zookeeper.quorum", "server0,server1,server2") hBaseConfiguration.set("hbase.zookeeper.property.clientPort", "2181") hBaseConfiguration } }
Example 2
Source File: HBaseReaders.scala From cuesheet with Apache License 2.0 | 5 votes |
package com.kakao.cuesheet.convert import com.kakao.mango.util.Conversions._ import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.client.Result import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.hadoop.hbase.mapreduce.TableInputFormat import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import scala.collection.JavaConversions._ trait HBaseReaders { val sc: SparkContext def hbaseTable(quorum: String, table: String): RDD[(String, ((String, String), (Long, String)))] = { hbaseTableBinary(quorum, table).map { case (rowkey, ((family, qualifier), (timestamp, value))) => (rowkey.string, ((family.string, qualifier.string), (timestamp, value.string))) } } def hbaseColumnBinary(quorum: String, table: String, family: Array[Byte], qualifier: Array[Byte]): RDD[(Array[Byte], (Long, Array[Byte]))] = { hbaseTableBinary(quorum, table).collect { case (rowkey, ((f, q), cell)) if family.sameElements(f) && qualifier.sameElements(q) => (rowkey, cell) } } def hbaseColumn(quorum: String, table: String, family: String, qualifier: String): RDD[(String, (Long, String))] = { hbaseTable(quorum, table).collect { case (rowkey, ((f, q), cell)) if family == f && qualifier == q => (rowkey, cell) } } }
Example 3
Source File: HBaseTest.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.examples import org.apache.hadoop.hbase.client.HBaseAdmin import org.apache.hadoop.hbase.{HBaseConfiguration, HTableDescriptor, TableName} import org.apache.hadoop.hbase.mapreduce.TableInputFormat import org.apache.spark._ object HBaseTest { def main(args: Array[String]) { val sparkConf = new SparkConf().setAppName("HBaseTest") val sc = new SparkContext(sparkConf) val conf = HBaseConfiguration.create() // Other options for configuring scan behavior are available. More information available at // http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/mapreduce/TableInputFormat.html conf.set(TableInputFormat.INPUT_TABLE, args(0)) // Initialize hBase table if necessary val admin = new HBaseAdmin(conf) if (!admin.isTableAvailable(args(0))) { val tableDesc = new HTableDescriptor(TableName.valueOf(args(0))) admin.createTable(tableDesc) } val hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], classOf[org.apache.hadoop.hbase.client.Result]) hBaseRDD.count() sc.stop() } }
Example 4
Source File: HBaseTest.scala From BigDatalog with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples import org.apache.hadoop.hbase.client.HBaseAdmin import org.apache.hadoop.hbase.{HBaseConfiguration, HTableDescriptor, TableName} import org.apache.hadoop.hbase.mapreduce.TableInputFormat import org.apache.spark._ object HBaseTest { def main(args: Array[String]) { val sparkConf = new SparkConf().setAppName("HBaseTest") val sc = new SparkContext(sparkConf) // please ensure HBASE_CONF_DIR is on classpath of spark driver // e.g: set it through spark.driver.extraClassPath property // in spark-defaults.conf or through --driver-class-path // command line option of spark-submit val conf = HBaseConfiguration.create() if (args.length < 1) { System.err.println("Usage: HBaseTest <table_name>") System.exit(1) } // Other options for configuring scan behavior are available. More information available at // http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/mapreduce/TableInputFormat.html conf.set(TableInputFormat.INPUT_TABLE, args(0)) // Initialize hBase table if necessary val admin = new HBaseAdmin(conf) if (!admin.isTableAvailable(args(0))) { val tableDesc = new HTableDescriptor(TableName.valueOf(args(0))) admin.createTable(tableDesc) } val hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], classOf[org.apache.hadoop.hbase.client.Result]) hBaseRDD.count() sc.stop() admin.close() } } // scalastyle:on println
Example 5
Source File: HogHBaseReputation.scala From hogzilla with GNU General Public License v2.0 | 5 votes |
package org.hogzilla.hbase import scala.math.random import java.lang.Math import org.apache.spark._ import org.apache.hadoop.hbase.client.HBaseAdmin import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.{HBaseConfiguration, HTableDescriptor, TableName} import org.apache.hadoop.hbase.mapreduce.TableInputFormat import org.apache.spark.mllib.regression.{LabeledPoint,LinearRegressionModel,LinearRegressionWithSGD} import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.rdd.RDD import org.apache.hadoop.hbase.client.HTable import org.apache.hadoop.hbase.filter.SingleColumnValueFilter import org.apache.hadoop.hbase.filter.BinaryComparator import org.apache.hadoop.hbase.filter.FilterList import org.apache.hadoop.hbase.filter.CompareFilter import java.util.ArrayList import org.apache.hadoop.hbase.client.Scan import org.apache.hadoop.hbase.filter.Filter import scala.collection.mutable.HashSet import org.apache.hadoop.hbase.client.Put object HogHBaseReputation { // Ex: MX, whitelist def getReputationList(listName:String, listType:String):Set[String] = { val list = new HashSet[String] val filters: ArrayList[Filter] = new ArrayList(); val colValFilter1 = new SingleColumnValueFilter(Bytes.toBytes("rep"), Bytes.toBytes("list_type"), CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes(listType))) colValFilter1.setFilterIfMissing(false); val colValFilter2 = new SingleColumnValueFilter(Bytes.toBytes("rep"), Bytes.toBytes("list"), CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes(listName))) colValFilter2.setFilterIfMissing(false); filters.add(colValFilter1); filters.add(colValFilter2); val filterList = new FilterList( FilterList.Operator.MUST_PASS_ALL, filters); val scan = new Scan() scan.setFilter(filterList) val it = HogHBaseRDD.hogzilla_reputation.getScanner(scan).iterator() while(it.hasNext()) { list.add( Bytes.toString(it.next().getValue(Bytes.toBytes("rep"),Bytes.toBytes("ip"))) ) } list.toSet } def saveReputationList(listName:String, listType:String, ip:String) = { val put = new Put(Bytes.toBytes(ip+"-"+listName+"-"+listType)) put.add(Bytes.toBytes("rep"), Bytes.toBytes("list_type"), Bytes.toBytes(listType)) put.add(Bytes.toBytes("rep"), Bytes.toBytes("list"), Bytes.toBytes(listName)) put.add(Bytes.toBytes("rep"), Bytes.toBytes("ip"), Bytes.toBytes(ip)) HogHBaseRDD.hogzilla_reputation.put(put) } }
Example 6
Source File: GraphX.scala From unicorn with Apache License 2.0 | 5 votes |
package unicorn.narwhal.graph import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.client.Result import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.hadoop.hbase.mapreduce.TableInputFormat import org.apache.spark.SparkContext import unicorn.bigtable.hbase.HBaseTable import unicorn.json._ import unicorn.unibase.graph.{ReadOnlyGraph, GraphSerializer, GraphVertexColumnFamily, GraphOutEdgeColumnFamily} def graphx(sc: SparkContext): org.apache.spark.graphx.Graph[JsObject, (String, JsValue)] = { val conf = HBaseConfiguration.create() conf.set(TableInputFormat.INPUT_TABLE, name) conf.setInt(TableInputFormat.SCAN_CACHEDROWS, 500) conf.setBoolean(TableInputFormat.SCAN_CACHEBLOCKS, false) conf.set(TableInputFormat.SCAN_COLUMNS, s"$GraphVertexColumnFamily $GraphOutEdgeColumnFamily") val rdd = sc.newAPIHadoopRDD( conf, classOf[TableInputFormat], classOf[ImmutableBytesWritable], classOf[Result] ) val rows = rdd.mapPartitions { it => val serializer = new GraphSerializer() it.map { tuple => val row = HBaseTable.getRow(tuple._2) serializer.deserializeVertex(row) } } val vertices = rows.map { vertex => (vertex.id, vertex.properties) } val edges = rows.flatMap { vertex => vertex.edges.map { edge => org.apache.spark.graphx.Edge(edge.from, edge.to, (edge.label, edge.properties)) } } org.apache.spark.graphx.Graph(vertices, edges) } }