org.apache.hadoop.hbase.io.ImmutableBytesWritable Scala Examples

The following examples show how to use org.apache.hadoop.hbase.io.ImmutableBytesWritable. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: HBase.scala    From AI   with Apache License 2.0 6 votes vote down vote up
package com.bigchange.hbase

import com.bigchange.util.HBaseUtil._
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.hbase.{HBaseConfiguration, HColumnDescriptor, HTableDescriptor, TableName}
import org.apache.hadoop.hbase.client.{Result, _}
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.protobuf.ProtobufUtil
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos
import org.apache.hadoop.hbase.util.Base64
import org.apache.spark.SparkContext


  def existRowKey(row:String, table: Table): Boolean ={

    val get = new Get(row.getBytes())
    val result = table.get(get)

    if (result.isEmpty) {
      warn("hbase table don't have this data,execute insert")
      return false
    }

    true

  }

  def getConfiguration = if(hBaseConfiguration == null) {
      warn("hbase setDefaultConfiguration....")
      setDefaultConfiguration
    } else  hBaseConfiguration

  def setDefaultConfiguration = {

    hBaseConfiguration = HBaseConfiguration.create

    // 本地测试 需配置的选项, 在集群上通过对应配置文件路径自动获得
    hBaseConfiguration.set("fs.defaultFS", "hdfs://ns1"); // nameservices的路径
    hBaseConfiguration.set("dfs.nameservices", "ns1");  //
    hBaseConfiguration.set("dfs.ha.namenodes.ns1", "nn1,nn2"); //namenode的路径
    hBaseConfiguration.set("dfs.namenode.rpc-address.ns1.nn1", "server3:9000"); // namenode 通信地址
    hBaseConfiguration.set("dfs.namenode.rpc-address.ns1.nn2", "server4:9000"); // namenode 通信地址
    // 设置namenode自动切换的实现类
    hBaseConfiguration.set("dfs.client.failover.proxy.provider.ns1", "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider")
    hBaseConfiguration.set("hbase.rootdir", "hdfs://ns1/hbase")
    hBaseConfiguration.set("hbase.zookeeper.quorum", "server0,server1,server2")
    hBaseConfiguration.set("hbase.zookeeper.property.clientPort", "2181")

    hBaseConfiguration

  }

} 
Example 2
Source File: HbRddWriter.scala    From hbrdd   with Apache License 2.0 5 votes vote down vote up
package top.spoofer.hbrdd.hbsupport

import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.spark.rdd.RDD
import top.spoofer.hbrdd.config.HbRddConfig
import top.spoofer.hbrdd.unit.HbRddFormatsWriter
import top.spoofer.hbrdd._
import HbRddWritPuter._

trait HbRddWriter {
  type TsValue[A] = (Long, A) // (ts, A)
  val LATEST_TIMESTAMP = Long.MaxValue
  
final class SingleFamilyRDDWriter[A](
    val rdd: RDD[(String, Map[String, A])],
    val put: HbRddPuter[A]
) extends HbRddWritCommon[A] with Serializable {
  def put2Hbase(tableName: String, family: String)(implicit config: HbRddConfig) = {
    val job = createJob(tableName, config.getHbaseConfig)
    rdd.flatMap({ case (rowId, data) => convert2Writable(rowId, Map(family -> data), put) })
      .saveAsNewAPIHadoopDataset(job.getConfiguration)
  }
} 
Example 3
Source File: HBaseReaders.scala    From cuesheet   with Apache License 2.0 5 votes vote down vote up
package com.kakao.cuesheet.convert

import com.kakao.mango.util.Conversions._
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.client.Result
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD

import scala.collection.JavaConversions._

trait HBaseReaders {
  val sc: SparkContext

  
  def hbaseTable(quorum: String, table: String): RDD[(String, ((String, String), (Long, String)))] = {
    hbaseTableBinary(quorum, table).map {
      case (rowkey, ((family, qualifier), (timestamp, value))) =>
        (rowkey.string, ((family.string, qualifier.string), (timestamp, value.string)))
    }
  }

  def hbaseColumnBinary(quorum: String, table: String, family: Array[Byte], qualifier: Array[Byte]): RDD[(Array[Byte], (Long, Array[Byte]))] = {
    hbaseTableBinary(quorum, table).collect {
      case (rowkey, ((f, q), cell)) if family.sameElements(f) && qualifier.sameElements(q) => (rowkey, cell)
    }
  }

  def hbaseColumn(quorum: String, table: String, family: String, qualifier: String): RDD[(String, (Long, String))] = {
    hbaseTable(quorum, table).collect {
      case (rowkey, ((f, q), cell)) if family == f && qualifier == q => (rowkey, cell)
    }
  }
} 
Example 4
Source File: HBaseSimpleRDD.scala    From spark-hbase-connector   with Apache License 2.0 5 votes vote down vote up
package it.nerdammer.spark.hbase

import it.nerdammer.spark.hbase.conversion.FieldReader
import org.apache.hadoop.hbase.CellUtil
import org.apache.hadoop.hbase.client.Result
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.rdd.{NewHadoopRDD, RDD}
import org.apache.spark.{Partition, TaskContext}

import scala.reflect.ClassTag

class HBaseSimpleRDD[R: ClassTag](hadoopHBase: NewHadoopRDD[ImmutableBytesWritable, Result], builder: HBaseReaderBuilder[R], saltingLength: Int = 0)
                       (implicit mapper: FieldReader[R], saltingProvider: SaltingProviderFactory[String]) extends RDD[R](hadoopHBase) {

  override def getPartitions: Array[Partition] = firstParent[(ImmutableBytesWritable, Result)].partitions

  override def compute(split: Partition, context: TaskContext) = {
    // val cleanConversion = sc.clean ---> next version
    firstParent[(ImmutableBytesWritable, Result)].iterator(split, context)
      .map(e => conversion(e._1, e._2))
  }

  def conversion(key: ImmutableBytesWritable, row: Result) = {

    val columnNames = HBaseUtils.chosenColumns(builder.columns, mapper.columns)

    val columnNamesFC = HBaseUtils.columnsWithFamily(builder.columnFamily, columnNames)

    val columns = columnNamesFC
      .map(t => (Bytes.toBytes(t._1), Bytes.toBytes(t._2)))
      .map(t => if(row.containsColumn(t._1, t._2)) Some(CellUtil.cloneValue(row.getColumnLatestCell(t._1, t._2)).array) else None)
      .toList

    mapper.map(Some(key.get.drop(saltingLength)) :: columns)
  }
} 
Example 5
Source File: GraphX.scala    From unicorn   with Apache License 2.0 5 votes vote down vote up
package unicorn.narwhal.graph

import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.client.Result
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.spark.SparkContext

import unicorn.bigtable.hbase.HBaseTable
import unicorn.json._
import unicorn.unibase.graph.{ReadOnlyGraph, GraphSerializer, GraphVertexColumnFamily, GraphOutEdgeColumnFamily}


  def graphx(sc: SparkContext): org.apache.spark.graphx.Graph[JsObject, (String, JsValue)] = {

    val conf = HBaseConfiguration.create()
    conf.set(TableInputFormat.INPUT_TABLE, name)
    conf.setInt(TableInputFormat.SCAN_CACHEDROWS, 500)
    conf.setBoolean(TableInputFormat.SCAN_CACHEBLOCKS, false)
    conf.set(TableInputFormat.SCAN_COLUMNS, s"$GraphVertexColumnFamily $GraphOutEdgeColumnFamily")

    val rdd = sc.newAPIHadoopRDD(
      conf,
      classOf[TableInputFormat],
      classOf[ImmutableBytesWritable],
      classOf[Result]
    )

    val rows = rdd.mapPartitions { it =>
      val serializer = new GraphSerializer()
      it.map { tuple =>
        val row = HBaseTable.getRow(tuple._2)
        serializer.deserializeVertex(row)
      }
    }

    val vertices = rows.map { vertex =>
      (vertex.id, vertex.properties)
    }

    val edges = rows.flatMap { vertex =>
      vertex.edges.map { edge =>
        org.apache.spark.graphx.Edge(edge.from, edge.to, (edge.label, edge.properties))
      }
    }

    org.apache.spark.graphx.Graph(vertices, edges)
  }
} 
Example 6
Source File: Phoenix.scala    From shc   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources.hbase.types

import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.phoenix.query.QueryConstants
import org.apache.phoenix.schema._
import org.apache.phoenix.schema.RowKeySchema.RowKeySchemaBuilder
import org.apache.phoenix.schema.types._
import org.apache.spark.sql.Row
import org.apache.spark.sql.execution.datasources.hbase._
import org.apache.spark.sql.types._

class Phoenix(f:Option[Field] = None) extends SHCDataType {
  private var schema: RowKeySchema = null

  def fromBytes(src: HBaseType): Any = {
    if (f.isDefined) {
      mapToPhoenixTypeInstance(f.get.dt).toObject(src)
    } else {
      throw new UnsupportedOperationException(
        "Phoenix coder: without field metadata, 'fromBytes' conversion can not be supported")
    }
  }

  def toBytes(input: Any): Array[Byte] = {
    input match {
      case data: Boolean => PBoolean.INSTANCE.toBytes(data)
      case data: Byte => PTinyint.INSTANCE.toBytes(data)
      case data: Array[Byte] => PVarbinary.INSTANCE.toBytes(data)
      case data: Double => PDouble.INSTANCE.toBytes(data)
      case data: Float => PFloat.INSTANCE.toBytes(data)
      case data: Int => PInteger.INSTANCE.toBytes(data)
      case data: Long => PLong.INSTANCE.toBytes(data)
      case data: Short => PSmallint.INSTANCE.toBytes(data)
      case data: String => PVarchar.INSTANCE.toBytes(data)
      case _ => throw new UnsupportedOperationException(s"unsupported data type $input")
    }
  }

  override def isRowKeySupported(): Boolean = true

  override def isCompositeKeySupported(): Boolean = true

  override def decodeCompositeRowKey(row: Array[Byte], keyFields: Seq[Field]): Map[Field, Any] = {
    if (schema == null) schema = buildSchema(keyFields)
    val ptr: ImmutableBytesWritable = new ImmutableBytesWritable
    val maxOffest = schema.iterator(row, 0, row.length, ptr)
    var ret = Map.empty[Field, Any]
    for (i <- 0 until schema.getFieldCount) {
      if (schema.next(ptr, i, maxOffest) != null) {
        val value = mapToPhoenixTypeInstance(keyFields(i).dt)
          .toObject(ptr, schema.getField(i).getDataType, SortOrder.getDefault)
        ret += ((keyFields(i), value))
      }
    }
    ret
  }

  override def encodeCompositeRowKey(rkIdxedFields: Seq[(Int, Field)], row: Row): Seq[Array[Byte]] = {
    rkIdxedFields.map { case (x, y) =>
      var ret = toBytes(row(x))
      // the last dimension of composite key does not need SEPARATOR
      if (y.length == -1 && x < rkIdxedFields.size - 1)
        ret ++= QueryConstants.SEPARATOR_BYTE_ARRAY
      ret
    }
  }

  private def buildSchema(keyFields: Seq[Field]): RowKeySchema = {
    val builder: RowKeySchemaBuilder = new RowKeySchemaBuilder(keyFields.length)
    keyFields.foreach{ x =>
      builder.addField(buildPDatum(x.dt), false, SortOrder.getDefault)
    }
    builder.build
  }

  private def mapToPhoenixTypeInstance(input: DataType): PDataType[_] = {
    input match {
      case BooleanType => PBoolean.INSTANCE
      case ByteType => PTinyint.INSTANCE
      case DoubleType => PDouble.INSTANCE
      case IntegerType => PInteger.INSTANCE
      case FloatType => PFloat.INSTANCE
      case LongType => PLong.INSTANCE
      case ShortType => PSmallint.INSTANCE
      case StringType => PVarchar.INSTANCE
      case BinaryType => PVarbinary.INSTANCE
      case _ => throw new UnsupportedOperationException(s"unsupported data type $input")
    }
  }

  private def buildPDatum(input: DataType): PDatum = new PDatum {
    override def getScale: Integer = null
    override def isNullable: Boolean = false
    override def getDataType: PDataType[_] = mapToPhoenixTypeInstance(input)
    override def getMaxLength: Integer = null
    override def getSortOrder: SortOrder = SortOrder.getDefault
  }
} 
Example 7
Source File: HBaseUtils.scala    From bigdata-examples   with Apache License 2.0 5 votes vote down vote up
package com.timeyang.common.util

import com.timeyang.common.config.BaseConf
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.hbase.{HBaseConfiguration, HColumnDescriptor, HTableDescriptor, TableName}
import org.apache.hadoop.hbase.client._
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat
import org.apache.hadoop.hbase.protobuf.ProtobufUtil
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos
import org.apache.hadoop.hbase.util.Base64
import org.apache.hadoop.mapreduce.Job


  def createHbaseOutputJob(tableName: String): Job = {
    val conf = HBaseUtils.newConf()
    conf.set(TableOutputFormat.OUTPUT_TABLE, tableName)
    val job = Job.getInstance(conf)
    job.setOutputKeyClass(classOf[ImmutableBytesWritable])
    job.setOutputValueClass(classOf[Put])
    job.setOutputFormatClass(classOf[TableOutputFormat[ImmutableBytesWritable]])
    job
  }

}