java.util.ArrayList Scala Example

Source File: DruidQueryExecutionMetric.scala From spark-druid-olap with Apache License 2.0

5 votes

package org.apache.spark.sql.sparklinedata.execution.metrics

import java.util.{ArrayList, Collections}

import org.apache.spark.util.AccumulatorV2
import org.sparklinedata.druid.metadata.{DruidQueryExecutionView, DruidQueryHistory}


class DruidQueryExecutionMetric extends
  AccumulatorV2[DruidQueryExecutionView, java.util.List[DruidQueryExecutionView]] {

  import scala.collection.JavaConverters._

  private val _list: java.util.List[DruidQueryExecutionView] =
    Collections.synchronizedList(new ArrayList[DruidQueryExecutionView]())

  private def getList : java.util.List[DruidQueryExecutionView] = {
    if (isAtDriverSide) DruidQueryHistory.getHistory.asJava else _list
  }

  override def isZero: Boolean = {
    _list.isEmpty
  }

  override def copy(): DruidQueryExecutionMetric = {
    val newAcc = new DruidQueryExecutionMetric
    newAcc._list.addAll(_list)
    newAcc
  }

  override def reset(): Unit = {
    _list.clear()
  }

  override def add(v: DruidQueryExecutionView): Unit = {
    if (isAtDriverSide) DruidQueryHistory.add(v) else _list.add(v)
  }

  private def addAll(v: java.util.List[DruidQueryExecutionView]): Unit = {
   v.asScala.foreach(add(_))
  }

  override def merge(other:
                     AccumulatorV2[DruidQueryExecutionView,
                       java.util.List[DruidQueryExecutionView]]):
  Unit = other match {
    case o: DruidQueryExecutionMetric => {
      addAll(o._list)
    }
    case _ => throw new UnsupportedOperationException(
      s"Cannot merge ${this.getClass.getName} with ${other.getClass.getName}")
  }

  override def value = _list.synchronized {
    java.util.Collections.unmodifiableList(getList)
  }

  private[spark] def setValue(newValue: java.util.List[DruidQueryExecutionView]): Unit = {
    reset()
    addAll(newValue)
  }

}

Source File: RddToDataFrame.scala From spark-sframe with BSD 2-Clause "Simplified" License

5 votes

package org.apache.spark.turi

import org.graphlab.create.GraphLabUtil
import org.apache.spark.sql.{SQLContext, Row, DataFrame}
import org.apache.spark.rdd.RDD
import scala.collection.JavaConversions._
import org.apache.spark.sql.types._
import scala.collection.mutable.ListBuffer
import scala.collection.mutable.ArrayBuffer
import scala.collection.immutable.Map
import java.util.HashMap
import java.util.ArrayList
import java.util.{Date,GregorianCalendar}
import java.sql.Date

object EvaluateRDD {
  
  def inferSchema(obj: Any): DataType = {
    if(obj.isInstanceOf[Int]) { 
      IntegerType
    } else if(obj.isInstanceOf[String]) { 
      StringType
    } else if(obj.isInstanceOf[Double]) { 
      DoubleType
    } else if(obj.isInstanceOf[Long]) { 
      LongType
    } else if(obj.isInstanceOf[Float]) { 
      FloatType
    } else if(obj.isInstanceOf[Map[_,_]]) {
      MapType(inferSchema(obj.asInstanceOf[Map[_,_]].head._1),inferSchema(obj.asInstanceOf[Map[_,_]].head._2))
    } else if(obj.isInstanceOf[java.util.HashMap[_,_]]) {
      MapType(inferSchema(obj.asInstanceOf[java.util.HashMap[_,_]].head._1),inferSchema(obj.asInstanceOf[java.util.HashMap[_,_]].head._2))
    } else if(obj.isInstanceOf[Array[_]]) {
      ArrayType(inferSchema(obj.asInstanceOf[Array[_]](0)))
    } else if(obj.isInstanceOf[java.util.ArrayList[_]]) {
      ArrayType(inferSchema(obj.asInstanceOf[java.util.ArrayList[_]](0)))
    } else if(obj.isInstanceOf[java.util.GregorianCalendar]) {
      TimestampType
    } else if(obj.isInstanceOf[java.util.Date] || obj.isInstanceOf[java.sql.Date]) {
      DateType
    } else { 
      StringType
    }
  }

  def toScala(obj: Any): Any = {
    if (obj.isInstanceOf[java.util.HashMap[_,_]]) {
      val jmap = obj.asInstanceOf[java.util.HashMap[_,_]]
      jmap.map { case (k,v) => toScala(k) -> toScala(v) }.toMap
    }
    else if(obj.isInstanceOf[java.util.ArrayList[_]]) {
      val buf = ArrayBuffer[Any]()
      val jArray = obj.asInstanceOf[java.util.ArrayList[_]]
      for(item <- jArray) {
        buf += toScala(item)
      }
      buf.toArray
    } else if(obj.isInstanceOf[java.util.GregorianCalendar]) {
      new java.sql.Timestamp(obj.asInstanceOf[java.util.GregorianCalendar].getTime().getTime())
    } else {
      obj
    }
  }
  def toSparkDataFrame(sqlContext: SQLContext, rdd: RDD[java.util.HashMap[String,_]]): DataFrame = { 
    val scalaRDD = rdd.map(l => toScala(l))
    val rowRDD = scalaRDD.map(l => Row.fromSeq(l.asInstanceOf[Map[_,_]].values.toList))
    
    var sample_data: java.util.HashMap[String,_] = rdd.take(1)(0)
    
    var schema_list: ListBuffer[StructField] = new ListBuffer[StructField]()
    for ((name,v) <- sample_data) { 
      schema_list.append(StructField(name,inferSchema(v)))
    }
    sqlContext.createDataFrame(rowRDD,StructType(schema_list))
  }
}

Source File: TestUtils.scala From shc with Apache License 2.0

5 votes

package org.apache.spark.sql

import java.nio.ByteBuffer
import java.util.{ArrayList, HashMap}

import scala.util.Random

object TestUtils {

  def generateRandomByteBuffer(rand: Random, size: Int): ByteBuffer = {
    val bb = ByteBuffer.allocate(size)
    val arrayOfBytes = new Array[Byte](size)
    rand.nextBytes(arrayOfBytes)
    bb.put(arrayOfBytes)
  }

  def generateRandomMap(rand: Random, size: Int): java.util.Map[String, Int] = {
    val jMap = new HashMap[String, Int]()
    for (i <- 0 until size) {
      jMap.put(rand.nextString(5), i)
    }
    jMap
  }

  def generateRandomArray(rand: Random, size: Int): ArrayList[Boolean] = {
    val vec = new ArrayList[Boolean]()
    for (i <- 0 until size) {
      vec.add(rand.nextBoolean())
    }
    vec
  }
}

Source File: EventManager.scala From Mycat-spider with Apache License 2.0

5 votes

package turbo.crawler.power

import java.util.ArrayList
import java.util.Hashtable
import java.util.concurrent.Callable
import java.util.concurrent.FutureTask
import java.util.concurrent.ScheduledThreadPoolExecutor
import turbo.crawler.Lifecycle
import turbo.crawler.Logable
import turbo.crawler.StringAdapter
import java.util.Collections

/**
 * Event manager
 * @author mclaren
 *
 */
object EventManager extends Lifecycle with Logable with StringAdapter with MessageDriven {
  /**
   * 线程池
   */
  private val exec = new ScheduledThreadPoolExecutor(sysprop("fetch.threads", "100").toInt)

  /**
   * 事件处理器
   */
  private val handlers = new Hashtable[String, java.util.List[Evt => Unit]]()

  /**
   * 获取JVM配置参数
   */
  private def sysprop(key: String, default: String) = {
    var matched = System.getProperty(key)
    if (isNotEmpty(matched)) matched else default
  }

  /**
   * 卸载系统
   */
  override def shutdown = {
    try {
      while (true) {
        if (exec.getActiveCount == 0) {
          exec.shutdown()
          throw new RuntimeException()
        }
      }
    } catch {
      case e: Exception => logger.info("Fetch completed and shutdown concurrenty fetchers.")
    }
  }

  /**
   * 向系统注册事件监听
   */
  def attachEvent(eventId: String, handler: Evt => Unit): Unit = {
    handlers.synchronized {
      var hds = handlers.get(eventId)
      if (hds == null) hds = new ArrayList[Evt => Unit]()
      hds.add(handler)
      handlers.put(eventId, hds)
    }
  }

  /**
   * 处理事件分发
   */
  override def fireEvent(evt: Evt): Unit = {
    if (handlers.containsKey(evt.eventId)) new WrapList[Evt => Unit](handlers.get(evt.eventId)).foreach(fd => dispatchEventConcurrently(evt, fd)) else logger.error("No handlers for event" + evt)
  }

  /**
   * 并行分发事件
   */
  private def dispatchEventConcurrently(evt: Evt, f: Evt => Unit) = {
    var task = new FutureTask[Unit](new Callable[Unit]() {
      def call: Unit = f(evt)
    })
    this.exec.submit(task)
  }

  /**
   * 包装Java列表为SCALA风格
   */
  private class WrapList[T](list: java.util.List[T]) {
    def foreach(f: T => Unit) = for (i <- 0 to list.size() - 1) f(list.get(i))
  }
}

Source File: FetchController.scala From Mycat-spider with Apache License 2.0

5 votes

package turbo.crawler

import java.util.ArrayList
import java.util.Hashtable

/**
 * 数据采集控制器
 * @author mclaren
 *
 */
object FetchController {
  private val CACHE = new Hashtable[String, Int]()

  private val MAX = 2
  def ensureReady(url: String) = {
    CACHE.synchronized {
      var count = CACHE.get(url)
      if (count >= MAX) throw new ResourceHasAlreadyBeenFetchedException(url)
    }
  }

  def recordUrl(url: String) = CACHE.synchronized(if (CACHE.containsKey(url)) CACHE.put(url, CACHE.get(url) + 1) else CACHE.put(url, 1))
}

Source File: StanfordBaseWrapperPlugin.scala From recogito2 with Apache License 2.0

5 votes

package org.pelagios.recogito.plugins.ner.stanford

import java.util.{ArrayList, Properties}
import edu.stanford.nlp.ling.CoreAnnotations
import edu.stanford.nlp.pipeline.{CoreDocument, StanfordCoreNLP}
import edu.stanford.nlp.util.StringUtils
import org.pelagios.recogito.sdk.PluginEnvironment
import org.pelagios.recogito.sdk.ner._
import scala.collection.JavaConverters._
import org.slf4j.LoggerFactory

case class StanfordEntity(chars: String, entityTag: String, charOffset: Int)


abstract class StanfordBaseWrapperPlugin(
  lang: String,
  config: String,
  description: String
) extends NERPlugin {
  
  private val logger = LoggerFactory.getLogger(this.getClass)

  private lazy val pipeline = {
    logger.info("Initializing NER pipeline")
    val pipeline = new StanfordCoreNLP(props)
    logger.info("Pipeline initialized")
    pipeline
  }

  private def toEntityType(entityTag: String) = entityTag match {
    case "LOCATION" | "CITY" | "COUNTRY" | "STATE_OR_PROVINCE" | "NATIONALITY" => Some(EntityType.LOCATION)
    case "PERSON" => Some(EntityType.PERSON)
    case "DATE" => Some(EntityType.DATE)
    case _ => None
  }
  
  private lazy val props = {
    val props = StringUtils.argsToProperties(Seq("-props", config):_*)
    props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner")
    props
  }
  
  override val getName = "Stanford CoreNLP"
  
  override val getDescription = description
  
  override val getOrganization = "Stanford NLP Group"
  
  override val getVersion = "3.9.1"
  
  override val getSupportedLanguages = Seq(lang).asJava
  
  override def parse(text: String, env: PluginEnvironment) = {
    val document = new CoreDocument(text) 
    pipeline.annotate(document)    
    
    val entities = document.tokens().asScala.foldLeft(Seq.empty[StanfordEntity]) { (result, token) =>
      val entityTag = token.get(classOf[CoreAnnotations.NamedEntityTagAnnotation])
      val chars = token.get(classOf[CoreAnnotations.TextAnnotation])
      val charOffset = token.beginPosition
      
      result.headOption match {
        case Some(previousEntity) if previousEntity.entityTag == entityTag =>
          // Append to previous phrase if entity tag is the same
          StanfordEntity(previousEntity.chars + " " + chars, entityTag, previousEntity.charOffset) +: result.tail
  
        case _ =>
          // Either this is the first token (result.headOption == None), or a new phrase
          StanfordEntity(chars, entityTag, charOffset) +: result
      }
    }

    // StanfordCoreNLP.clearAnnotatorPool

    entities.withFilter(_.entityTag != "O")
      .flatMap(e => toEntityType(e.entityTag).map(etype => new Entity(e.chars, etype, e.charOffset))).asJava
  }
  
}

Source File: Public.scala From recogito2 with Apache License 2.0

5 votes

package services.generated


import java.util.ArrayList
import java.util.Arrays
import java.util.List

import javax.annotation.Generated

import org.jooq.Sequence
import org.jooq.Table
import org.jooq.impl.SchemaImpl

import services.generated.tables.AuthorityFile
import services.generated.tables.Document
import services.generated.tables.DocumentFilepart
import services.generated.tables.DocumentPreferences
import services.generated.tables.FeatureToggle
import services.generated.tables.Folder
import services.generated.tables.FolderAssociation
import services.generated.tables.ServiceAnnouncement
import services.generated.tables.SharingPolicy
import services.generated.tables.Similarity
import services.generated.tables.Task
import services.generated.tables.Upload
import services.generated.tables.UploadFilepart
import services.generated.tables.User
import services.generated.tables.UserRole


object Public {

	
@Generated(
	value = Array(
		"http://www.jooq.org",
		"jOOQ version:3.7.2"
	),
	comments = "This class is generated by jOOQ"
)
class Public extends SchemaImpl("public") {

	override def getSequences : List[Sequence[_]] = {
		val result = new ArrayList[Sequence[_]]
		result.addAll(getSequences0)
		result
	}

	private def getSequences0() : List[Sequence[_]] = {
		return Arrays.asList[Sequence[_]](
			Sequences.FEATURE_TOGGLE_ID_SEQ,
			Sequences.SHARING_POLICY_ID_SEQ,
			Sequences.UPLOAD_ID_SEQ,
			Sequences.USER_ROLE_ID_SEQ)
	}

	override def getTables : List[Table[_]] = {
		val result = new ArrayList[Table[_]]
		result.addAll(getTables0)
		result
	}

	private def getTables0() : List[Table[_]] = {
		return Arrays.asList[Table[_]](
			AuthorityFile.AUTHORITY_FILE,
			Document.DOCUMENT,
			DocumentFilepart.DOCUMENT_FILEPART,
			DocumentPreferences.DOCUMENT_PREFERENCES,
			FeatureToggle.FEATURE_TOGGLE,
			Folder.FOLDER,
			FolderAssociation.FOLDER_ASSOCIATION,
			ServiceAnnouncement.SERVICE_ANNOUNCEMENT,
			SharingPolicy.SHARING_POLICY,
			Similarity.SIMILARITY,
			Task.TASK,
			Upload.UPLOAD,
			UploadFilepart.UPLOAD_FILEPART,
			User.USER,
			UserRole.USER_ROLE)
	}
}

Source File: HogHBaseReputation.scala From hogzilla with GNU General Public License v2.0

5 votes

package org.hogzilla.hbase




import scala.math.random
import java.lang.Math
import org.apache.spark._
import org.apache.hadoop.hbase.client.HBaseAdmin
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.{HBaseConfiguration, HTableDescriptor, TableName}
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.spark.mllib.regression.{LabeledPoint,LinearRegressionModel,LinearRegressionWithSGD}
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.rdd.RDD
import org.apache.hadoop.hbase.client.HTable
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter
import org.apache.hadoop.hbase.filter.BinaryComparator
import org.apache.hadoop.hbase.filter.FilterList
import org.apache.hadoop.hbase.filter.CompareFilter
import java.util.ArrayList
import org.apache.hadoop.hbase.client.Scan
import org.apache.hadoop.hbase.filter.Filter
import scala.collection.mutable.HashSet
import org.apache.hadoop.hbase.client.Put


object HogHBaseReputation {

  // Ex: MX, whitelist
	def getReputationList(listName:String, listType:String):Set[String] =
	{
		val list =  new HashSet[String]


	  val filters: ArrayList[Filter] = new ArrayList();

		val colValFilter1 = new SingleColumnValueFilter(Bytes.toBytes("rep"), Bytes.toBytes("list_type"),
				CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes(listType)))
		colValFilter1.setFilterIfMissing(false);

		val colValFilter2 = new SingleColumnValueFilter(Bytes.toBytes("rep"), Bytes.toBytes("list"),
				CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes(listName)))
		colValFilter2.setFilterIfMissing(false);

		filters.add(colValFilter1);
		filters.add(colValFilter2);

		val filterList = new FilterList( FilterList.Operator.MUST_PASS_ALL, filters);
		val scan = new Scan()
		scan.setFilter(filterList)
    
		val it = HogHBaseRDD.hogzilla_reputation.getScanner(scan).iterator()
		
    while(it.hasNext())
		{
      list.add( Bytes.toString(it.next().getValue(Bytes.toBytes("rep"),Bytes.toBytes("ip"))) )
		}
    
    list.toSet

	}
  
 def saveReputationList(listName:String, listType:String, ip:String) =
 {
     val put = new Put(Bytes.toBytes(ip+"-"+listName+"-"+listType))
     put.add(Bytes.toBytes("rep"), Bytes.toBytes("list_type"), Bytes.toBytes(listType))
     put.add(Bytes.toBytes("rep"), Bytes.toBytes("list"), Bytes.toBytes(listName))
     put.add(Bytes.toBytes("rep"), Bytes.toBytes("ip"), Bytes.toBytes(ip))
     
     HogHBaseRDD.hogzilla_reputation.put(put)
 }

}

Source File: JavaTestUtils.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.streaming

import scala.collection.mutable.{SynchronizedBuffer, ArrayBuffer}
import scala.reflect.ClassTag

import java.util.{List => JList}
import org.apache.spark.streaming.api.java.{JavaPairDStream, JavaDStreamLike, JavaDStream, JavaStreamingContext}
import org.apache.spark.streaming._
import java.util.ArrayList
import collection.JavaConversions._
import org.apache.spark.api.java.JavaRDDLike
import org.apache.spark.streaming.dstream.DStream


  def runStreamsWithPartitions[V](ssc: JavaStreamingContext, numBatches: Int,
      numExpectedOutput: Int): JList[JList[JList[V]]] = {
    implicit val cm: ClassTag[V] =
      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V]]
    val res = runStreamsWithPartitions[V](ssc.ssc, numBatches, numExpectedOutput)
    val out = new ArrayList[JList[JList[V]]]()
    res.map{entry =>
      val lists = entry.map(new ArrayList[V](_))
      out.append(new ArrayList[JList[V]](lists))
    }
    out
  }
}

object JavaTestUtils extends JavaTestBase {
  override def maxWaitTimeMillis = 20000

}

object JavaCheckpointTestUtils extends JavaTestBase {
  override def actuallyWait = true
}

Source File: LogFile.scala From kyuubi with Apache License 2.0

5 votes

package yaooqinn.kyuubi.operation

import java.io.{BufferedReader, File, FileInputStream, FileNotFoundException, FileOutputStream, InputStreamReader, IOException, PrintStream}
import java.util.ArrayList

import scala.collection.JavaConverters._

import org.apache.commons.io.FileUtils
import org.apache.hadoop.io.IOUtils
import org.apache.kyuubi.Logging
import org.apache.spark.sql.Row

import yaooqinn.kyuubi.KyuubiSQLException

class LogFile private (
    file: File,
    private var reader: Option[BufferedReader],
    writer: PrintStream,
    @volatile private var isRemoved: Boolean = false) extends Logging {

  def this(file: File) = {
    this(file,
      LogFile.createReader(file, isRemoved = false),
      new PrintStream(new FileOutputStream(file)))
  }

  private def resetReader(): Unit = {
    reader.foreach(IOUtils.closeStream)
    reader = None
  }

  private def readResults(nLines: Long): Seq[Row] = {
    reader = reader.orElse(LogFile.createReader(file, isRemoved))

    val logs = new ArrayList[Row]()
    reader.foreach { r =>
      var i = 1
      try {
        var line: String = r.readLine()
        while ((i < nLines || nLines <= 0) && line != null) {
          logs.add(Row(line))
          line = r.readLine()
          i += 1
        }
      } catch {
        case e: FileNotFoundException =>
          val operationHandle = file.getName
          val path = file.getAbsolutePath
          val msg = if (isRemoved) {
            s"Operation[$operationHandle] has been closed and the log file $path has been removed"
          } else {
            s"Operation[$operationHandle] Log file $path is not found"
          }
          throw new KyuubiSQLException(msg, e)
      }
    }
    logs.asScala
  }

  
  def write(msg: String): Unit = {
    writer.print(msg)
  }


  def close(): Unit = synchronized {
    try {
      reader.foreach(_.close())
      writer.close()
      if (!isRemoved) {
        FileUtils.forceDelete(file)
        isRemoved = true
      }
    } catch {
      case e: IOException =>
        error(s"Failed to remove corresponding log file of operation: ${file.getName}", e)
    }
  }
}

object LogFile {

  def createReader(file: File, isRemoved: Boolean): Option[BufferedReader] = try {
    Option(new BufferedReader(new InputStreamReader(new FileInputStream(file))))
  } catch {
    case e: FileNotFoundException =>
      val operationHandle = file.getName
      val path = file.getAbsolutePath
      val msg = if (isRemoved) {
        s"Operation[$operationHandle] has been closed and the log file $path has been removed"
      } else {
        s"Operation[$operationHandle] Log file $path is not found"
      }
      throw new KyuubiSQLException(msg, e)
  }
}

Source File: JavaTestUtils.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.streaming

import scala.collection.mutable.{SynchronizedBuffer, ArrayBuffer}
import scala.reflect.ClassTag

import java.util.{List => JList}
import org.apache.spark.streaming.api.java.{JavaPairDStream, JavaDStreamLike, JavaDStream, JavaStreamingContext}
import org.apache.spark.streaming._
import java.util.ArrayList
import collection.JavaConversions._
import org.apache.spark.api.java.JavaRDDLike
import org.apache.spark.streaming.dstream.DStream


  def runStreamsWithPartitions[V](ssc: JavaStreamingContext, numBatches: Int,
      numExpectedOutput: Int): JList[JList[JList[V]]] = {
    implicit val cm: ClassTag[V] =
      implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V]]
    val res = runStreamsWithPartitions[V](ssc.ssc, numBatches, numExpectedOutput)
    val out = new ArrayList[JList[JList[V]]]()
    res.map{entry =>
      val lists = entry.map(new ArrayList[V](_))
      out.append(new ArrayList[JList[V]](lists))
    }
    out
  }
}

object JavaTestUtils extends JavaTestBase {
  override def maxWaitTimeMillis = 20000

}

object JavaCheckpointTestUtils extends JavaTestBase {
  override def actuallyWait = true
}

Source File: AddUUIDTest.scala From piflow with BSD 2-Clause "Simplified" License

5 votes

package cn.piflow.bundle

import java.net.InetAddress
import java.util.ArrayList

import cn.piflow.Runner
import cn.piflow.conf.bean.FlowBean
import cn.piflow.conf.util.{FileUtil, OptionUtil}
import cn.piflow.util.{PropertyUtil, ServerIpUtil}
import org.apache.spark.sql.SparkSession
import org.h2.tools.Server
import org.junit.Test

import scala.util.parsing.json.JSON

class AddUUIDTest {

  @Test
  def testFlow(): Unit ={

    //parse flow json
    val file = "src/main/resources/flow/common/uuid.json"
    val flowJsonStr = FileUtil.fileReader(file)
    val map = OptionUtil.getAny(JSON.parseFull(flowJsonStr)).asInstanceOf[Map[String, Any]]
    println(map)

    //create flow
    val flowBean = FlowBean(map)
    val flow = flowBean.constructFlow()


    val ip = InetAddress.getLocalHost.getHostAddress
    cn.piflow.util.FileUtil.writeFile("server.ip=" + ip, ServerIpUtil.getServerIpFile())
    val h2Server = Server.createTcpServer("-tcp", "-tcpAllowOthers", "-tcpPort","50001").start()
    //execute flow
    val spark = SparkSession.builder()
      .master("local[12]")
      .appName("hive")
      .config("spark.driver.memory", "4g")
      .config("spark.executor.memory", "8g")
      .config("spark.cores.max", "8")
      .config("hive.metastore.uris",PropertyUtil.getPropertyValue("hive.metastore.uris"))
      .enableHiveSupport()
      .getOrCreate()

    val process = Runner.create()
      .bind(classOf[SparkSession].getName, spark)
      .bind("checkpoint.path", "")
      .bind("debug.path","")
      .start(flow);

    process.awaitTermination();
    val pid = process.pid();
    println(pid + "!!!!!!!!!!!!!!!!!!!!!")
    spark.close();
  }


}

Source File: SequoiadbWriter.scala From spark-sequoiadb with Apache License 2.0

5 votes

package com.sequoiadb.spark.io


  def save(it: Iterator[Row], schema: StructType): Unit = {
    try {
      ds = Option(new SequoiadbDatasource (
          config[List[String]](SequoiadbConfig.Host),
          config[String](SequoiadbConfig.Username),
          config[String](SequoiadbConfig.Password),
          ConnectionUtil.initConfigOptions,
          ConnectionUtil.initSequoiadbOptions ))
      // pickup a connection
      connection = Option(ds.get.getConnection)
      
      // locate collection
      val cl = connection.get.getCollectionSpace(
          config[String](SequoiadbConfig.CollectionSpace)).getCollection(
              config[String](SequoiadbConfig.Collection))
      LOG.info ("bulksize = " + config[String](SequoiadbConfig.BulkSize))
      // loop through it and perform batch insert
      // batch size is defined in SequoiadbConfig.BulkSize
      val list : ArrayList[BSONObject] = new ArrayList[BSONObject]()
      while ( it.hasNext ) {
        val record = it.next
        val bsonrecord = SequoiadbRowConverter.rowAsDBObject ( record, schema )
        list.add(bsonrecord)
        if ( list.size >= config[String](SequoiadbConfig.BulkSize).toInt ) {
          cl.bulkInsert ( list, 0 )
          list.clear
        }
      }
      // insert rest of the record if there's any
      if ( list.size > 0 ) {
        cl.bulkInsert ( list, 0 )
        list.clear
      }
    } catch {
      case ex: Exception => throw SequoiadbException(ex.getMessage, ex)
    } finally {
      ds.fold(ifEmpty=()) { connectionpool =>
        connection.fold(ifEmpty=()) { conn =>
          connectionpool.close(conn)
        }
        connectionpool.close
      } // ds.fold(ifEmpty=())
    } // finally
  } // def save(it: Iterator[BSONObject]): Unit =
}

Source File: CreateKuduTable.scala From SparkOnKudu with Apache License 2.0

5 votes

package org.kududb.spark.demo.gamer.aggregates

import java.util
import java.util.ArrayList

import org.kududb.ColumnSchema.ColumnSchemaBuilder
import org.kududb.client.{CreateTableOptions, KuduClient}
import org.kududb.{ColumnSchema, Schema, Type}

object CreateGamerAggregatesKuduTable {
  def main(args:Array[String]): Unit = {
    if (args.length == 0) {
      println("{kuduMaster} {tableName}")
      return
    }

    val kuduMaster = args(0)
    val tableName = args(1)
    val numberOfBuckets = args(2).toInt

    val kuduClient = new KuduClient.KuduClientBuilder(kuduMaster).build()
    val columnList = new util.ArrayList[ColumnSchema]()

    columnList.add(new ColumnSchemaBuilder("gamer_id", Type.STRING).key(true).build())
    columnList.add(new ColumnSchemaBuilder("last_time_played", Type.INT64).key(false).build())
    columnList.add(new ColumnSchemaBuilder("games_played", Type.INT32).key(false).build())
    columnList.add(new ColumnSchemaBuilder("games_won", Type.INT32).key(false).build())
    columnList.add(new ColumnSchemaBuilder("oks", Type.INT32).key(false).build())
    columnList.add(new ColumnSchemaBuilder("deaths", Type.INT32).key(false).build())
    columnList.add(new ColumnSchemaBuilder("damage_given", Type.INT32).key(false).build())
    columnList.add(new ColumnSchemaBuilder("damage_taken", Type.INT32).key(false).build())
    columnList.add(new ColumnSchemaBuilder("max_oks_in_one_game", Type.INT32).key(false).build())
    columnList.add(new ColumnSchemaBuilder("max_deaths_in_one_game", Type.INT32).key(false).build())
    val schema = new Schema(columnList)

    if (kuduClient.tableExists(tableName)) {
      println("Deleting Table")
      kuduClient.deleteTable(tableName)
    }
    val builder = new CreateTableOptions()

    val hashColumnList = new ArrayList[String]
    hashColumnList.add("gamer_id")

    builder.addHashPartitions(hashColumnList, numberOfBuckets)

    println("Creating Table")
    kuduClient.createTable(tableName, schema, builder)
    println("Created Table")
    kuduClient.shutdown()
  }
}

Source File: CreateKuduTable.scala From SparkOnKudu with Apache License 2.0

5 votes

package org.kududb.spark.demo.gamer.cdc

import java.util
import java.util.ArrayList

import org.kududb.{Schema, Type, ColumnSchema}
import org.kududb.ColumnSchema.ColumnSchemaBuilder
import org.kududb.client.{CreateTableOptions, KuduClient}

object CreateGamerCDCKuduTable {
  def main(args:Array[String]): Unit = {
    if (args.length == 0) {
      println("{kuduMaster} {tableName}")
      return
    }

    val kuduMaster = args(0)
    val tableName = args(1)
    val numberOfBuckets = args(2).toInt

    val kuduClient = new KuduClient.KuduClientBuilder(kuduMaster).build()
    val columnList = new util.ArrayList[ColumnSchema]()

    columnList.add(new ColumnSchemaBuilder("gamer_id", Type.STRING).key(true).build())
    columnList.add(new ColumnSchemaBuilder("eff_to", Type.STRING).key(true).build())
    columnList.add(new ColumnSchemaBuilder("eff_from", Type.STRING).key(false).build())
    columnList.add(new ColumnSchemaBuilder("last_time_played", Type.INT64).key(false).build())
    columnList.add(new ColumnSchemaBuilder("games_played", Type.INT32).key(false).build())
    columnList.add(new ColumnSchemaBuilder("games_won", Type.INT32).key(false).build())
    columnList.add(new ColumnSchemaBuilder("oks", Type.INT32).key(false).build())
    columnList.add(new ColumnSchemaBuilder("deaths", Type.INT32).key(false).build())
    columnList.add(new ColumnSchemaBuilder("damage_given", Type.INT32).key(false).build())
    columnList.add(new ColumnSchemaBuilder("damage_taken", Type.INT32).key(false).build())
    columnList.add(new ColumnSchemaBuilder("max_oks_in_one_game", Type.INT32).key(false).build())
    columnList.add(new ColumnSchemaBuilder("max_deaths_in_one_game", Type.INT32).key(false).build())
    val schema = new Schema(columnList)

    if (kuduClient.tableExists(tableName)) {
      println("Deleting Table")
      kuduClient.deleteTable(tableName)
    }

    val builder = new CreateTableOptions()

    val hashColumnList = new ArrayList[String]
    hashColumnList.add("gamer_id")

    builder.addHashPartitions(hashColumnList, numberOfBuckets)

    println("Creating Table")
    kuduClient.createTable(tableName, schema, builder)
    println("Created Table")
    kuduClient.shutdown()
  }
}

Source File: TileSieve.scala From Electrodynamics with GNU Lesser General Public License v3.0

5 votes

package com.calclavia.edx.basic.process.sifting

import java.util.ArrayList

import cpw.mods.fml.relauncher.{Side, SideOnly}
import edx.core.Reference
import edx.core.resource.content.ItemRubble
import io.netty.buffer.ByteBuf
import net.minecraft.block.Block
import net.minecraft.block.material.Material
import net.minecraft.entity.player.EntityPlayer
import net.minecraft.item.ItemStack
import net.minecraft.util.ResourceLocation
import net.minecraftforge.client.IItemRenderer.ItemRenderType
import net.minecraftforge.client.model.AdvancedModelLoader
import org.lwjgl.opengl.GL11
import resonantengine.api.item.ISimpleItemRenderer
import resonantengine.core.network.discriminator.PacketType
import resonantengine.lib.content.prefab.TInventory
import resonantengine.lib.modcontent.block.ResonantTile
import resonantengine.lib.render.{RenderItemOverlayUtility, RenderUtility}
import resonantengine.lib.transform.region.Cuboid
import resonantengine.lib.transform.vector.Vector3
import resonantengine.lib.utility.inventory.InventoryUtility
import resonantengine.lib.wrapper.ByteBufWrapper._
import resonantengine.prefab.block.itemblock.ItemBlockSaved
import resonantengine.prefab.network.{TPacketReceiver, TPacketSender}

object TileSieve
{
  val model = AdvancedModelLoader.loadModel(new ResourceLocation(Reference.domain, Reference.modelPath + "sieve.tcn"))
}

class TileSieve extends ResonantTile(Material.wood) with TInventory with TPacketSender with TPacketReceiver with ISimpleItemRenderer
{
  //Constructor
  setTextureName("material_wood_top")
  bounds = new Cuboid(0, 0, 0, 1, 0.25, 1)
  normalRender = false
  isOpaqueCube = false
  itemBlock = classOf[ItemSieve]

  override def canUpdate: Boolean = false

  override def getSizeInventory: Int = 1

  override def use(player: EntityPlayer, hitSide: Int, hit: Vector3): Boolean =
  {
    val currentStack = player.inventory.getCurrentItem

    if (currentStack != null && currentStack.getItem.isInstanceOf[ItemRubble])
    {
      interactCurrentItem(this, 0, player)
    }
    else
    {
      extractItem(this, 0, player)
    }

    return true
  }

  override def isItemValidForSlot(i: Int, itemStack: ItemStack): Boolean = itemStack.getItem.isInstanceOf[ItemRubble]

  override def renderInventoryItem(renderType: ItemRenderType, itemStack: ItemStack, data: AnyRef*): Unit =
  {
    
  override def write(buf: ByteBuf, id: Int)
  {
    super.write(buf, id)
    buf <<<< writeToNBT
  }

  override def read(buf: ByteBuf, id: Int, packetType: PacketType)
  {
    super.read(buf, id, packetType)
    buf >>>> readFromNBT
  }
}

Source File: TilePlasma.scala From Electrodynamics with GNU Lesser General Public License v3.0

5 votes

package com.calclavia.edx.quantum.machine.plasma

import java.util.ArrayList

import net.minecraft.block.material.Material
import net.minecraft.entity.Entity
import net.minecraft.init.Blocks
import net.minecraft.item.ItemStack
import net.minecraft.tileentity.TileEntity
import net.minecraft.util.DamageSource
import net.minecraft.world.IBlockAccess
import net.minecraftforge.common.MinecraftForge
import net.minecraftforge.common.util.ForgeDirection
import resonantengine.api.event.PlasmaEvent
import resonantengine.lib.grid.thermal.GridThermal
import resonantengine.lib.mod.config.Config
import resonantengine.lib.modcontent.block.ResonantTile
import resonantengine.lib.transform.vector.Vector3

object TilePlasma
{
  @Config var plasmaMaxTemperature: Int = 1000000
}

class TilePlasma extends ResonantTile(Material.lava)
{
  private var temperature: Double = TilePlasma.plasmaMaxTemperature

  //Constructor
  textureName = "plasma"
  isOpaqueCube = false

  override def getLightValue(access: IBlockAccess): Int =
  {
    return 7
  }

  override def isSolid(access: IBlockAccess, side: Int): Boolean =
  {
    return false
  }

  override def getDrops(metadata: Int, fortune: Int): ArrayList[ItemStack] =
  {
    return new ArrayList[ItemStack]
  }

  override def getRenderBlockPass: Int =
  {
    return 1
  }

  override def collide(entity: Entity)
  {
    entity.attackEntityFrom(DamageSource.inFire, 100)
  }

  override def update
  {
    super.update
    GridThermal.addHeat(position, ((temperature - GridThermal.getTemperature(position)) * 0.1f).asInstanceOf[Float])
    if (ticks % 20 == 0)
    {
      temperature /= 1.5
      if (temperature <= TilePlasma.plasmaMaxTemperature / 10)
      {
        worldObj.setBlock(xCoord, yCoord, zCoord, Blocks.fire, 0, 3)
        return
      }
      for (i <- 0 to 6)
      {
        if (worldObj.rand.nextFloat < 0.4)
        {
          val diDian: Vector3 = position
          diDian.add(ForgeDirection.getOrientation(i))
          val tileEntity: TileEntity = diDian.getTileEntity(worldObj)
          if (!(tileEntity.isInstanceOf[TilePlasma]))
          {
            MinecraftForge.EVENT_BUS.post(new PlasmaEvent.SpawnPlasmaEvent(worldObj, diDian.xi, diDian.yi, diDian.zi, temperature.asInstanceOf[Int]))
          }
        }
      }
    }
  }

  def setTemperature(newTemperature: Int)
  {
    temperature = newTemperature
  }

}

Source File: TSDBUpdater.scala From sprue with Apache License 2.0

5 votes

package com.cloudera.sprue

import java.io._
import org.apache.commons._
import org.apache.http._
import org.apache.http.client._
import org.apache.http.client.methods.HttpPost
import java.util.ArrayList
import org.apache.http.client.entity.UrlEncodedFormEntity
import com.google.gson.Gson
import java.util.HashMap
import java.lang.reflect.Type
import com.google.gson.reflect.TypeToken
import org.apache.http.entity.StringEntity
import org.apache.http.impl.client.DefaultHttpClient
import org.apache.spark.sql.Row

 
case class MetricsTags(state: String)
case class OpenTSDBMessageElement(metric: String, timestamp: Long, value: Long, tags: MetricsTags)

object TSDBUpdater {
    val client = new DefaultHttpClient()
    // val client = HttpClientBuilder.create.build
}


class TSDBUpdater (url : String) extends Serializable {
       
  def loadPatientStats (row : Row) {
      
     val metricList = new ArrayList[OpenTSDBMessageElement]()
     val jmap = new MetricsTags(row.getString(0))
     val evalTimestamp = row.getLong(1)
     
     val sirsMetric = new OpenTSDBMessageElement("sirs", evalTimestamp, row.getLong(2), jmap)
     metricList.add(sirsMetric)
     
     val sepsisMetric = new OpenTSDBMessageElement("sepsis", evalTimestamp, row.getLong(3), jmap)
     metricList.add(sepsisMetric)
     
     val severeSepsisMetric = new OpenTSDBMessageElement("severeSepsis", evalTimestamp, row.getLong(4), jmap)
     metricList.add(severeSepsisMetric)

    val septicShockMetric = new OpenTSDBMessageElement("septicShock", evalTimestamp, row.getLong(5), jmap)
     metricList.add(septicShockMetric)
     
    val organMetric = new OpenTSDBMessageElement("organDysfunctionSyndrome", evalTimestamp, row.getLong(6), jmap)
     metricList.add(organMetric)

    val metricsAsJson = new Gson().toJson(metricList)
      
    val post = new HttpPost(url)
    
    post.setHeader("Content-type", "application/json");
    post.setEntity(new StringEntity(metricsAsJson));

    val response = TSDBUpdater.client.execute(post) 
  //  println("response =====" + response.toString())
  }
}

Source File: TestConnector.scala From openwhisk with Apache License 2.0

5 votes

package org.apache.openwhisk.core.connector.test

import java.util.ArrayList
import java.util.concurrent.LinkedBlockingQueue

import scala.concurrent.Future
import scala.concurrent.duration._
import scala.collection.JavaConverters._

import org.apache.kafka.clients.producer.RecordMetadata
import org.apache.kafka.common.TopicPartition
import common.StreamLogging

import org.apache.openwhisk.common.Counter
import org.apache.openwhisk.core.connector.Message
import org.apache.openwhisk.core.connector.MessageConsumer
import org.apache.openwhisk.core.connector.MessageProducer

class TestConnector(topic: String, override val maxPeek: Int, allowMoreThanMax: Boolean)
    extends MessageConsumer
    with StreamLogging {

  override def peek(duration: FiniteDuration, retry: Int = 0) = {
    val msgs = new ArrayList[Message]
    queue.synchronized {
      queue.drainTo(msgs, if (allowMoreThanMax) Int.MaxValue else maxPeek)
      msgs.asScala map { m =>
        offset += 1
        (topic, -1, offset, m.serialize.getBytes)
      }
    }
  }

  override def commit(retry: Int = 0) = {
    if (throwCommitException) {
      throw new Exception("commit failed")
    } else {
      // nothing to do
    }
  }

  def occupancy = queue.size

  def send(msg: Message): Future[RecordMetadata] = {
    producer.send(topic, msg)
  }

  def send(msgs: Seq[Message]): Future[RecordMetadata] = {
    import scala.language.reflectiveCalls
    producer.sendBulk(topic, msgs)
  }

  def close() = {
    closed = true
    producer.close()
  }

  private val producer = new MessageProducer {
    def send(topic: String, msg: Message, retry: Int = 0): Future[RecordMetadata] = {
      queue.synchronized {
        if (queue.offer(msg)) {
          logging.info(this, s"put: $msg")
          Future.successful(new RecordMetadata(new TopicPartition(topic, 0), 0, queue.size, -1, Long.box(-1L), -1, -1))
        } else {
          logging.error(this, s"put failed: $msg")
          Future.failed(new IllegalStateException("failed to write msg"))
        }
      }
    }

    def sendBulk(topic: String, msgs: Seq[Message]): Future[RecordMetadata] = {
      queue.synchronized {
        if (queue.addAll(msgs.asJava)) {
          logging.info(this, s"put: ${msgs.length} messages")
          Future.successful(new RecordMetadata(new TopicPartition(topic, 0), 0, queue.size, -1, Long.box(-1L), -1, -1))
        } else {
          logging.error(this, s"put failed: ${msgs.length} messages")
          Future.failed(new IllegalStateException("failed to write msg"))
        }
      }
    }

    def close() = {}
    def sentCount() = counter.next()
    val counter = new Counter()
  }

  var throwCommitException = false
  private val queue = new LinkedBlockingQueue[Message]()
  @volatile private var closed = false
  private var offset = -1L
}

Source File: KuduService.scala From pulse with Apache License 2.0

5 votes

package io.phdata.pulse.logcollector

import java.util.{ ArrayList, Collections }

import com.typesafe.scalalogging.LazyLogging
import io.phdata.pulse.common.domain.TimeseriesEvent
import org.apache.kudu.client.SessionConfiguration.FlushMode
import org.apache.kudu.client.{ CreateTableOptions, KuduClient, KuduException, KuduTable }
import org.apache.kudu.{ ColumnSchema, Schema, Type }

import scala.collection.concurrent

object TimeseriesEventColumns {
  val TIMESTAMP = "ts"
  val KEY       = "key"
  val TAG       = "tag"
  val VALUE     = "value"
}


  private[logcollector] def getOrCreateTable(tableName: String): KuduTable =
    KerberosContext.runPrivileged {
      if (tableCache.contains(tableName)) {
        tableCache(tableName)
      } else if (!client.tableExists(tableName)) {
        logger.info(s"Kudu table not found: $tableName")
        val columns = new ArrayList[ColumnSchema]
        columns.add(
          new ColumnSchema.ColumnSchemaBuilder(TimeseriesEventColumns.TIMESTAMP,
                                               Type.UNIXTIME_MICROS).key(true).build)
        columns.add(
          new ColumnSchema.ColumnSchemaBuilder(TimeseriesEventColumns.KEY, Type.STRING)
            .key(true)
            .build)
        columns.add(
          new ColumnSchema.ColumnSchemaBuilder(TimeseriesEventColumns.TAG, Type.STRING)
            .key(true)
            .build)
        columns.add(
          new ColumnSchema.ColumnSchemaBuilder(TimeseriesEventColumns.VALUE, Type.DOUBLE)
            .key(false)
            .build)
        val schema = new Schema(columns)
        val opts = new CreateTableOptions()
          .setRangePartitionColumns(Collections.singletonList(TimeseriesEventColumns.TIMESTAMP))
          .addHashPartitions(Collections.singletonList(TimeseriesEventColumns.KEY), 4)
        val table = client.createTable(tableName, schema, opts)
        tableCache.put(tableName, table)
        logger.info(s"Created Kudu table $tableName")
        table
      } else {
        val table = client.openTable(tableName)
        tableCache.put(tableName, table)
        table
      }
    }
}

Source File: ContextIdParserImpl.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.gateway.ujes.route.contextservice

import java.util
import java.util.{ArrayList, List}

import com.webank.wedatasphere.linkis.common.utils.Logging
import com.webank.wedatasphere.linkis.cs.common.entity.source.{ContextIDParser, HAContextID}
import com.webank.wedatasphere.linkis.cs.common.utils.CSHighAvailableUtils
import com.webank.wedatasphere.linkis.rpc.instancealias.InstanceAliasManager
import org.springframework.beans.factory.annotation.Autowired
import org.springframework.stereotype.Component



@Component
class ContextIdParserImpl extends ContextIDParser with Logging {

  @Autowired
  var instanceAliasManager : InstanceAliasManager = _

  override def parse(contextId: String): util.List[String] = {

    if (CSHighAvailableUtils.checkHAIDBasicFormat(contextId)) {
      val instances = new util.ArrayList[String](2)
      val haContextID = CSHighAvailableUtils.decodeHAID(contextId)
      if (instanceAliasManager.isInstanceAliasValid(haContextID.getInstance)) {
        instances.add(instanceAliasManager.getInstanceByAlias(haContextID.getInstance).getInstance)
      } else {
        error(s"parse HAID instance invalid. haIDKey : " + contextId)
      }
      if (instanceAliasManager.isInstanceAliasValid(haContextID.getBackupInstance)) {
        instances.add(instanceAliasManager.getInstanceByAlias(haContextID.getBackupInstance).getInstance)
      } else {
        error("parse HAID backupInstance invalid. haIDKey : " + contextId)
      }
      instances
    } else {
      new util.ArrayList[String](0)
    }
  }

  private def isNumberic(s:String):Boolean = {
    s.toCharArray foreach {
      c => if (c < 48 || c >57) return false
    }
    true
  }

}

Source File: RiakPythonHelper.scala From spark-riak-connector with Apache License 2.0

5 votes

package com.basho.riak.spark.util.python

import com.basho.riak.spark._
import com.basho.riak.spark.rdd.RiakRDD
import org.apache.spark.api.java.JavaSparkContext
import org.apache.spark.api.java.JavaRDD
import com.basho.riak.spark.writer.WriteConf
import org.apache.spark.rdd.RDD
import java.util.ArrayList
import scala.collection.JavaConversions._

class RiakPythonHelper {
  implicit val pickling = new PicklingUtils()
  def riakBucket(jsc: JavaSparkContext, bucketName: String, bucketType: String): RiakRDD[(String, Any)] = {
    jsc.sc.riakBucket(bucketName, bucketType)
  }

  def saveToRiak(jrdd: JavaRDD[Array[Byte]], bucketName: String, bucketType: String) = {
    jrdd.rdd.unpickle().saveToRiak(bucketName, bucketType, WriteConf())
  }

  def query2iKeys[K](jsc: JavaSparkContext, bucketName: String, bucketType: String, index: String, keys: ArrayList[K]) =
    jsc.sc.riakBucket(bucketName, bucketType).query2iKeys(index, keys: _*)

  def queryBucketKeys(jsc: JavaSparkContext, bucketName: String, bucketType: String, keys: ArrayList[String]) =
    jsc.sc.riakBucket(bucketName, bucketType).queryBucketKeys(keys: _*)

  def partitionBy2iRanges[K](jsc: JavaSparkContext, bucketName: String, bucketType: String, index: String, ranges: ArrayList[ArrayList[K]]) = {
    val r = ranges.map(x => (x(0),  x(1)))
    jsc.sc.riakBucket(bucketName, bucketType).partitionBy2iRanges(index, r: _*)
  }

  def partitionBy2iKeys[K](jsc: JavaSparkContext, bucketName: String, bucketType: String, index: String, keys: ArrayList[K]) =
    jsc.sc.riakBucket(bucketName, bucketType).partitionBy2iKeys(index, keys: _*)

  def pickleRows(rdd: RDD[_]): RDD[Array[Byte]] = rdd.pickle()

  def javaRDD(rdd: RDD[_]) = JavaRDD.fromRDD(rdd)
}

java.util.ArrayList Scala Examples