org.apache.commons.lang3.StringUtils Scala Example

Source File: ClientConfiguration.scala From haystack-traces with Apache License 2.0

5 votes

package com.expedia.www.haystack.trace.storage.backends.cassandra.config.entities

import com.datastax.driver.core.ConsistencyLevel
import com.expedia.www.haystack.commons.retries.RetryOperation
import org.apache.commons.lang3.StringUtils



case class CassandraConfiguration(clientConfig: ClientConfiguration,
                                  consistencyLevel: ConsistencyLevel,
                                  retryConfig: RetryOperation.Config,
                                  consistencyLevelOnError: List[(Class[_], ConsistencyLevel)]) {
  def writeConsistencyLevel(error: Throwable): ConsistencyLevel = {
    if (error == null) {
      consistencyLevel
    } else {
      consistencyLevelOnError
        .find(errorClass => errorClass._1.isAssignableFrom(error.getClass))
        .map(_._2).getOrElse(writeConsistencyLevel(error.getCause))
    }
  }
}

Source File: LogListener.scala From splash with Apache License 2.0

5 votes

package org.apache.spark.shuffle

import org.apache.commons.lang3.StringUtils
import org.apache.spark.internal.Logging
import org.testng.{ITestResult, TestListenerAdapter}

class LogListener extends TestListenerAdapter with Logging {
  override def onTestStart(tr: ITestResult): Unit = {
    super.onTestStart(tr)
    logInfo(s"--- ${tr.getName}${getParams(tr)} --- test start.")
  }

  override def onTestFailure(tr: ITestResult): Unit = {
    logError(s"--- ${tr.getName}${getParams(tr)} --- failed, took ${getSeconds(tr)}s.")
    val params = tr.getParameters
    if (params.nonEmpty) {
      logError(s"test parameters: $params.")
    }
    logError("detail:", tr.getThrowable)
  }

  override def onTestSkipped(tr: ITestResult): Unit = {
    logWarning(s"--- ${tr.getName}${getParams(tr)} --- skipped, took ${getSeconds(tr)}s.")
  }

  override def onTestSuccess(tr: ITestResult): Unit = {
    logInfo(s"--- ${tr.getName}${getParams(tr)} --- passed, took ${getSeconds(tr)}s.")
  }

  private def getParams(tr: ITestResult): String = {
    val params = tr.getParameters
    if (params.nonEmpty) {
      s" [${StringUtils.join(params, ", ")}]"
    } else {
      ""
    }
  }

  private def getSeconds(tr: ITestResult) = (tr.getEndMillis - tr.getStartMillis).toDouble / 1000
}

Source File: Generator.scala From donut with MIT License

5 votes

package report.donut

import org.apache.commons.lang3.StringUtils
import org.joda.time.DateTime
import org.joda.time.format.{DateTimeFormat, DateTimeFormatter}
import report.donut.gherkin.model._
import report.donut.log.Log
import report.donut.performance.PerformanceSupport
import report.donut.template.TemplateEngine
import report.donut.transformers.cucumber.{CucumberTransformer, Feature => CucumberFeature}

import scala.collection.mutable.ListBuffer
import scala.util.Try

object Generator extends Log with PerformanceSupport {

  val formatter: DateTimeFormatter = DateTimeFormat.forPattern("yyyy-MM-dd-HHmm")

  //this wrapper is currently used to help the java maven plugin
  def apply(resultSources: String,
            outputPath: String = "donut",
            filePrefix: String = "",
            dateTime: String,
            template: String = "default",
            countSkippedAsFailure: Boolean = false,
            countPendingAsFailure: Boolean = false,
            countUndefinedAsFailure: Boolean = false,
            countMissingAsFailure: Boolean = false,
            projectName: String,
            projectVersion: String,
            customAttributes: scala.collection.mutable.Map[String, String]): ReportConsole = {

    createReport(resultSources, outputPath, filePrefix, dateTime, template, countSkippedAsFailure, countPendingAsFailure,
      countUndefinedAsFailure, countMissingAsFailure, projectName, projectVersion, customAttributes.toMap) match {
      case Right(report) => ReportConsole(report)
      case Left(error) => throw DonutException(s"An error occurred while generating donut report. $error")
    }
  }

  private[donut] def createReport(resultSources: String,
                                  outputPath: String = "donut",
                                  filePrefix: String = "",
                                  datetime: String = formatter.print(DateTime.now),
                                  template: String = "default",
                                  countSkippedAsFailure: Boolean = false,
                                  countPendingAsFailure: Boolean = false,
                                  countUndefinedAsFailure: Boolean = false,
                                  countMissingAsFailure: Boolean = false,
                                  projectName: String,
                                  projectVersion: String,
                                  customAttributes: Map[String, String] = Map()): Either[String, Report] = {

    //Prepare objects
    val statusConf = StatusConfiguration(countSkippedAsFailure, countPendingAsFailure, countUndefinedAsFailure, countMissingAsFailure)
    val projectMetadata = ProjectMetadata(projectName, projectVersion, customAttributes)
    val reportStartedTimestamp = Try(formatter.parseDateTime(datetime)).getOrElse(DateTime.now)

    for {
      resultSourceList <- if (!StringUtils.isBlank(resultSources)) Right(resultSources.split(",").map(_.trim).toList).right else Left("Unable to extract the paths to the result sources. Please use this format:- cucumber:/my/path/cucumber-reports,cucumber:/my/other/path/adapted-reports").right
      features <- timed("step1", "Loaded result sources") {
        loadResultSources(resultSourceList, statusConf).right
      }
      report <- timed("step2", "Produced report") {
        Right(Report(features, reportStartedTimestamp, projectMetadata)).right
      }
      _ <- TemplateEngine(report, s"/templates/$template/index.html").renderToHTML(outputPath, filePrefix).right
    } yield report
  }

  
  def loadResultSources(resultSourceList: List[String], statusConf: StatusConfiguration): Either[String, List[Feature]] = {
    var features = new ListBuffer[CucumberFeature]
    for (resultSource <- resultSourceList) {
      val result = ResultLoader(resultSource).load
      if (result.isLeft) return Left(result.left.get)
      features ++= result.right.get
    }
    val donutFeatures = CucumberTransformer.transform(features.toList, statusConf).right.get
    Try(donutFeatures.toList).toEither(_.getMessage)
  }
}

case class DonutException(mgs: String) extends Exception

Source File: ResultLoader.scala From donut with MIT License

5 votes

package report.donut

import java.io.File

import org.apache.commons.lang3.StringUtils
import org.json4s.{DefaultFormats, JValue}
import report.donut.gherkin.processors.JSONProcessor
import report.donut.transformers.cucumber.Feature

import scala.util.Try

trait ResultLoader {
  def load(): Either[String, List[Feature]]
}

object ResultLoader {

  private[donut] class CucumberResultLoader(sourceDir: File) extends ResultLoader {
    override def load(): Either[String, List[Feature]] = {
      if (!sourceDir.exists) {
        return Left(s"Source directory does not exist: $sourceDir")
      }

      val jsonValues = JSONProcessor.loadFrom(sourceDir) match {
        case Left(errors) => return Left(errors)
        case Right(r) => if (r.isEmpty) return Left("No files found of correct format") else Right(r)
      }

      Try(loadCukeFeatures(jsonValues.right.get)).toEither(_.getMessage)
    }

    private[donut] def loadCukeFeatures(json: List[JValue]) = {
      implicit val formats = DefaultFormats
      json.flatMap(f => f.extract[List[Feature]])
    }
  }

  def apply(resultSource: String): ResultLoader = {
    val pattern = "([a-zA-z]{2,}):(.*)".r
    pattern.findFirstMatchIn(resultSource) match {
      case Some(m) => {
        val format = m.group(1)
        val sourcePath = m.group(2)
        if (StringUtils.isBlank(sourcePath)) {
          throw new DonutException("Please provide the source directory path.")
        }
        format match {
          case "cucumber" => new CucumberResultLoader(new File(sourcePath))
          case _ => throw DonutException(s"Unsupported result format: $format")
        }
      }
      case None => new CucumberResultLoader(new File(resultSource)) //Defaults to cucumber result format
    }
  }
}

Source File: ArticleValidator.scala From scala-play-realworld-example-app with MIT License

5 votes

package articles.services

import commons.utils.RealWorldStringUtils
import commons.validations.PropertyViolation
import commons.validations.constraints._
import articles.models.{ArticleUpdate, NewArticle}
import org.apache.commons.lang3.StringUtils

import scala.concurrent.ExecutionContext

class ArticleValidator(implicit private val ec: ExecutionContext) {

  private val titleValidator = new TitleValidator
  private val descriptionValidator = new DescriptionValidator
  private val bodyValidator = new BodyValidator
  private val tagValidator = new TagValidator

  def validateNewArticle(newArticle: NewArticle): Seq[PropertyViolation] = {
    require(newArticle != null)

    validateTitle(newArticle.title) ++
      validateDescription(newArticle.body) ++
      validateBody(newArticle.body) ++
      validateTags(newArticle.tagList)
  }

  private def validateTags(tags: Seq[String]) = tags.flatMap(tagValidator.validate)

  def validateArticleUpdate(articleUpdate: ArticleUpdate): Seq[PropertyViolation] = {
    val titleViolations = articleUpdate.title.map(validateTitle).getOrElse(Seq.empty)
    val descriptionViolations = articleUpdate.description.map(validateDescription).getOrElse(Seq.empty)
    val bodyViolations = articleUpdate.body.map(validateBody).getOrElse(Seq.empty)

    titleViolations ++ descriptionViolations ++ bodyViolations
  }

  private def validateTitle(title: String) = titleValidator.validate(title)

  private def validateDescription(description: String) = descriptionValidator.validate(description)

  private def validateBody(body: String) = bodyValidator.validate(body)

  private class StringValidator(minLength: Int = 0, maxLength: Int = Int.MaxValue) {

    def validate(str: String): Seq[Violation] = {
      if (StringUtils.isBlank(str)) Seq(NotNullViolation)
      else if (str.length < minLength) Seq(MinLengthViolation(minLength))
      else if (str.length > maxLength) Seq(MaxLengthViolation(maxLength))
      else if (RealWorldStringUtils.startsWithWhiteSpace(str)
        || RealWorldStringUtils.endsWithWhiteSpace(str)) Seq(PrefixOrSuffixWithWhiteSpacesViolation)
      else Nil
    }
  }

  private class TitleValidator {
    private val maxLength = 255
    private val stringValidator = new StringValidator(maxLength = maxLength)

    def validate(title: String): Seq[PropertyViolation] = {
      stringValidator.validate(title)
        .map(PropertyViolation("title", _))
    }
  }

  private class DescriptionValidator {
    private val maxLength = 255
    private val stringValidator = new StringValidator(maxLength = maxLength)

    def validate(description: String): Seq[PropertyViolation] = {
      stringValidator.validate(description)
        .map(PropertyViolation("description", _))
    }
  }

  private class BodyValidator {
    private val stringValidator = new StringValidator

    def validate(body: String): Seq[PropertyViolation] = {
      stringValidator.validate(body)
        .map(PropertyViolation("body", _))
    }
  }

  private class TagValidator {
    private val maxLength = 255
    private val stringValidator = new StringValidator(maxLength = maxLength)

    def validate(tag: String): Seq[PropertyViolation] = {
      stringValidator.validate(tag)
        .map(PropertyViolation("tag", _))
    }
  }

}

Source File: CommentController.scala From scala-play-realworld-example-app with MIT License

5 votes

package articles.controllers

import commons.exceptions.MissingModelException
import commons.services.ActionRunner
import articles.exceptions.AuthorMismatchException
import articles.models._
import articles.services.CommentService
import commons.controllers.RealWorldAbstractController
import org.apache.commons.lang3.StringUtils
import play.api.libs.json._
import play.api.mvc.{Action, AnyContent, ControllerComponents}
import users.controllers.{AuthenticatedActionBuilder, OptionallyAuthenticatedActionBuilder}

class CommentController(authenticatedAction: AuthenticatedActionBuilder,
                        optionallyAuthenticatedActionBuilder: OptionallyAuthenticatedActionBuilder,
                        actionRunner: ActionRunner,
                        commentService: CommentService,
                        components: ControllerComponents)
  extends RealWorldAbstractController(components) {

  def delete(id: CommentId): Action[AnyContent] = authenticatedAction.async { request =>

    actionRunner.runTransactionally(commentService.delete(id, request.user.userId))
      .map(_ => Ok)
      .recover({
        case _: AuthorMismatchException => Forbidden
        case _: MissingModelException => NotFound
      })
  }

  def findByArticleSlug(slug: String): Action[AnyContent] = optionallyAuthenticatedActionBuilder.async { request =>
    require(StringUtils.isNotBlank(slug))

    val maybeUserId = request.authenticatedUserOption.map(_.userId)
    actionRunner.runTransactionally(commentService.findByArticleSlug(slug, maybeUserId))
      .map(CommentList(_))
      .map(Json.toJson(_))
      .map(Ok(_))
      .recover({
        case _: MissingModelException => NotFound
      })
  }

  def create(slug: String): Action[_] = authenticatedAction.async(validateJson[NewCommentWrapper]) { request =>
    require(StringUtils.isNotBlank(slug))

    val newComment = request.body.comment
    val userId = request.user.userId

    actionRunner.runTransactionally(commentService.create(newComment, slug, userId)
      .map(CommentWrapper(_))
      .map(Json.toJson(_))
      .map(Ok(_)))
      .recover({
        case _: MissingModelException => NotFound
      })
  }

}

Source File: EnvHelper.scala From carbondata with Apache License 2.0

5 votes

package org.apache.spark.sql

import org.apache.commons.lang3.StringUtils

import org.apache.carbondata.core.metadata.DatabaseLocationProvider


object EnvHelper {

  def isLegacy(sparkSession: SparkSession): Boolean = false

  def isPrivacy(sparkSession: SparkSession, isExternal: Boolean): Boolean = {
    (!isExternal) && isLegacy(sparkSession)
  }

  def setDefaultHeader(
      sparkSession: SparkSession,
      optionsFinal: java.util.Map[String, String]
  ): Unit = {
    if (isLegacy(sparkSession)) {
      val fileHeader = optionsFinal.get("fileheader")
      val header = optionsFinal.get("header")
      if (StringUtils.isEmpty(fileHeader) && StringUtils.isEmpty(header)) {
        optionsFinal.put("header", "false")
      }
    }
  }

  def isRetainData(sparkSession: SparkSession, retainData: Boolean): Boolean = {
    if (isLegacy(sparkSession)) {
      retainData
    } else {
      true
    }
  }

  def getDatabase(database: String): String = {
    DatabaseLocationProvider.get().provide(database)
  }
}

Source File: CarbonSparkStreamingFactory.scala From carbondata with Apache License 2.0

5 votes

package org.apache.spark.sql

import org.apache.commons.lang3.StringUtils

import org.apache.carbondata.streaming.CarbonStreamException
import org.apache.carbondata.streaming.CarbonStreamSparkStreaming
import org.apache.carbondata.streaming.CarbonStreamSparkStreamingWriter


object CarbonSparkStreamingFactory {

  def getStreamSparkStreamingWriter(spark: SparkSession,
    dbNameStr: String,
    tableName: String): CarbonStreamSparkStreamingWriter =
    synchronized {
    val dbName = if (StringUtils.isEmpty(dbNameStr)) "default" else dbNameStr
    val key = dbName + "." + tableName
    if (CarbonStreamSparkStreaming.getTableMap.containsKey(key)) {
      CarbonStreamSparkStreaming.getTableMap.get(key)
    } else {
      if (StringUtils.isEmpty(tableName) || tableName.contains(" ")) {
        throw new CarbonStreamException("Table creation failed. " +
                                        "Table name must not be blank or " +
                                        "cannot contain blank space")
      }
      val carbonTable = CarbonEnv.getCarbonTable(Some(dbName),
        tableName)(spark)
      if (!carbonTable.isStreamingSink) {
        throw new CarbonStreamException(s"Table ${carbonTable.getDatabaseName}." +
                                        s"${carbonTable.getTableName} is not a streaming table")
      }
      val streamWriter = new CarbonStreamSparkStreamingWriter(spark,
        carbonTable, spark.sessionState.newHadoopConf())
      CarbonStreamSparkStreaming.getTableMap.put(key, streamWriter)
      streamWriter
    }
  }
}

Source File: NetezzaFilters.scala From spark-netezza with Apache License 2.0

5 votes

package com.ibm.spark.netezza

import java.sql.{Date, Timestamp}

import org.apache.commons.lang3.StringUtils
import org.apache.spark.sql.sources._


  def generateFilterExpr(f: Filter): Option[String] = {
    Option(f match {
      case EqualTo(attr, value) => s"$attr = ${quoteValue(value)}"
      case EqualNullSafe(attr, value) =>
        s"(NOT ($attr != ${quoteValue(value)} OR $attr IS NULL OR " +
          s"${quoteValue(value)} IS NULL) OR ($attr IS NULL AND ${quoteValue(value)} IS NULL))"
      case LessThan(attr, value) => s"$attr < ${quoteValue(value)}"
      case GreaterThan(attr, value) => s"$attr > ${quoteValue(value)}"
      case LessThanOrEqual(attr, value) => s"$attr <= ${quoteValue(value)}"
      case GreaterThanOrEqual(attr, value) => s"$attr >= ${quoteValue(value)}"
      case IsNull(attr) => s"$attr IS NULL"
      case IsNotNull(attr) => s"$attr IS NOT NULL"
      case StringStartsWith(attr, value) => s"${attr} LIKE '${value}%'"
      case StringEndsWith(attr, value) => s"${attr} LIKE '%${value}'"
      case StringContains(attr, value) => s"${attr} LIKE '%${value}%'"
      case In(attr, value) => s"$attr IN (${quoteValue(value)})"
      case Not(f) => generateFilterExpr(f).map(p => s"(NOT ($p))").getOrElse(null)
      case Or(f1, f2) =>
        val or = Seq(f1, f2).flatMap(generateFilterExpr(_))
        if (or.size == 2) {
          or.map(p => s"($p)").mkString(" OR ")
        } else {
          null
        }
      case And(f1, f2) =>
        val and = Seq(f1, f2).flatMap(generateFilterExpr(_))
        if (and.size == 2) {
          and.map(p => s"($p)").mkString(" AND ")
        } else {
          null
        }
      case _ => null
    })
  }
}

Source File: MetadataOperation.scala From kyuubi with Apache License 2.0

5 votes

package yaooqinn.kyuubi.operation.metadata

import java.security.PrivilegedExceptionAction

import org.apache.commons.lang3.StringUtils
import org.apache.spark.KyuubiSparkUtil

import yaooqinn.kyuubi.KyuubiSQLException
import yaooqinn.kyuubi.cli.FetchOrientation
import yaooqinn.kyuubi.operation._
import yaooqinn.kyuubi.schema.{RowSet, RowSetBuilder}
import yaooqinn.kyuubi.session.KyuubiSession

abstract class MetadataOperation(session: KyuubiSession, opType: OperationType)
  extends AbstractOperation(session, opType) {

  setHasResultSet(true)

  override def cancel(): Unit = {
    setState(CANCELED)
    throw new UnsupportedOperationException("MetadataOperation.cancel()")
  }

  override def close(): Unit = {
    setState(CLOSED)
    cleanupOperationLog()
  }

  
  protected def convertSchemaPattern(pattern: String): String = {
    if (StringUtils.isEmpty(pattern)) {
      convertPattern("%", datanucleusFormat = true)
    } else {
      convertPattern(pattern, datanucleusFormat = true)
    }
  }

  private def convertPattern(pattern: String, datanucleusFormat: Boolean): String = {
    val wStr = if (datanucleusFormat) "*" else ".*"
    pattern
      .replaceAll("([^\\\\])%", "$1" + wStr)
      .replaceAll("\\\\%", "%")
      .replaceAll("^%", wStr)
      .replaceAll("([^\\\\])_", "$1.")
      .replaceAll("\\\\_", "_")
      .replaceAll("^_", ".")
  }

  protected def execute(block: => Unit): Unit = {
    setState(RUNNING)
    try {
      session.ugi.doAs(new PrivilegedExceptionAction[Unit] {
        override def run(): Unit = block
      })
      setState(FINISHED)
    } catch {
      case e: Exception =>
        setState(ERROR)
        throw new KyuubiSQLException(KyuubiSparkUtil.findCause(e))
    }
  }

  override def getNextRowSet(order: FetchOrientation, rowSetSize: Long): RowSet = {
    assertState(FINISHED)
    validateDefaultFetchOrientation(order)
    val taken = iter.take(rowSetSize.toInt)
    RowSetBuilder.create(getResultSetSchema, taken.toSeq, getProtocolVersion)
  }

}

Source File: HiveTokenCollector.scala From kyuubi with Apache License 2.0

5 votes

package yaooqinn.kyuubi.session.security

import scala.util.control.NonFatal

import org.apache.commons.lang3.StringUtils
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier
import org.apache.hadoop.hive.ql.metadata.Hive
import org.apache.hadoop.io.Text
import org.apache.hadoop.security.{Credentials, UserGroupInformation}
import org.apache.hadoop.security.token.Token
import org.apache.kyuubi.Logging
import org.apache.spark.SparkConf

import yaooqinn.kyuubi.utils.KyuubiHadoopUtil
import yaooqinn.kyuubi.utils.KyuubiHiveUtil._

private[security] object HiveTokenCollector extends TokenCollector with Logging {

  override def obtainTokens(conf: SparkConf): Unit = {
    try {
      val c = hiveConf(conf)
      val principal = c.getTrimmed(METASTORE_PRINCIPAL)
      val uris = c.getTrimmed(URIS)
      require(StringUtils.isNotEmpty(principal), METASTORE_PRINCIPAL + " Undefined")
      require(StringUtils.isNotEmpty(uris), URIS + " Undefined")
      val currentUser = UserGroupInformation.getCurrentUser.getUserName
      val credentials = new Credentials()
      KyuubiHadoopUtil.doAsRealUser {
        val hive = Hive.get(c, true)
        info(s"Getting token from Hive Metastore for owner $currentUser via $principal")
        val tokenString = hive.getDelegationToken(currentUser, principal)
        val token = new Token[DelegationTokenIdentifier]
        token.decodeFromUrlString(tokenString)
        info(s"Got " + DelegationTokenIdentifier.stringifyToken(token))
        credentials.addToken(new Text("hive.metastore.delegation.token"), token)
      }
      UserGroupInformation.getCurrentUser.addCredentials(credentials)
    } catch {
      case NonFatal(e) =>
        error("Failed to get token from hive metatore service", e)
    } finally {
      Hive.closeCurrent()
    }
  }

  override def tokensRequired(conf: SparkConf): Boolean = {
    UserGroupInformation.isSecurityEnabled && StringUtils.isNotBlank(hiveConf(conf).get(URIS))
  }
}

Source File: LdapAuthenticationProviderImpl.scala From kyuubi with Apache License 2.0

5 votes

package yaooqinn.kyuubi.auth

import java.util.Hashtable
import javax.naming.{Context, NamingException}
import javax.naming.directory.InitialDirContext
import javax.security.sasl.AuthenticationException

import org.apache.commons.lang3.StringUtils
import org.apache.spark.{KyuubiConf, SparkConf}

import yaooqinn.kyuubi.service.ServiceUtils

class LdapAuthenticationProviderImpl(conf: SparkConf) extends PasswdAuthenticationProvider {

  import KyuubiConf._

  
  override def authenticate(user: String, password: String): Unit = {
    if (StringUtils.isBlank(user)) {
      throw new AuthenticationException(s"Error validating LDAP user, user is null" +
        s" or contains blank space")
    }

    if (StringUtils.isBlank(password)) {
      throw new AuthenticationException(s"Error validating LDAP user, password is null" +
        s" or contains blank space")
    }

    val env = new Hashtable[String, Any]()
    env.put(Context.INITIAL_CONTEXT_FACTORY, "com.sun.jndi.ldap.LdapCtxFactory")
    env.put(Context.SECURITY_AUTHENTICATION, "simple")

    conf.getOption(AUTHENTICATION_LDAP_URL).foreach(env.put(Context.PROVIDER_URL, _))

    val domain = conf.get(AUTHENTICATION_LDAP_DOMAIN, "")
    val u = if (!hasDomain(user) && StringUtils.isNotBlank(domain)) {
      user + "@" + domain
    } else {
      user
    }

    val bindDn = conf.getOption(AUTHENTICATION_LDAP_BASEDN) match {
      case Some(dn) => "uid=" + u + "," + dn
      case _ => u
    }

    env.put(Context.SECURITY_PRINCIPAL, bindDn)
    env.put(Context.SECURITY_CREDENTIALS, password)

    try {
      val ctx = new InitialDirContext(env)
      ctx.close()
    } catch {
      case e: NamingException =>
        throw new AuthenticationException(s"Error validating LDAP user: $bindDn", e)
    }
  }

  private def hasDomain(userName: String): Boolean = ServiceUtils.indexOfDomainMatch(userName) > 0
}

Source File: MapreduceTransformation.scala From schedoscope with Apache License 2.0

5 votes

package org.schedoscope.dsl.transformations

import java.net.URI

import org.apache.commons.lang3.StringUtils
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
import org.apache.hadoop.mapreduce.{Job, MRJobConfig}
import org.schedoscope.Schedoscope
import org.schedoscope.dsl.View
import org.schedoscope.scheduler.driver.{DriverRunState, MapreduceDriver}
import org.schedoscope.scheduler.service.ViewTransformationStatus


case class MapreduceTransformation(v: View,
                                   createJob: (Map[String, Any]) => Job,
                                   cleanupAfterJob: (Job, MapreduceDriver, DriverRunState[MapreduceBaseTransformation]) => DriverRunState[MapreduceBaseTransformation] = (_, __, completionRunState) => completionRunState,
                                   dirsToDelete: List[String] = List(),
                                   deleteViewPath: Boolean = true) extends MapreduceBaseTransformation {

  lazy val job = createJob(configuration.toMap)

  var directoriesToDelete = dirsToDelete ++ (if (deleteViewPath) List(v.fullPath) else List())

  description = StringUtils.abbreviate(v.urlPath, 100)
}

trait MapreduceBaseTransformation extends Transformation {

  def name = "mapreduce"

  val cleanupAfterJob: (Job, MapreduceDriver, DriverRunState[MapreduceBaseTransformation]) => DriverRunState[MapreduceBaseTransformation]

  val v: View

  val job: Job

  var directoriesToDelete: List[String]

  override def fileResourcesToChecksum = {
    val jarName = try {
      job.getConfiguration().get(MRJobConfig.JAR).split("/").last
    } catch {
      case _: Throwable => null
    }

    Schedoscope.settings
      .getDriverSettings("mapreduce")
      .libJarsHdfs
      .filter(lj => jarName == null || lj.contains(jarName))
  }

  override def viewTransformationStatus = ViewTransformationStatus(
    name,
    Some(Map(
      "input" -> job.getConfiguration().get(FileInputFormat.INPUT_DIR),
      "output" -> job.getConfiguration().get(FileOutputFormat.OUTDIR))))

  def configure() {
    // if job jar hasn't been registered, add all mapreduce libjars
    // to distributed cache
    if (job.getConfiguration().get(MRJobConfig.JAR) == null) {
      fileResourcesToChecksum.foreach(r => {
        try {
          job.addCacheFile(new URI(r))
        } catch {
          case _: Throwable => Unit
        }
      })
    }
    configuration.foreach { case (k, v) => if (v == null) job.getConfiguration.unset(k) else job.getConfiguration.set(k, v.toString) }
  }
}

Source File: ServiceMetadataDocumentGenerator.scala From haystack-traces with Apache License 2.0

5 votes

package com.expedia.www.haystack.trace.indexer.writers.es

import java.time.Instant

import com.expedia.open.tracing.Span
import com.expedia.www.haystack.commons.metrics.MetricsSupport
import com.expedia.www.haystack.trace.commons.clients.es.document.ServiceMetadataDoc
import com.expedia.www.haystack.trace.commons.utils.SpanUtils
import com.expedia.www.haystack.trace.indexer.config.entities.ServiceMetadataWriteConfiguration
import org.apache.commons.lang3.StringUtils

import scala.collection.mutable

class ServiceMetadataDocumentGenerator(config: ServiceMetadataWriteConfiguration) extends MetricsSupport {

  private var serviceMetadataMap = new mutable.HashMap[String, mutable.Set[String]]()
  private var allOperationCount: Int = 0
  private var lastFlushInstant = Instant.MIN

  private def shouldFlush: Boolean = {
    config.flushIntervalInSec == 0 || Instant.now().minusSeconds(config.flushIntervalInSec).isAfter(lastFlushInstant)
  }

  private def areStatementsReadyToBeExecuted(): Seq[ServiceMetadataDoc] = {
    if (serviceMetadataMap.nonEmpty && (shouldFlush || allOperationCount > config.flushOnMaxOperationCount)) {
      val statements = serviceMetadataMap.flatMap {
        case (serviceName, operationList) =>
          createServiceMetadataDoc(serviceName, operationList)
      }

      lastFlushInstant = Instant.now()
      serviceMetadataMap = new mutable.HashMap[String, mutable.Set[String]]()
      allOperationCount = 0
      statements.toSeq
    } else {
      Nil
    }
  }

  
  def createServiceMetadataDoc(serviceName: String, operationList: mutable.Set[String]): List[ServiceMetadataDoc] = {
    operationList.map(operationName => ServiceMetadataDoc(serviceName, operationName)).toList

  }
}

Source File: ContinuousTrigger.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.streaming.continuous

import java.util.concurrent.TimeUnit

import scala.concurrent.duration.Duration

import org.apache.commons.lang3.StringUtils

import org.apache.spark.annotation.{Experimental, InterfaceStability}
import org.apache.spark.sql.streaming.{ProcessingTime, Trigger}
import org.apache.spark.unsafe.types.CalendarInterval


@InterfaceStability.Evolving
case class ContinuousTrigger(intervalMs: Long) extends Trigger {
  require(intervalMs >= 0, "the interval of trigger should not be negative")
}

private[sql] object ContinuousTrigger {
  def apply(interval: String): ContinuousTrigger = {
    if (StringUtils.isBlank(interval)) {
      throw new IllegalArgumentException(
        "interval cannot be null or blank.")
    }
    val cal = if (interval.startsWith("interval")) {
      CalendarInterval.fromString(interval)
    } else {
      CalendarInterval.fromString("interval " + interval)
    }
    if (cal == null) {
      throw new IllegalArgumentException(s"Invalid interval: $interval")
    }
    if (cal.months > 0) {
      throw new IllegalArgumentException(s"Doesn't support month or year interval: $interval")
    }
    new ContinuousTrigger(cal.microseconds / 1000)
  }

  def apply(interval: Duration): ContinuousTrigger = {
    ContinuousTrigger(interval.toMillis)
  }

  def create(interval: String): ContinuousTrigger = {
    apply(interval)
  }

  def create(interval: Long, unit: TimeUnit): ContinuousTrigger = {
    ContinuousTrigger(unit.toMillis(interval))
  }
}

Source File: DriverWrapper.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.deploy.worker

import java.io.File

import org.apache.commons.lang3.StringUtils

import org.apache.spark.{SecurityManager, SparkConf}
import org.apache.spark.deploy.{DependencyUtils, SparkHadoopUtil, SparkSubmit}
import org.apache.spark.internal.Logging
import org.apache.spark.rpc.RpcEnv
import org.apache.spark.util.{ChildFirstURLClassLoader, MutableURLClassLoader, Utils}


      case workerUrl :: userJar :: mainClass :: extraArgs =>
        val conf = new SparkConf()
        val host: String = Utils.localHostName()
        val port: Int = sys.props.getOrElse("spark.driver.port", "0").toInt
        val rpcEnv = RpcEnv.create("Driver", host, port, conf, new SecurityManager(conf))
        logInfo(s"Driver address: ${rpcEnv.address}")
        rpcEnv.setupEndpoint("workerWatcher", new WorkerWatcher(rpcEnv, workerUrl))

        val currentLoader = Thread.currentThread.getContextClassLoader
        val userJarUrl = new File(userJar).toURI().toURL()
        val loader =
          if (sys.props.getOrElse("spark.driver.userClassPathFirst", "false").toBoolean) {
            new ChildFirstURLClassLoader(Array(userJarUrl), currentLoader)
          } else {
            new MutableURLClassLoader(Array(userJarUrl), currentLoader)
          }
        Thread.currentThread.setContextClassLoader(loader)
        setupDependencies(loader, userJar)

        // Delegate to supplied main class
        val clazz = Utils.classForName(mainClass)
        val mainMethod = clazz.getMethod("main", classOf[Array[String]])
        mainMethod.invoke(null, extraArgs.toArray[String])

        rpcEnv.shutdown()

      case _ =>
        // scalastyle:off println
        System.err.println("Usage: DriverWrapper <workerUrl> <userJar> <driverMainClass> [options]")
        // scalastyle:on println
        System.exit(-1)
    }
  }

  private def setupDependencies(loader: MutableURLClassLoader, userJar: String): Unit = {
    val sparkConf = new SparkConf()
    val secMgr = new SecurityManager(sparkConf)
    val hadoopConf = SparkHadoopUtil.newConfiguration(sparkConf)

    val Seq(packagesExclusions, packages, repositories, ivyRepoPath, ivySettingsPath) =
      Seq(
        "spark.jars.excludes",
        "spark.jars.packages",
        "spark.jars.repositories",
        "spark.jars.ivy",
        "spark.jars.ivySettings"
      ).map(sys.props.get(_).orNull)

    val resolvedMavenCoordinates = DependencyUtils.resolveMavenDependencies(packagesExclusions,
      packages, repositories, ivyRepoPath, Option(ivySettingsPath))
    val jars = {
      val jarsProp = sys.props.get("spark.jars").orNull
      if (!StringUtils.isBlank(resolvedMavenCoordinates)) {
        SparkSubmit.mergeFileLists(jarsProp, resolvedMavenCoordinates)
      } else {
        jarsProp
      }
    }
    val localJars = DependencyUtils.resolveAndDownloadJars(jars, userJar, sparkConf, hadoopConf,
      secMgr)
    DependencyUtils.addJarsToClassPath(localJars, loader)
  }
}

Source File: PostgreSqlMain.scala From ingraph with Eclipse Public License 1.0

5 votes

package ingraph.compiler.sql

import java.sql.DriverManager

import ingraph.compiler.sql.Util.withResources
import org.apache.commons.lang3.StringUtils

object PostgreSqlMain extends App {

  // https://github.com/yandex-qatools/postgresql-embedded/tree/ea26f6945478da8e8b48e382f8869896da2fda30#howto
  withResources(new EmbeddedPostgresWrapper) { postgres =>
    withResources(DriverManager.getConnection(postgres.Url)) { conn =>
      withResources(conn.createStatement()) {
        _.execute("CREATE TABLE films (code char(5));")
      }
      withResources(conn.createStatement()) {
        _.execute("INSERT INTO films VALUES ('movie');")
      }

      withResources(conn.createStatement) { statement =>
        assert(statement.execute("SELECT * FROM films;"))
        assert(statement.getResultSet().next())

        val code = statement.getResultSet().getString("code")

        val separator = StringUtils.repeat('=', 42)
        println(separator)
        println(code)
        println(separator)

        assert(code == "movie")
      }
    }
  }
}

Source File: Queryable.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.sql.execution

import scala.util.control.NonFatal

import org.apache.commons.lang3.StringUtils
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.types.StructType


  private[sql] def formatString (
      rows: Seq[Seq[String]],
      numRows: Int,
      hasMoreData : Boolean,
      truncate: Boolean = true): String = {
    val sb = new StringBuilder
    val numCols = schema.fieldNames.length

    // Initialise the width of each column to a minimum value of '3'
    val colWidths = Array.fill(numCols)(3)

    // Compute the width of each column
    for (row <- rows) {
      for ((cell, i) <- row.zipWithIndex) {
        colWidths(i) = math.max(colWidths(i), cell.length)
      }
    }

    // Create SeparateLine
    val sep: String = colWidths.map("-" * _).addString(sb, "+", "+", "+\n").toString()

    // column names
    rows.head.zipWithIndex.map { case (cell, i) =>
      if (truncate) {
        StringUtils.leftPad(cell, colWidths(i))
      } else {
        StringUtils.rightPad(cell, colWidths(i))
      }
    }.addString(sb, "|", "|", "|\n")

    sb.append(sep)

    // data
    rows.tail.map {
      _.zipWithIndex.map { case (cell, i) =>
        if (truncate) {
          StringUtils.leftPad(cell.toString, colWidths(i))
        } else {
          StringUtils.rightPad(cell.toString, colWidths(i))
        }
      }.addString(sb, "|", "|", "|\n")
    }

    sb.append(sep)

    // For Data that has more than "numRows" records
    if (hasMoreData) {
      val rowsString = if (numRows == 1) "row" else "rows"
      sb.append(s"only showing top $numRows $rowsString\n")
    }

    sb.toString()
  }
}

Source File: DataFramePrettyPrinter.scala From lighthouse with Apache License 2.0

5 votes

package be.dataminded.lighthouse.testing

import java.sql.Date

import org.apache.commons.lang3.StringUtils
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.catalyst.util.DateTimeUtils

private[testing] object DataFramePrettyPrinter {

  def prettyPrintDataFrame(df: DataFrame, number: Int, truncate: Int = 20): String = {
    val numRows     = number.max(0)
    val takeResult  = df.take(numRows + 1)
    val hasMoreData = takeResult.length > numRows
    val data        = takeResult.take(numRows)

    val header = df.schema.fieldNames.toSeq

    def asReadableRows = {
      data.map { row =>
        row.toSeq.map { cell =>
          val str = cell match {
            case null                => "null"
            case binary: Array[Byte] => binary.map("%02X".format(_)).mkString("[", " ", "]")
            case array: Array[_]     => array.mkString("[", ", ", "]")
            case seq: Seq[_]         => seq.mkString("[", ", ", "]")
            case d: Date =>
              DateTimeUtils.dateToString(DateTimeUtils.fromJavaDate(d))
            case _ => cell.toString
          }
          if (truncate > 0 && str.length > truncate) {
            // do not show ellipses for strings shorter than 4 characters.
            if (truncate < 4) str.substring(0, truncate)
            else str.substring(0, truncate - 3) + "..."
          } else {
            str
          }
        }: Seq[String]
      }
    }

    // For array values, replace Seq and Array with square brackets
    // For cells that are beyond `truncate` characters, replace it with the
    // first `truncate-3` and "..."
    val rows: Seq[Seq[String]] = header +: asReadableRows

    val sb = new StringBuilder

    // Initialise the width of each column to a minimum value of '3'
    val colWidths = Array.fill(header.length)(3)

    // Compute the width of each column
    for (row <- rows) {
      for ((cell, i) <- row.zipWithIndex) {
        colWidths(i) = math.max(colWidths(i), cell.length)
      }
    }

    // Create SeparateLine
    val sep: String = colWidths.map("-" * _).addString(sb, "+", "+", "+\n").toString()

    // column names
    rows.head.zipWithIndex
      .map {
        case (cell, i) =>
          if (truncate > 0) {
            StringUtils.leftPad(cell, colWidths(i))
          } else {
            StringUtils.rightPad(cell, colWidths(i))
          }
      }
      .addString(sb, "|", "|", "|\n")

    sb.append(sep)

    // data
    rows.tail.map {
      _.zipWithIndex
        .map {
          case (cell, i) =>
            if (truncate > 0) {
              StringUtils.leftPad(cell.toString, colWidths(i))
            } else {
              StringUtils.rightPad(cell.toString, colWidths(i))
            }
        }
        .addString(sb, "|", "|", "|\n")
    }

    sb.append(sep)

    // For Data that has more than "numRows" records
    if (hasMoreData) {
      val rowsString = if (numRows == 1) "row" else "rows"
      sb.append(s"only showing top $numRows $rowsString\n")
    }

    sb.toString()
  }
}

Source File: ZipkinTraceFilter.scala From play-zipkin-tracing with Apache License 2.0

5 votes

package brave.play.filter

import javax.inject.Inject

import akka.stream.Materializer
import brave.play.ZipkinTraceServiceLike
import play.api.mvc.{Filter, Headers, RequestHeader, Result}
import play.api.routing.Router

import scala.concurrent.Future
import scala.util.Failure


class ZipkinTraceFilter @Inject() (tracer: ZipkinTraceServiceLike)(implicit val mat: Materializer) extends Filter {

  import tracer.executionContext
  private val reqHeaderToSpanName: RequestHeader => String = ZipkinTraceFilter.ParamAwareRequestNamer

  def apply(nextFilter: RequestHeader => Future[Result])(req: RequestHeader): Future[Result] = {
    val serverSpan = tracer.serverReceived(
      spanName = reqHeaderToSpanName(req),
      span = tracer.newSpan(req.headers)((headers, key) => headers.get(key))
    )
    val result = nextFilter(req.withHeaders(new Headers(
      (req.headers.toMap.mapValues(_.headOption getOrElse "") ++ tracer.toMap(serverSpan)).toSeq
    )))
    result.onComplete {
      case Failure(t) => tracer.serverSend(serverSpan, "failed" -> s"Finished with exception: ${t.getMessage}")
      case _ => tracer.serverSend(serverSpan)
    }
    result
  }
}

object ZipkinTraceFilter {
  val ParamAwareRequestNamer: RequestHeader => String = { reqHeader =>
    import org.apache.commons.lang3.StringUtils
    val pathPattern = StringUtils.replace(
      reqHeader.attrs.get(Router.Attrs.HandlerDef).map(_.path).getOrElse(reqHeader.path),
      "<[^/]+>", ""
    )
    s"${reqHeader.method} - $pathPattern"
  }
}

Source File: StringDeduplication.scala From Mastering-Spark-for-Data-Science with MIT License

5 votes

package io.gzet

import org.apache.commons.lang3.StringUtils
import org.apache.spark.rdd.RDD

import scalaz.Scalaz._

trait StringDeduplication extends Serializable {

  def deduplicateWithContext(rdd: RDD[(Long, String)]): RDD[(Long, String)]
  def deduplicate(rdd: RDD[String]): RDD[(String, String)]

  def initialize(rdd: RDD[String]) = {
    rdd map(s => (s, Map(s -> 1)))
  }

  def identityDedup = (rdd: RDD[(String, Map[String, Int])]) => {
    rdd reduceByKey(_ |+| _)
  }

  def getPreferredAlternative(rdd: RDD[(String, Map[String, Int])]) = {
    rdd flatMap { case (key, tf) =>
      val bestName = tf.toSeq.sortBy(_._2).reverse.head._1
      tf.keySet map(_ -> bestName)
    }
  }

  def stringDedup = (rdd: RDD[(String, Map[String, Int])], stopWords: Set[String]) => {
    rdd map { case (name, others) =>
      (clean(name, stopWords), others)
    } reduceByKey(_ |+| _)
  }

  private def clean(name: String, stopWords: Set[String]) = {
    StringUtils.stripAccents(name)
      .split("\\W+")
      .map(_.trim)
      .filter({ case part => !stopWords.contains(part.toLowerCase()) })
      .mkString(" ")
      .split("(?<=[a-z])(?=[A-Z])")
      .mkString(" ")
      .toLowerCase()
      .split("[^a-z]")
      .map(_.trim)
      .mkString(" ")
  }

}

Source File: EmptinessProfiler.scala From Mastering-Spark-for-Data-Science with MIT License

5 votes

package io.gzet.profilers.field

import io.gzet.profilers.Utils
import org.apache.commons.lang3.StringUtils
import org.apache.spark.sql.Dataset

import scalaz.Scalaz._

case class EmptinessProfiler() {

  def profile(df: Dataset[Array[String]]): Dataset[EmptinessReport] = {

    import df.sparkSession.implicits._

    val features = Utils.buildColumns(df)

    features.map(f => (f.idx, StringUtils.isNotEmpty(f.value))).groupByKey({ case (column, isNotEmpty) =>
      (column, isNotEmpty)
    }).count().map({ case ((column, isNotEmpty), count) =>
      (column, Map(isNotEmpty -> count))
    }).groupByKey({ case (column, map) =>
      column
    }).reduceGroups({ (v1, v2) =>
      (v1._1, v1._2 |+| v2._2)
    }).map({ case (col, (_, map)) =>
      val emptiness = map.getOrElse(false, 0L) / (map.getOrElse(true, 0L) + map.getOrElse(false, 0L)).toDouble
      EmptinessReport(
        col,
        emptiness
      )
    })

  }

}

case class EmptinessReport(
                            field: Int,
                            metricValue: Double
                          )

Source File: Index.scala From Mastering-Spark-for-Data-Science with MIT License

5 votes

package controllers

import com.typesafe.config.ConfigFactory
import models.{Library, Songs}
import org.apache.commons.lang3.StringUtils
import play.api.Logger
import play.api.data.Form
import play.api.data.Forms._
import play.api.mvc._
import svc.{AnalyzerSvc, CassandraDao, SparkSvc}

object Index extends Controller {

  val config = ConfigFactory.load()
  val minTime = config.getInt("gzet.min.time")
  val maxTime = config.getInt("gzet.max.time")
  val cassandraHost = config.getString("cassandra.host")
  val cassandraPort = config.getInt("cassandra.port")
  val sampleSize = config.getDouble("gzet.sample.size")
  val minMatch = config.getDouble("gzet.min.match")

  val dao = new CassandraDao(cassandraHost, cassandraPort)
  val analyzer = new AnalyzerSvc()
  val spark = new SparkSvc()

  val indexForm: Form[Library] = Form(mapping("path" -> text)(Library.apply)(Library.unapply))

  def index = Action { implicit request =>
    val songs = Songs(dao.getSongs)
    Logger.info(s"Database is currently ${songs.songs.size} songs long")
    Ok(views.html.index(indexForm)(songs))
  }

  def submit = Action { implicit request =>
    indexForm.bindFromRequest.fold(
      errors =>
        Redirect(routes.Index.index()).flashing("error" -> s"Missing path"),
      index =>
        try {
          if(StringUtils.isNotEmpty(index.path)) {
            Logger.info("Dropping database")
            dao.dropSongs
            dao.dropPlaylist
            Logger.info("Submitting job")
            val jobId = spark.index(index.path)
            Redirect(routes.Index.index()).flashing("success" -> jobId)
          } else {
            Redirect(routes.Index.index()).flashing("error" -> s"Missing path")
          }
        } catch {
          case e: Exception =>
            Redirect(routes.Index.index()).flashing("error" -> e.getMessage)
        }
    )
  }
}

Source File: DataSourceV2StringFormat.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.datasources.v2

import org.apache.commons.lang3.StringUtils

import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
import org.apache.spark.sql.sources.DataSourceRegister
import org.apache.spark.sql.sources.v2.DataSourceV2
import org.apache.spark.util.Utils


  def pushedFilters: Seq[Expression]

  private def sourceName: String = source match {
    case registered: DataSourceRegister => registered.shortName()
    // source.getClass.getSimpleName can cause Malformed class name error,
    // call safer `Utils.getSimpleName` instead
    case _ => Utils.getSimpleName(source.getClass)
  }

  def metadataString: String = {
    val entries = scala.collection.mutable.ArrayBuffer.empty[(String, String)]

    if (pushedFilters.nonEmpty) {
      entries += "Filters" -> pushedFilters.mkString("[", ", ", "]")
    }

    // TODO: we should only display some standard options like path, table, etc.
    if (options.nonEmpty) {
      entries += "Options" -> Utils.redact(options).map {
        case (k, v) => s"$k=$v"
      }.mkString("[", ",", "]")
    }

    val outputStr = Utils.truncatedString(output, "[", ", ", "]")

    val entriesStr = if (entries.nonEmpty) {
      Utils.truncatedString(entries.map {
        case (key, value) => key + ": " + StringUtils.abbreviate(value, 100)
      }, " (", ", ", ")")
    } else {
      ""
    }

    s"$sourceName$outputStr$entriesStr"
  }
}

Source File: TimeWindow.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.expressions

import org.apache.commons.lang3.StringUtils

import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.CalendarInterval

case class TimeWindow(
    timeColumn: Expression,
    windowDuration: Long,
    slideDuration: Long,
    startTime: Long) extends UnaryExpression
  with ImplicitCastInputTypes
  with Unevaluable
  with NonSQLExpression {

  //////////////////////////
  // SQL Constructors
  //////////////////////////

  def this(
      timeColumn: Expression,
      windowDuration: Expression,
      slideDuration: Expression,
      startTime: Expression) = {
    this(timeColumn, TimeWindow.parseExpression(windowDuration),
      TimeWindow.parseExpression(slideDuration), TimeWindow.parseExpression(startTime))
  }

  def this(timeColumn: Expression, windowDuration: Expression, slideDuration: Expression) = {
    this(timeColumn, TimeWindow.parseExpression(windowDuration),
      TimeWindow.parseExpression(slideDuration), 0)
  }

  def this(timeColumn: Expression, windowDuration: Expression) = {
    this(timeColumn, windowDuration, windowDuration)
  }

  override def child: Expression = timeColumn
  override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType)
  override def dataType: DataType = new StructType()
    .add(StructField("start", TimestampType))
    .add(StructField("end", TimestampType))

  // This expression is replaced in the analyzer.
  override lazy val resolved = false

  
case class PreciseTimestamp(child: Expression) extends UnaryExpression with ExpectsInputTypes {
  override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType)
  override def dataType: DataType = LongType
  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    val eval = child.genCode(ctx)
    ev.copy(code = eval.code +
      s"""boolean ${ev.isNull} = ${eval.isNull};
         |${ctx.javaType(dataType)} ${ev.value} = ${eval.value};
       """.stripMargin)
  }
}

Source File: UPC.scala From utils with Apache License 2.0

5 votes

package com.indix.utils.core

import org.apache.commons.lang3.StringUtils

import scala.util.Try

object UPC {

  def isValid(input: String) = Try(standardize(input)).map(_ == input).toOption.getOrElse(false)

  

  def standardize(rawUpc: String) : String = {

    def standardizeRec(input: String) : String = {
      if (input.length < 12) {
        standardizeRec(leftPadZeroes(input, 12))
      } else if (input.length == 12) {
        val cDigit = calculateCheckDigit(input.substring(0, 11))
        if (input.last == cDigit + '0' && !isIsbn(input)) {
          input
        } else {
          val cDigit13 = calculateCheckDigit(leftPadZeroes(input, 13))
          input + cDigit13
        }
      } else if (input.length == 14) {
        val cDigit = calculateCheckDigit(input.substring(0, 13))
        if(input.last == cDigit + '0') {
          val gtinWithoutFirstAndCheckDigit = input.substring(1, 13)
          val upcCheckDigit = calculateCheckDigit(gtinWithoutFirstAndCheckDigit)
          standardizeRec(gtinWithoutFirstAndCheckDigit + upcCheckDigit)
        } else {
          fail("not a valid 14 digit UPC")
        }
      } else {
        input
      }
    }

    val cleanedUpc = verifyValidUpc(clean(rawUpc))

    if(isIsbn(rawUpc)) {
      leftPadZeroes(standardizeRec(cleanedUpc), 13)
    } else {
      leftPadZeroes(standardizeRec(cleanedUpc), 14)
    }


  }

  private def isIsbn(input: String) = {
    input.startsWith("978") || input.startsWith("979")
  }

  private def calculateCheckDigit(input: String) = {
    // We compute the odd and even positions from right to left because while computing check digit for EANs
    // the input length would be an even number. This makes the even and odd positions change. While for
    // input with odd length the even and odd positions are the same.
    // Reference - https://en.wikipedia.org/wiki/Check_digit#EAN_(GLN,_GTIN,_EAN_numbers_administered_by_GS1)

    val sumOddDigits = input.reverse.zipWithIndex
      .filter { case (digit, index) => (index + 1) % 2 != 0 }
      .map { case (digit, index) => digit - '0' }
      .sum

    val sumEvenDigits = input.reverse.zipWithIndex
      .filter { case (digit, index) => (index + 1) % 2 == 0 }
      .map { case (digit, index) => digit - '0' }
      .sum

    val checkDigitSum = sumOddDigits * 3 + sumEvenDigits

    if (checkDigitSum % 10 == 0) 0 else 10 - (checkDigitSum % 10)
  }

  private def leftPadZeroes(s: String, length: Int) = StringUtils.leftPad(s, length, '0')

  private def clean(input: String) = input.replaceAll("-", "")


  private def verifyValidUpc(input: String) = {
    if (StringUtils.isEmpty(input))
      fail(input + " is either null / empty")
    else if (!parseLong(input).exists(_ > 0))
      fail("NAN value - " + input)
    else if (input.length < 7 || input.length > 14)
      fail("Invalid UPC/EAN -" + input)
    else if (input.length == input.count(_ == '0'))
      fail("All Zero UPC not allowed. Invalid UPC/EAN - " + input)
    input
  }

  private def fail(message: String) = throw new IllegalArgumentException(message)

  private def parseLong(s: String) = Try(Some(s.toLong)).getOrElse(None)

}

Source File: MPN.scala From utils with Apache License 2.0

5 votes

package com.indix.utils.core

import org.apache.commons.lang3.StringUtils
import org.apache.commons.lang3.text.WordUtils

import scala.io.Source

object MPN {
  // Some domain specific keywords known to be invalid
  val BlackListedMpns = Source.fromInputStream(getClass.getResourceAsStream("/BlacklistMPNs.txt")).getLines.toSet

  val StopChars = Set(' ', '-', '_', '.', '/')
  val TerminateChars = Set(',', '"', '*', '%', '{', '}', "#", '&', '\\')

  val MaxLen = 50
  val MinLen = 3

  // Does not consider one word strings as title-case phrase
  def isTitleCase(str: String): Boolean = {
    val words = str.split(' ').filter(_.nonEmpty)
    if (words.length < 2) false
    else words.forall(w => w == WordUtils.capitalizeFully(w))
  }

  def postProcessIdentifier(input: String): String = {
    val trimmedUpper = input.trim.toUpperCase
    trimmedUpper
  }

  // Check if identifier is valid, also return the identifier to process further if any
  def validateIdentifier(text: String): (Boolean, String) = {
    val input = if (text != null) text.trim() else text
    input match {
      case _ if StringUtils.isBlank(input) || input.length > MaxLen || input.length < MinLen => (false, "")
      case _ if input.count(c => TerminateChars.contains(c)) > 1 => (false, input)
      case _ if BlackListedMpns.contains(input.toLowerCase) => (false, "")
      case _ if isTitleCase(input) => (false, "")
      // Unicode strings yet to be handled
      case _ => (true, input)
    }
  }

  def isValidIdentifier(value: String): Boolean = validateIdentifier(value)._1

  def standardizeMPN(input: String): Option[String] = {
    val (isValid, identifier) = validateIdentifier(input)
    if (isValid) {
      Some(postProcessIdentifier(identifier))
    } else if (StringUtils.isBlank(identifier)) {
      None
    } else if (identifier.indexWhere(c => TerminateChars.contains(c)) > 0) {
      Some(postProcessIdentifier(identifier.substring(0, identifier.indexWhere(c => TerminateChars.contains(c)))))
    }
    else None
  }
}

Source File: ISBN.scala From utils with Apache License 2.0

5 votes

package com.indix.utils.core

import org.apache.commons.lang3.StringUtils

case class ISBN(isbn: String, isbn10: Option[String] = None) {
  override def toString = isbn
}

object ISBN {
  def apply(input: String): Option[ISBN] = {
    Some(input)
      .filter(!StringUtils.isEmpty(_))
      .map(clean)
      .flatMap {
        case x if isValidIsbn10(x) =>
          Some(new ISBN(isbn10to13(x), Some(x)))
        case x if isValidIsbn13(x) =>
          Some(new ISBN(x))
        case _ => None
      }
  }

  private def clean(input: String) = {
    input.replaceAll("[ -]", "")
  }

  private def calculateCheckDigit13(input: String) = {
    val inputWithoutChecksum = input.dropRight(1)

    val sum = inputWithoutChecksum.zipWithIndex.map{
      case (c, i) if i % 2 != 0 => (c - '0') * 3
      case (c, _) => c - '0'
    }.sum

    (10 - (sum % 10) + '0').toChar
  }

  private def calculateCheckDigit10(input: String) = {
    val sum = input.dropRight(1).map(_ - '0').zip(10 to 2 by -1).foldLeft(0)((i: Int, tuple: (Int, Int)) => i + tuple._1 * tuple._2)
    val checkDigit = (11 - sum % 11) % 11
    if (checkDigit == 10) 'X' else (checkDigit + '0').toChar
  }

  private def isbn10to13(input: String) = {
    val withPrefix = "978" + input
    withPrefix.dropRight(1) + calculateCheckDigit13(withPrefix)
  }

  private def isValidIsbn13(input: String) = {
    input.length == 13 && input.matches("^97[89].+") && input.last == calculateCheckDigit13(input)
  }

  private def isValidIsbn10(input: String) = {
    input.length == 10 && input.last == calculateCheckDigit10(input)
  }

}

Source File: CustomAccumulator.scala From HadoopLearning with MIT License

5 votes

package com.liumm.transform

import org.apache.commons.lang3.StringUtils
import org.apache.spark.util.AccumulatorV2


class CustomAccumulator extends AccumulatorV2[String, String] {

  var result = "" //默认值

  override def isZero: Boolean = {
    result == ""
  }

  override def copy(): AccumulatorV2[String, String] = {
    val customAccumulator = new CustomAccumulator()
    customAccumulator.result = this.result
    customAccumulator
  }

  override def reset(): Unit = {
    result = ""
  }

  override def add(v: String): Unit = {
    if (StringUtils.isNoneBlank(v)) {
      if (isZero) {
        result = v
      } else {
        result += "|" + v
      }
    }
  }

  override def merge(other: AccumulatorV2[String, String]): Unit = other match {
    case newAc: CustomAccumulator =>
      if (isZero) result = newAc.value
      else result += "|" + newAc.value
    case _ =>
      throw new UnsupportedOperationException(
        s"Cannot merge ${this.getClass.getName} with ${other.getClass.getName}"
      )
  }

  override def value: String = {
    result
  }
}

Source File: StreamUtils.scala From spark-redis with BSD 3-Clause "New" or "Revised" License

5 votes

package com.redislabs.provider.redis.util

import org.apache.commons.lang3.StringUtils
import redis.clients.jedis.{StreamEntryID, Jedis}


object StreamUtils extends Logging {

  val EntryIdEarliest = new StreamEntryID(0, 0)

  def createConsumerGroupIfNotExist(conn: Jedis, streamKey: String, groupName: String,
                                    offset: StreamEntryID): Unit = {
    try {
      conn.xgroupCreate(streamKey, groupName, offset, true)
    } catch {
      case e: Exception if StringUtils.contains(e.getMessage, "already exists") =>
        logInfo(s"Consumer group already exists: $groupName")
    }
  }

  def resetConsumerGroup(conn: Jedis, streamKey: String, groupName: String,
                         offset: StreamEntryID): Unit = {
    logInfo(s"Setting consumer group $groupName id to $offset")
    conn.xgroupSetID(streamKey, groupName, offset)
  }
}

Source File: RabbitRecordFactory.scala From hydra with Apache License 2.0

5 votes

package hydra.rabbit

import hydra.common.config.ConfigSupport
import hydra.core.ingest.HydraRequest
import hydra.core.transport.{
  AckStrategy,
  HydraRecord,
  RecordFactory,
  RecordMetadata
}
import hydra.rabbit.RabbitRecord.{
  DESTINATION_TYPE_EXCHANGE,
  DESTINATION_TYPE_QUEUE,
  HYDRA_RABBIT_EXCHANGE,
  HYDRA_RABBIT_QUEUE
}
import org.apache.commons.lang3.StringUtils

import scala.concurrent.{ExecutionContext, Future}

object RabbitRecordFactory
    extends RecordFactory[String, String]
    with ConfigSupport {

  override def build(
      request: HydraRequest
  )(implicit ec: ExecutionContext): Future[RabbitRecord] = {
    val props = Seq(
      request.metadataValue(HYDRA_RABBIT_EXCHANGE),
      request.metadataValue(HYDRA_RABBIT_QUEUE)
    ).flatten
    Future {
      require(
        props.length == 1,
        "A single parameter for exchange or queue is required"
      )
      val destination = request.metadataValue(HYDRA_RABBIT_EXCHANGE) match {
        case Some(exchange) => (exchange, DESTINATION_TYPE_EXCHANGE)
        case _ =>
          (
            request.metadataValue(HYDRA_RABBIT_QUEUE).get,
            DESTINATION_TYPE_QUEUE
          )
      }
      RabbitRecord(
        destination._1,
        destination._2,
        request.payload,
        request.ackStrategy
      )
    }
  }
}

case class RabbitRecord(
    destination: String,
    destinationType: String,
    payload: String,
    ackStrategy: AckStrategy
) extends HydraRecord[String, String] {

  override val key: String = StringUtils.EMPTY
}

object RabbitRecord {

  val HYDRA_RABBIT_EXCHANGE = "hydra-rabbit-exchange"

  val HYDRA_RABBIT_QUEUE = "hydra-rabbit-queue"

  val DESTINATION_TYPE_EXCHANGE = "exchange"

  val DESTINATION_TYPE_QUEUE = "queue"

}

case class RabbitRecordMetadata(
    timestamp: Long,
    id: Long,
    destination: String,
    destinationType: String,
    ackStrategy: AckStrategy
) extends RecordMetadata

Source File: JsonRecord.scala From hydra with Apache License 2.0

5 votes

package hydra.kafka.producer

import com.fasterxml.jackson.databind.{JsonNode, ObjectMapper}
import hydra.core.transport.AckStrategy
import org.apache.commons.lang3.StringUtils


case class JsonRecord(
    destination: String,
    key: String,
    payload: JsonNode,
    ackStrategy: AckStrategy
) extends KafkaRecord[String, JsonNode]

object JsonRecord {
  val mapper = new ObjectMapper()

  def apply(
      topic: String,
      key: Option[String],
      obj: Any,
      ackStrategy: AckStrategy
  ): JsonRecord = {
    val payload = mapper.convertValue[JsonNode](obj, classOf[JsonNode])
    new JsonRecord(topic, key.orNull, payload, ackStrategy)
  }
}

Source File: StringRecord.scala From hydra with Apache License 2.0

5 votes

package hydra.kafka.producer

import hydra.core.transport.AckStrategy
import org.apache.commons.lang3.StringUtils


case class StringRecord(
    destination: String,
    key: String,
    payload: String,
    ackStrategy: AckStrategy
) extends KafkaRecord[String, String]

object StringRecord {

  def apply(
      topic: String,
      key: Option[String],
      payload: String,
      ackStrategy: AckStrategy
  ): StringRecord =
    new StringRecord(topic, key.orNull, payload, ackStrategy)
}

Source File: AvroRecord.scala From hydra with Apache License 2.0

5 votes

package hydra.kafka.producer

import com.pluralsight.hydra.avro.JsonConverter
import hydra.core.transport.AckStrategy
import org.apache.avro.Schema
import org.apache.avro.generic.GenericRecord
import org.apache.commons.lang3.StringUtils


case class AvroRecord(
    destination: String,
    schema: Schema,
    key: String,
    payload: GenericRecord,
    ackStrategy: AckStrategy
) extends KafkaRecord[String, GenericRecord]

object AvroRecord {

  def apply(
      destination: String,
      schema: Schema,
      key: Option[String],
      json: String,
      ackStrategy: AckStrategy,
      useStrictValidation: Boolean = false
  ): AvroRecord = {

    val payload: GenericRecord = {
      val converter: JsonConverter[GenericRecord] =
        new JsonConverter[GenericRecord](schema, useStrictValidation)
      converter.convert(json)
    }

    AvroRecord(destination, schema, key.orNull, payload, ackStrategy)
  }

  def apply(
      destination: String,
      schema: Schema,
      key: Option[String],
      record: GenericRecord,
      ackStrategy: AckStrategy
  ): AvroRecord = {
    AvroRecord(destination, schema, key.orNull, record, ackStrategy)
  }
}

Source File: TimeWindow.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.expressions

import org.apache.commons.lang3.StringUtils

import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode}
import org.apache.spark.sql.catalyst.expressions.codegen.Block._
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.CalendarInterval

case class TimeWindow(
    timeColumn: Expression,
    windowDuration: Long,
    slideDuration: Long,
    startTime: Long) extends UnaryExpression
  with ImplicitCastInputTypes
  with Unevaluable
  with NonSQLExpression {

  //////////////////////////
  // SQL Constructors
  //////////////////////////

  def this(
      timeColumn: Expression,
      windowDuration: Expression,
      slideDuration: Expression,
      startTime: Expression) = {
    this(timeColumn, TimeWindow.parseExpression(windowDuration),
      TimeWindow.parseExpression(slideDuration), TimeWindow.parseExpression(startTime))
  }

  def this(timeColumn: Expression, windowDuration: Expression, slideDuration: Expression) = {
    this(timeColumn, TimeWindow.parseExpression(windowDuration),
      TimeWindow.parseExpression(slideDuration), 0)
  }

  def this(timeColumn: Expression, windowDuration: Expression) = {
    this(timeColumn, windowDuration, windowDuration)
  }

  override def child: Expression = timeColumn
  override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType)
  override def dataType: DataType = new StructType()
    .add(StructField("start", TimestampType))
    .add(StructField("end", TimestampType))

  // This expression is replaced in the analyzer.
  override lazy val resolved = false

  
case class PreciseTimestampConversion(
    child: Expression,
    fromType: DataType,
    toType: DataType) extends UnaryExpression with ExpectsInputTypes {
  override def inputTypes: Seq[AbstractDataType] = Seq(fromType)
  override def dataType: DataType = toType
  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    val eval = child.genCode(ctx)
    ev.copy(code = eval.code +
      code"""boolean ${ev.isNull} = ${eval.isNull};
         |${CodeGenerator.javaType(dataType)} ${ev.value} = ${eval.value};
       """.stripMargin)
  }
  override def nullSafeEval(input: Any): Any = input
}

Source File: SinkRouteHandler.scala From ohara with Apache License 2.0

5 votes

package oharastream.ohara.shabondi.sink

import java.time.{Duration => JDuration}
import java.util.concurrent.TimeUnit

import akka.actor.ActorSystem
import akka.http.scaladsl.model.{ContentTypes, HttpEntity, StatusCodes}
import akka.http.scaladsl.server.{ExceptionHandler, Route}
import com.typesafe.scalalogging.Logger
import oharastream.ohara.common.data.Row
import oharastream.ohara.common.util.Releasable
import oharastream.ohara.shabondi.common.{JsonSupport, RouteHandler, ShabondiUtils}
import org.apache.commons.lang3.StringUtils

import scala.collection.mutable.ArrayBuffer
import scala.compat.java8.DurationConverters._
import scala.concurrent.ExecutionContextExecutor
import scala.concurrent.duration.Duration
import spray.json.DefaultJsonProtocol._
import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._

private[shabondi] object SinkRouteHandler {
  def apply(config: SinkConfig)(implicit actorSystem: ActorSystem) =
    new SinkRouteHandler(config)
}

private[shabondi] class SinkRouteHandler(config: SinkConfig)(implicit actorSystem: ActorSystem) extends RouteHandler {
  implicit private val contextExecutor: ExecutionContextExecutor = actorSystem.dispatcher

  private val log              = Logger(classOf[SinkRouteHandler])
  private[sink] val dataGroups = SinkDataGroups(config)

  def scheduleFreeIdleGroups(interval: JDuration, idleTime: JDuration): Unit =
    actorSystem.scheduler.scheduleWithFixedDelay(Duration(1, TimeUnit.SECONDS), interval.toScala) { () =>
      {
        log.trace("scheduled free group, total group: {} ", dataGroups.size)
        dataGroups.freeIdleGroup(idleTime)
      }
    }

  private val exceptionHandler = ExceptionHandler {
    case ex: Throwable =>
      log.error(ex.getMessage, ex)
      complete((StatusCodes.InternalServerError, ex.getMessage))
  }

  private def fullyPollQueue(queue: RowQueue): Seq[Row] = {
    val buffer    = ArrayBuffer.empty[Row]
    var item: Row = queue.poll()
    while (item != null) {
      buffer += item
      item = queue.poll()
    }
    buffer.toSeq
  }

  private def apiUrl = ShabondiUtils.apiUrl

  def route(): Route = handleExceptions(exceptionHandler) {
    path("groups" / Segment) { groupId =>
      get {
        if (StringUtils.isAlphanumeric(groupId)) {
          val group  = dataGroups.createIfAbsent(groupId)
          val result = fullyPollQueue(group.queue).map(row => JsonSupport.toRowData(row))
          complete(result)
        } else {
          val entity =
            HttpEntity(ContentTypes.`text/plain(UTF-8)`, "Illegal group name, only accept alpha and numeric.")
          complete(StatusCodes.NotAcceptable -> entity)
        }
      } ~ {
        complete(StatusCodes.MethodNotAllowed -> s"Unsupported method, please reference: $apiUrl")
      }
    } ~ {
      complete(StatusCodes.NotFound -> s"Please reference: $apiUrl")
    }
  }

  override def close(): Unit = {
    Releasable.close(dataGroups)
  }
}

Source File: ContinuousTrigger.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.streaming.continuous

import java.util.concurrent.TimeUnit

import scala.concurrent.duration.Duration

import org.apache.commons.lang3.StringUtils

import org.apache.spark.annotation.{Experimental, InterfaceStability}
import org.apache.spark.sql.streaming.{ProcessingTime, Trigger}
import org.apache.spark.unsafe.types.CalendarInterval


@InterfaceStability.Evolving
case class ContinuousTrigger(intervalMs: Long) extends Trigger {
  require(intervalMs >= 0, "the interval of trigger should not be negative")
}

private[sql] object ContinuousTrigger {
  def apply(interval: String): ContinuousTrigger = {
    if (StringUtils.isBlank(interval)) {
      throw new IllegalArgumentException(
        "interval cannot be null or blank.")
    }
    val cal = if (interval.startsWith("interval")) {
      CalendarInterval.fromString(interval)
    } else {
      CalendarInterval.fromString("interval " + interval)
    }
    if (cal == null) {
      throw new IllegalArgumentException(s"Invalid interval: $interval")
    }
    if (cal.months > 0) {
      throw new IllegalArgumentException(s"Doesn't support month or year interval: $interval")
    }
    new ContinuousTrigger(cal.microseconds / 1000)
  }

  def apply(interval: Duration): ContinuousTrigger = {
    ContinuousTrigger(interval.toMillis)
  }

  def create(interval: String): ContinuousTrigger = {
    apply(interval)
  }

  def create(interval: Long, unit: TimeUnit): ContinuousTrigger = {
    ContinuousTrigger(unit.toMillis(interval))
  }
}

Source File: DefaultThriftServerNameRule.scala From Hive-JDBC-Proxy with Apache License 2.0

5 votes

package com.enjoyyin.hive.proxy.jdbc.rule.basic

import com.enjoyyin.hive.proxy.jdbc.rule.ThriftServerNameRule
import com.enjoyyin.hive.proxy.jdbc.rule.ThriftServerNameRule._
import org.apache.commons.lang3.StringUtils
import com.enjoyyin.hive.proxy.jdbc.domain.ThriftServerName



object DefaultThriftServerNameRule extends ThriftServerNameRule {
  
  override def dealOrNot(params: Map[String, String]): ThriftServerName = {
    var thriftServerName = ""
    if(params != null && params.contains(THRIFT_CONNECTION_NAME)) {
      thriftServerName = params(THRIFT_CONNECTION_NAME)
    }
    var _username = params(USERNAME_NAME)
    if(StringUtils.isEmpty(thriftServerName)) {
      if(StringUtils.isNotEmpty(_username) && _username.indexOf("_") > 0) {
        val _arrays = _username.split("_")
        if(_arrays.length != 2) {
          throw new IllegalArgumentException(s"非法的用户名${_username}.")
        }
        thriftServerName = _arrays(0)
        _username = _arrays(1)
      } else {
        thriftServerName = _username
      }
      if(StringUtils.isEmpty(thriftServerName)) {      
        throw new NullPointerException(s"JDBC url must have $THRIFT_CONNECTION_NAME")
      }
    }
    ThriftServerName(thriftServerName, _username)
  }
  
  override def canDeal(params: Map[String, String]) = true
}

Source File: DefaultStatisticsDealRule.scala From Hive-JDBC-Proxy with Apache License 2.0

5 votes

package com.enjoyyin.hive.proxy.jdbc.rule.basic

import com.enjoyyin.hive.proxy.jdbc.rule.StatisticsDealRule
import com.enjoyyin.hive.proxy.jdbc.thrift.EventInfo
import com.enjoyyin.hive.proxy.jdbc.thrift.Listener
import com.enjoyyin.hive.proxy.jdbc.util.Logging
import com.enjoyyin.hive.proxy.jdbc.util.Utils
import com.enjoyyin.hive.proxy.jdbc.util.ProxyConf._
import com.enjoyyin.hive.proxy.jdbc.thrift.ProxyServiceEvent
import com.enjoyyin.hive.proxy.jdbc.domain.DealRule._
import org.apache.commons.lang3.StringUtils


object DefaultStatisticsDealRule extends StatisticsDealRule with Logging {
  
  override def dealOrNot(eventInfo: EventInfo): Unit = {
    if(StringUtils.isEmpty(eventInfo.hql)) return
    val executeTime = eventInfo.finishedExecuteTime - eventInfo.startExecuteTime
    val waitTime = eventInfo.startExecuteTime - eventInfo.startWaitTime
    val msg = new StringBuilder
    msg ++= "Since the beginning waiting time is " ++ Utils.dateFormat(eventInfo.startWaitTime) ++ 
      ", I who come from (name=" ++ eventInfo.user.username ++ ", ip=" ++ eventInfo.user.ipAddress ++ "), have cost " ++ Utils.msDurationToString(waitTime) ++ " waiting for execute, and the beginning executing time is " ++
      Utils.dateFormat(eventInfo.startExecuteTime) ++ "; have cost " ++
      Utils.msDurationToString(executeTime) ++ " to be completed, when the finish time is " ++
      Utils.dateFormat(eventInfo.finishedExecuteTime) ++
      ", and the final state is " ++ eventInfo.state.toString ++ ", hql is: " ++ eventInfo.hql
    if(StringUtils.isNotEmpty(eventInfo.errorMsg)) {
      msg ++= ", and error message is: " ++ eventInfo.errorMsg
    }
    logInfo(msg.toString)
  }
}

class StatisticsDealListener extends Listener {
  
  def onChange(thriftServerName: String, event: ProxyServiceEvent[_]): Unit = {
    val eventInfo = event.getEventInfo
    eventInfo.foreach { e =>
      getHiveThriftProxyRule(thriftServerName).statisticsDeal match {
        case Default => DefaultStatisticsDealRule.dealOrNot(e)
        case UserDefine => getHiveThriftProxyRuleClass(thriftServerName).statisticsDealRule.dealOrNot(e)
        case _ =>
      }
    }
  }
  
}

Source File: DefaultLoginValidateRule.scala From Hive-JDBC-Proxy with Apache License 2.0

5 votes

package com.enjoyyin.hive.proxy.jdbc.rule.basic

import com.enjoyyin.hive.proxy.jdbc.rule.LoginValidateRule
import com.enjoyyin.hive.proxy.jdbc.util.Logging
import com.enjoyyin.hive.proxy.jdbc.domain.User
import com.enjoyyin.hive.proxy.jdbc.thrift.ProxySession
import com.enjoyyin.hive.proxy.jdbc.util.ProxyConf._
import com.enjoyyin.hive.proxy.jdbc.domain.DealRule._
import org.apache.commons.lang3.StringUtils
import org.apache.hive.service.auth.PasswdAuthenticationProvider
import com.enjoyyin.hive.proxy.jdbc.rule.ThriftServerNameRule
import com.enjoyyin.hive.proxy.jdbc.util.Utils
import javax.security.sasl.AuthenticationException



object DefaultLoginValidateRule extends LoginValidateRule with PasswdAuthenticationProvider {
  
  private def validateByUser(user: String, password: String): Boolean = {
    val pwd = getPwdByUser(user)
    pwd.contains(password)
  }
  
  override def dealOrNot(user: User): Boolean = {
    val session = user.asInstanceOf[ProxySession]
    val username = session.username
    if(validateByUser(username, session.password)){
      return true
    }
    val thriftServerName = session.thriftServerName
    val userPwds = getHiveThriftServerInfo(thriftServerName).map(t => t.username -> t.password).toMap
    if(!userPwds.contains(username)) {
      return false
    } else if(StringUtils.isNotEmpty(userPwds(username)) &&
        userPwds(username) != session.password) {
      return false
    }
    true
  }
  
  override def Authenticate(user: String, password: String) : Unit = {
    val thriftServer = Utils.tryAndLogError(ThriftServerNameRule.getThriftServerName(null, user, null))
    if(thriftServer != null) {
      val thriftServerName = thriftServer.thriftServerName
      if(!getThriftNames.contains(thriftServerName)) {
        throw new AuthenticationException("Cannot redirect to correct thrift proxy!")
      }
      val session = new ProxySession(null, thriftServerName, 0,
        thriftServer.username, password, null)
      getHiveThriftProxyRule(thriftServerName).loginValidate match {
        case Default => if(!dealOrNot(session)) throw new AuthenticationException("Wrong username or password!")
        case UserDefine => 
          val canLogin = Utils.tryThrow(getHiveThriftProxyRuleClass(thriftServerName).loginValidateRule.dealOrNot(session))(
            new AuthenticationException("Login authentication has occured an error！", _))
          if(!canLogin) {
            throw new AuthenticationException("Wrong username or password!")
          }
        case _ =>
      }
    } else if(!validateByUser(user, password)){
      throw new AuthenticationException("Wrong username or password!")
    }
  }
  
}

Source File: HiveThriftServerInfo.scala From Hive-JDBC-Proxy with Apache License 2.0

5 votes

package com.enjoyyin.hive.proxy.jdbc.domain

import com.enjoyyin.hive.proxy.jdbc.rule.UserAllowRule
import com.enjoyyin.hive.proxy.jdbc.rule.QueryDealRule
import com.enjoyyin.hive.proxy.jdbc.rule.LoginValidateRule
import org.apache.commons.lang3.StringUtils
import com.enjoyyin.hive.proxy.jdbc.rule.StatisticsDealRule
import com.enjoyyin.hive.proxy.jdbc.util.ProxyConf.MAX_EXECUTE_TIME_OF_OPERATION
import com.enjoyyin.hive.proxy.jdbc.util.Utils
import com.enjoyyin.hive.proxy.jdbc.rule.Balancer



object AllowRule extends Enumeration {
  type AllowRule = Value
  val Allow, UserDefine, Error, CancelPrev= Value
}
object DealRule extends Enumeration {
  type DealRule = Value
  val NONE, Default, UserDefine = Value
}
import AllowRule._
import DealRule._
case class HiveThriftServerInfo(thriftServerName: String, serverName: String, valid: Boolean, username: String,
                                password: String, host: String, port: Int, maxThread: Int, loginTimeout: Int = 5000) {
  def validate: Unit = {
    if(StringUtils.isEmpty(host) || !host.matches("[0-9\\.a-zA-Z\\-]{7,}")) {
      throw new IllegalArgumentException(s"${thriftServerName}的host($host)不存在或不合法！")
    }
    if(StringUtils.isEmpty(username)) {
      throw new IllegalArgumentException(s"${thriftServerName}的username不能为空！")
    }
    if(port <= 0) {
      throw new IllegalArgumentException(s"${thriftServerName}的port必须大于0！")
    }
    if(maxThread <= 0 || maxThread >= 50) {
      throw new IllegalArgumentException(s"${thriftServerName}的maxThread必须处于(0, 50]之间！")
    }
    if(loginTimeout < MAX_EXECUTE_TIME_OF_OPERATION) {
      throw new IllegalArgumentException(s"${thriftServerName}的loginTimeout必须大于参数operation.execute.time.max(${Utils.msDurationToString(MAX_EXECUTE_TIME_OF_OPERATION)})！")
    }
  }
}
case class HiveThriftProxyRule(thriftServerName: String, allowMultiSessionsInIP: AllowRule = Allow,
    allowMultiSessionsInUser: AllowRule = Allow, loginValidate: DealRule = NONE,
    queryDeal: DealRule = Default, statisticsDeal: DealRule = Default, balancerRule: DealRule = Default)
 
case class HiveThriftProxyRuleClass(thriftServerName: String, mutiSessionsInIPRule: UserAllowRule = null,
    mutiSessionsInUserRule: UserAllowRule = null, statisticsDealRule: StatisticsDealRule = null,
    loginValidateRule: LoginValidateRule = null, queryDealRule: List[QueryDealRule] = List.empty[QueryDealRule],
    balancer: Balancer = null)
    
case class User(username: String, password: String, ipAddress: String)

case class UserHQL(user: User, hqlPriority: HQLPriority, executeHQLs: Map[String, Array[ExecuteHQLInfo]])

case class HQLPriority(hql: String, priority: Int)

case class ExecuteHQLInfo(username: String, ipAddress: String, hql: String)

case class ThriftServerName(thriftServerName: String, username: String)

Source File: ExtractApplicationProperties.scala From rug with GNU General Public License v3.0

5 votes

package com.atomist.rug.kind.java

import com.atomist.tree.content.project.{ConfigValue, Configuration, SimpleConfigValue, SimpleConfiguration}
import com.atomist.source.FileArtifact
import org.apache.commons.lang3.StringUtils

import scala.collection.mutable.ListBuffer
import scala.io.Source


class ExtractApplicationProperties(source: String) extends Function[FileArtifact, Configuration] {

  override def apply(f: FileArtifact): Configuration = {
    val isWhiteSpace: String => Boolean = line => StringUtils.isWhitespace(line)
    val isComment: String => Boolean = line => !isWhiteSpace(line) && line.dropWhile(c => c.isWhitespace).startsWith("#")
    val isContent: String => Boolean = line => !(isWhiteSpace(line) || isComment(line))

    trait State
    object InComment extends State
    object InBlanks extends State

    var state: State = InComment
    var comment = ""
    val configValues = new ListBuffer[ConfigValue]()

    // Strip # and whitespace from comments (respecting multiline comments)
    def extractComment(comment: String): String = {

      def toCommentContentLine(l: String) = {
        val r = l.dropWhile(c => c.isWhitespace || '#'.equals(c))
        r
      }

      val r = comment.lines.map(l => toCommentContentLine(l)).mkString("\n")
      r
    }

    // Return None if not a valid property line
    def parseContentLine(line: String): Option[ConfigValue] = {
      val stripped = line.dropWhile(c => c.isWhitespace)
      val idx = stripped.indexOf("=")
      if (idx == -1) {
        None
      }
      else {
        val (key, value) = stripped.splitAt(idx)
        val profile = ""
        Some(SimpleConfigValue(key, value.substring(1), source, profile, description = extractComment(comment)))
      }
    }

    def appendToComment(l: String): Unit = {
      if ("".equals(comment)) comment = l
      else comment = comment + "\n" + l
    }

    val lines = Source.fromString(f.content).getLines()
    for (line <- lines) {
      if (isContent(line)) {
        parseContentLine(line).foreach(cv => configValues.append(cv))
        comment = ""
      }
      else state match {
        case InBlanks if isComment(line) =>
          state = InComment
          appendToComment(line)
        case InComment if isComment(line) || isWhiteSpace(line) =>
          appendToComment(line)
        case InComment =>
          comment = ""
          state = InBlanks
        case _ =>
      }
    }
    new SimpleConfiguration(configValues)
  }
}

Source File: ServiceInfoLogger.scala From openwhisk with Apache License 2.0

5 votes

package org.apache.openwhisk.standalone

import java.io.File

import org.apache.commons.lang3.StringUtils
import org.apache.openwhisk.standalone.ColorOutput.clr

import scala.io.AnsiColor

class ServiceInfoLogger(conf: Conf, services: Seq[ServiceContainer], workDir: File) extends AnsiColor {
  private val separator = "=" * 80

  def run(): Unit = {
    println(separator)
    println("Launched service details")
    println()
    services.foreach(logService)
    println()
    println(s"Local working directory - ${workDir.getAbsolutePath}")
    println(separator)
  }

  private def logService(s: ServiceContainer): Unit = {
    val msg = s"${portInfo(s.port)} ${s.description} (${clr(s.name, BOLD, conf.colorEnabled)})"
    println(msg)
  }

  private def portInfo(port: Int) = {
    val msg = StringUtils.center(port.toString, 7)
    s"[${clr(msg, GREEN, conf.colorEnabled)}]"
  }
}

Source File: RemoteDirectory.scala From dependency with MIT License

5 votes

package io.flow.dependency.api.lib

import io.flow.dependency.v0.models.{Credentials, CredentialsUndefinedType, UsernamePassword}
import org.htmlcleaner.HtmlCleaner
import org.apache.commons.codec.binary.Base64
import org.apache.commons.lang3.StringUtils
import org.apache.commons.text.StringEscapeUtils
import java.net.URL


import scala.util.{Failure, Success, Try}



object RemoteDirectory {

  case class Result(
    directories: Seq[String] = Nil,
    files: Seq[String] = Nil
  )

  def fetch(
    url: String,
    credentials: Option[Credentials] = None
  ) (
    filter: String => Boolean = { !_.startsWith(".") }
  ): Result = {
    val base = Result()
    val cleaner = new HtmlCleaner()

    val uc = (new URL(url)).openConnection()
    credentials.map { cred =>
      cred match {
        case UsernamePassword(username, password) =>{
          val userpass = username + ":" + password.getOrElse("")
          val basicAuth = "Basic " + new String(new Base64().encode(userpass.getBytes()))
          uc.setRequestProperty ("Authorization", basicAuth)
        }
        case CredentialsUndefinedType(_) => {
          // No-op
        }
      }
    }

    Try(cleaner.clean(uc.getInputStream())) match {
      case Failure(_) => {
        base
      }
      case Success(rootNode) => {
        rootNode.getElementsByName("a", true).foldLeft(base) { case (result, elem) =>
          Option(elem.getAttributeByName("href")) match {
            case None => {
              result
            }
            case Some(_) => {
              val text = StringEscapeUtils.unescapeHtml4(elem.getText.toString)
              filter(StringUtils.stripEnd(text, "/")) match {
                case false => {
                  result
                }
                case true => {
                  text.endsWith("/") match {
                    case true => result.copy(directories = result.directories ++ Seq(text))
                    case false => result.copy(files = result.files ++ Seq(text))
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

Source File: RemoteVersions.scala From dependency with MIT License

5 votes

package io.flow.dependency.api.lib

import io.flow.dependency.v0.models.Credentials
import io.flow.util.Version
import org.apache.commons.lang3.StringUtils

object RemoteVersions {

  def fetch(
    resolver: String,
    groupId: String,
    artifactId: String,
    credentials: Option[Credentials]
  ): Seq[ArtifactVersion] = {
    val versions = fetchUrl(
      joinUrl(resolver, groupId.replaceAll("\\.", "/")),
      artifactId,
      credentials
    ) match {
      case Nil => {
        fetchUrl(joinUrl(resolver, groupId), artifactId, credentials)
      }
      case results => {
        results
      }
    }
    versions.sortBy { _.tag }.reverse
  }

  private[this] def fetchUrl(
    url: String,
    artifactId: String,
    credentials: Option[Credentials]
  ): Seq[ArtifactVersion] = {
    val result = RemoteDirectory.fetch(url, credentials = credentials)(
      filter = { name => name == artifactId || name.startsWith(artifactId + "_") }
    )

    result.directories.flatMap { dir =>
      val thisUrl = joinUrl(url, dir)
      RemoteDirectory.fetch(thisUrl, credentials = credentials)().directories.map { d =>
        ArtifactVersion(
          tag = Version(StringUtils.stripEnd(d, "/")),
          crossBuildVersion = crossBuildVersion(dir)
        )
      }
    }
  }

  // e.g. "scala-csv_2.11/" => 2.11
  def crossBuildVersion(text: String): Option[Version] = {
    StringUtils.stripEnd(text, "/").split("_").toList match {
      case Nil => None
      case _ :: Nil => None
      case inple => {
        // Check if we can successfully parse the version tag for a
        // major version. If so, we assume we have found a cross build
        // version.
        val tag = Version(inple.last)
        tag.major match {
          case None => None
          case Some(_) => Some(tag)
        }
      }
    }
  }

  def makeUrls(
    resolver: String,
    groupId: String
  ): Seq[String] = {
    Seq(
      joinUrl(
        resolver, groupId.replaceAll("\\.", "/")
      ),
      joinUrl(resolver, groupId)
    )
  }

  def joinUrl(
    a: String,
    b: String
  ): String = {
    Seq(a, b).map ( StringUtils.stripEnd(_, "/") ).mkString("/")
  }
}

Source File: BinaryVersionProvider.scala From dependency with MIT License

5 votes

package io.flow.dependency.api.lib

import io.flow.dependency.v0.models.BinaryType
import io.flow.log.RollbarLogger
import io.flow.util.Version
import org.apache.commons.lang3.StringUtils



trait BinaryVersionProvider {

  
  def versions(binary: BinaryType): Seq[Version]

}

@javax.inject.Singleton
case class DefaultBinaryVersionProvider @javax.inject.Inject()(
  logger: RollbarLogger
) extends BinaryVersionProvider {

  private[this] val ScalaUrl = "https://www.scala-lang.org/download/all.html"
  private[this] val SbtUrl = "https://flow.jfrog.io/flow/libs-release/org/scala-sbt/sbt/"

  override def versions(
    binary: BinaryType
  ) : Seq[Version] = {
    binary match {
      case BinaryType.Scala => {
        fetchScalaVersions()
      }
      case BinaryType.Sbt => {
        fetchSbtVersions()
      }
      case BinaryType.UNDEFINED(name) => {
        if (!name.startsWith("tst-")) {
          logger.withKeyValue("binary_name", name).warn(s"Do not know how to find versions for the programming binary")
        }
        Nil
      }
    }
  }

  def fetchScalaVersions(): Seq[Version] = {
    RemoteDirectory.fetch(ScalaUrl) { name =>
      name.toLowerCase.startsWith("scala ")
    }.files.flatMap { toVersion }
  }

  def fetchSbtVersions(): Seq[Version] = {
    RemoteDirectory.fetch(SbtUrl)().directories.flatMap { dir =>
      toVersion(StringUtils.stripEnd(dir, "/"))
    }
  }

  def toVersion(value: String): Option[Version] = {
    val tag = Version(
      StringUtils.stripStart(
        StringUtils.stripStart(value, "scala"),
        "Scala"
      ).trim
    )
    tag.major match {
      case None => None
      case Some(_) => Some(tag)
    }
  }

}

org.apache.commons.lang3.StringUtils Scala Examples