org.apache.commons.lang3.StringUtils Scala Examples

The following examples show how to use org.apache.commons.lang3.StringUtils. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: ClientConfiguration.scala    From haystack-traces   with Apache License 2.0 5 votes vote down vote up
package com.expedia.www.haystack.trace.storage.backends.cassandra.config.entities

import com.datastax.driver.core.ConsistencyLevel
import com.expedia.www.haystack.commons.retries.RetryOperation
import org.apache.commons.lang3.StringUtils



case class CassandraConfiguration(clientConfig: ClientConfiguration,
                                  consistencyLevel: ConsistencyLevel,
                                  retryConfig: RetryOperation.Config,
                                  consistencyLevelOnError: List[(Class[_], ConsistencyLevel)]) {
  def writeConsistencyLevel(error: Throwable): ConsistencyLevel = {
    if (error == null) {
      consistencyLevel
    } else {
      consistencyLevelOnError
        .find(errorClass => errorClass._1.isAssignableFrom(error.getClass))
        .map(_._2).getOrElse(writeConsistencyLevel(error.getCause))
    }
  }
} 
Example 2
Source File: LogListener.scala    From splash   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.shuffle

import org.apache.commons.lang3.StringUtils
import org.apache.spark.internal.Logging
import org.testng.{ITestResult, TestListenerAdapter}

class LogListener extends TestListenerAdapter with Logging {
  override def onTestStart(tr: ITestResult): Unit = {
    super.onTestStart(tr)
    logInfo(s"--- ${tr.getName}${getParams(tr)} --- test start.")
  }

  override def onTestFailure(tr: ITestResult): Unit = {
    logError(s"--- ${tr.getName}${getParams(tr)} --- failed, took ${getSeconds(tr)}s.")
    val params = tr.getParameters
    if (params.nonEmpty) {
      logError(s"test parameters: $params.")
    }
    logError("detail:", tr.getThrowable)
  }

  override def onTestSkipped(tr: ITestResult): Unit = {
    logWarning(s"--- ${tr.getName}${getParams(tr)} --- skipped, took ${getSeconds(tr)}s.")
  }

  override def onTestSuccess(tr: ITestResult): Unit = {
    logInfo(s"--- ${tr.getName}${getParams(tr)} --- passed, took ${getSeconds(tr)}s.")
  }

  private def getParams(tr: ITestResult): String = {
    val params = tr.getParameters
    if (params.nonEmpty) {
      s" [${StringUtils.join(params, ", ")}]"
    } else {
      ""
    }
  }

  private def getSeconds(tr: ITestResult) = (tr.getEndMillis - tr.getStartMillis).toDouble / 1000
} 
Example 3
Source File: Generator.scala    From donut   with MIT License 5 votes vote down vote up
package report.donut

import org.apache.commons.lang3.StringUtils
import org.joda.time.DateTime
import org.joda.time.format.{DateTimeFormat, DateTimeFormatter}
import report.donut.gherkin.model._
import report.donut.log.Log
import report.donut.performance.PerformanceSupport
import report.donut.template.TemplateEngine
import report.donut.transformers.cucumber.{CucumberTransformer, Feature => CucumberFeature}

import scala.collection.mutable.ListBuffer
import scala.util.Try

object Generator extends Log with PerformanceSupport {

  val formatter: DateTimeFormatter = DateTimeFormat.forPattern("yyyy-MM-dd-HHmm")

  //this wrapper is currently used to help the java maven plugin
  def apply(resultSources: String,
            outputPath: String = "donut",
            filePrefix: String = "",
            dateTime: String,
            template: String = "default",
            countSkippedAsFailure: Boolean = false,
            countPendingAsFailure: Boolean = false,
            countUndefinedAsFailure: Boolean = false,
            countMissingAsFailure: Boolean = false,
            projectName: String,
            projectVersion: String,
            customAttributes: scala.collection.mutable.Map[String, String]): ReportConsole = {

    createReport(resultSources, outputPath, filePrefix, dateTime, template, countSkippedAsFailure, countPendingAsFailure,
      countUndefinedAsFailure, countMissingAsFailure, projectName, projectVersion, customAttributes.toMap) match {
      case Right(report) => ReportConsole(report)
      case Left(error) => throw DonutException(s"An error occurred while generating donut report. $error")
    }
  }

  private[donut] def createReport(resultSources: String,
                                  outputPath: String = "donut",
                                  filePrefix: String = "",
                                  datetime: String = formatter.print(DateTime.now),
                                  template: String = "default",
                                  countSkippedAsFailure: Boolean = false,
                                  countPendingAsFailure: Boolean = false,
                                  countUndefinedAsFailure: Boolean = false,
                                  countMissingAsFailure: Boolean = false,
                                  projectName: String,
                                  projectVersion: String,
                                  customAttributes: Map[String, String] = Map()): Either[String, Report] = {

    //Prepare objects
    val statusConf = StatusConfiguration(countSkippedAsFailure, countPendingAsFailure, countUndefinedAsFailure, countMissingAsFailure)
    val projectMetadata = ProjectMetadata(projectName, projectVersion, customAttributes)
    val reportStartedTimestamp = Try(formatter.parseDateTime(datetime)).getOrElse(DateTime.now)

    for {
      resultSourceList <- if (!StringUtils.isBlank(resultSources)) Right(resultSources.split(",").map(_.trim).toList).right else Left("Unable to extract the paths to the result sources. Please use this format:- cucumber:/my/path/cucumber-reports,cucumber:/my/other/path/adapted-reports").right
      features <- timed("step1", "Loaded result sources") {
        loadResultSources(resultSourceList, statusConf).right
      }
      report <- timed("step2", "Produced report") {
        Right(Report(features, reportStartedTimestamp, projectMetadata)).right
      }
      _ <- TemplateEngine(report, s"/templates/$template/index.html").renderToHTML(outputPath, filePrefix).right
    } yield report
  }

  
  def loadResultSources(resultSourceList: List[String], statusConf: StatusConfiguration): Either[String, List[Feature]] = {
    var features = new ListBuffer[CucumberFeature]
    for (resultSource <- resultSourceList) {
      val result = ResultLoader(resultSource).load
      if (result.isLeft) return Left(result.left.get)
      features ++= result.right.get
    }
    val donutFeatures = CucumberTransformer.transform(features.toList, statusConf).right.get
    Try(donutFeatures.toList).toEither(_.getMessage)
  }
}

case class DonutException(mgs: String) extends Exception 
Example 4
Source File: ResultLoader.scala    From donut   with MIT License 5 votes vote down vote up
package report.donut

import java.io.File

import org.apache.commons.lang3.StringUtils
import org.json4s.{DefaultFormats, JValue}
import report.donut.gherkin.processors.JSONProcessor
import report.donut.transformers.cucumber.Feature

import scala.util.Try

trait ResultLoader {
  def load(): Either[String, List[Feature]]
}

object ResultLoader {

  private[donut] class CucumberResultLoader(sourceDir: File) extends ResultLoader {
    override def load(): Either[String, List[Feature]] = {
      if (!sourceDir.exists) {
        return Left(s"Source directory does not exist: $sourceDir")
      }

      val jsonValues = JSONProcessor.loadFrom(sourceDir) match {
        case Left(errors) => return Left(errors)
        case Right(r) => if (r.isEmpty) return Left("No files found of correct format") else Right(r)
      }

      Try(loadCukeFeatures(jsonValues.right.get)).toEither(_.getMessage)
    }

    private[donut] def loadCukeFeatures(json: List[JValue]) = {
      implicit val formats = DefaultFormats
      json.flatMap(f => f.extract[List[Feature]])
    }
  }

  def apply(resultSource: String): ResultLoader = {
    val pattern = "([a-zA-z]{2,}):(.*)".r
    pattern.findFirstMatchIn(resultSource) match {
      case Some(m) => {
        val format = m.group(1)
        val sourcePath = m.group(2)
        if (StringUtils.isBlank(sourcePath)) {
          throw new DonutException("Please provide the source directory path.")
        }
        format match {
          case "cucumber" => new CucumberResultLoader(new File(sourcePath))
          case _ => throw DonutException(s"Unsupported result format: $format")
        }
      }
      case None => new CucumberResultLoader(new File(resultSource)) //Defaults to cucumber result format
    }
  }
} 
Example 5
Source File: ArticleValidator.scala    From scala-play-realworld-example-app   with MIT License 5 votes vote down vote up
package articles.services

import commons.utils.RealWorldStringUtils
import commons.validations.PropertyViolation
import commons.validations.constraints._
import articles.models.{ArticleUpdate, NewArticle}
import org.apache.commons.lang3.StringUtils

import scala.concurrent.ExecutionContext

class ArticleValidator(implicit private val ec: ExecutionContext) {

  private val titleValidator = new TitleValidator
  private val descriptionValidator = new DescriptionValidator
  private val bodyValidator = new BodyValidator
  private val tagValidator = new TagValidator

  def validateNewArticle(newArticle: NewArticle): Seq[PropertyViolation] = {
    require(newArticle != null)

    validateTitle(newArticle.title) ++
      validateDescription(newArticle.body) ++
      validateBody(newArticle.body) ++
      validateTags(newArticle.tagList)
  }

  private def validateTags(tags: Seq[String]) = tags.flatMap(tagValidator.validate)

  def validateArticleUpdate(articleUpdate: ArticleUpdate): Seq[PropertyViolation] = {
    val titleViolations = articleUpdate.title.map(validateTitle).getOrElse(Seq.empty)
    val descriptionViolations = articleUpdate.description.map(validateDescription).getOrElse(Seq.empty)
    val bodyViolations = articleUpdate.body.map(validateBody).getOrElse(Seq.empty)

    titleViolations ++ descriptionViolations ++ bodyViolations
  }

  private def validateTitle(title: String) = titleValidator.validate(title)

  private def validateDescription(description: String) = descriptionValidator.validate(description)

  private def validateBody(body: String) = bodyValidator.validate(body)

  private class StringValidator(minLength: Int = 0, maxLength: Int = Int.MaxValue) {

    def validate(str: String): Seq[Violation] = {
      if (StringUtils.isBlank(str)) Seq(NotNullViolation)
      else if (str.length < minLength) Seq(MinLengthViolation(minLength))
      else if (str.length > maxLength) Seq(MaxLengthViolation(maxLength))
      else if (RealWorldStringUtils.startsWithWhiteSpace(str)
        || RealWorldStringUtils.endsWithWhiteSpace(str)) Seq(PrefixOrSuffixWithWhiteSpacesViolation)
      else Nil
    }
  }

  private class TitleValidator {
    private val maxLength = 255
    private val stringValidator = new StringValidator(maxLength = maxLength)

    def validate(title: String): Seq[PropertyViolation] = {
      stringValidator.validate(title)
        .map(PropertyViolation("title", _))
    }
  }

  private class DescriptionValidator {
    private val maxLength = 255
    private val stringValidator = new StringValidator(maxLength = maxLength)

    def validate(description: String): Seq[PropertyViolation] = {
      stringValidator.validate(description)
        .map(PropertyViolation("description", _))
    }
  }

  private class BodyValidator {
    private val stringValidator = new StringValidator

    def validate(body: String): Seq[PropertyViolation] = {
      stringValidator.validate(body)
        .map(PropertyViolation("body", _))
    }
  }

  private class TagValidator {
    private val maxLength = 255
    private val stringValidator = new StringValidator(maxLength = maxLength)

    def validate(tag: String): Seq[PropertyViolation] = {
      stringValidator.validate(tag)
        .map(PropertyViolation("tag", _))
    }
  }

} 
Example 6
Source File: CommentController.scala    From scala-play-realworld-example-app   with MIT License 5 votes vote down vote up
package articles.controllers

import commons.exceptions.MissingModelException
import commons.services.ActionRunner
import articles.exceptions.AuthorMismatchException
import articles.models._
import articles.services.CommentService
import commons.controllers.RealWorldAbstractController
import org.apache.commons.lang3.StringUtils
import play.api.libs.json._
import play.api.mvc.{Action, AnyContent, ControllerComponents}
import users.controllers.{AuthenticatedActionBuilder, OptionallyAuthenticatedActionBuilder}

class CommentController(authenticatedAction: AuthenticatedActionBuilder,
                        optionallyAuthenticatedActionBuilder: OptionallyAuthenticatedActionBuilder,
                        actionRunner: ActionRunner,
                        commentService: CommentService,
                        components: ControllerComponents)
  extends RealWorldAbstractController(components) {

  def delete(id: CommentId): Action[AnyContent] = authenticatedAction.async { request =>

    actionRunner.runTransactionally(commentService.delete(id, request.user.userId))
      .map(_ => Ok)
      .recover({
        case _: AuthorMismatchException => Forbidden
        case _: MissingModelException => NotFound
      })
  }

  def findByArticleSlug(slug: String): Action[AnyContent] = optionallyAuthenticatedActionBuilder.async { request =>
    require(StringUtils.isNotBlank(slug))

    val maybeUserId = request.authenticatedUserOption.map(_.userId)
    actionRunner.runTransactionally(commentService.findByArticleSlug(slug, maybeUserId))
      .map(CommentList(_))
      .map(Json.toJson(_))
      .map(Ok(_))
      .recover({
        case _: MissingModelException => NotFound
      })
  }

  def create(slug: String): Action[_] = authenticatedAction.async(validateJson[NewCommentWrapper]) { request =>
    require(StringUtils.isNotBlank(slug))

    val newComment = request.body.comment
    val userId = request.user.userId

    actionRunner.runTransactionally(commentService.create(newComment, slug, userId)
      .map(CommentWrapper(_))
      .map(Json.toJson(_))
      .map(Ok(_)))
      .recover({
        case _: MissingModelException => NotFound
      })
  }

} 
Example 7
Source File: EnvHelper.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql

import org.apache.commons.lang3.StringUtils

import org.apache.carbondata.core.metadata.DatabaseLocationProvider


object EnvHelper {

  def isLegacy(sparkSession: SparkSession): Boolean = false

  def isPrivacy(sparkSession: SparkSession, isExternal: Boolean): Boolean = {
    (!isExternal) && isLegacy(sparkSession)
  }

  def setDefaultHeader(
      sparkSession: SparkSession,
      optionsFinal: java.util.Map[String, String]
  ): Unit = {
    if (isLegacy(sparkSession)) {
      val fileHeader = optionsFinal.get("fileheader")
      val header = optionsFinal.get("header")
      if (StringUtils.isEmpty(fileHeader) && StringUtils.isEmpty(header)) {
        optionsFinal.put("header", "false")
      }
    }
  }

  def isRetainData(sparkSession: SparkSession, retainData: Boolean): Boolean = {
    if (isLegacy(sparkSession)) {
      retainData
    } else {
      true
    }
  }

  def getDatabase(database: String): String = {
    DatabaseLocationProvider.get().provide(database)
  }
} 
Example 8
Source File: CarbonSparkStreamingFactory.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql

import org.apache.commons.lang3.StringUtils

import org.apache.carbondata.streaming.CarbonStreamException
import org.apache.carbondata.streaming.CarbonStreamSparkStreaming
import org.apache.carbondata.streaming.CarbonStreamSparkStreamingWriter


object CarbonSparkStreamingFactory {

  def getStreamSparkStreamingWriter(spark: SparkSession,
    dbNameStr: String,
    tableName: String): CarbonStreamSparkStreamingWriter =
    synchronized {
    val dbName = if (StringUtils.isEmpty(dbNameStr)) "default" else dbNameStr
    val key = dbName + "." + tableName
    if (CarbonStreamSparkStreaming.getTableMap.containsKey(key)) {
      CarbonStreamSparkStreaming.getTableMap.get(key)
    } else {
      if (StringUtils.isEmpty(tableName) || tableName.contains(" ")) {
        throw new CarbonStreamException("Table creation failed. " +
                                        "Table name must not be blank or " +
                                        "cannot contain blank space")
      }
      val carbonTable = CarbonEnv.getCarbonTable(Some(dbName),
        tableName)(spark)
      if (!carbonTable.isStreamingSink) {
        throw new CarbonStreamException(s"Table ${carbonTable.getDatabaseName}." +
                                        s"${carbonTable.getTableName} is not a streaming table")
      }
      val streamWriter = new CarbonStreamSparkStreamingWriter(spark,
        carbonTable, spark.sessionState.newHadoopConf())
      CarbonStreamSparkStreaming.getTableMap.put(key, streamWriter)
      streamWriter
    }
  }
} 
Example 9
Source File: NetezzaFilters.scala    From spark-netezza   with Apache License 2.0 5 votes vote down vote up
package com.ibm.spark.netezza

import java.sql.{Date, Timestamp}

import org.apache.commons.lang3.StringUtils
import org.apache.spark.sql.sources._


  def generateFilterExpr(f: Filter): Option[String] = {
    Option(f match {
      case EqualTo(attr, value) => s"$attr = ${quoteValue(value)}"
      case EqualNullSafe(attr, value) =>
        s"(NOT ($attr != ${quoteValue(value)} OR $attr IS NULL OR " +
          s"${quoteValue(value)} IS NULL) OR ($attr IS NULL AND ${quoteValue(value)} IS NULL))"
      case LessThan(attr, value) => s"$attr < ${quoteValue(value)}"
      case GreaterThan(attr, value) => s"$attr > ${quoteValue(value)}"
      case LessThanOrEqual(attr, value) => s"$attr <= ${quoteValue(value)}"
      case GreaterThanOrEqual(attr, value) => s"$attr >= ${quoteValue(value)}"
      case IsNull(attr) => s"$attr IS NULL"
      case IsNotNull(attr) => s"$attr IS NOT NULL"
      case StringStartsWith(attr, value) => s"${attr} LIKE '${value}%'"
      case StringEndsWith(attr, value) => s"${attr} LIKE '%${value}'"
      case StringContains(attr, value) => s"${attr} LIKE '%${value}%'"
      case In(attr, value) => s"$attr IN (${quoteValue(value)})"
      case Not(f) => generateFilterExpr(f).map(p => s"(NOT ($p))").getOrElse(null)
      case Or(f1, f2) =>
        val or = Seq(f1, f2).flatMap(generateFilterExpr(_))
        if (or.size == 2) {
          or.map(p => s"($p)").mkString(" OR ")
        } else {
          null
        }
      case And(f1, f2) =>
        val and = Seq(f1, f2).flatMap(generateFilterExpr(_))
        if (and.size == 2) {
          and.map(p => s"($p)").mkString(" AND ")
        } else {
          null
        }
      case _ => null
    })
  }
} 
Example 10
Source File: MetadataOperation.scala    From kyuubi   with Apache License 2.0 5 votes vote down vote up
package yaooqinn.kyuubi.operation.metadata

import java.security.PrivilegedExceptionAction

import org.apache.commons.lang3.StringUtils
import org.apache.spark.KyuubiSparkUtil

import yaooqinn.kyuubi.KyuubiSQLException
import yaooqinn.kyuubi.cli.FetchOrientation
import yaooqinn.kyuubi.operation._
import yaooqinn.kyuubi.schema.{RowSet, RowSetBuilder}
import yaooqinn.kyuubi.session.KyuubiSession

abstract class MetadataOperation(session: KyuubiSession, opType: OperationType)
  extends AbstractOperation(session, opType) {

  setHasResultSet(true)

  override def cancel(): Unit = {
    setState(CANCELED)
    throw new UnsupportedOperationException("MetadataOperation.cancel()")
  }

  override def close(): Unit = {
    setState(CLOSED)
    cleanupOperationLog()
  }

  
  protected def convertSchemaPattern(pattern: String): String = {
    if (StringUtils.isEmpty(pattern)) {
      convertPattern("%", datanucleusFormat = true)
    } else {
      convertPattern(pattern, datanucleusFormat = true)
    }
  }

  private def convertPattern(pattern: String, datanucleusFormat: Boolean): String = {
    val wStr = if (datanucleusFormat) "*" else ".*"
    pattern
      .replaceAll("([^\\\\])%", "$1" + wStr)
      .replaceAll("\\\\%", "%")
      .replaceAll("^%", wStr)
      .replaceAll("([^\\\\])_", "$1.")
      .replaceAll("\\\\_", "_")
      .replaceAll("^_", ".")
  }

  protected def execute(block: => Unit): Unit = {
    setState(RUNNING)
    try {
      session.ugi.doAs(new PrivilegedExceptionAction[Unit] {
        override def run(): Unit = block
      })
      setState(FINISHED)
    } catch {
      case e: Exception =>
        setState(ERROR)
        throw new KyuubiSQLException(KyuubiSparkUtil.findCause(e))
    }
  }

  override def getNextRowSet(order: FetchOrientation, rowSetSize: Long): RowSet = {
    assertState(FINISHED)
    validateDefaultFetchOrientation(order)
    val taken = iter.take(rowSetSize.toInt)
    RowSetBuilder.create(getResultSetSchema, taken.toSeq, getProtocolVersion)
  }

} 
Example 11
Source File: HiveTokenCollector.scala    From kyuubi   with Apache License 2.0 5 votes vote down vote up
package yaooqinn.kyuubi.session.security

import scala.util.control.NonFatal

import org.apache.commons.lang3.StringUtils
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier
import org.apache.hadoop.hive.ql.metadata.Hive
import org.apache.hadoop.io.Text
import org.apache.hadoop.security.{Credentials, UserGroupInformation}
import org.apache.hadoop.security.token.Token
import org.apache.kyuubi.Logging
import org.apache.spark.SparkConf

import yaooqinn.kyuubi.utils.KyuubiHadoopUtil
import yaooqinn.kyuubi.utils.KyuubiHiveUtil._

private[security] object HiveTokenCollector extends TokenCollector with Logging {

  override def obtainTokens(conf: SparkConf): Unit = {
    try {
      val c = hiveConf(conf)
      val principal = c.getTrimmed(METASTORE_PRINCIPAL)
      val uris = c.getTrimmed(URIS)
      require(StringUtils.isNotEmpty(principal), METASTORE_PRINCIPAL + " Undefined")
      require(StringUtils.isNotEmpty(uris), URIS + " Undefined")
      val currentUser = UserGroupInformation.getCurrentUser.getUserName
      val credentials = new Credentials()
      KyuubiHadoopUtil.doAsRealUser {
        val hive = Hive.get(c, true)
        info(s"Getting token from Hive Metastore for owner $currentUser via $principal")
        val tokenString = hive.getDelegationToken(currentUser, principal)
        val token = new Token[DelegationTokenIdentifier]
        token.decodeFromUrlString(tokenString)
        info(s"Got " + DelegationTokenIdentifier.stringifyToken(token))
        credentials.addToken(new Text("hive.metastore.delegation.token"), token)
      }
      UserGroupInformation.getCurrentUser.addCredentials(credentials)
    } catch {
      case NonFatal(e) =>
        error("Failed to get token from hive metatore service", e)
    } finally {
      Hive.closeCurrent()
    }
  }

  override def tokensRequired(conf: SparkConf): Boolean = {
    UserGroupInformation.isSecurityEnabled && StringUtils.isNotBlank(hiveConf(conf).get(URIS))
  }
} 
Example 12
Source File: LdapAuthenticationProviderImpl.scala    From kyuubi   with Apache License 2.0 5 votes vote down vote up
package yaooqinn.kyuubi.auth

import java.util.Hashtable
import javax.naming.{Context, NamingException}
import javax.naming.directory.InitialDirContext
import javax.security.sasl.AuthenticationException

import org.apache.commons.lang3.StringUtils
import org.apache.spark.{KyuubiConf, SparkConf}

import yaooqinn.kyuubi.service.ServiceUtils

class LdapAuthenticationProviderImpl(conf: SparkConf) extends PasswdAuthenticationProvider {

  import KyuubiConf._

  
  override def authenticate(user: String, password: String): Unit = {
    if (StringUtils.isBlank(user)) {
      throw new AuthenticationException(s"Error validating LDAP user, user is null" +
        s" or contains blank space")
    }

    if (StringUtils.isBlank(password)) {
      throw new AuthenticationException(s"Error validating LDAP user, password is null" +
        s" or contains blank space")
    }

    val env = new Hashtable[String, Any]()
    env.put(Context.INITIAL_CONTEXT_FACTORY, "com.sun.jndi.ldap.LdapCtxFactory")
    env.put(Context.SECURITY_AUTHENTICATION, "simple")

    conf.getOption(AUTHENTICATION_LDAP_URL).foreach(env.put(Context.PROVIDER_URL, _))

    val domain = conf.get(AUTHENTICATION_LDAP_DOMAIN, "")
    val u = if (!hasDomain(user) && StringUtils.isNotBlank(domain)) {
      user + "@" + domain
    } else {
      user
    }

    val bindDn = conf.getOption(AUTHENTICATION_LDAP_BASEDN) match {
      case Some(dn) => "uid=" + u + "," + dn
      case _ => u
    }

    env.put(Context.SECURITY_PRINCIPAL, bindDn)
    env.put(Context.SECURITY_CREDENTIALS, password)

    try {
      val ctx = new InitialDirContext(env)
      ctx.close()
    } catch {
      case e: NamingException =>
        throw new AuthenticationException(s"Error validating LDAP user: $bindDn", e)
    }
  }

  private def hasDomain(userName: String): Boolean = ServiceUtils.indexOfDomainMatch(userName) > 0
} 
Example 13
Source File: MapreduceTransformation.scala    From schedoscope   with Apache License 2.0 5 votes vote down vote up
package org.schedoscope.dsl.transformations

import java.net.URI

import org.apache.commons.lang3.StringUtils
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
import org.apache.hadoop.mapreduce.{Job, MRJobConfig}
import org.schedoscope.Schedoscope
import org.schedoscope.dsl.View
import org.schedoscope.scheduler.driver.{DriverRunState, MapreduceDriver}
import org.schedoscope.scheduler.service.ViewTransformationStatus


case class MapreduceTransformation(v: View,
                                   createJob: (Map[String, Any]) => Job,
                                   cleanupAfterJob: (Job, MapreduceDriver, DriverRunState[MapreduceBaseTransformation]) => DriverRunState[MapreduceBaseTransformation] = (_, __, completionRunState) => completionRunState,
                                   dirsToDelete: List[String] = List(),
                                   deleteViewPath: Boolean = true) extends MapreduceBaseTransformation {

  lazy val job = createJob(configuration.toMap)

  var directoriesToDelete = dirsToDelete ++ (if (deleteViewPath) List(v.fullPath) else List())

  description = StringUtils.abbreviate(v.urlPath, 100)
}

trait MapreduceBaseTransformation extends Transformation {

  def name = "mapreduce"

  val cleanupAfterJob: (Job, MapreduceDriver, DriverRunState[MapreduceBaseTransformation]) => DriverRunState[MapreduceBaseTransformation]

  val v: View

  val job: Job

  var directoriesToDelete: List[String]

  override def fileResourcesToChecksum = {
    val jarName = try {
      job.getConfiguration().get(MRJobConfig.JAR).split("/").last
    } catch {
      case _: Throwable => null
    }

    Schedoscope.settings
      .getDriverSettings("mapreduce")
      .libJarsHdfs
      .filter(lj => jarName == null || lj.contains(jarName))
  }

  override def viewTransformationStatus = ViewTransformationStatus(
    name,
    Some(Map(
      "input" -> job.getConfiguration().get(FileInputFormat.INPUT_DIR),
      "output" -> job.getConfiguration().get(FileOutputFormat.OUTDIR))))

  def configure() {
    // if job jar hasn't been registered, add all mapreduce libjars
    // to distributed cache
    if (job.getConfiguration().get(MRJobConfig.JAR) == null) {
      fileResourcesToChecksum.foreach(r => {
        try {
          job.addCacheFile(new URI(r))
        } catch {
          case _: Throwable => Unit
        }
      })
    }
    configuration.foreach { case (k, v) => if (v == null) job.getConfiguration.unset(k) else job.getConfiguration.set(k, v.toString) }
  }
} 
Example 14
Source File: ServiceMetadataDocumentGenerator.scala    From haystack-traces   with Apache License 2.0 5 votes vote down vote up
package com.expedia.www.haystack.trace.indexer.writers.es

import java.time.Instant

import com.expedia.open.tracing.Span
import com.expedia.www.haystack.commons.metrics.MetricsSupport
import com.expedia.www.haystack.trace.commons.clients.es.document.ServiceMetadataDoc
import com.expedia.www.haystack.trace.commons.utils.SpanUtils
import com.expedia.www.haystack.trace.indexer.config.entities.ServiceMetadataWriteConfiguration
import org.apache.commons.lang3.StringUtils

import scala.collection.mutable

class ServiceMetadataDocumentGenerator(config: ServiceMetadataWriteConfiguration) extends MetricsSupport {

  private var serviceMetadataMap = new mutable.HashMap[String, mutable.Set[String]]()
  private var allOperationCount: Int = 0
  private var lastFlushInstant = Instant.MIN

  private def shouldFlush: Boolean = {
    config.flushIntervalInSec == 0 || Instant.now().minusSeconds(config.flushIntervalInSec).isAfter(lastFlushInstant)
  }

  private def areStatementsReadyToBeExecuted(): Seq[ServiceMetadataDoc] = {
    if (serviceMetadataMap.nonEmpty && (shouldFlush || allOperationCount > config.flushOnMaxOperationCount)) {
      val statements = serviceMetadataMap.flatMap {
        case (serviceName, operationList) =>
          createServiceMetadataDoc(serviceName, operationList)
      }

      lastFlushInstant = Instant.now()
      serviceMetadataMap = new mutable.HashMap[String, mutable.Set[String]]()
      allOperationCount = 0
      statements.toSeq
    } else {
      Nil
    }
  }

  
  def createServiceMetadataDoc(serviceName: String, operationList: mutable.Set[String]): List[ServiceMetadataDoc] = {
    operationList.map(operationName => ServiceMetadataDoc(serviceName, operationName)).toList

  }
} 
Example 15
Source File: ContinuousTrigger.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.streaming.continuous

import java.util.concurrent.TimeUnit

import scala.concurrent.duration.Duration

import org.apache.commons.lang3.StringUtils

import org.apache.spark.annotation.{Experimental, InterfaceStability}
import org.apache.spark.sql.streaming.{ProcessingTime, Trigger}
import org.apache.spark.unsafe.types.CalendarInterval


@InterfaceStability.Evolving
case class ContinuousTrigger(intervalMs: Long) extends Trigger {
  require(intervalMs >= 0, "the interval of trigger should not be negative")
}

private[sql] object ContinuousTrigger {
  def apply(interval: String): ContinuousTrigger = {
    if (StringUtils.isBlank(interval)) {
      throw new IllegalArgumentException(
        "interval cannot be null or blank.")
    }
    val cal = if (interval.startsWith("interval")) {
      CalendarInterval.fromString(interval)
    } else {
      CalendarInterval.fromString("interval " + interval)
    }
    if (cal == null) {
      throw new IllegalArgumentException(s"Invalid interval: $interval")
    }
    if (cal.months > 0) {
      throw new IllegalArgumentException(s"Doesn't support month or year interval: $interval")
    }
    new ContinuousTrigger(cal.microseconds / 1000)
  }

  def apply(interval: Duration): ContinuousTrigger = {
    ContinuousTrigger(interval.toMillis)
  }

  def create(interval: String): ContinuousTrigger = {
    apply(interval)
  }

  def create(interval: Long, unit: TimeUnit): ContinuousTrigger = {
    ContinuousTrigger(unit.toMillis(interval))
  }
} 
Example 16
Source File: DriverWrapper.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.worker

import java.io.File

import org.apache.commons.lang3.StringUtils

import org.apache.spark.{SecurityManager, SparkConf}
import org.apache.spark.deploy.{DependencyUtils, SparkHadoopUtil, SparkSubmit}
import org.apache.spark.internal.Logging
import org.apache.spark.rpc.RpcEnv
import org.apache.spark.util.{ChildFirstURLClassLoader, MutableURLClassLoader, Utils}


      case workerUrl :: userJar :: mainClass :: extraArgs =>
        val conf = new SparkConf()
        val host: String = Utils.localHostName()
        val port: Int = sys.props.getOrElse("spark.driver.port", "0").toInt
        val rpcEnv = RpcEnv.create("Driver", host, port, conf, new SecurityManager(conf))
        logInfo(s"Driver address: ${rpcEnv.address}")
        rpcEnv.setupEndpoint("workerWatcher", new WorkerWatcher(rpcEnv, workerUrl))

        val currentLoader = Thread.currentThread.getContextClassLoader
        val userJarUrl = new File(userJar).toURI().toURL()
        val loader =
          if (sys.props.getOrElse("spark.driver.userClassPathFirst", "false").toBoolean) {
            new ChildFirstURLClassLoader(Array(userJarUrl), currentLoader)
          } else {
            new MutableURLClassLoader(Array(userJarUrl), currentLoader)
          }
        Thread.currentThread.setContextClassLoader(loader)
        setupDependencies(loader, userJar)

        // Delegate to supplied main class
        val clazz = Utils.classForName(mainClass)
        val mainMethod = clazz.getMethod("main", classOf[Array[String]])
        mainMethod.invoke(null, extraArgs.toArray[String])

        rpcEnv.shutdown()

      case _ =>
        // scalastyle:off println
        System.err.println("Usage: DriverWrapper <workerUrl> <userJar> <driverMainClass> [options]")
        // scalastyle:on println
        System.exit(-1)
    }
  }

  private def setupDependencies(loader: MutableURLClassLoader, userJar: String): Unit = {
    val sparkConf = new SparkConf()
    val secMgr = new SecurityManager(sparkConf)
    val hadoopConf = SparkHadoopUtil.newConfiguration(sparkConf)

    val Seq(packagesExclusions, packages, repositories, ivyRepoPath, ivySettingsPath) =
      Seq(
        "spark.jars.excludes",
        "spark.jars.packages",
        "spark.jars.repositories",
        "spark.jars.ivy",
        "spark.jars.ivySettings"
      ).map(sys.props.get(_).orNull)

    val resolvedMavenCoordinates = DependencyUtils.resolveMavenDependencies(packagesExclusions,
      packages, repositories, ivyRepoPath, Option(ivySettingsPath))
    val jars = {
      val jarsProp = sys.props.get("spark.jars").orNull
      if (!StringUtils.isBlank(resolvedMavenCoordinates)) {
        SparkSubmit.mergeFileLists(jarsProp, resolvedMavenCoordinates)
      } else {
        jarsProp
      }
    }
    val localJars = DependencyUtils.resolveAndDownloadJars(jars, userJar, sparkConf, hadoopConf,
      secMgr)
    DependencyUtils.addJarsToClassPath(localJars, loader)
  }
} 
Example 17
Source File: PostgreSqlMain.scala    From ingraph   with Eclipse Public License 1.0 5 votes vote down vote up
package ingraph.compiler.sql

import java.sql.DriverManager

import ingraph.compiler.sql.Util.withResources
import org.apache.commons.lang3.StringUtils

object PostgreSqlMain extends App {

  // https://github.com/yandex-qatools/postgresql-embedded/tree/ea26f6945478da8e8b48e382f8869896da2fda30#howto
  withResources(new EmbeddedPostgresWrapper) { postgres =>
    withResources(DriverManager.getConnection(postgres.Url)) { conn =>
      withResources(conn.createStatement()) {
        _.execute("CREATE TABLE films (code char(5));")
      }
      withResources(conn.createStatement()) {
        _.execute("INSERT INTO films VALUES ('movie');")
      }

      withResources(conn.createStatement) { statement =>
        assert(statement.execute("SELECT * FROM films;"))
        assert(statement.getResultSet().next())

        val code = statement.getResultSet().getString("code")

        val separator = StringUtils.repeat('=', 42)
        println(separator)
        println(code)
        println(separator)

        assert(code == "movie")
      }
    }
  }
} 
Example 18
Source File: Queryable.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution

import scala.util.control.NonFatal

import org.apache.commons.lang3.StringUtils
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.types.StructType


  private[sql] def formatString (
      rows: Seq[Seq[String]],
      numRows: Int,
      hasMoreData : Boolean,
      truncate: Boolean = true): String = {
    val sb = new StringBuilder
    val numCols = schema.fieldNames.length

    // Initialise the width of each column to a minimum value of '3'
    val colWidths = Array.fill(numCols)(3)

    // Compute the width of each column
    for (row <- rows) {
      for ((cell, i) <- row.zipWithIndex) {
        colWidths(i) = math.max(colWidths(i), cell.length)
      }
    }

    // Create SeparateLine
    val sep: String = colWidths.map("-" * _).addString(sb, "+", "+", "+\n").toString()

    // column names
    rows.head.zipWithIndex.map { case (cell, i) =>
      if (truncate) {
        StringUtils.leftPad(cell, colWidths(i))
      } else {
        StringUtils.rightPad(cell, colWidths(i))
      }
    }.addString(sb, "|", "|", "|\n")

    sb.append(sep)

    // data
    rows.tail.map {
      _.zipWithIndex.map { case (cell, i) =>
        if (truncate) {
          StringUtils.leftPad(cell.toString, colWidths(i))
        } else {
          StringUtils.rightPad(cell.toString, colWidths(i))
        }
      }.addString(sb, "|", "|", "|\n")
    }

    sb.append(sep)

    // For Data that has more than "numRows" records
    if (hasMoreData) {
      val rowsString = if (numRows == 1) "row" else "rows"
      sb.append(s"only showing top $numRows $rowsString\n")
    }

    sb.toString()
  }
} 
Example 19
Source File: DataFramePrettyPrinter.scala    From lighthouse   with Apache License 2.0 5 votes vote down vote up
package be.dataminded.lighthouse.testing

import java.sql.Date

import org.apache.commons.lang3.StringUtils
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.catalyst.util.DateTimeUtils

private[testing] object DataFramePrettyPrinter {

  def prettyPrintDataFrame(df: DataFrame, number: Int, truncate: Int = 20): String = {
    val numRows     = number.max(0)
    val takeResult  = df.take(numRows + 1)
    val hasMoreData = takeResult.length > numRows
    val data        = takeResult.take(numRows)

    val header = df.schema.fieldNames.toSeq

    def asReadableRows = {
      data.map { row =>
        row.toSeq.map { cell =>
          val str = cell match {
            case null                => "null"
            case binary: Array[Byte] => binary.map("%02X".format(_)).mkString("[", " ", "]")
            case array: Array[_]     => array.mkString("[", ", ", "]")
            case seq: Seq[_]         => seq.mkString("[", ", ", "]")
            case d: Date =>
              DateTimeUtils.dateToString(DateTimeUtils.fromJavaDate(d))
            case _ => cell.toString
          }
          if (truncate > 0 && str.length > truncate) {
            // do not show ellipses for strings shorter than 4 characters.
            if (truncate < 4) str.substring(0, truncate)
            else str.substring(0, truncate - 3) + "..."
          } else {
            str
          }
        }: Seq[String]
      }
    }

    // For array values, replace Seq and Array with square brackets
    // For cells that are beyond `truncate` characters, replace it with the
    // first `truncate-3` and "..."
    val rows: Seq[Seq[String]] = header +: asReadableRows

    val sb = new StringBuilder

    // Initialise the width of each column to a minimum value of '3'
    val colWidths = Array.fill(header.length)(3)

    // Compute the width of each column
    for (row <- rows) {
      for ((cell, i) <- row.zipWithIndex) {
        colWidths(i) = math.max(colWidths(i), cell.length)
      }
    }

    // Create SeparateLine
    val sep: String = colWidths.map("-" * _).addString(sb, "+", "+", "+\n").toString()

    // column names
    rows.head.zipWithIndex
      .map {
        case (cell, i) =>
          if (truncate > 0) {
            StringUtils.leftPad(cell, colWidths(i))
          } else {
            StringUtils.rightPad(cell, colWidths(i))
          }
      }
      .addString(sb, "|", "|", "|\n")

    sb.append(sep)

    // data
    rows.tail.map {
      _.zipWithIndex
        .map {
          case (cell, i) =>
            if (truncate > 0) {
              StringUtils.leftPad(cell.toString, colWidths(i))
            } else {
              StringUtils.rightPad(cell.toString, colWidths(i))
            }
        }
        .addString(sb, "|", "|", "|\n")
    }

    sb.append(sep)

    // For Data that has more than "numRows" records
    if (hasMoreData) {
      val rowsString = if (numRows == 1) "row" else "rows"
      sb.append(s"only showing top $numRows $rowsString\n")
    }

    sb.toString()
  }
} 
Example 20
Source File: ZipkinTraceFilter.scala    From play-zipkin-tracing   with Apache License 2.0 5 votes vote down vote up
package brave.play.filter

import javax.inject.Inject

import akka.stream.Materializer
import brave.play.ZipkinTraceServiceLike
import play.api.mvc.{Filter, Headers, RequestHeader, Result}
import play.api.routing.Router

import scala.concurrent.Future
import scala.util.Failure


class ZipkinTraceFilter @Inject() (tracer: ZipkinTraceServiceLike)(implicit val mat: Materializer) extends Filter {

  import tracer.executionContext
  private val reqHeaderToSpanName: RequestHeader => String = ZipkinTraceFilter.ParamAwareRequestNamer

  def apply(nextFilter: RequestHeader => Future[Result])(req: RequestHeader): Future[Result] = {
    val serverSpan = tracer.serverReceived(
      spanName = reqHeaderToSpanName(req),
      span = tracer.newSpan(req.headers)((headers, key) => headers.get(key))
    )
    val result = nextFilter(req.withHeaders(new Headers(
      (req.headers.toMap.mapValues(_.headOption getOrElse "") ++ tracer.toMap(serverSpan)).toSeq
    )))
    result.onComplete {
      case Failure(t) => tracer.serverSend(serverSpan, "failed" -> s"Finished with exception: ${t.getMessage}")
      case _ => tracer.serverSend(serverSpan)
    }
    result
  }
}

object ZipkinTraceFilter {
  val ParamAwareRequestNamer: RequestHeader => String = { reqHeader =>
    import org.apache.commons.lang3.StringUtils
    val pathPattern = StringUtils.replace(
      reqHeader.attrs.get(Router.Attrs.HandlerDef).map(_.path).getOrElse(reqHeader.path),
      "<[^/]+>", ""
    )
    s"${reqHeader.method} - $pathPattern"
  }
} 
Example 21
Source File: StringDeduplication.scala    From Mastering-Spark-for-Data-Science   with MIT License 5 votes vote down vote up
package io.gzet

import org.apache.commons.lang3.StringUtils
import org.apache.spark.rdd.RDD

import scalaz.Scalaz._

trait StringDeduplication extends Serializable {

  def deduplicateWithContext(rdd: RDD[(Long, String)]): RDD[(Long, String)]
  def deduplicate(rdd: RDD[String]): RDD[(String, String)]

  def initialize(rdd: RDD[String]) = {
    rdd map(s => (s, Map(s -> 1)))
  }

  def identityDedup = (rdd: RDD[(String, Map[String, Int])]) => {
    rdd reduceByKey(_ |+| _)
  }

  def getPreferredAlternative(rdd: RDD[(String, Map[String, Int])]) = {
    rdd flatMap { case (key, tf) =>
      val bestName = tf.toSeq.sortBy(_._2).reverse.head._1
      tf.keySet map(_ -> bestName)
    }
  }

  def stringDedup = (rdd: RDD[(String, Map[String, Int])], stopWords: Set[String]) => {
    rdd map { case (name, others) =>
      (clean(name, stopWords), others)
    } reduceByKey(_ |+| _)
  }

  private def clean(name: String, stopWords: Set[String]) = {
    StringUtils.stripAccents(name)
      .split("\\W+")
      .map(_.trim)
      .filter({ case part => !stopWords.contains(part.toLowerCase()) })
      .mkString(" ")
      .split("(?<=[a-z])(?=[A-Z])")
      .mkString(" ")
      .toLowerCase()
      .split("[^a-z]")
      .map(_.trim)
      .mkString(" ")
  }

} 
Example 22
Source File: EmptinessProfiler.scala    From Mastering-Spark-for-Data-Science   with MIT License 5 votes vote down vote up
package io.gzet.profilers.field

import io.gzet.profilers.Utils
import org.apache.commons.lang3.StringUtils
import org.apache.spark.sql.Dataset

import scalaz.Scalaz._

case class EmptinessProfiler() {

  def profile(df: Dataset[Array[String]]): Dataset[EmptinessReport] = {

    import df.sparkSession.implicits._

    val features = Utils.buildColumns(df)

    features.map(f => (f.idx, StringUtils.isNotEmpty(f.value))).groupByKey({ case (column, isNotEmpty) =>
      (column, isNotEmpty)
    }).count().map({ case ((column, isNotEmpty), count) =>
      (column, Map(isNotEmpty -> count))
    }).groupByKey({ case (column, map) =>
      column
    }).reduceGroups({ (v1, v2) =>
      (v1._1, v1._2 |+| v2._2)
    }).map({ case (col, (_, map)) =>
      val emptiness = map.getOrElse(false, 0L) / (map.getOrElse(true, 0L) + map.getOrElse(false, 0L)).toDouble
      EmptinessReport(
        col,
        emptiness
      )
    })

  }

}

case class EmptinessReport(
                            field: Int,
                            metricValue: Double
                          ) 
Example 23
Source File: Index.scala    From Mastering-Spark-for-Data-Science   with MIT License 5 votes vote down vote up
package controllers

import com.typesafe.config.ConfigFactory
import models.{Library, Songs}
import org.apache.commons.lang3.StringUtils
import play.api.Logger
import play.api.data.Form
import play.api.data.Forms._
import play.api.mvc._
import svc.{AnalyzerSvc, CassandraDao, SparkSvc}

object Index extends Controller {

  val config = ConfigFactory.load()
  val minTime = config.getInt("gzet.min.time")
  val maxTime = config.getInt("gzet.max.time")
  val cassandraHost = config.getString("cassandra.host")
  val cassandraPort = config.getInt("cassandra.port")
  val sampleSize = config.getDouble("gzet.sample.size")
  val minMatch = config.getDouble("gzet.min.match")

  val dao = new CassandraDao(cassandraHost, cassandraPort)
  val analyzer = new AnalyzerSvc()
  val spark = new SparkSvc()

  val indexForm: Form[Library] = Form(mapping("path" -> text)(Library.apply)(Library.unapply))

  def index = Action { implicit request =>
    val songs = Songs(dao.getSongs)
    Logger.info(s"Database is currently ${songs.songs.size} songs long")
    Ok(views.html.index(indexForm)(songs))
  }

  def submit = Action { implicit request =>
    indexForm.bindFromRequest.fold(
      errors =>
        Redirect(routes.Index.index()).flashing("error" -> s"Missing path"),
      index =>
        try {
          if(StringUtils.isNotEmpty(index.path)) {
            Logger.info("Dropping database")
            dao.dropSongs
            dao.dropPlaylist
            Logger.info("Submitting job")
            val jobId = spark.index(index.path)
            Redirect(routes.Index.index()).flashing("success" -> jobId)
          } else {
            Redirect(routes.Index.index()).flashing("error" -> s"Missing path")
          }
        } catch {
          case e: Exception =>
            Redirect(routes.Index.index()).flashing("error" -> e.getMessage)
        }
    )
  }
} 
Example 24
Source File: DataSourceV2StringFormat.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources.v2

import org.apache.commons.lang3.StringUtils

import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
import org.apache.spark.sql.sources.DataSourceRegister
import org.apache.spark.sql.sources.v2.DataSourceV2
import org.apache.spark.util.Utils


  def pushedFilters: Seq[Expression]

  private def sourceName: String = source match {
    case registered: DataSourceRegister => registered.shortName()
    // source.getClass.getSimpleName can cause Malformed class name error,
    // call safer `Utils.getSimpleName` instead
    case _ => Utils.getSimpleName(source.getClass)
  }

  def metadataString: String = {
    val entries = scala.collection.mutable.ArrayBuffer.empty[(String, String)]

    if (pushedFilters.nonEmpty) {
      entries += "Filters" -> pushedFilters.mkString("[", ", ", "]")
    }

    // TODO: we should only display some standard options like path, table, etc.
    if (options.nonEmpty) {
      entries += "Options" -> Utils.redact(options).map {
        case (k, v) => s"$k=$v"
      }.mkString("[", ",", "]")
    }

    val outputStr = Utils.truncatedString(output, "[", ", ", "]")

    val entriesStr = if (entries.nonEmpty) {
      Utils.truncatedString(entries.map {
        case (key, value) => key + ": " + StringUtils.abbreviate(value, 100)
      }, " (", ", ", ")")
    } else {
      ""
    }

    s"$sourceName$outputStr$entriesStr"
  }
} 
Example 25
Source File: TimeWindow.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.expressions

import org.apache.commons.lang3.StringUtils

import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.CalendarInterval

case class TimeWindow(
    timeColumn: Expression,
    windowDuration: Long,
    slideDuration: Long,
    startTime: Long) extends UnaryExpression
  with ImplicitCastInputTypes
  with Unevaluable
  with NonSQLExpression {

  //////////////////////////
  // SQL Constructors
  //////////////////////////

  def this(
      timeColumn: Expression,
      windowDuration: Expression,
      slideDuration: Expression,
      startTime: Expression) = {
    this(timeColumn, TimeWindow.parseExpression(windowDuration),
      TimeWindow.parseExpression(slideDuration), TimeWindow.parseExpression(startTime))
  }

  def this(timeColumn: Expression, windowDuration: Expression, slideDuration: Expression) = {
    this(timeColumn, TimeWindow.parseExpression(windowDuration),
      TimeWindow.parseExpression(slideDuration), 0)
  }

  def this(timeColumn: Expression, windowDuration: Expression) = {
    this(timeColumn, windowDuration, windowDuration)
  }

  override def child: Expression = timeColumn
  override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType)
  override def dataType: DataType = new StructType()
    .add(StructField("start", TimestampType))
    .add(StructField("end", TimestampType))

  // This expression is replaced in the analyzer.
  override lazy val resolved = false

  
case class PreciseTimestamp(child: Expression) extends UnaryExpression with ExpectsInputTypes {
  override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType)
  override def dataType: DataType = LongType
  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    val eval = child.genCode(ctx)
    ev.copy(code = eval.code +
      s"""boolean ${ev.isNull} = ${eval.isNull};
         |${ctx.javaType(dataType)} ${ev.value} = ${eval.value};
       """.stripMargin)
  }
} 
Example 26
Source File: UPC.scala    From utils   with Apache License 2.0 5 votes vote down vote up
package com.indix.utils.core

import org.apache.commons.lang3.StringUtils

import scala.util.Try

object UPC {

  def isValid(input: String) = Try(standardize(input)).map(_ == input).toOption.getOrElse(false)

  

  def standardize(rawUpc: String) : String = {

    def standardizeRec(input: String) : String = {
      if (input.length < 12) {
        standardizeRec(leftPadZeroes(input, 12))
      } else if (input.length == 12) {
        val cDigit = calculateCheckDigit(input.substring(0, 11))
        if (input.last == cDigit + '0' && !isIsbn(input)) {
          input
        } else {
          val cDigit13 = calculateCheckDigit(leftPadZeroes(input, 13))
          input + cDigit13
        }
      } else if (input.length == 14) {
        val cDigit = calculateCheckDigit(input.substring(0, 13))
        if(input.last == cDigit + '0') {
          val gtinWithoutFirstAndCheckDigit = input.substring(1, 13)
          val upcCheckDigit = calculateCheckDigit(gtinWithoutFirstAndCheckDigit)
          standardizeRec(gtinWithoutFirstAndCheckDigit + upcCheckDigit)
        } else {
          fail("not a valid 14 digit UPC")
        }
      } else {
        input
      }
    }

    val cleanedUpc = verifyValidUpc(clean(rawUpc))

    if(isIsbn(rawUpc)) {
      leftPadZeroes(standardizeRec(cleanedUpc), 13)
    } else {
      leftPadZeroes(standardizeRec(cleanedUpc), 14)
    }


  }

  private def isIsbn(input: String) = {
    input.startsWith("978") || input.startsWith("979")
  }

  private def calculateCheckDigit(input: String) = {
    // We compute the odd and even positions from right to left because while computing check digit for EANs
    // the input length would be an even number. This makes the even and odd positions change. While for
    // input with odd length the even and odd positions are the same.
    // Reference - https://en.wikipedia.org/wiki/Check_digit#EAN_(GLN,_GTIN,_EAN_numbers_administered_by_GS1)

    val sumOddDigits = input.reverse.zipWithIndex
      .filter { case (digit, index) => (index + 1) % 2 != 0 }
      .map { case (digit, index) => digit - '0' }
      .sum

    val sumEvenDigits = input.reverse.zipWithIndex
      .filter { case (digit, index) => (index + 1) % 2 == 0 }
      .map { case (digit, index) => digit - '0' }
      .sum

    val checkDigitSum = sumOddDigits * 3 + sumEvenDigits

    if (checkDigitSum % 10 == 0) 0 else 10 - (checkDigitSum % 10)
  }

  private def leftPadZeroes(s: String, length: Int) = StringUtils.leftPad(s, length, '0')

  private def clean(input: String) = input.replaceAll("-", "")


  private def verifyValidUpc(input: String) = {
    if (StringUtils.isEmpty(input))
      fail(input + " is either null / empty")
    else if (!parseLong(input).exists(_ > 0))
      fail("NAN value - " + input)
    else if (input.length < 7 || input.length > 14)
      fail("Invalid UPC/EAN -" + input)
    else if (input.length == input.count(_ == '0'))
      fail("All Zero UPC not allowed. Invalid UPC/EAN - " + input)
    input
  }

  private def fail(message: String) = throw new IllegalArgumentException(message)

  private def parseLong(s: String) = Try(Some(s.toLong)).getOrElse(None)

} 
Example 27
Source File: MPN.scala    From utils   with Apache License 2.0 5 votes vote down vote up
package com.indix.utils.core

import org.apache.commons.lang3.StringUtils
import org.apache.commons.lang3.text.WordUtils

import scala.io.Source

object MPN {
  // Some domain specific keywords known to be invalid
  val BlackListedMpns = Source.fromInputStream(getClass.getResourceAsStream("/BlacklistMPNs.txt")).getLines.toSet

  val StopChars = Set(' ', '-', '_', '.', '/')
  val TerminateChars = Set(',', '"', '*', '%', '{', '}', "#", '&', '\\')

  val MaxLen = 50
  val MinLen = 3

  // Does not consider one word strings as title-case phrase
  def isTitleCase(str: String): Boolean = {
    val words = str.split(' ').filter(_.nonEmpty)
    if (words.length < 2) false
    else words.forall(w => w == WordUtils.capitalizeFully(w))
  }

  def postProcessIdentifier(input: String): String = {
    val trimmedUpper = input.trim.toUpperCase
    trimmedUpper
  }

  // Check if identifier is valid, also return the identifier to process further if any
  def validateIdentifier(text: String): (Boolean, String) = {
    val input = if (text != null) text.trim() else text
    input match {
      case _ if StringUtils.isBlank(input) || input.length > MaxLen || input.length < MinLen => (false, "")
      case _ if input.count(c => TerminateChars.contains(c)) > 1 => (false, input)
      case _ if BlackListedMpns.contains(input.toLowerCase) => (false, "")
      case _ if isTitleCase(input) => (false, "")
      // Unicode strings yet to be handled
      case _ => (true, input)
    }
  }

  def isValidIdentifier(value: String): Boolean = validateIdentifier(value)._1

  def standardizeMPN(input: String): Option[String] = {
    val (isValid, identifier) = validateIdentifier(input)
    if (isValid) {
      Some(postProcessIdentifier(identifier))
    } else if (StringUtils.isBlank(identifier)) {
      None
    } else if (identifier.indexWhere(c => TerminateChars.contains(c)) > 0) {
      Some(postProcessIdentifier(identifier.substring(0, identifier.indexWhere(c => TerminateChars.contains(c)))))
    }
    else None
  }
} 
Example 28
Source File: ISBN.scala    From utils   with Apache License 2.0 5 votes vote down vote up
package com.indix.utils.core

import org.apache.commons.lang3.StringUtils

case class ISBN(isbn: String, isbn10: Option[String] = None) {
  override def toString = isbn
}

object ISBN {
  def apply(input: String): Option[ISBN] = {
    Some(input)
      .filter(!StringUtils.isEmpty(_))
      .map(clean)
      .flatMap {
        case x if isValidIsbn10(x) =>
          Some(new ISBN(isbn10to13(x), Some(x)))
        case x if isValidIsbn13(x) =>
          Some(new ISBN(x))
        case _ => None
      }
  }

  private def clean(input: String) = {
    input.replaceAll("[ -]", "")
  }

  private def calculateCheckDigit13(input: String) = {
    val inputWithoutChecksum = input.dropRight(1)

    val sum = inputWithoutChecksum.zipWithIndex.map{
      case (c, i) if i % 2 != 0 => (c - '0') * 3
      case (c, _) => c - '0'
    }.sum

    (10 - (sum % 10) + '0').toChar
  }

  private def calculateCheckDigit10(input: String) = {
    val sum = input.dropRight(1).map(_ - '0').zip(10 to 2 by -1).foldLeft(0)((i: Int, tuple: (Int, Int)) => i + tuple._1 * tuple._2)
    val checkDigit = (11 - sum % 11) % 11
    if (checkDigit == 10) 'X' else (checkDigit + '0').toChar
  }

  private def isbn10to13(input: String) = {
    val withPrefix = "978" + input
    withPrefix.dropRight(1) + calculateCheckDigit13(withPrefix)
  }

  private def isValidIsbn13(input: String) = {
    input.length == 13 && input.matches("^97[89].+") && input.last == calculateCheckDigit13(input)
  }

  private def isValidIsbn10(input: String) = {
    input.length == 10 && input.last == calculateCheckDigit10(input)
  }

} 
Example 29
Source File: CustomAccumulator.scala    From HadoopLearning   with MIT License 5 votes vote down vote up
package com.liumm.transform

import org.apache.commons.lang3.StringUtils
import org.apache.spark.util.AccumulatorV2


class CustomAccumulator extends AccumulatorV2[String, String] {

  var result = "" //默认值

  override def isZero: Boolean = {
    result == ""
  }

  override def copy(): AccumulatorV2[String, String] = {
    val customAccumulator = new CustomAccumulator()
    customAccumulator.result = this.result
    customAccumulator
  }

  override def reset(): Unit = {
    result = ""
  }

  override def add(v: String): Unit = {
    if (StringUtils.isNoneBlank(v)) {
      if (isZero) {
        result = v
      } else {
        result += "|" + v
      }
    }
  }

  override def merge(other: AccumulatorV2[String, String]): Unit = other match {
    case newAc: CustomAccumulator =>
      if (isZero) result = newAc.value
      else result += "|" + newAc.value
    case _ =>
      throw new UnsupportedOperationException(
        s"Cannot merge ${this.getClass.getName} with ${other.getClass.getName}"
      )
  }

  override def value: String = {
    result
  }
} 
Example 30
Source File: StreamUtils.scala    From spark-redis   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package com.redislabs.provider.redis.util

import org.apache.commons.lang3.StringUtils
import redis.clients.jedis.{StreamEntryID, Jedis}


object StreamUtils extends Logging {

  val EntryIdEarliest = new StreamEntryID(0, 0)

  def createConsumerGroupIfNotExist(conn: Jedis, streamKey: String, groupName: String,
                                    offset: StreamEntryID): Unit = {
    try {
      conn.xgroupCreate(streamKey, groupName, offset, true)
    } catch {
      case e: Exception if StringUtils.contains(e.getMessage, "already exists") =>
        logInfo(s"Consumer group already exists: $groupName")
    }
  }

  def resetConsumerGroup(conn: Jedis, streamKey: String, groupName: String,
                         offset: StreamEntryID): Unit = {
    logInfo(s"Setting consumer group $groupName id to $offset")
    conn.xgroupSetID(streamKey, groupName, offset)
  }
} 
Example 31
Source File: RabbitRecordFactory.scala    From hydra   with Apache License 2.0 5 votes vote down vote up
package hydra.rabbit

import hydra.common.config.ConfigSupport
import hydra.core.ingest.HydraRequest
import hydra.core.transport.{
  AckStrategy,
  HydraRecord,
  RecordFactory,
  RecordMetadata
}
import hydra.rabbit.RabbitRecord.{
  DESTINATION_TYPE_EXCHANGE,
  DESTINATION_TYPE_QUEUE,
  HYDRA_RABBIT_EXCHANGE,
  HYDRA_RABBIT_QUEUE
}
import org.apache.commons.lang3.StringUtils

import scala.concurrent.{ExecutionContext, Future}

object RabbitRecordFactory
    extends RecordFactory[String, String]
    with ConfigSupport {

  override def build(
      request: HydraRequest
  )(implicit ec: ExecutionContext): Future[RabbitRecord] = {
    val props = Seq(
      request.metadataValue(HYDRA_RABBIT_EXCHANGE),
      request.metadataValue(HYDRA_RABBIT_QUEUE)
    ).flatten
    Future {
      require(
        props.length == 1,
        "A single parameter for exchange or queue is required"
      )
      val destination = request.metadataValue(HYDRA_RABBIT_EXCHANGE) match {
        case Some(exchange) => (exchange, DESTINATION_TYPE_EXCHANGE)
        case _ =>
          (
            request.metadataValue(HYDRA_RABBIT_QUEUE).get,
            DESTINATION_TYPE_QUEUE
          )
      }
      RabbitRecord(
        destination._1,
        destination._2,
        request.payload,
        request.ackStrategy
      )
    }
  }
}

case class RabbitRecord(
    destination: String,
    destinationType: String,
    payload: String,
    ackStrategy: AckStrategy
) extends HydraRecord[String, String] {

  override val key: String = StringUtils.EMPTY
}

object RabbitRecord {

  val HYDRA_RABBIT_EXCHANGE = "hydra-rabbit-exchange"

  val HYDRA_RABBIT_QUEUE = "hydra-rabbit-queue"

  val DESTINATION_TYPE_EXCHANGE = "exchange"

  val DESTINATION_TYPE_QUEUE = "queue"

}

case class RabbitRecordMetadata(
    timestamp: Long,
    id: Long,
    destination: String,
    destinationType: String,
    ackStrategy: AckStrategy
) extends RecordMetadata 
Example 32
Source File: JsonRecord.scala    From hydra   with Apache License 2.0 5 votes vote down vote up
package hydra.kafka.producer

import com.fasterxml.jackson.databind.{JsonNode, ObjectMapper}
import hydra.core.transport.AckStrategy
import org.apache.commons.lang3.StringUtils


case class JsonRecord(
    destination: String,
    key: String,
    payload: JsonNode,
    ackStrategy: AckStrategy
) extends KafkaRecord[String, JsonNode]

object JsonRecord {
  val mapper = new ObjectMapper()

  def apply(
      topic: String,
      key: Option[String],
      obj: Any,
      ackStrategy: AckStrategy
  ): JsonRecord = {
    val payload = mapper.convertValue[JsonNode](obj, classOf[JsonNode])
    new JsonRecord(topic, key.orNull, payload, ackStrategy)
  }
} 
Example 33
Source File: StringRecord.scala    From hydra   with Apache License 2.0 5 votes vote down vote up
package hydra.kafka.producer

import hydra.core.transport.AckStrategy
import org.apache.commons.lang3.StringUtils


case class StringRecord(
    destination: String,
    key: String,
    payload: String,
    ackStrategy: AckStrategy
) extends KafkaRecord[String, String]

object StringRecord {

  def apply(
      topic: String,
      key: Option[String],
      payload: String,
      ackStrategy: AckStrategy
  ): StringRecord =
    new StringRecord(topic, key.orNull, payload, ackStrategy)
} 
Example 34
Source File: AvroRecord.scala    From hydra   with Apache License 2.0 5 votes vote down vote up
package hydra.kafka.producer

import com.pluralsight.hydra.avro.JsonConverter
import hydra.core.transport.AckStrategy
import org.apache.avro.Schema
import org.apache.avro.generic.GenericRecord
import org.apache.commons.lang3.StringUtils


case class AvroRecord(
    destination: String,
    schema: Schema,
    key: String,
    payload: GenericRecord,
    ackStrategy: AckStrategy
) extends KafkaRecord[String, GenericRecord]

object AvroRecord {

  def apply(
      destination: String,
      schema: Schema,
      key: Option[String],
      json: String,
      ackStrategy: AckStrategy,
      useStrictValidation: Boolean = false
  ): AvroRecord = {

    val payload: GenericRecord = {
      val converter: JsonConverter[GenericRecord] =
        new JsonConverter[GenericRecord](schema, useStrictValidation)
      converter.convert(json)
    }

    AvroRecord(destination, schema, key.orNull, payload, ackStrategy)
  }

  def apply(
      destination: String,
      schema: Schema,
      key: Option[String],
      record: GenericRecord,
      ackStrategy: AckStrategy
  ): AvroRecord = {
    AvroRecord(destination, schema, key.orNull, record, ackStrategy)
  }
} 
Example 35
Source File: TimeWindow.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.expressions

import org.apache.commons.lang3.StringUtils

import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode}
import org.apache.spark.sql.catalyst.expressions.codegen.Block._
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.CalendarInterval

case class TimeWindow(
    timeColumn: Expression,
    windowDuration: Long,
    slideDuration: Long,
    startTime: Long) extends UnaryExpression
  with ImplicitCastInputTypes
  with Unevaluable
  with NonSQLExpression {

  //////////////////////////
  // SQL Constructors
  //////////////////////////

  def this(
      timeColumn: Expression,
      windowDuration: Expression,
      slideDuration: Expression,
      startTime: Expression) = {
    this(timeColumn, TimeWindow.parseExpression(windowDuration),
      TimeWindow.parseExpression(slideDuration), TimeWindow.parseExpression(startTime))
  }

  def this(timeColumn: Expression, windowDuration: Expression, slideDuration: Expression) = {
    this(timeColumn, TimeWindow.parseExpression(windowDuration),
      TimeWindow.parseExpression(slideDuration), 0)
  }

  def this(timeColumn: Expression, windowDuration: Expression) = {
    this(timeColumn, windowDuration, windowDuration)
  }

  override def child: Expression = timeColumn
  override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType)
  override def dataType: DataType = new StructType()
    .add(StructField("start", TimestampType))
    .add(StructField("end", TimestampType))

  // This expression is replaced in the analyzer.
  override lazy val resolved = false

  
case class PreciseTimestampConversion(
    child: Expression,
    fromType: DataType,
    toType: DataType) extends UnaryExpression with ExpectsInputTypes {
  override def inputTypes: Seq[AbstractDataType] = Seq(fromType)
  override def dataType: DataType = toType
  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
    val eval = child.genCode(ctx)
    ev.copy(code = eval.code +
      code"""boolean ${ev.isNull} = ${eval.isNull};
         |${CodeGenerator.javaType(dataType)} ${ev.value} = ${eval.value};
       """.stripMargin)
  }
  override def nullSafeEval(input: Any): Any = input
} 
Example 36
Source File: SinkRouteHandler.scala    From ohara   with Apache License 2.0 5 votes vote down vote up
package oharastream.ohara.shabondi.sink

import java.time.{Duration => JDuration}
import java.util.concurrent.TimeUnit

import akka.actor.ActorSystem
import akka.http.scaladsl.model.{ContentTypes, HttpEntity, StatusCodes}
import akka.http.scaladsl.server.{ExceptionHandler, Route}
import com.typesafe.scalalogging.Logger
import oharastream.ohara.common.data.Row
import oharastream.ohara.common.util.Releasable
import oharastream.ohara.shabondi.common.{JsonSupport, RouteHandler, ShabondiUtils}
import org.apache.commons.lang3.StringUtils

import scala.collection.mutable.ArrayBuffer
import scala.compat.java8.DurationConverters._
import scala.concurrent.ExecutionContextExecutor
import scala.concurrent.duration.Duration
import spray.json.DefaultJsonProtocol._
import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._

private[shabondi] object SinkRouteHandler {
  def apply(config: SinkConfig)(implicit actorSystem: ActorSystem) =
    new SinkRouteHandler(config)
}

private[shabondi] class SinkRouteHandler(config: SinkConfig)(implicit actorSystem: ActorSystem) extends RouteHandler {
  implicit private val contextExecutor: ExecutionContextExecutor = actorSystem.dispatcher

  private val log              = Logger(classOf[SinkRouteHandler])
  private[sink] val dataGroups = SinkDataGroups(config)

  def scheduleFreeIdleGroups(interval: JDuration, idleTime: JDuration): Unit =
    actorSystem.scheduler.scheduleWithFixedDelay(Duration(1, TimeUnit.SECONDS), interval.toScala) { () =>
      {
        log.trace("scheduled free group, total group: {} ", dataGroups.size)
        dataGroups.freeIdleGroup(idleTime)
      }
    }

  private val exceptionHandler = ExceptionHandler {
    case ex: Throwable =>
      log.error(ex.getMessage, ex)
      complete((StatusCodes.InternalServerError, ex.getMessage))
  }

  private def fullyPollQueue(queue: RowQueue): Seq[Row] = {
    val buffer    = ArrayBuffer.empty[Row]
    var item: Row = queue.poll()
    while (item != null) {
      buffer += item
      item = queue.poll()
    }
    buffer.toSeq
  }

  private def apiUrl = ShabondiUtils.apiUrl

  def route(): Route = handleExceptions(exceptionHandler) {
    path("groups" / Segment) { groupId =>
      get {
        if (StringUtils.isAlphanumeric(groupId)) {
          val group  = dataGroups.createIfAbsent(groupId)
          val result = fullyPollQueue(group.queue).map(row => JsonSupport.toRowData(row))
          complete(result)
        } else {
          val entity =
            HttpEntity(ContentTypes.`text/plain(UTF-8)`, "Illegal group name, only accept alpha and numeric.")
          complete(StatusCodes.NotAcceptable -> entity)
        }
      } ~ {
        complete(StatusCodes.MethodNotAllowed -> s"Unsupported method, please reference: $apiUrl")
      }
    } ~ {
      complete(StatusCodes.NotFound -> s"Please reference: $apiUrl")
    }
  }

  override def close(): Unit = {
    Releasable.close(dataGroups)
  }
} 
Example 37
Source File: ContinuousTrigger.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.streaming.continuous

import java.util.concurrent.TimeUnit

import scala.concurrent.duration.Duration

import org.apache.commons.lang3.StringUtils

import org.apache.spark.annotation.{Experimental, InterfaceStability}
import org.apache.spark.sql.streaming.{ProcessingTime, Trigger}
import org.apache.spark.unsafe.types.CalendarInterval


@InterfaceStability.Evolving
case class ContinuousTrigger(intervalMs: Long) extends Trigger {
  require(intervalMs >= 0, "the interval of trigger should not be negative")
}

private[sql] object ContinuousTrigger {
  def apply(interval: String): ContinuousTrigger = {
    if (StringUtils.isBlank(interval)) {
      throw new IllegalArgumentException(
        "interval cannot be null or blank.")
    }
    val cal = if (interval.startsWith("interval")) {
      CalendarInterval.fromString(interval)
    } else {
      CalendarInterval.fromString("interval " + interval)
    }
    if (cal == null) {
      throw new IllegalArgumentException(s"Invalid interval: $interval")
    }
    if (cal.months > 0) {
      throw new IllegalArgumentException(s"Doesn't support month or year interval: $interval")
    }
    new ContinuousTrigger(cal.microseconds / 1000)
  }

  def apply(interval: Duration): ContinuousTrigger = {
    ContinuousTrigger(interval.toMillis)
  }

  def create(interval: String): ContinuousTrigger = {
    apply(interval)
  }

  def create(interval: Long, unit: TimeUnit): ContinuousTrigger = {
    ContinuousTrigger(unit.toMillis(interval))
  }
} 
Example 38
Source File: DefaultThriftServerNameRule.scala    From Hive-JDBC-Proxy   with Apache License 2.0 5 votes vote down vote up
package com.enjoyyin.hive.proxy.jdbc.rule.basic

import com.enjoyyin.hive.proxy.jdbc.rule.ThriftServerNameRule
import com.enjoyyin.hive.proxy.jdbc.rule.ThriftServerNameRule._
import org.apache.commons.lang3.StringUtils
import com.enjoyyin.hive.proxy.jdbc.domain.ThriftServerName



object DefaultThriftServerNameRule extends ThriftServerNameRule {
  
  override def dealOrNot(params: Map[String, String]): ThriftServerName = {
    var thriftServerName = ""
    if(params != null && params.contains(THRIFT_CONNECTION_NAME)) {
      thriftServerName = params(THRIFT_CONNECTION_NAME)
    }
    var _username = params(USERNAME_NAME)
    if(StringUtils.isEmpty(thriftServerName)) {
      if(StringUtils.isNotEmpty(_username) && _username.indexOf("_") > 0) {
        val _arrays = _username.split("_")
        if(_arrays.length != 2) {
          throw new IllegalArgumentException(s"非法的用户名${_username}.")
        }
        thriftServerName = _arrays(0)
        _username = _arrays(1)
      } else {
        thriftServerName = _username
      }
      if(StringUtils.isEmpty(thriftServerName)) {      
        throw new NullPointerException(s"JDBC url must have $THRIFT_CONNECTION_NAME")
      }
    }
    ThriftServerName(thriftServerName, _username)
  }
  
  override def canDeal(params: Map[String, String]) = true
} 
Example 39
Source File: DefaultStatisticsDealRule.scala    From Hive-JDBC-Proxy   with Apache License 2.0 5 votes vote down vote up
package com.enjoyyin.hive.proxy.jdbc.rule.basic

import com.enjoyyin.hive.proxy.jdbc.rule.StatisticsDealRule
import com.enjoyyin.hive.proxy.jdbc.thrift.EventInfo
import com.enjoyyin.hive.proxy.jdbc.thrift.Listener
import com.enjoyyin.hive.proxy.jdbc.util.Logging
import com.enjoyyin.hive.proxy.jdbc.util.Utils
import com.enjoyyin.hive.proxy.jdbc.util.ProxyConf._
import com.enjoyyin.hive.proxy.jdbc.thrift.ProxyServiceEvent
import com.enjoyyin.hive.proxy.jdbc.domain.DealRule._
import org.apache.commons.lang3.StringUtils


object DefaultStatisticsDealRule extends StatisticsDealRule with Logging {
  
  override def dealOrNot(eventInfo: EventInfo): Unit = {
    if(StringUtils.isEmpty(eventInfo.hql)) return
    val executeTime = eventInfo.finishedExecuteTime - eventInfo.startExecuteTime
    val waitTime = eventInfo.startExecuteTime - eventInfo.startWaitTime
    val msg = new StringBuilder
    msg ++= "Since the beginning waiting time is " ++ Utils.dateFormat(eventInfo.startWaitTime) ++ 
      ", I who come from (name=" ++ eventInfo.user.username ++ ", ip=" ++ eventInfo.user.ipAddress ++ "), have cost " ++ Utils.msDurationToString(waitTime) ++ " waiting for execute, and the beginning executing time is " ++
      Utils.dateFormat(eventInfo.startExecuteTime) ++ "; have cost " ++
      Utils.msDurationToString(executeTime) ++ " to be completed, when the finish time is " ++
      Utils.dateFormat(eventInfo.finishedExecuteTime) ++
      ", and the final state is " ++ eventInfo.state.toString ++ ", hql is: " ++ eventInfo.hql
    if(StringUtils.isNotEmpty(eventInfo.errorMsg)) {
      msg ++= ", and error message is: " ++ eventInfo.errorMsg
    }
    logInfo(msg.toString)
  }
}

class StatisticsDealListener extends Listener {
  
  def onChange(thriftServerName: String, event: ProxyServiceEvent[_]): Unit = {
    val eventInfo = event.getEventInfo
    eventInfo.foreach { e =>
      getHiveThriftProxyRule(thriftServerName).statisticsDeal match {
        case Default => DefaultStatisticsDealRule.dealOrNot(e)
        case UserDefine => getHiveThriftProxyRuleClass(thriftServerName).statisticsDealRule.dealOrNot(e)
        case _ =>
      }
    }
  }
  
} 
Example 40
Source File: DefaultLoginValidateRule.scala    From Hive-JDBC-Proxy   with Apache License 2.0 5 votes vote down vote up
package com.enjoyyin.hive.proxy.jdbc.rule.basic

import com.enjoyyin.hive.proxy.jdbc.rule.LoginValidateRule
import com.enjoyyin.hive.proxy.jdbc.util.Logging
import com.enjoyyin.hive.proxy.jdbc.domain.User
import com.enjoyyin.hive.proxy.jdbc.thrift.ProxySession
import com.enjoyyin.hive.proxy.jdbc.util.ProxyConf._
import com.enjoyyin.hive.proxy.jdbc.domain.DealRule._
import org.apache.commons.lang3.StringUtils
import org.apache.hive.service.auth.PasswdAuthenticationProvider
import com.enjoyyin.hive.proxy.jdbc.rule.ThriftServerNameRule
import com.enjoyyin.hive.proxy.jdbc.util.Utils
import javax.security.sasl.AuthenticationException



object DefaultLoginValidateRule extends LoginValidateRule with PasswdAuthenticationProvider {
  
  private def validateByUser(user: String, password: String): Boolean = {
    val pwd = getPwdByUser(user)
    pwd.contains(password)
  }
  
  override def dealOrNot(user: User): Boolean = {
    val session = user.asInstanceOf[ProxySession]
    val username = session.username
    if(validateByUser(username, session.password)){
      return true
    }
    val thriftServerName = session.thriftServerName
    val userPwds = getHiveThriftServerInfo(thriftServerName).map(t => t.username -> t.password).toMap
    if(!userPwds.contains(username)) {
      return false
    } else if(StringUtils.isNotEmpty(userPwds(username)) &&
        userPwds(username) != session.password) {
      return false
    }
    true
  }
  
  override def Authenticate(user: String, password: String) : Unit = {
    val thriftServer = Utils.tryAndLogError(ThriftServerNameRule.getThriftServerName(null, user, null))
    if(thriftServer != null) {
      val thriftServerName = thriftServer.thriftServerName
      if(!getThriftNames.contains(thriftServerName)) {
        throw new AuthenticationException("Cannot redirect to correct thrift proxy!")
      }
      val session = new ProxySession(null, thriftServerName, 0,
        thriftServer.username, password, null)
      getHiveThriftProxyRule(thriftServerName).loginValidate match {
        case Default => if(!dealOrNot(session)) throw new AuthenticationException("Wrong username or password!")
        case UserDefine => 
          val canLogin = Utils.tryThrow(getHiveThriftProxyRuleClass(thriftServerName).loginValidateRule.dealOrNot(session))(
            new AuthenticationException("Login authentication has occured an error!", _))
          if(!canLogin) {
            throw new AuthenticationException("Wrong username or password!")
          }
        case _ =>
      }
    } else if(!validateByUser(user, password)){
      throw new AuthenticationException("Wrong username or password!")
    }
  }
  
} 
Example 41
Source File: HiveThriftServerInfo.scala    From Hive-JDBC-Proxy   with Apache License 2.0 5 votes vote down vote up
package com.enjoyyin.hive.proxy.jdbc.domain

import com.enjoyyin.hive.proxy.jdbc.rule.UserAllowRule
import com.enjoyyin.hive.proxy.jdbc.rule.QueryDealRule
import com.enjoyyin.hive.proxy.jdbc.rule.LoginValidateRule
import org.apache.commons.lang3.StringUtils
import com.enjoyyin.hive.proxy.jdbc.rule.StatisticsDealRule
import com.enjoyyin.hive.proxy.jdbc.util.ProxyConf.MAX_EXECUTE_TIME_OF_OPERATION
import com.enjoyyin.hive.proxy.jdbc.util.Utils
import com.enjoyyin.hive.proxy.jdbc.rule.Balancer



object AllowRule extends Enumeration {
  type AllowRule = Value
  val Allow, UserDefine, Error, CancelPrev= Value
}
object DealRule extends Enumeration {
  type DealRule = Value
  val NONE, Default, UserDefine = Value
}
import AllowRule._
import DealRule._
case class HiveThriftServerInfo(thriftServerName: String, serverName: String, valid: Boolean, username: String,
                                password: String, host: String, port: Int, maxThread: Int, loginTimeout: Int = 5000) {
  def validate: Unit = {
    if(StringUtils.isEmpty(host) || !host.matches("[0-9\\.a-zA-Z\\-]{7,}")) {
      throw new IllegalArgumentException(s"${thriftServerName}的host($host)不存在或不合法!")
    }
    if(StringUtils.isEmpty(username)) {
      throw new IllegalArgumentException(s"${thriftServerName}的username不能为空!")
    }
    if(port <= 0) {
      throw new IllegalArgumentException(s"${thriftServerName}的port必须大于0!")
    }
    if(maxThread <= 0 || maxThread >= 50) {
      throw new IllegalArgumentException(s"${thriftServerName}的maxThread必须处于(0, 50]之间!")
    }
    if(loginTimeout < MAX_EXECUTE_TIME_OF_OPERATION) {
      throw new IllegalArgumentException(s"${thriftServerName}的loginTimeout必须大于参数operation.execute.time.max(${Utils.msDurationToString(MAX_EXECUTE_TIME_OF_OPERATION)})!")
    }
  }
}
case class HiveThriftProxyRule(thriftServerName: String, allowMultiSessionsInIP: AllowRule = Allow,
    allowMultiSessionsInUser: AllowRule = Allow, loginValidate: DealRule = NONE,
    queryDeal: DealRule = Default, statisticsDeal: DealRule = Default, balancerRule: DealRule = Default)
 
case class HiveThriftProxyRuleClass(thriftServerName: String, mutiSessionsInIPRule: UserAllowRule = null,
    mutiSessionsInUserRule: UserAllowRule = null, statisticsDealRule: StatisticsDealRule = null,
    loginValidateRule: LoginValidateRule = null, queryDealRule: List[QueryDealRule] = List.empty[QueryDealRule],
    balancer: Balancer = null)
    
case class User(username: String, password: String, ipAddress: String)

case class UserHQL(user: User, hqlPriority: HQLPriority, executeHQLs: Map[String, Array[ExecuteHQLInfo]])

case class HQLPriority(hql: String, priority: Int)

case class ExecuteHQLInfo(username: String, ipAddress: String, hql: String)

case class ThriftServerName(thriftServerName: String, username: String) 
Example 42
Source File: ExtractApplicationProperties.scala    From rug   with GNU General Public License v3.0 5 votes vote down vote up
package com.atomist.rug.kind.java

import com.atomist.tree.content.project.{ConfigValue, Configuration, SimpleConfigValue, SimpleConfiguration}
import com.atomist.source.FileArtifact
import org.apache.commons.lang3.StringUtils

import scala.collection.mutable.ListBuffer
import scala.io.Source


class ExtractApplicationProperties(source: String) extends Function[FileArtifact, Configuration] {

  override def apply(f: FileArtifact): Configuration = {
    val isWhiteSpace: String => Boolean = line => StringUtils.isWhitespace(line)
    val isComment: String => Boolean = line => !isWhiteSpace(line) && line.dropWhile(c => c.isWhitespace).startsWith("#")
    val isContent: String => Boolean = line => !(isWhiteSpace(line) || isComment(line))

    trait State
    object InComment extends State
    object InBlanks extends State

    var state: State = InComment
    var comment = ""
    val configValues = new ListBuffer[ConfigValue]()

    // Strip # and whitespace from comments (respecting multiline comments)
    def extractComment(comment: String): String = {

      def toCommentContentLine(l: String) = {
        val r = l.dropWhile(c => c.isWhitespace || '#'.equals(c))
        r
      }

      val r = comment.lines.map(l => toCommentContentLine(l)).mkString("\n")
      r
    }

    // Return None if not a valid property line
    def parseContentLine(line: String): Option[ConfigValue] = {
      val stripped = line.dropWhile(c => c.isWhitespace)
      val idx = stripped.indexOf("=")
      if (idx == -1) {
        None
      }
      else {
        val (key, value) = stripped.splitAt(idx)
        val profile = ""
        Some(SimpleConfigValue(key, value.substring(1), source, profile, description = extractComment(comment)))
      }
    }

    def appendToComment(l: String): Unit = {
      if ("".equals(comment)) comment = l
      else comment = comment + "\n" + l
    }

    val lines = Source.fromString(f.content).getLines()
    for (line <- lines) {
      if (isContent(line)) {
        parseContentLine(line).foreach(cv => configValues.append(cv))
        comment = ""
      }
      else state match {
        case InBlanks if isComment(line) =>
          state = InComment
          appendToComment(line)
        case InComment if isComment(line) || isWhiteSpace(line) =>
          appendToComment(line)
        case InComment =>
          comment = ""
          state = InBlanks
        case _ =>
      }
    }
    new SimpleConfiguration(configValues)
  }
} 
Example 43
Source File: ServiceInfoLogger.scala    From openwhisk   with Apache License 2.0 5 votes vote down vote up
package org.apache.openwhisk.standalone

import java.io.File

import org.apache.commons.lang3.StringUtils
import org.apache.openwhisk.standalone.ColorOutput.clr

import scala.io.AnsiColor

class ServiceInfoLogger(conf: Conf, services: Seq[ServiceContainer], workDir: File) extends AnsiColor {
  private val separator = "=" * 80

  def run(): Unit = {
    println(separator)
    println("Launched service details")
    println()
    services.foreach(logService)
    println()
    println(s"Local working directory - ${workDir.getAbsolutePath}")
    println(separator)
  }

  private def logService(s: ServiceContainer): Unit = {
    val msg = s"${portInfo(s.port)} ${s.description} (${clr(s.name, BOLD, conf.colorEnabled)})"
    println(msg)
  }

  private def portInfo(port: Int) = {
    val msg = StringUtils.center(port.toString, 7)
    s"[${clr(msg, GREEN, conf.colorEnabled)}]"
  }
} 
Example 44
Source File: RemoteDirectory.scala    From dependency   with MIT License 5 votes vote down vote up
package io.flow.dependency.api.lib

import io.flow.dependency.v0.models.{Credentials, CredentialsUndefinedType, UsernamePassword}
import org.htmlcleaner.HtmlCleaner
import org.apache.commons.codec.binary.Base64
import org.apache.commons.lang3.StringUtils
import org.apache.commons.text.StringEscapeUtils
import java.net.URL


import scala.util.{Failure, Success, Try}



object RemoteDirectory {

  case class Result(
    directories: Seq[String] = Nil,
    files: Seq[String] = Nil
  )

  def fetch(
    url: String,
    credentials: Option[Credentials] = None
  ) (
    filter: String => Boolean = { !_.startsWith(".") }
  ): Result = {
    val base = Result()
    val cleaner = new HtmlCleaner()

    val uc = (new URL(url)).openConnection()
    credentials.map { cred =>
      cred match {
        case UsernamePassword(username, password) =>{
          val userpass = username + ":" + password.getOrElse("")
          val basicAuth = "Basic " + new String(new Base64().encode(userpass.getBytes()))
          uc.setRequestProperty ("Authorization", basicAuth)
        }
        case CredentialsUndefinedType(_) => {
          // No-op
        }
      }
    }

    Try(cleaner.clean(uc.getInputStream())) match {
      case Failure(_) => {
        base
      }
      case Success(rootNode) => {
        rootNode.getElementsByName("a", true).foldLeft(base) { case (result, elem) =>
          Option(elem.getAttributeByName("href")) match {
            case None => {
              result
            }
            case Some(_) => {
              val text = StringEscapeUtils.unescapeHtml4(elem.getText.toString)
              filter(StringUtils.stripEnd(text, "/")) match {
                case false => {
                  result
                }
                case true => {
                  text.endsWith("/") match {
                    case true => result.copy(directories = result.directories ++ Seq(text))
                    case false => result.copy(files = result.files ++ Seq(text))
                  }
                }
              }
            }
          }
        }
      }
    }
  }
} 
Example 45
Source File: RemoteVersions.scala    From dependency   with MIT License 5 votes vote down vote up
package io.flow.dependency.api.lib

import io.flow.dependency.v0.models.Credentials
import io.flow.util.Version
import org.apache.commons.lang3.StringUtils

object RemoteVersions {

  def fetch(
    resolver: String,
    groupId: String,
    artifactId: String,
    credentials: Option[Credentials]
  ): Seq[ArtifactVersion] = {
    val versions = fetchUrl(
      joinUrl(resolver, groupId.replaceAll("\\.", "/")),
      artifactId,
      credentials
    ) match {
      case Nil => {
        fetchUrl(joinUrl(resolver, groupId), artifactId, credentials)
      }
      case results => {
        results
      }
    }
    versions.sortBy { _.tag }.reverse
  }

  private[this] def fetchUrl(
    url: String,
    artifactId: String,
    credentials: Option[Credentials]
  ): Seq[ArtifactVersion] = {
    val result = RemoteDirectory.fetch(url, credentials = credentials)(
      filter = { name => name == artifactId || name.startsWith(artifactId + "_") }
    )

    result.directories.flatMap { dir =>
      val thisUrl = joinUrl(url, dir)
      RemoteDirectory.fetch(thisUrl, credentials = credentials)().directories.map { d =>
        ArtifactVersion(
          tag = Version(StringUtils.stripEnd(d, "/")),
          crossBuildVersion = crossBuildVersion(dir)
        )
      }
    }
  }

  // e.g. "scala-csv_2.11/" => 2.11
  def crossBuildVersion(text: String): Option[Version] = {
    StringUtils.stripEnd(text, "/").split("_").toList match {
      case Nil => None
      case _ :: Nil => None
      case inple => {
        // Check if we can successfully parse the version tag for a
        // major version. If so, we assume we have found a cross build
        // version.
        val tag = Version(inple.last)
        tag.major match {
          case None => None
          case Some(_) => Some(tag)
        }
      }
    }
  }

  def makeUrls(
    resolver: String,
    groupId: String
  ): Seq[String] = {
    Seq(
      joinUrl(
        resolver, groupId.replaceAll("\\.", "/")
      ),
      joinUrl(resolver, groupId)
    )
  }

  def joinUrl(
    a: String,
    b: String
  ): String = {
    Seq(a, b).map ( StringUtils.stripEnd(_, "/") ).mkString("/")
  }
} 
Example 46
Source File: BinaryVersionProvider.scala    From dependency   with MIT License 5 votes vote down vote up
package io.flow.dependency.api.lib

import io.flow.dependency.v0.models.BinaryType
import io.flow.log.RollbarLogger
import io.flow.util.Version
import org.apache.commons.lang3.StringUtils



trait BinaryVersionProvider {

  
  def versions(binary: BinaryType): Seq[Version]

}

@javax.inject.Singleton
case class DefaultBinaryVersionProvider @javax.inject.Inject()(
  logger: RollbarLogger
) extends BinaryVersionProvider {

  private[this] val ScalaUrl = "https://www.scala-lang.org/download/all.html"
  private[this] val SbtUrl = "https://flow.jfrog.io/flow/libs-release/org/scala-sbt/sbt/"

  override def versions(
    binary: BinaryType
  ) : Seq[Version] = {
    binary match {
      case BinaryType.Scala => {
        fetchScalaVersions()
      }
      case BinaryType.Sbt => {
        fetchSbtVersions()
      }
      case BinaryType.UNDEFINED(name) => {
        if (!name.startsWith("tst-")) {
          logger.withKeyValue("binary_name", name).warn(s"Do not know how to find versions for the programming binary")
        }
        Nil
      }
    }
  }

  def fetchScalaVersions(): Seq[Version] = {
    RemoteDirectory.fetch(ScalaUrl) { name =>
      name.toLowerCase.startsWith("scala ")
    }.files.flatMap { toVersion }
  }

  def fetchSbtVersions(): Seq[Version] = {
    RemoteDirectory.fetch(SbtUrl)().directories.flatMap { dir =>
      toVersion(StringUtils.stripEnd(dir, "/"))
    }
  }

  def toVersion(value: String): Option[Version] = {
    val tag = Version(
      StringUtils.stripStart(
        StringUtils.stripStart(value, "scala"),
        "Scala"
      ).trim
    )
    tag.major match {
      case None => None
      case Some(_) => Some(tag)
    }
  }

}