org.slf4j.Logger Scala Example

Source File: SnowflakeConnectorUtils.scala From spark-snowflake with Apache License 2.0

6 votes

package net.snowflake.spark.snowflake

import java.nio.file.Paths
import java.security.InvalidKeyException

import net.snowflake.spark.snowflake.pushdowns.SnowflakeStrategy
import org.apache.spark.sql.SparkSession
import org.slf4j.{Logger, LoggerFactory}


  def disablePushdownSession(session: SparkSession): Unit = {
    session.experimental.extraStrategies = session.experimental.extraStrategies
      .filterNot(strategy => strategy.isInstanceOf[SnowflakeStrategy])
  }

  def setPushdownSession(session: SparkSession, enabled: Boolean): Unit = {
    if (enabled) {
      enablePushdownSession(session)
    } else {
      disablePushdownSession(session)
    }
  }

  // TODO: Improve error handling with retries, etc.

  @throws[SnowflakeConnectorException]
  def handleS3Exception(ex: Exception): Unit = {
    if (ex.getCause.isInstanceOf[InvalidKeyException]) {
      // Most likely cause: Unlimited strength policy files not installed
      var msg: String = "Strong encryption with Java JRE requires JCE " +
        "Unlimited Strength Jurisdiction Policy " +
        "files. " +
        "Follow JDBC client installation instructions " +
        "provided by Snowflake or contact Snowflake " +
        "Support. This needs to be installed in the Java runtime for all Spark executor nodes."

      log.error(
        "JCE Unlimited Strength policy files missing: {}. {}.",
        ex.getMessage: Any,
        ex.getCause.getMessage: Any
      )

      val bootLib: String =
        java.lang.System.getProperty("sun.boot.library.path")

      if (bootLib != null) {
        msg += " The target directory on your system is: " + Paths
          .get(bootLib, "security")
          .toString
        log.error(msg)
      }

      throw new SnowflakeConnectorException(msg)
    } else {
      throw ex
    }
  }
}

class SnowflakeConnectorException(message: String) extends Exception(message)
class SnowflakePushdownException(message: String)
  extends SnowflakeConnectorException(message)
class SnowflakeConnectorFeatureNotSupportException(message: String)
  extends Exception(message)

class SnowflakePushdownUnsupportedException(message: String,
                                            val unsupportedOperation: String,
                                            val details: String,
                                            val isKnownUnsupportedOperation: Boolean)
  extends Exception(message)

Source File: TestHook.scala From spark-snowflake with Apache License 2.0

5 votes

package net.snowflake.spark.snowflake.test

import net.snowflake.client.jdbc.{ErrorCode, SnowflakeSQLException}
import net.snowflake.spark.snowflake.test.TestHookFlag.TestHookFlag
import org.slf4j.{Logger, LoggerFactory}

object TestHookFlag extends Enumeration {
  type TestHookFlag = Value

  // All predefined test hook's name start with TH_ (TEST HOOK).
  val TH_WRITE_ERROR_AFTER_DROP_OLD_TABLE = Value("TH_WRITE_ERROR_AFTER_DROP_OLD_TABLE")
  val TH_WRITE_ERROR_AFTER_CREATE_NEW_TABLE = Value("TH_WRITE_ERROR_AFTER_CREATE_NEW_TABLE")
  val TH_WRITE_ERROR_AFTER_TRUNCATE_TABLE = Value("TH_WRITE_ERROR_AFTER_TRUNCATE_TABLE")
  val TH_WRITE_ERROR_AFTER_COPY_INTO = Value("TH_WRITE_ERROR_AFTER_COPY_INTO")
  val TH_GCS_UPLOAD_RAISE_EXCEPTION = Value("TH_GCS_UPLOAD_RAISE_EXCEPTION")
  val TH_COPY_INTO_TABLE_MISS_FILES_SUCCESS = Value("TH_COPY_INTO_TABLE_MISS_FILES_SUCCESS")
  val TH_COPY_INTO_TABLE_MISS_FILES_FAIL = Value("TH_COPY_INTO_TABLE_MISS_FILES_FAIL")
}

object TestHook {
  val log: Logger = LoggerFactory.getLogger(getClass)

  private val ENABLED_TEST_FLAGS =
    new scala.collection.mutable.HashSet[TestHookFlag]()

  private var IS_TEST_ENABLED = false

  private val TEST_MESSAGE_PREFIX =
    "Internal test error (should NOT be seen by user):"

  // Enable test
  private[snowflake] def enableTestHook() : Unit = {
    IS_TEST_ENABLED = true
  }

  // Disable test
  private[snowflake] def disableTestHook() : Unit = {
    IS_TEST_ENABLED = false
    ENABLED_TEST_FLAGS.clear()
  }

  // Enable a specific test flag
  private[snowflake] def enableTestFlag(testFlag : TestHookFlag): Unit = {
    enableTestHook()
    if (!ENABLED_TEST_FLAGS.contains(testFlag)) {
      ENABLED_TEST_FLAGS.add(testFlag)
    }
  }

  // Enable a specific test flag only (all other flags are disabled)
  private[snowflake] def enableTestFlagOnly(testFlag : TestHookFlag): Unit = {
    disableTestHook()
    enableTestFlag(testFlag)
  }

  // Disable a specific test flag
  private[snowflake] def disableTestFlag(testFlag : TestHookFlag): Unit = {
    if (ENABLED_TEST_FLAGS.contains(testFlag)) {
      ENABLED_TEST_FLAGS.remove(testFlag)
    }
    if (ENABLED_TEST_FLAGS.isEmpty) {
      disableTestHook()
    }
  }

  // Check whether a flag is enabled
  private[snowflake] def isTestFlagEnabled(testFlag : TestHookFlag): Boolean = {
    IS_TEST_ENABLED && ENABLED_TEST_FLAGS.contains(testFlag)
  }

  // Raise exception if the specific test flag is enabled.
  private[snowflake] def raiseExceptionIfTestFlagEnabled(testFlag: TestHookFlag,
                                                         errorMessage: String)
  : Unit = {
    if (isTestFlagEnabled(testFlag)) {
      throw new SnowflakeSQLException(ErrorCode.INTERNAL_ERROR,
        s"$TEST_MESSAGE_PREFIX  $errorMessage")
    }
  }
}

Source File: ClusterTest.scala From spark-snowflake with Apache License 2.0

5 votes

package net.snowflake.spark.snowflake

import net.snowflake.spark.snowflake.testsuite.ClusterTestSuiteBase
import org.slf4j.{Logger, LoggerFactory}
import org.apache.spark.sql.SparkSession

object ClusterTest {
  val log: Logger = LoggerFactory.getLogger(getClass)

  val RemoteMode = "remote"
  val LocalMode = "local"

  val TestSuiteSeparator = ";"

  // Driver function to run the test.
  def main(args: Array[String]): Unit = {
    log.info(s"Test Spark Connector: ${net.snowflake.spark.snowflake.Utils.VERSION}")

    val usage = s"""Two parameters are need: [local | remote] and
                    | testClassNames (using ';' to separate multiple classes)
                    |""".stripMargin
    log.info(usage)

    if (args.length < 2) {
      throw new Exception(s"At least two parameters are need. Usage: $usage")
    }

    // Setup Spark session.
    // local mode is introduced for debugging purpose
    val runMode = args(0)
    var sparkSessionBuilder = SparkSession
      .builder()
      .appName("Spark SQL basic example")
      .config("spark.some.config.option", "some-value")
    if (runMode.equalsIgnoreCase(LocalMode)) {
      sparkSessionBuilder = sparkSessionBuilder
        .config("spark.master", "local")
    }
    val spark = sparkSessionBuilder.getOrCreate()

    // Run specified test suites
    val testSuiteNames = args(1).split(TestSuiteSeparator)
    for (testSuiteName <- testSuiteNames) {
      if (!testSuiteName.trim.isEmpty) {
        // Retrieve commit ID from env.
        val commitID = scala.util.Properties
          .envOrElse(TestUtils.GITHUB_SHA, "commit id not set")

        // val testSuiteName = "net.snowflake.spark.snowflake.testsuite.BasicReadWriteSuite"
        val resultBuilder = new ClusterTestResultBuilder()
          .withTestType("Scala")
          .withTestCaseName(testSuiteName)
          .withCommitID(commitID)
          .withTestStatus(TestUtils.TEST_RESULT_STATUS_INIT)
          .withStartTimeInMill(System.currentTimeMillis())
          .withGithubRunId(TestUtils.githubRunId)

        try {
          Class
            .forName(testSuiteName)
            .newInstance()
            .asInstanceOf[ClusterTestSuiteBase]
            .run(spark, resultBuilder)
        } catch {
          case e: Throwable =>
            log.error(e.getMessage)
            resultBuilder
              .withTestStatus(TestUtils.TEST_RESULT_STATUS_EXCEPTION)
              .withReason(e.getMessage)
        } finally {
          // Set test end time.
          resultBuilder
            .withEndTimeInMill(System.currentTimeMillis())
          // Write test result
          resultBuilder.build().writeToSnowflake()
        }
      }
    }

    spark.stop()
  }
}

Source File: AuthServiceJWT.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.api.auth

import java.util.concurrent.{CompletableFuture, CompletionStage}

import com.daml.lf.data.Ref
import com.daml.jwt.{JwtVerifier, JwtVerifierBase}
import com.daml.ledger.api.auth.AuthServiceJWT.Error
import io.grpc.Metadata
import org.slf4j.{Logger, LoggerFactory}
import spray.json._

import scala.collection.mutable.ListBuffer
import scala.util.Try


class AuthServiceJWT(verifier: JwtVerifierBase) extends AuthService {

  protected val logger: Logger = LoggerFactory.getLogger(AuthServiceJWT.getClass)

  override def decodeMetadata(headers: Metadata): CompletionStage[Claims] = {
    decodeAndParse(headers).fold(
      error => {
        logger.warn("Authorization error: " + error.message)
        CompletableFuture.completedFuture(Claims.empty)
      },
      token => CompletableFuture.completedFuture(payloadToClaims(token))
    )
  }

  private[this] def parsePayload(jwtPayload: String): Either[Error, AuthServiceJWTPayload] = {
    import AuthServiceJWTCodec.JsonImplicits._
    Try(JsonParser(jwtPayload).convertTo[AuthServiceJWTPayload]).toEither.left.map(t =>
      Error("Could not parse JWT token: " + t.getMessage))
  }

  private[this] def decodeAndParse(headers: Metadata): Either[Error, AuthServiceJWTPayload] = {
    val bearerTokenRegex = "Bearer (.*)".r

    for {
      headerValue <- Option
        .apply(headers.get(AUTHORIZATION_KEY))
        .toRight(Error("Authorization header not found"))
      token <- bearerTokenRegex
        .findFirstMatchIn(headerValue)
        .map(_.group(1))
        .toRight(Error("Authorization header does not use Bearer format"))
      decoded <- verifier
        .verify(com.daml.jwt.domain.Jwt(token))
        .toEither
        .left
        .map(e => Error("Could not verify JWT token: " + e.message))
      parsed <- parsePayload(decoded.payload)
    } yield parsed
  }

  private[this] def payloadToClaims(payload: AuthServiceJWTPayload): Claims = {
    val claims = ListBuffer[Claim]()

    // Any valid token authorizes the user to use public services
    claims.append(ClaimPublic)

    if (payload.admin)
      claims.append(ClaimAdmin)

    payload.actAs
      .foreach(party => claims.append(ClaimActAsParty(Ref.Party.assertFromString(party))))

    payload.readAs
      .foreach(party => claims.append(ClaimReadAsParty(Ref.Party.assertFromString(party))))

    Claims(
      claims = claims.toList,
      ledgerId = payload.ledgerId,
      participantId = payload.participantId,
      applicationId = payload.applicationId,
      expiration = payload.exp,
    )
  }
}

object AuthServiceJWT {
  final case class Error(message: String)

  def apply(verifier: com.auth0.jwt.interfaces.JWTVerifier) =
    new AuthServiceJWT(new JwtVerifier(verifier))

  def apply(verifier: JwtVerifierBase) =
    new AuthServiceJWT(verifier)
}

Source File: AuthorizationInterceptor.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.api.auth.interceptor

import com.daml.ledger.api.auth.{AuthService, Claims}
import com.daml.platform.server.api.validation.ErrorFactories.unauthenticated
import io.grpc.{
  Context,
  Contexts,
  Metadata,
  ServerCall,
  ServerCallHandler,
  ServerInterceptor,
  Status
}
import org.slf4j.{Logger, LoggerFactory}

import scala.compat.java8.FutureConverters
import scala.concurrent.ExecutionContext
import scala.util.{Failure, Success, Try}


final class AuthorizationInterceptor(protected val authService: AuthService, ec: ExecutionContext)
    extends ServerInterceptor {

  private val logger: Logger = LoggerFactory.getLogger(AuthorizationInterceptor.getClass)
  private val internalAuthenticationError =
    Status.INTERNAL.withDescription("Failed to get claims from request metadata")

  import AuthorizationInterceptor.contextKeyClaim

  override def interceptCall[ReqT, RespT](
      call: ServerCall[ReqT, RespT],
      headers: Metadata,
      nextListener: ServerCallHandler[ReqT, RespT]): ServerCall.Listener[ReqT] = {
    // Note: Context uses ThreadLocal storage, we need to capture it outside of the async block below.
    // Contexts are immutable and safe to pass around.
    val prevCtx = Context.current

    // The method interceptCall() must return a Listener.
    // The target listener is created by calling `Contexts.interceptCall()`.
    // However, this is only done after we have asynchronously received the claims.
    // Therefore, we need to return a listener that buffers all messages until the target listener is available.
    new AsyncForwardingListener[ReqT] {
      FutureConverters
        .toScala(authService.decodeMetadata(headers))
        .onComplete {
          case Failure(exception) =>
            logger.warn(s"Failed to get claims from request metadata: ${exception.getMessage}")
            call.close(internalAuthenticationError, new Metadata())
            new ServerCall.Listener[Nothing]() {}
          case Success(Claims.empty) =>
            logger.debug(s"Auth metadata decoded into empty claims, returning UNAUTHENTICATED")
            call.close(Status.UNAUTHENTICATED, new Metadata())
            new ServerCall.Listener[Nothing]() {}
          case Success(claims) =>
            val nextCtx = prevCtx.withValue(contextKeyClaim, claims)
            // Contexts.interceptCall() creates a listener that wraps all methods of `nextListener`
            // such that `Context.current` returns `nextCtx`.
            val nextListenerWithContext =
              Contexts.interceptCall(nextCtx, call, headers, nextListener)
            setNextListener(nextListenerWithContext)
            nextListenerWithContext
        }(ec)
    }
  }
}

object AuthorizationInterceptor {

  private val contextKeyClaim = Context.key[Claims]("AuthServiceDecodedClaim")

  def extractClaimsFromContext(): Try[Claims] =
    Option(contextKeyClaim.get()).fold[Try[Claims]](Failure(unauthenticated()))(Success(_))

  def apply(authService: AuthService, ec: ExecutionContext): AuthorizationInterceptor =
    new AuthorizationInterceptor(authService, ec)

}

Source File: GlobalLogLevel.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.sandbox

import ch.qos.logback.classic.Level
import org.slf4j.{Logger, LoggerFactory}

object GlobalLogLevel {
  def set(level: Level): Unit = {
    val rootLogger = LoggerFactory.getLogger(Logger.ROOT_LOGGER_NAME)
    LoggerFactory.getILoggerFactory match {
      case loggerContext: ch.qos.logback.classic.LoggerContext =>
        rootLogger.info(s"Sandbox verbosity changed to $level")
        loggerContext.getLoggerList.forEach(_.setLevel(level))
      case _ =>
        rootLogger.warn(s"Sandbox verbosity cannot be set to requested $level")
    }
  }
}

Source File: CommandCompletionServiceValidation.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.server.api.validation

import com.daml.ledger.api.domain.LedgerId
import com.daml.ledger.api.v1.command_completion_service.CommandCompletionServiceGrpc.CommandCompletionService
import com.daml.ledger.api.v1.command_completion_service._
import com.daml.platform.api.grpc.GrpcApiService
import com.daml.dec.DirectExecutionContext
import com.daml.platform.server.api.ProxyCloseable
import io.grpc.ServerServiceDefinition
import io.grpc.stub.StreamObserver
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Future

//TODO: this class is only needed by DamlOnXCommandCompletionService.scala. Must be deleted once that's gone!
class CommandCompletionServiceValidation(
    val service: CommandCompletionService with AutoCloseable,
    val ledgerId: LedgerId)
    extends CommandCompletionService
    with FieldValidations
    with GrpcApiService
    with ProxyCloseable
    with ErrorFactories {

  protected val logger: Logger = LoggerFactory.getLogger(CommandCompletionService.getClass)

  override def completionStream(
      request: CompletionStreamRequest,
      responseObserver: StreamObserver[CompletionStreamResponse]): Unit = {
    val validation = for {
      _ <- matchLedgerId(ledgerId)(LedgerId(request.ledgerId))
      _ <- requireNonEmptyString(request.applicationId, "application_id")
      _ <- requireNonEmpty(request.parties, "parties")
    } yield request

    validation.fold(
      exception => responseObserver.onError(exception),
      value => service.completionStream(value, responseObserver)
    )
  }

  override def completionEnd(request: CompletionEndRequest): Future[CompletionEndResponse] = {
    matchLedgerId(ledgerId)(LedgerId(request.ledgerId))
      .fold(Future.failed, _ => service.completionEnd(request))
  }

  override def bindService(): ServerServiceDefinition =
    CommandCompletionServiceGrpc.bindService(this, DirectExecutionContext)
}

Source File: ActiveContractsServiceValidation.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.server.api.validation

import com.daml.dec.DirectExecutionContext
import com.daml.ledger.api.domain.LedgerId
import com.daml.ledger.api.v1.active_contracts_service.ActiveContractsServiceGrpc.ActiveContractsService
import com.daml.ledger.api.v1.active_contracts_service.{
  ActiveContractsServiceGrpc,
  GetActiveContractsRequest,
  GetActiveContractsResponse
}
import com.daml.platform.api.grpc.GrpcApiService
import com.daml.platform.server.api.ProxyCloseable
import io.grpc.ServerServiceDefinition
import io.grpc.stub.StreamObserver
import org.slf4j.{Logger, LoggerFactory}

class ActiveContractsServiceValidation(
    protected val service: ActiveContractsService with AutoCloseable,
    val ledgerId: LedgerId)
    extends ActiveContractsService
    with ProxyCloseable
    with GrpcApiService
    with FieldValidations {

  protected val logger: Logger = LoggerFactory.getLogger(ActiveContractsService.getClass)

  override def getActiveContracts(
      request: GetActiveContractsRequest,
      responseObserver: StreamObserver[GetActiveContractsResponse]): Unit = {
    matchLedgerId(ledgerId)(LedgerId(request.ledgerId))
      .fold(responseObserver.onError, _ => service.getActiveContracts(request, responseObserver))
  }
  override def bindService(): ServerServiceDefinition =
    ActiveContractsServiceGrpc.bindService(this, DirectExecutionContext)
}

Source File: LedgerConfigurationServiceValidation.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.server.api.validation

import com.daml.dec.DirectExecutionContext
import com.daml.ledger.api.domain.LedgerId
import com.daml.ledger.api.v1.ledger_configuration_service.LedgerConfigurationServiceGrpc.LedgerConfigurationService
import com.daml.ledger.api.v1.ledger_configuration_service.{
  GetLedgerConfigurationRequest,
  GetLedgerConfigurationResponse,
  LedgerConfigurationServiceGrpc
}
import com.daml.platform.api.grpc.GrpcApiService
import com.daml.platform.server.api.ProxyCloseable
import io.grpc.ServerServiceDefinition
import io.grpc.stub.StreamObserver
import org.slf4j.{Logger, LoggerFactory}

class LedgerConfigurationServiceValidation(
    protected val service: LedgerConfigurationService with GrpcApiService,
    protected val ledgerId: LedgerId)
    extends LedgerConfigurationService
    with ProxyCloseable
    with GrpcApiService
    with FieldValidations {

  protected val logger: Logger = LoggerFactory.getLogger(LedgerConfigurationService.getClass)

  override def getLedgerConfiguration(
      request: GetLedgerConfigurationRequest,
      responseObserver: StreamObserver[GetLedgerConfigurationResponse]): Unit =
    matchLedgerId(ledgerId)(LedgerId(request.ledgerId)).fold(
      t => responseObserver.onError(t),
      _ => service.getLedgerConfiguration(request, responseObserver)
    )

  override def bindService(): ServerServiceDefinition =
    LedgerConfigurationServiceGrpc.bindService(this, DirectExecutionContext)
}

Source File: PackageServiceValidation.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.server.api.validation

import com.daml.dec.DirectExecutionContext
import com.daml.ledger.api.domain.LedgerId
import com.daml.ledger.api.v1.package_service.PackageServiceGrpc.PackageService
import com.daml.ledger.api.v1.package_service._
import com.daml.platform.api.grpc.GrpcApiService
import com.daml.platform.server.api.ProxyCloseable
import io.grpc.ServerServiceDefinition
import org.slf4j.{Logger, LoggerFactory}

import scala.Function.const
import scala.concurrent.Future

class PackageServiceValidation(
    protected val service: PackageService with AutoCloseable,
    val ledgerId: LedgerId)
    extends PackageService
    with ProxyCloseable
    with GrpcApiService
    with FieldValidations {

  protected val logger: Logger = LoggerFactory.getLogger(PackageService.getClass)

  override def listPackages(request: ListPackagesRequest): Future[ListPackagesResponse] =
    matchLedgerId(ledgerId)(LedgerId(request.ledgerId))
      .map(const(request))
      .fold(
        Future.failed,
        service.listPackages
      )

  override def getPackage(request: GetPackageRequest): Future[GetPackageResponse] =
    matchLedgerId(ledgerId)(LedgerId(request.ledgerId))
      .map(const(request))
      .fold(
        Future.failed,
        service.getPackage
      )

  override def getPackageStatus(
      request: GetPackageStatusRequest): Future[GetPackageStatusResponse] =
    matchLedgerId(ledgerId)(LedgerId(request.ledgerId))
      .map(const(request))
      .fold(
        Future.failed,
        service.getPackageStatus
      )
  override def bindService(): ServerServiceDefinition =
    PackageServiceGrpc.bindService(this, DirectExecutionContext)

  override def close(): Unit = service.close()
}

Source File: GrpcCommandService.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.server.api.services.grpc

import java.time.{Duration, Instant}

import com.daml.ledger.api.domain.LedgerId
import com.daml.ledger.api.v1.command_service.CommandServiceGrpc.CommandService
import com.daml.ledger.api.v1.command_service._
import com.daml.ledger.api.validation.{CommandsValidator, SubmitAndWaitRequestValidator}
import com.daml.platform.api.grpc.GrpcApiService
import com.daml.dec.DirectExecutionContext
import com.daml.platform.server.api.ProxyCloseable
import com.google.protobuf.empty.Empty
import io.grpc.ServerServiceDefinition
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Future

class GrpcCommandService(
    protected val service: CommandService with AutoCloseable,
    val ledgerId: LedgerId,
    currentLedgerTime: () => Instant,
    currentUtcTime: () => Instant,
    maxDeduplicationTime: () => Option[Duration]
) extends CommandService
    with GrpcApiService
    with ProxyCloseable {

  protected val logger: Logger = LoggerFactory.getLogger(CommandService.getClass)

  private[this] val validator =
    new SubmitAndWaitRequestValidator(new CommandsValidator(ledgerId))

  override def submitAndWait(request: SubmitAndWaitRequest): Future[Empty] =
    validator
      .validate(request, currentLedgerTime(), currentUtcTime(), maxDeduplicationTime())
      .fold(Future.failed, _ => service.submitAndWait(request))

  override def submitAndWaitForTransactionId(
      request: SubmitAndWaitRequest): Future[SubmitAndWaitForTransactionIdResponse] =
    validator
      .validate(request, currentLedgerTime(), currentUtcTime(), maxDeduplicationTime())
      .fold(Future.failed, _ => service.submitAndWaitForTransactionId(request))

  override def submitAndWaitForTransaction(
      request: SubmitAndWaitRequest): Future[SubmitAndWaitForTransactionResponse] =
    validator
      .validate(request, currentLedgerTime(), currentUtcTime(), maxDeduplicationTime())
      .fold(Future.failed, _ => service.submitAndWaitForTransaction(request))

  override def submitAndWaitForTransactionTree(
      request: SubmitAndWaitRequest): Future[SubmitAndWaitForTransactionTreeResponse] =
    validator
      .validate(request, currentLedgerTime(), currentUtcTime(), maxDeduplicationTime())
      .fold(Future.failed, _ => service.submitAndWaitForTransactionTree(request))

  override def bindService(): ServerServiceDefinition =
    CommandServiceGrpc.bindService(this, DirectExecutionContext)

}

Source File: GrpcCommandSubmissionService.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.server.api.services.grpc

import java.time.{Duration, Instant}

import com.daml.dec.DirectExecutionContext
import com.daml.ledger.api.domain.LedgerId
import com.daml.ledger.api.v1.command_submission_service.CommandSubmissionServiceGrpc.{
  CommandSubmissionService => ApiCommandSubmissionService
}
import com.daml.ledger.api.v1.command_submission_service.{
  CommandSubmissionServiceGrpc,
  SubmitRequest => ApiSubmitRequest
}
import com.daml.ledger.api.validation.{CommandsValidator, SubmitRequestValidator}
import com.daml.metrics.{Metrics, Timed}
import com.daml.platform.api.grpc.GrpcApiService
import com.daml.platform.server.api.ProxyCloseable
import com.daml.platform.server.api.services.domain.CommandSubmissionService
import com.google.protobuf.empty.Empty
import io.grpc.ServerServiceDefinition
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Future

class GrpcCommandSubmissionService(
    override protected val service: CommandSubmissionService with AutoCloseable,
    ledgerId: LedgerId,
    currentLedgerTime: () => Instant,
    currentUtcTime: () => Instant,
    maxDeduplicationTime: () => Option[Duration],
    metrics: Metrics,
) extends ApiCommandSubmissionService
    with ProxyCloseable
    with GrpcApiService {

  protected val logger: Logger = LoggerFactory.getLogger(ApiCommandSubmissionService.getClass)

  private val validator = new SubmitRequestValidator(new CommandsValidator(ledgerId))

  override def submit(request: ApiSubmitRequest): Future[Empty] =
    Timed.future(
      metrics.daml.commands.submissions,
      Timed
        .value(
          metrics.daml.commands.validation,
          validator
            .validate(request, currentLedgerTime(), currentUtcTime(), maxDeduplicationTime()))
        .fold(
          Future.failed,
          service.submit(_).map(_ => Empty.defaultInstance)(DirectExecutionContext))
    )

  override def bindService(): ServerServiceDefinition =
    CommandSubmissionServiceGrpc.bindService(this, DirectExecutionContext)

}

Source File: Committer.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.participant.state.kvutils.committer

import com.codahale.metrics.Timer
import com.daml.ledger.participant.state.kvutils.DamlKvutils.{
  DamlConfigurationEntry,
  DamlLogEntry,
  DamlLogEntryId,
  DamlStateKey,
  DamlStateValue
}
import com.daml.ledger.participant.state.kvutils.{Conversions, DamlStateMap, Err}
import com.daml.ledger.participant.state.kvutils.committer.Committer._
import com.daml.ledger.participant.state.v1.{Configuration, ParticipantId}
import com.daml.lf.data.Time
import com.daml.metrics.Metrics
import org.slf4j.{Logger, LoggerFactory}


        throw Err.MissingInputState(Conversions.configurationStateKey)
      )
      .flatMap { v =>
        val entry = v.getConfigurationEntry
        Configuration
          .decode(entry.getConfiguration)
          .fold({ err =>
            logger.error(s"Failed to parse configuration: $err, using default configuration.")
            None
          }, conf => Some(Some(entry) -> conf))
      }
      .getOrElse(None -> defaultConfig)
}

Source File: AkkaStreamPerformanceTest.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.api.perf.util

import akka.actor.ActorSystem
import akka.stream.Materializer
import com.daml.ledger.api.testing.utils.Resource
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.{ExecutionContext, ExecutionContextExecutor}

@SuppressWarnings(Array("org.wartremover.warts.LeakingSealed"))
abstract class AkkaStreamPerformanceTest extends PerformanceTest {

  protected val logger: Logger = LoggerFactory.getLogger(this.getClass)

  type ResourceType

  @volatile protected var system: ActorSystem = _
  @volatile protected var materializer: Materializer = _
  @transient protected implicit val ec: ExecutionContextExecutor = ExecutionContext.global

  protected def resource: Resource[ResourceType]

  protected def setup(): Unit = {
    resource.setup()
    implicit val sys: ActorSystem = ActorSystem(this.getClass.getSimpleName.stripSuffix("$"))
    system = sys
    materializer = Materializer(system)
  }

  protected def teardown(): Unit = {
    await(system.terminate())
    resource.close()
  }

  implicit class FixtureSetup[T](using: Using[T]) extends Serializable {
    def withLifecycleManagement(additionalSetup: T => Unit = _ => ()): Using[T] =
      using
        .setUp { input =>
          try {
            setup()
            additionalSetup(input)
          } catch {
            case t: Throwable =>
              logger.error("Setup failed.", t)
              throw t
          }
        }
        .tearDown { _ =>
          try {
            teardown()
          } catch {
            case t: Throwable =>
              logger.error("Teardown failed.", t)
              throw t
          }
        }
  }
}

Source File: TraceLog.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.lf.speedy

import com.daml.lf.data.Ref.Location
import org.slf4j.Logger

final case class TraceLog(logger: Logger, capacity: Int) {

  private val buffer = Array.ofDim[(String, Option[Location])](capacity)
  private var pos: Int = 0
  private var size: Int = 0

  def add(message: String, optLocation: Option[Location]): Unit = {
    if (logger.isDebugEnabled) {
      logger.debug(s"${Pretty.prettyLoc(optLocation).renderWideStream.mkString}: $message")
    }
    buffer(pos) = (message, optLocation)
    pos = (pos + 1) % capacity
    if (size < capacity)
      size += 1
  }

  def iterator: Iterator[(String, Option[Location])] =
    new RingIterator(if (size < capacity) 0 else pos, size, buffer)
}

private final class RingIterator[A](ringStart: Int, ringSize: Int, buffer: Array[A])
    extends Iterator[A] {
  private var pos: Int = ringStart
  private var first = true
  private def nextPos: Int = (pos + 1) % ringSize
  def hasNext: Boolean = ringSize != 0 && (first || pos != ringStart)
  def next: A = {
    val x = buffer(pos)
    first = false
    pos = nextPos
    x
  }
}

Source File: Slf4JLogger.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.client.binding.util

import akka.stream._
import akka.stream.stage.{GraphStage, GraphStageLogic, InHandler, OutHandler}
import org.slf4j.Logger

final case class Slf4JLogger[T, U](
    logger: Logger,
    prefix: String,
    project: T => U,
    logDemand: Boolean = false)
    extends GraphStage[FlowShape[T, T]] {

  override def toString = "Slf4JLog"

  val in: Inlet[T] = Inlet[T]("in")
  val out: Outlet[T] = Outlet[T]("out")

  override def shape: FlowShape[T, T] = FlowShape(in, out)

  override def createLogic(inheritedAttributes: Attributes): GraphStageLogic =
    new GraphStageLogic(shape) with OutHandler with InHandler {

      override def onPush(): Unit = {

        val elem = grab(in)
        if (logger.isDebugEnabled) logger.debug("[{}] Element: {}", prefix, project(elem))
        push(out, elem)
      }

      override def onPull(): Unit = {
        if (logDemand) logger.debug("[{}] Demand", prefix)
        pull(in)
      }

      override def onUpstreamFailure(cause: Throwable): Unit = {
        logger.warn(s"[$prefix] Upstream failed", cause)

        super.onUpstreamFailure(cause)
      }

      override def onUpstreamFinish(): Unit = {
        logger.debug("[{}] Upstream finished.", prefix)

        super.onUpstreamFinish()
      }

      override def onDownstreamFinish(cause: Throwable): Unit = {
        logger.debug("[{}] Downstream finished.", prefix)

        super.onDownstreamFinish(cause)
      }

      setHandlers(in, out, this)
    }
}

object Slf4JLogger {
  def apply[T](logger: Logger, prefix: String): Slf4JLogger[T, T] =
    new Slf4JLogger(logger, prefix, identity)
}

Source File: Main.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.codegen

import java.io.File
import java.nio.file.Path

import ch.qos.logback.classic.Level
import com.daml.lf.codegen.conf.Conf
import com.typesafe.scalalogging.StrictLogging
import org.slf4j.{Logger, LoggerFactory}
import scalaz.Cord

import scala.collection.breakOut

object Main extends StrictLogging {

  private val codegenId = "Scala Codegen"

  @deprecated("Use codegen font-end: com.daml.codegen.CodegenMain.main", "0.13.23")
  def main(args: Array[String]): Unit =
    Conf.parse(args) match {
      case Some(conf) =>
        generateCode(conf)
      case None =>
        throw new IllegalArgumentException(
          s"Invalid ${codegenId: String} command line arguments: ${args.mkString(" "): String}")
    }

  def generateCode(conf: Conf): Unit = conf match {
    case Conf(darMap, outputDir, decoderPkgAndClass, verbosity, roots) =>
      setGlobalLogLevel(verbosity)
      logUnsupportedEventDecoderOverride(decoderPkgAndClass)
      val (dars, packageName) = darsAndOnePackageName(darMap)
      CodeGen.generateCode(dars, packageName, outputDir.toFile, CodeGen.Novel, roots)
  }

  private def setGlobalLogLevel(verbosity: Level): Unit = {
    LoggerFactory.getLogger(Logger.ROOT_LOGGER_NAME) match {
      case a: ch.qos.logback.classic.Logger =>
        a.setLevel(verbosity)
        logger.info(s"${codegenId: String} verbosity: ${verbosity.toString}")
      case _ =>
        logger.warn(s"${codegenId: String} cannot set requested verbosity: ${verbosity.toString}")
    }
  }

  private def logUnsupportedEventDecoderOverride(mapping: Option[(String, String)]): Unit =
    mapping.foreach {
      case (a, b) =>
        logger.warn(
          s"${codegenId: String} does not allow overriding Event Decoder, skipping: ${a: String} -> ${b: String}")
    }

  private def darsAndOnePackageName(darMap: Map[Path, Option[String]]): (List[File], String) = {
    val dars: List[File] = darMap.keys.map(_.toFile)(breakOut)
    val uniquePackageNames: Set[String] = darMap.values.collect { case Some(x) => x }(breakOut)
    uniquePackageNames.toSeq match {
      case Seq(packageName) =>
        (dars, packageName)
      case _ =>
        throw new IllegalStateException(
          s"${codegenId: String} expects all dars mapped to the same package name, " +
            s"requested: ${format(darMap): String}")
    }
  }

  private def format(map: Map[Path, Option[String]]): String = {
    val cord = map.foldLeft(Cord("{")) { (str, kv) =>
      str ++ kv._1.toFile.getAbsolutePath ++ "->" ++ kv._2.toString ++ ","
    }
    (cord ++ "}").toString
  }
}

Source File: ContextualizedLogger.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.logging

import akka.NotUsed
import akka.stream.scaladsl.Flow
import com.daml.grpc.GrpcException
import io.grpc.Status
import org.slf4j.{Logger, LoggerFactory}

import scala.collection.concurrent.TrieMap
import scala.util.{Failure, Try}
import scala.util.control.NonFatal

object ContextualizedLogger {

  // Caches loggers to prevent them from needlessly wasting memory
  // Replicates the behavior of the underlying Slf4j logger factory
  private[this] val cache = TrieMap.empty[String, ContextualizedLogger]

  // Allows to explicitly pass a logger, should be used for testing only
  private[logging] def createFor(withoutContext: Logger): ContextualizedLogger =
    new ContextualizedLogger(withoutContext)

  // Slf4j handles the caching of the underlying logger itself
  private[logging] def createFor(name: String): ContextualizedLogger =
    createFor(LoggerFactory.getLogger(name))

  
  def get(clazz: Class[_]): ContextualizedLogger = {
    val name = clazz.getName.stripSuffix("$")
    cache.getOrElseUpdate(name, createFor(name))
  }

}

final class ContextualizedLogger private (val withoutContext: Logger) {

  val trace = new LeveledLogger.Trace(withoutContext)
  val debug = new LeveledLogger.Debug(withoutContext)
  val info = new LeveledLogger.Info(withoutContext)
  val warn = new LeveledLogger.Warn(withoutContext)
  val error = new LeveledLogger.Error(withoutContext)

  private def internalOrUnknown(code: Status.Code): Boolean =
    code == Status.Code.INTERNAL || code == Status.Code.UNKNOWN

  private def logError(t: Throwable)(implicit logCtx: LoggingContext): Unit =
    error("Unhandled internal error", t)

  def logErrorsOnCall[Out](implicit logCtx: LoggingContext): PartialFunction[Try[Out], Unit] = {
    case Failure(e @ GrpcException(s, _)) =>
      if (internalOrUnknown(s.getCode)) {
        logError(e)
      }
    case Failure(NonFatal(e)) =>
      logError(e)
  }

  def logErrorsOnStream[Out](implicit logCtx: LoggingContext): Flow[Out, Out, NotUsed] =
    Flow[Out].mapError {
      case e @ GrpcException(s, _) =>
        if (internalOrUnknown(s.getCode)) {
          logError(e)
        }
        e
      case NonFatal(e) =>
        logError(e)
        e
    }

}

Source File: LeveledLogger.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.logging

import org.slf4j.{Logger, Marker}

private[logging] object LeveledLogger {

  final class Trace(logger: Logger) extends LeveledLogger {
    override protected def isEnabled: Boolean =
      logger.isTraceEnabled()
    override protected def log(msg: String): Unit =
      logger.trace(msg)
    override protected def log(msg: String, t: Throwable): Unit =
      logger.trace(msg, t)
    override protected def log(m: Marker, msg: String, t: Throwable): Unit =
      logger.trace(m, msg, t)
    override protected def log(fmt: String, arg: AnyRef): Unit =
      logger.trace(fmt, arg)
  }

  final class Debug(logger: Logger) extends LeveledLogger {
    override protected def isEnabled: Boolean =
      logger.isDebugEnabled()
    override protected def log(msg: String): Unit =
      logger.debug(msg)
    override protected def log(msg: String, t: Throwable): Unit =
      logger.debug(msg, t)
    override protected def log(m: Marker, msg: String, t: Throwable): Unit =
      logger.debug(m, msg, t)
    override protected def log(fmt: String, arg: AnyRef): Unit =
      logger.debug(fmt, arg)
  }

  final class Info(logger: Logger) extends LeveledLogger {
    override protected def isEnabled: Boolean =
      logger.isInfoEnabled()
    override protected def log(msg: String): Unit =
      logger.info(msg)
    override protected def log(msg: String, t: Throwable): Unit =
      logger.info(msg, t)
    override protected def log(m: Marker, msg: String, t: Throwable): Unit =
      logger.info(m, msg, t)
    override protected def log(fmt: String, arg: AnyRef): Unit =
      logger.info(fmt, arg)
  }

  final class Warn(logger: Logger) extends LeveledLogger {
    override protected def isEnabled: Boolean =
      logger.isWarnEnabled()
    override protected def log(msg: String): Unit =
      logger.warn(msg)
    override protected def log(msg: String, t: Throwable): Unit =
      logger.warn(msg, t)
    override protected def log(m: Marker, msg: String, t: Throwable): Unit =
      logger.warn(m, msg, t)
    override protected def log(fmt: String, arg: AnyRef): Unit =
      logger.warn(fmt, arg)
  }

  final class Error(logger: Logger) extends LeveledLogger {
    override protected def isEnabled: Boolean =
      logger.isErrorEnabled()
    override protected def log(msg: String): Unit =
      logger.error(msg)
    override protected def log(msg: String, t: Throwable): Unit =
      logger.error(msg, t)
    override protected def log(m: Marker, msg: String, t: Throwable): Unit =
      logger.error(m, msg, t)
    override protected def log(fmt: String, arg: AnyRef): Unit =
      logger.error(fmt, arg)
  }

}

private[logging] sealed abstract class LeveledLogger {

  protected def isEnabled: Boolean

  protected def log(msg: String): Unit
  protected def log(msg: String, t: Throwable): Unit
  protected def log(m: Marker, msg: String, t: Throwable): Unit
  protected def log(fmt: String, arg: AnyRef): Unit

  final def apply(msg: => String)(implicit logCtx: LoggingContext): Unit =
    if (isEnabled)
      logCtx.ifEmpty(log(msg))(log(s"$msg (context: {})", _))

  final def apply(msg: => String, t: Throwable)(implicit logCtx: LoggingContext): Unit =
    if (isEnabled)
      logCtx.ifEmpty(log(msg, t))(c => log(c, s"$msg (context: $c)", t))

}

Source File: TextClassifier.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.example.textclassification

import com.intel.analytics.bigdl.example.utils._
import com.intel.analytics.bigdl.nn.{ClassNLLCriterion, _}
import com.intel.analytics.bigdl.utils.{Engine, LoggerFilter, T}
import org.apache.log4j.{Level => Levle4j, Logger => Logger4j}
import org.slf4j.{Logger, LoggerFactory}
import scopt.OptionParser

import scala.collection.mutable.{ArrayBuffer, Map => MMap}
import scala.language.existentials

object TextClassifier {
  val log: Logger = LoggerFactory.getLogger(this.getClass)
  LoggerFilter.redirectSparkInfoLogs()
  Logger4j.getLogger("com.intel.analytics.bigdl.optim").setLevel(Levle4j.INFO)

  def main(args: Array[String]): Unit = {
    val localParser = new OptionParser[TextClassificationParams]("BigDL Example") {
      opt[String]('b', "baseDir")
        .required()
        .text("Base dir containing the training and word2Vec data")
        .action((x, c) => c.copy(baseDir = x))
      opt[String]('p', "partitionNum")
        .text("you may want to tune the partitionNum if run into spark mode")
        .action((x, c) => c.copy(partitionNum = x.toInt))
      opt[String]('s', "maxSequenceLength")
        .text("maxSequenceLength")
        .action((x, c) => c.copy(maxSequenceLength = x.toInt))
      opt[String]('w', "maxWordsNum")
        .text("maxWordsNum")
        .action((x, c) => c.copy(maxWordsNum = x.toInt))
      opt[String]('l', "trainingSplit")
        .text("trainingSplit")
        .action((x, c) => c.copy(trainingSplit = x.toDouble))
      opt[String]('z', "batchSize")
        .text("batchSize")
        .action((x, c) => c.copy(batchSize = x.toInt))
      opt[Int]('l', "learningRate")
        .text("learningRate")
        .action((x, c) => c.copy(learningRate = x))
    }

    localParser.parse(args, TextClassificationParams()).map { param =>
      log.info(s"Current parameters: $param")
      val textClassification = new TextClassifier(param)
      textClassification.train()
    }
  }
}

Source File: TimestampLogicalType.scala From embulk-output-s3_parquet with MIT License

5 votes

package org.embulk.output.s3_parquet.parquet

import java.time.ZoneId

import org.apache.parquet.io.api.RecordConsumer
import org.apache.parquet.schema.{LogicalTypeAnnotation, PrimitiveType, Types}
import org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit
import org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.{
  MICROS,
  MILLIS,
  NANOS
}
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName
import org.embulk.config.ConfigException
import org.embulk.output.s3_parquet.catalog.GlueDataType
import org.embulk.spi.`type`.{
  BooleanType,
  DoubleType,
  JsonType,
  LongType,
  StringType,
  TimestampType
}
import org.embulk.spi.time.{Timestamp, TimestampFormatter}
import org.embulk.spi.Column
import org.msgpack.value.Value
import org.slf4j.{Logger, LoggerFactory}

case class TimestampLogicalType(
    isAdjustedToUtc: Boolean,
    timeUnit: TimeUnit,
    timeZone: ZoneId
) extends ParquetColumnType {
  private val logger: Logger =
    LoggerFactory.getLogger(classOf[TimestampLogicalType])

  override def primitiveType(column: Column): PrimitiveType =
    column.getType match {
      case _: LongType | _: TimestampType =>
        Types
          .optional(PrimitiveTypeName.INT64)
          .as(LogicalTypeAnnotation.timestampType(isAdjustedToUtc, timeUnit))
          .named(column.getName)
      case _: BooleanType | _: DoubleType | _: StringType | _: JsonType | _ =>
        throw new ConfigException(s"Unsupported column type: ${column.getName}")
    }

  override def glueDataType(column: Column): GlueDataType =
    column.getType match {
      case _: LongType | _: TimestampType =>
        timeUnit match {
          case MILLIS => GlueDataType.TIMESTAMP
          case MICROS | NANOS =>
            warningWhenConvertingTimestampToGlueType(GlueDataType.BIGINT)
            GlueDataType.BIGINT
        }
      case _: BooleanType | _: DoubleType | _: StringType | _: JsonType | _ =>
        throw new ConfigException(s"Unsupported column type: ${column.getName}")
    }

  override def consumeBoolean(consumer: RecordConsumer, v: Boolean): Unit =
    throw newUnsupportedMethodException("consumeBoolean")
  override def consumeString(consumer: RecordConsumer, v: String): Unit =
    throw newUnsupportedMethodException("consumeString")

  override def consumeLong(consumer: RecordConsumer, v: Long): Unit =
    consumer.addLong(v)

  override def consumeDouble(consumer: RecordConsumer, v: Double): Unit =
    throw newUnsupportedMethodException("consumeDouble")

  override def consumeTimestamp(
      consumer: RecordConsumer,
      v: Timestamp,
      formatter: TimestampFormatter
  ): Unit = timeUnit match {
    case MILLIS => consumer.addLong(v.toEpochMilli)
    case MICROS =>
      consumer.addLong(v.getEpochSecond * 1_000_000L + (v.getNano / 1_000L))
    case NANOS =>
      consumer.addLong(v.getEpochSecond * 1_000_000_000L + v.getNano)
  }

  override def consumeJson(consumer: RecordConsumer, v: Value): Unit =
    throw newUnsupportedMethodException("consumeJson")

  private def warningWhenConvertingTimestampToGlueType(
      glueType: GlueDataType
  ): Unit =
    logger.warn(
      s"timestamp(isAdjustedToUtc = $isAdjustedToUtc, timeUnit = $timeUnit) is converted" +
        s" to Glue ${glueType.name} but this is not represented correctly, because Glue" +
        s" does not support time type. Please use `catalog.column_options` to define the type."
    )
}

Source File: JsonLogicalType.scala From embulk-output-s3_parquet with MIT License

5 votes

package org.embulk.output.s3_parquet.parquet
import org.apache.parquet.io.api.{Binary, RecordConsumer}
import org.apache.parquet.schema.{LogicalTypeAnnotation, PrimitiveType, Types}
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName
import org.embulk.config.ConfigException
import org.embulk.output.s3_parquet.catalog.GlueDataType
import org.embulk.spi.Column
import org.embulk.spi.`type`.{
  BooleanType,
  DoubleType,
  JsonType,
  LongType,
  StringType,
  TimestampType
}
import org.embulk.spi.time.{Timestamp, TimestampFormatter}
import org.msgpack.value.{Value, ValueFactory}
import org.slf4j.{Logger, LoggerFactory}

object JsonLogicalType extends ParquetColumnType {
  private val logger: Logger = LoggerFactory.getLogger(JsonLogicalType.getClass)
  override def primitiveType(column: Column): PrimitiveType =
    column.getType match {
      case _: BooleanType | _: LongType | _: DoubleType | _: StringType |
          _: JsonType =>
        Types
          .optional(PrimitiveTypeName.BINARY)
          .as(LogicalTypeAnnotation.jsonType())
          .named(column.getName)
      case _: TimestampType | _ =>
        throw new ConfigException(s"Unsupported column type: ${column.getName}")
    }

  override def glueDataType(column: Column): GlueDataType =
    column.getType match {
      case _: BooleanType | _: LongType | _: DoubleType | _: StringType |
          _: JsonType =>
        warningWhenConvertingJsonToGlueType(GlueDataType.STRING)
        GlueDataType.STRING
      case _: TimestampType | _ =>
        throw new ConfigException(s"Unsupported column type: ${column.getName}")
    }

  override def consumeBoolean(consumer: RecordConsumer, v: Boolean): Unit =
    consumeJson(consumer, ValueFactory.newBoolean(v))

  override def consumeString(consumer: RecordConsumer, v: String): Unit =
    consumeJson(consumer, ValueFactory.newString(v))

  override def consumeLong(consumer: RecordConsumer, v: Long): Unit =
    consumeJson(consumer, ValueFactory.newInteger(v))

  override def consumeDouble(consumer: RecordConsumer, v: Double): Unit =
    consumeJson(consumer, ValueFactory.newFloat(v))

  override def consumeTimestamp(
      consumer: RecordConsumer,
      v: Timestamp,
      formatter: TimestampFormatter
  ): Unit = throw newUnsupportedMethodException("consumeTimestamp")

  override def consumeJson(consumer: RecordConsumer, v: Value): Unit =
    consumer.addBinary(Binary.fromString(v.toJson))

  private def warningWhenConvertingJsonToGlueType(
      glueType: GlueDataType
  ): Unit = {
    logger.warn(
      s"json is converted" +
        s" to Glue ${glueType.name} but this is not represented correctly, because Glue" +
        s" does not support json type. Please use `catalog.column_options` to define the type."
    )
  }

}

Source File: ScorexLogging.scala From matcher with MIT License

5 votes

package com.wavesplatform.dex.domain.utils

import monix.eval.Task
import monix.execution.{CancelableFuture, Scheduler}
import org.slf4j.{Logger, LoggerFactory}

case class LoggerFacade(logger: Logger) {

  def trace(message: => String): Unit                       = if (logger.isTraceEnabled) logger.trace(message)
  def debug(message: => String, arg: Any): Unit             = if (logger.isDebugEnabled) logger.debug(message, arg)
  def debug(message: => String): Unit                       = if (logger.isDebugEnabled) logger.debug(message)
  def info(message: => String): Unit                        = if (logger.isInfoEnabled) logger.info(message)
  def info(message: => String, arg: Any): Unit              = if (logger.isInfoEnabled) logger.info(message, arg)
  def info(message: => String, throwable: Throwable): Unit  = if (logger.isInfoEnabled) logger.info(message, throwable)
  def warn(message: => String): Unit                        = if (logger.isWarnEnabled) logger.warn(message)
  def warn(message: => String, throwable: Throwable): Unit  = if (logger.isWarnEnabled) logger.warn(message, throwable)
  def error(message: => String): Unit                       = if (logger.isErrorEnabled) logger.error(message)
  def error(message: => String, throwable: Throwable): Unit = if (logger.isErrorEnabled) logger.error(message, throwable)
}

trait ScorexLogging {

  protected lazy val log: LoggerFacade = LoggerFacade(LoggerFactory.getLogger(this.getClass))

  implicit class TaskExt[A](t: Task[A]) {

    def runAsyncLogErr(implicit s: Scheduler): CancelableFuture[A] = logErr.runToFuture(s)

    def logErr: Task[A] = t.onErrorHandleWith { ex =>
      log.error(s"Error executing task", ex)
      Task.raiseError[A](ex)
    }
  }
}

Source File: EmbeddedCassandra.scala From phantom-activator-template with Apache License 2.0

5 votes

package controllers

import java.io.File
import java.util.concurrent.atomic.AtomicBoolean

import org.cassandraunit.utils.EmbeddedCassandraServerHelper
import org.slf4j.Logger

import scala.concurrent.blocking
import scala.util.control.NonFatal
import scala.util.{Failure, Success, Try}


  def start(logger: Logger, config: Option[File] = None, timeout: Option[Int] = None): Unit = {
    this.synchronized {
      if (started.compareAndSet(false, true)) {
        blocking {
          val configFile = config.map(_.toURI.toString) getOrElse EmbeddedCassandraServerHelper.DEFAULT_CASSANDRA_YML_FILE
          System.setProperty("cassandra.config", configFile)
          Try {
            EmbeddedCassandraServerHelper.mkdirs()
          } match {
            case Success(value) => logger.info("Successfully created directories for embedded Cassandra.")
            case Failure(NonFatal(e)) =>
              logger.error(s"Error creating Embedded cassandra directories: ${e.getMessage}")
          }

          (config, timeout) match {
            case (Some(file), None) =>
              logger.info(s"Starting Cassandra in embedded mode with configuration from $file.")
              EmbeddedCassandraServerHelper.startEmbeddedCassandra(
                file,
                EmbeddedCassandraServerHelper.DEFAULT_TMP_DIR,
                EmbeddedCassandraServerHelper.DEFAULT_STARTUP_TIMEOUT
              )
            case (Some(file), Some(time)) =>
              logger.info(s"Starting Cassandra in embedded mode with configuration from $file and timeout set to $timeout ms.")
              EmbeddedCassandraServerHelper.startEmbeddedCassandra(
                file,
                EmbeddedCassandraServerHelper.DEFAULT_TMP_DIR,
                time
              )

            case (None, Some(time)) =>
              logger.info(s"Starting Cassandra in embedded mode with default configuration and timeout set to $timeout ms.")
              EmbeddedCassandraServerHelper.startEmbeddedCassandra(time)
            case (None, None) =>
              logger.info("Starting Cassandra in embedded mode with default configuration.")
              EmbeddedCassandraServerHelper.startEmbeddedCassandra()
              logger.info("Successfully started embedded Cassandra")
          }
        }
      }
      else {
        logger.info("Embedded Cassandra has already been started")
      }
    }
  }


  def cleanup(logger: Logger): Unit = {
    this.synchronized {
      if (started.compareAndSet(true, false)) {
        logger.info("Cleaning up embedded Cassandra")
        EmbeddedCassandraServerHelper.cleanEmbeddedCassandra()
      } else {
        logger.info("Cassandra is not running, not cleaning up")
      }
    }
  }
}

Source File: KafkaSink.scala From spark-kafka-sink with Apache License 2.0

5 votes

package org.apache.spark.metrics.sink

import java.util.{ Properties, Locale }
import java.util.concurrent.TimeUnit

import org.slf4j.Logger
import org.slf4j.LoggerFactory

import com.codahale.metrics.MetricRegistry
import org.apache.spark.SecurityManager

import com.manyangled.kafkasink.KafkaReporter

class KafkaSink(val properties: Properties, val registry: MetricRegistry,
    securityMgr: SecurityManager) extends org.apache.spark.metrics.sink.Sink {

  val logger: Logger = LoggerFactory.getLogger(this.getClass)

  private def popt(prop: String): Option[String] =
    Option(properties.getProperty(prop))

  // These are non-negotiable
  val broker = popt("broker").get
  val topic = popt("topic").get

  lazy val reporter = new KafkaReporter(registry, broker, topic, properties)

  def start(): Unit = {
    logger.info(s"Starting Kafka metric reporter at $broker, topic $topic")
    val period = popt("period").getOrElse("10").toLong
    val tstr = popt("unit").getOrElse("seconds").toUpperCase(Locale.ROOT)
    val tunit = TimeUnit.valueOf(tstr)
    reporter.start(period, tunit)
  }

  def stop(): Unit = {
    logger.info(s"Stopping Kafka metric reporter at $broker, topic $topic")
    reporter.stop()
  }

  def report(): Unit = {
    logger.info(s"Reporting metrics to Kafka reporter at $broker, topic $topic")
    reporter.report()
  }
}

Source File: Demo2iConfig.scala From spark-riak-connector with Apache License 2.0

5 votes

package com.basho.riak.spark.examples.demos.fbl
import com.basho.riak.client.core.RiakNode
import com.basho.riak.client.core.query.Namespace
import com.basho.riak.spark.rdd._
import com.basho.riak.client.core.query.indexes.LongIntIndex
import com.basho.riak.spark.rdd.connector.RiakConnectorConf
import com.basho.riak.spark.rdd.{RiakFunctions, BucketDef}
import com.basho.riak.spark.util.RiakObjectConversionUtil
import com.basho.riak.spark.writer.{WriteDataMapperFactory, WriteDataMapper}
import org.slf4j.{LoggerFactory, Logger}
import com.basho.riak.spark._
import com.basho.riak.client.core.query.{RiakObject, Namespace}
import com.basho.riak.client.api.annotations.{RiakKey, RiakIndex}
import org.apache.spark.{SparkConf, SparkContext}

case class Demo2iConfig(riakConf: RiakConnectorConf, index: String, bucket: String, from: Long, to: Long, name:String){

  def riakNodeBuilder(minConnections:Int = 2):RiakNode.Builder = {
    val firstTheWinner = riakConf.hosts.iterator.next()

    new RiakNode.Builder()
      .withMinConnections(minConnections)
      .withRemoteAddress(firstTheWinner.getHost)
      .withRemotePort(firstTheWinner.getPort)
  }
}

object Demo2iConfig{

  val DEFAULT_INDEX_NAME = "creationNo"
  val DEFAULT_BUCKET_NAME = "test-bucket"
  val DEFAULT_FROM = 1
  val DEFAULT_TO = 4

  def apply(sparkConf: SparkConf):Demo2iConfig = {

    Demo2iConfig(
      riakConf = RiakConnectorConf(sparkConf),
      index = sparkConf.get("spark.riak.demo.index", DEFAULT_INDEX_NAME),
      bucket = sparkConf.get("spark.riak.demo.bucket", DEFAULT_BUCKET_NAME),
      from = sparkConf.get("spark.riak.demo.from", DEFAULT_FROM.toString).toLong,
      to = sparkConf.get("spark.riak.demo.to", DEFAULT_TO.toString).toLong,
      name = sparkConf.get("spark.app.name", "")
    )
  }
}

Source File: StatCounter.scala From spark-riak-connector with Apache License 2.0

5 votes

package com.basho.riak.spark.rdd

import java.util.concurrent.atomic.LongAdder

import org.slf4j.Logger

import scala.concurrent.duration.Duration

class StatCounter(logger:Logger = null) {
  case class Stats(duration: Duration, counter:Long, logger:Logger = null){
    def dump(message: String, logger:Logger = this.logger ):Stats ={
      require(logger != null, "logger should be specified")
      logger.info("{}\n\t{} items were processed\n\tit took {}\n",
        List[AnyRef](message, counter: java.lang.Long, duration):_*)
      this
    }
  }

  private val counter = new LongAdder
  private var startedAt = System.currentTimeMillis()


  def increment():StatCounter = {
    counter.increment()
    this
  }

  def +=(value: Int): StatCounter = {
    counter.add(value)
    this
  }

  def +=(value: Long): StatCounter = {
    counter.add(value)
    this
  }

  def reset():StatCounter = {
    startedAt = System.currentTimeMillis()
    counter.reset()
    this
  }

  def stats():Stats ={
    val duration = System.currentTimeMillis() - startedAt
    new Stats(Duration(duration, "ms"), counter.longValue(), logger)
  }
}

object StatCounter{
  def apply(logger: Logger = null): StatCounter = {
    new StatCounter(logger)
  }
}

Source File: AbstractRiakTest.scala From spark-riak-connector with Apache License 2.0

5 votes

package com.basho.riak.spark.rdd

import com.basho.riak.JsonTestFunctions
import com.basho.riak.client.core.RiakNode
import com.basho.riak.client.core.query.Namespace
import org.junit._
import org.junit.rules.TestWatcher
import org.junit.runner.Description
import org.slf4j.{Logger, LoggerFactory}

abstract class AbstractRiakTest extends RiakFunctions with JsonTestFunctions {

  private final val logger: Logger = LoggerFactory.getLogger(this.getClass)

  protected val DEFAULT_NAMESPACE = new Namespace("default","test-bucket")
  protected val DEFAULT_NAMESPACE_4STORE = new Namespace("default", "test-bucket-4store")

  protected override val numberOfParallelRequests: Int = 4
  protected override val nodeBuilder: RiakNode.Builder = new RiakNode.Builder().withMinConnections(numberOfParallelRequests)

  protected val jsonData: Option[String] = None

  @Rule
  def watchman: TestWatcher = new TestWatcher() {
    override def starting(description: Description): Unit = {
      super.starting(description)
      logger.info(
        "\n----------------------------------------\n" +
          "  [TEST STARTED]  {}\n" +
          "----------------------------------------\n",
        description.getDisplayName)
    }

    override def finished(description: Description): Unit = {
      super.finished(description)
      logger.info(
        "\n----------------------------------------\n" +
          "  [TEST FINISHED]  {}\n" +
          "----------------------------------------\n",
        description.getDisplayName)
    }
  }

  @Before
  protected def initialize(): Unit = setupData()

  protected def setupData(): Unit = {
    // Purge data: data might be not only created, but it may be also changed during the previous test case execution
    //
    // For manual check: curl -v http://localhost:10018/buckets/test-bucket/keys?keys=true
    List(DEFAULT_NAMESPACE, DEFAULT_NAMESPACE_4STORE) foreach resetAndEmptyBucket

    withRiakDo(session => jsonData.foreach(createValues(session, DEFAULT_NAMESPACE, _)))
  }
}

Source File: UnorderedParallelParquetSink.scala From parquet4s with MIT License

5 votes

package com.github.mjakubowski84.parquet4s

import java.util.UUID

import akka.Done
import akka.stream.scaladsl.{Flow, Keep, Sink}
import org.apache.hadoop.fs.Path
import org.apache.parquet.schema.MessageType
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Future

private[parquet4s] object UnorderedParallelParquetSink extends IOOps {

  protected val logger: Logger = LoggerFactory.getLogger(this.getClass)

  def apply[T: ParquetRecordEncoder : ParquetSchemaResolver](path: Path,
                                                             parallelism: Int,
                                                             options: ParquetWriter.Options = ParquetWriter.Options()
                                                            ): Sink[T, Future[Done]] = {
    val schema = ParquetSchemaResolver.resolveSchema[T]
    val valueCodecConfiguration = options.toValueCodecConfiguration

    validateWritePath(path, options)

    def encode(data: T): RowParquetRecord = ParquetRecordEncoder.encode[T](data, valueCodecConfiguration)

    Flow[T]
      .zipWithIndex
      .groupBy(parallelism, elemAndIndex => Math.floorMod(elemAndIndex._2, parallelism))
      .map(elemAndIndex => encode(elemAndIndex._1))
      .fold(UnorderedChunk(path, schema, options))(_.write(_))
      .map(_.close())
      .async
      .mergeSubstreamsWithParallelism(parallelism)
      .toMat(Sink.ignore)(Keep.right)
  }

  private trait UnorderedChunk {

    def write(record: RowParquetRecord): UnorderedChunk

    def close(): Unit

  }

  private object UnorderedChunk {

    def apply(basePath: Path,
              schema: MessageType,
              options: ParquetWriter.Options): UnorderedChunk = new PendingUnorderedChunk(basePath, schema, options)

    private[UnorderedChunk] class PendingUnorderedChunk(basePath: Path,
                                        schema: MessageType,
                                        options: ParquetWriter.Options) extends UnorderedChunk {
      override def write(record: RowParquetRecord): UnorderedChunk = {
        val chunkPath = Path.mergePaths(basePath, new Path(s"/part-${UUID.randomUUID()}.parquet"))
        val writer = ParquetWriter.internalWriter(chunkPath, schema, options)
        writer.write(record)
        new StartedUnorderedChunk(chunkPath, writer, acc = 1)
      }

      override def close(): Unit = ()
    }

    private[UnorderedChunk] class StartedUnorderedChunk(chunkPath: Path,
                                        writer: ParquetWriter.InternalWriter,
                                        acc: Long
                                       ) extends UnorderedChunk {
      override def write(record: RowParquetRecord): UnorderedChunk = {
        writer.write(record)
        new StartedUnorderedChunk(chunkPath, writer, acc = acc + 1)
      }

      override def close(): Unit = {
        if (logger.isDebugEnabled) logger.debug(s"$acc records were successfully written to $chunkPath")
        writer.close()
      }
    }
  }

}

Source File: IndefiniteStreamParquetSink.scala From parquet4s with MIT License

5 votes

package com.github.mjakubowski84.parquet4s
import akka.stream.FlowShape
import akka.stream.scaladsl.{Broadcast, Flow, GraphDSL, Keep, Sink, ZipWith}
import com.github.mjakubowski84.parquet4s.ParquetWriter.ParquetWriterFactory
import org.apache.hadoop.fs.Path
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.duration.FiniteDuration


private[parquet4s] object IndefiniteStreamParquetSink extends IOOps {

  protected val logger: Logger = LoggerFactory.getLogger(this.getClass)

  def apply[In, ToWrite: ParquetWriterFactory, Mat](path: Path,
                                                    maxChunkSize: Int,
                                                    chunkWriteTimeWindow: FiniteDuration,
                                                    buildChunkPath: ChunkPathBuilder[In] = ChunkPathBuilder.default,
                                                    preWriteTransformation: In => ToWrite = identity[In] _,
                                                    postWriteSink: Sink[Seq[In], Mat] = Sink.ignore,
                                                    options: ParquetWriter.Options = ParquetWriter.Options()
                                            ): Sink[In, Mat] = {
    validateWritePath(path, options)

    val internalFlow = Flow.fromGraph(GraphDSL.create() { implicit b =>
      import GraphDSL.Implicits._
    
      val inChunkFlow = b.add(Flow[In].groupedWithin(maxChunkSize, chunkWriteTimeWindow))
      val broadcastChunks = b.add(Broadcast[Seq[In]](outputPorts = 2))
      val writeFlow = Flow[Seq[In]].map { chunk =>
        val toWrite = chunk.map(preWriteTransformation)
        val chunkPath = buildChunkPath(path, chunk)
        if (logger.isDebugEnabled()) logger.debug(s"Writing ${toWrite.size} records to $chunkPath")
        ParquetWriter.writeAndClose(chunkPath.toString, toWrite, options)
      }
      val zip = b.add(ZipWith[Seq[In], Unit, Seq[In]]((chunk, _) => chunk))
      
      inChunkFlow ~> broadcastChunks ~> writeFlow ~> zip.in1
                     broadcastChunks ~> zip.in0

      FlowShape(inChunkFlow.in, zip.out)               
    })

    internalFlow.toMat(postWriteSink)(Keep.right)
  }

}

Source File: SingleFileParquetSink.scala From parquet4s with MIT License

5 votes

package com.github.mjakubowski84.parquet4s

import akka.Done
import akka.stream.scaladsl.{Flow, Keep, Sink}
import org.apache.hadoop.fs.Path
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Future

private[parquet4s] object SingleFileParquetSink {

  protected val logger: Logger = LoggerFactory.getLogger(this.getClass)

  def apply[T: ParquetRecordEncoder : ParquetSchemaResolver](path: Path,
                                                             options: ParquetWriter.Options = ParquetWriter.Options()
                                                            ): Sink[T, Future[Done]] = {
    val schema = ParquetSchemaResolver.resolveSchema[T]
    val writer = ParquetWriter.internalWriter(path, schema, options)
    val valueCodecConfiguration = options.toValueCodecConfiguration
    val isDebugEnabled = logger.isDebugEnabled

    def encode(data: T): RowParquetRecord = ParquetRecordEncoder.encode[T](data, valueCodecConfiguration)

    Flow[T]
      .map(encode)
      .fold(0) { case (acc, record) => writer.write(record); acc + 1}
      .map { count =>
        if (isDebugEnabled) logger.debug(s"$count records were successfully written to $path")
        writer.close()
      }
      .toMat(Sink.ignore)(Keep.right)
  }

}

Source File: IOOps.scala From parquet4s with MIT License

5 votes

package com.github.mjakubowski84.parquet4s

import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.SecureIOUtils.AlreadyExistsException
import org.apache.parquet.hadoop.ParquetFileWriter
import org.slf4j.Logger

import scala.concurrent.{ExecutionContext, Future}
import scala.util.Try

trait IOOps {

  protected val logger: Logger

  protected def validateWritePath(path: Path, writeOptions: ParquetWriter.Options): Unit = {
    val fs = path.getFileSystem(writeOptions.hadoopConf)
    try {
      if (fs.exists(path)) {
        if (writeOptions.writeMode == ParquetFileWriter.Mode.CREATE)
          throw new AlreadyExistsException(s"File or directory already exists: $path")
        else {
          if (logger.isDebugEnabled) logger.debug(s"Deleting $path in order to override with new data.")
          fs.delete(path, true)
        }
      }
    } finally fs.close()
  }

  protected def filesAtPath(path: Path, writeOptions: ParquetWriter.Options)
                           (implicit ec: ExecutionContext): Future[List[String]] = Future {
    scala.concurrent.blocking {
      val fs = path.getFileSystem(writeOptions.hadoopConf)
      try {
        val iter = fs.listFiles(path, false)
        Stream
          .continually(Try(iter.next()))
          .takeWhile(_.isSuccess)
          .map(_.get)
          .map(_.getPath.getName)
          .toList
      } finally fs.close()
    }
  }

  protected def filesAtPath(path: String, writeOptions: ParquetWriter.Options)
                           (implicit ec: ExecutionContext): Future[List[String]] = filesAtPath(new Path(path), writeOptions)

}

Source File: SequentialFileSplittingParquetSink.scala From parquet4s with MIT License

5 votes

package com.github.mjakubowski84.parquet4s

import akka.Done
import akka.stream.scaladsl.{Flow, Keep, Sink}
import org.apache.hadoop.fs.Path
import org.apache.parquet.schema.MessageType
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Future

private[parquet4s] object SequentialFileSplittingParquetSink extends IOOps {

  protected val logger: Logger = LoggerFactory.getLogger(this.getClass)

  def apply[T: ParquetRecordEncoder : ParquetSchemaResolver](path: Path,
                                                             maxRecordsPerFile: Long,
                                                             options: ParquetWriter.Options = ParquetWriter.Options()
                                                            ): Sink[T, Future[Done]] = {
    val schema = ParquetSchemaResolver.resolveSchema[T]
    val valueCodecConfiguration = options.toValueCodecConfiguration

    validateWritePath(path, options)

    def encode(data: T): RowParquetRecord = ParquetRecordEncoder.encode[T](data, valueCodecConfiguration)

    Flow[T]
      .zipWithIndex
      .map { case (elem, index) => OrderedChunkElem(encode(elem), index) }
      .fold(OrderedChunk(path, schema, maxRecordsPerFile, options))(_.write(_))
      .map(_.close())
      .toMat(Sink.ignore)(Keep.right)
  }

  private case class OrderedChunkElem(record: RowParquetRecord, index: Long) {
    def isSplit(maxRecordsPerFile: Long): Boolean = index % maxRecordsPerFile == 0
  }

  private trait OrderedChunk {
    def write(elem: OrderedChunkElem): OrderedChunk
    def close(): Unit
  }

  private object OrderedChunk {

    def apply(basePath: Path,
              schema: MessageType,
              maxRecordsPerFile: Long,
              options: ParquetWriter.Options): OrderedChunk = new PendingOrderedChunk(basePath, schema, maxRecordsPerFile, options)


    private[OrderedChunk] class PendingOrderedChunk(basePath: Path,
                                                    schema: MessageType,
                                                    maxRecordsPerFile: Long,
                                                    options: ParquetWriter.Options) extends OrderedChunk {
      override def write(elem: OrderedChunkElem): OrderedChunk = {
        val chunkNumber: Int = Math.floorDiv(elem.index, maxRecordsPerFile).toInt
        val chunkPath = Path.mergePaths(basePath, new Path(chunkFileName(chunkNumber)))
        val writer = ParquetWriter.internalWriter(chunkPath, schema, options)
        writer.write(elem.record)
        new StartedOrderedChunk(basePath, schema, maxRecordsPerFile, options, chunkPath, writer, acc = 1)
      }

      override def close(): Unit = ()

      private def chunkFileName(chunkNumber: Int): String = f"/part-$chunkNumber%05d.parquet"
    }

    private[OrderedChunk] class StartedOrderedChunk(basePath: Path,
                                                    schema: MessageType,
                                                    maxRecordsPerFile: Long,
                                                    options: ParquetWriter.Options,
                                                    chunkPath: Path,
                                                    writer: ParquetWriter.InternalWriter,
                                                    acc: Long) extends OrderedChunk {
      override def write(elem: OrderedChunkElem): OrderedChunk = {
        if (elem.isSplit(maxRecordsPerFile)) {
          this.close()
          new PendingOrderedChunk(basePath, schema, maxRecordsPerFile, options).write(elem)
        } else {
          writer.write(elem.record)
          new StartedOrderedChunk(basePath, schema, maxRecordsPerFile, options, chunkPath, writer, acc = acc + 1)
        }
      }

      override def close(): Unit = {
        if (logger.isDebugEnabled) logger.debug(s"$acc records were successfully written to $chunkPath")
        writer.close()
      }
    }
  }

}

Source File: SddfApp.scala From sddf with GNU General Public License v3.0

5 votes

package de.unihamburg.vsis.sddf

import org.joda.time.format.PeriodFormatterBuilder
import org.slf4j.Logger
import org.slf4j.LoggerFactory

import de.unihamburg.vsis.sddf.config.Config

import scopt.Read
import scopt.OptionParser

class SddfApp extends App {

  val periodFormatter = (new PeriodFormatterBuilder() minimumPrintedDigits (2) printZeroAlways ()
    appendDays () appendSeparator ("d ")
    appendHours () appendSeparator (":") appendMinutes () appendSuffix (":") appendSeconds ()
    appendSeparator (".")
    minimumPrintedDigits (3) appendMillis () toFormatter)

  @transient var _log: Logger = null
  // Method to get or create the logger for this object
  def log(): Logger = {
    if (_log == null) {
      _log = LoggerFactory.getLogger(getClass.getName)
    }
    _log
  }
  
  @transient var _logLineage: Logger = null
  // Method to get or create the logger for this object
  def logLineage(): Logger = {
    if (_logLineage == null) {
      _logLineage = LoggerFactory.getLogger("lineage")
    }
    _logLineage
  }
  

  // extend Parser to accept the type Option
  implicit val optionRead: Read[Option[String]] = Read.reads(Some(_))
  
  // parsing commandline parameters
  val parser = new OptionParser[Parameters]("sddf") {
    head("SddF", "0.1.0")
    opt[Map[String, String]]('p', "properties") optional() valueName("<property>") action { (x, c) =>
      c.copy(properties = x) } text("set arbitrary properties via command line")
    opt[Option[String]]('c', "config-file") optional() action { (x, c) =>
      c.copy(propertyPath = x) } text("optional path to a property file")
  }
  
  // parser.parse returns Option[C]
  val parameters = parser.parse(args, Parameters())
  var propertiesCommandline: Map[String, String] = Map()
  var propertiesPath: Option[String] = None
   parameters match {
    case Some(config) =>
      propertiesCommandline = config.properties
      propertiesPath = config.propertyPath
    case None =>
      // arguments are bad, error message will have been displayed
  }
  
  val Conf: Config = if(propertiesPath.isDefined) new Config(propertiesPath.get) else new Config()
  
  propertiesCommandline.foreach(props => {
	  Conf.setPropertyCommandline(props._1, props._2)
  })
  
}

case class Parameters(propertyPath: Option[String] = None, properties: Map[String,String] = Map())

Source File: MetaCatalogProcessor.scala From daf with BSD 3-Clause "New" or "Revised" License

5 votes

package it.gov.daf.ingestion.metacatalog

import com.typesafe.config.ConfigFactory
import play.api.libs.json._
import it.gov.daf.catalogmanager._
import it.gov.daf.catalogmanager.json._
import org.slf4j.{Logger, LoggerFactory}
import org.apache.commons.lang.StringEscapeUtils

//Get Logical_uri, process MetadataCatalog and get the required info
class MetaCatalogProcessor(metaCatalog: MetaCatalog) {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
  val sftpDefPrefix = ConfigFactory.load().getString("ingmgr.sftpdef.prefixdir")

  
  def separator() = {
    metaCatalog.operational
      .input_src.sftp
      .flatMap(_.headOption)
      .flatMap(_.param)
      .flatMap(_.split(", ").reverse.headOption)
      .map(_.replace("sep=", ""))
      .getOrElse(",")
  }

  def fileFormatNifi(): String = {
    val inputSftp = metaCatalog.operational.input_src.sftp

    inputSftp match {
      case Some(s) =>
        val sftps: Seq[SourceSftp] = s.filter(x => x.name.equals("sftp_daf"))
        if (sftps.nonEmpty) sftps.head.param.getOrElse("")
        else ""

      case None => ""
    }
  }

  def ingPipelineNifi(): String = {
    ingPipeline.mkString(",")
  }

}

Source File: KuduController.scala From daf with BSD 3-Clause "New" or "Revised" License

5 votes

package controllers

import org.apache.kudu.spark.kudu._
import org.apache.spark.sql.{ DataFrame, SparkSession }
import org.slf4j.{ Logger, LoggerFactory }

import scala.util.{ Failure, Try }

class KuduController(sparkSession: SparkSession, master: String) {

  val alogger: Logger = LoggerFactory.getLogger(this.getClass)

  def readData(table: String): Try[DataFrame] =  Try{
    sparkSession
      .sqlContext
      .read
      .options(Map("kudu.master" -> master, "kudu.table" -> table)).kudu
  }.recoverWith {
    case ex =>
      alogger.error(s"Exception ${ex.getMessage}\n ${ex.getStackTrace.mkString("\n")} ")
      Failure(ex)
  }
}

Source File: PhysicalDatasetController.scala From daf with BSD 3-Clause "New" or "Revised" License

5 votes

package controllers

import cats.syntax.show.toShow
import com.typesafe.config.Config
import daf.dataset.{ DatasetParams, FileDatasetParams, KuduDatasetParams }
import daf.filesystem.fileFormatShow
import org.apache.spark.sql.{ DataFrame, SparkSession }
import org.apache.spark.SparkConf
import org.slf4j.{ Logger, LoggerFactory }

class PhysicalDatasetController(sparkSession: SparkSession,
                                kuduMaster: String,
                                defaultLimit: Option[Int] = None,
                                defaultChunkSize: Int = 0) {

  lazy val kuduController = new KuduController(sparkSession, kuduMaster)
  lazy val hdfsController = new HDFSController(sparkSession)

  val logger: Logger = LoggerFactory.getLogger(this.getClass)

  private def addLimit(dataframe: DataFrame, limit: Option[Int]) = (limit, defaultLimit) match {
    case (None, None)                 => dataframe
    case (None, Some(value))          => dataframe.limit { value }
    case (Some(value), None)          => dataframe.limit { value }
    case (Some(value), Some(default)) => dataframe.limit { math.min(value, default) }
  }

  def kudu(params: KuduDatasetParams, limit: Option[Int] = None) = {
    logger.debug { s"Reading data from kudu table [${params.table}]" }
    kuduController.readData(params.table).map { addLimit(_, limit) }
  }

  def hdfs(params: FileDatasetParams, limit: Option[Int] = None) = {
    logger.debug { s"Reading data from hdfs at path [${params.path}]" }
    hdfsController.readData(params.path, params.format.show, params.param("separator")).map { addLimit(_, limit) }
  }

  def get(params: DatasetParams, limit: Option[Int]= None) = params match {
    case kuduParams: KuduDatasetParams => kudu(kuduParams, limit)
    case hdfsParams: FileDatasetParams => hdfs(hdfsParams, limit)
  }

}

object PhysicalDatasetController {

  private def getOptionalString(path: String, underlying: Config) = {
    if (underlying.hasPath(path)) {
      Some(underlying.getString(path))
    } else {
      None
    }
  }

  private def getOptionalInt(path: String, underlying: Config) = {
    if (underlying.hasPath(path)) {
      Some(underlying.getInt(path))
    } else {
      None
    }
  }

  val logger: Logger = LoggerFactory.getLogger(this.getClass)

  def apply(configuration: Config): PhysicalDatasetController = {

    val sparkConfig = new SparkConf()
    sparkConfig.set("spark.driver.memory", configuration.getString("spark.driver.memory"))

    val sparkSession = SparkSession.builder().master("local").config(sparkConfig).getOrCreate()

    val kuduMaster = configuration.getString("kudu.master")

    val defaultLimit = if (configuration hasPath "daf.row_limit") Some {
      configuration.getInt("daf.row_limit")
    } else None

    System.setProperty("sun.security.krb5.debug", "true")

    new PhysicalDatasetController(sparkSession, kuduMaster, defaultLimit)
  }
}

Source File: HDFSController.scala From daf with BSD 3-Clause "New" or "Revised" License

5 votes

package controllers

import com.databricks.spark.avro._
import org.apache.spark.sql.{ DataFrame, SparkSession }
import org.slf4j.{Logger, LoggerFactory}

import scala.util.{Failure, Try}

class HDFSController(sparkSession: SparkSession) {

  val alogger: Logger = LoggerFactory.getLogger(this.getClass)

  def readData(path: String, format: String, separator: Option[String]): Try[DataFrame] =  format match {
    case "csv" => Try {
      val pathFixAle = path + "/" + path.split("/").last + ".csv"
      alogger.debug(s"questo e' il path $pathFixAle")
      separator match {
        case None => sparkSession.read.csv(pathFixAle)
        case Some(sep) => sparkSession.read.format("csv")
          .option("sep", sep)
          .option("inferSchema", "true")
          .option("header", "true")
          .load(pathFixAle)
      }
    }
    case "parquet" => Try { sparkSession.read.parquet(path) }
    case "avro"    => Try { sparkSession.read.avro(path) }
    case unknown   => Failure { new IllegalArgumentException(s"Unsupported format [$unknown]") }
  }
}

Source File: CleanupStatistics.scala From daf with BSD 3-Clause "New" or "Revised" License

5 votes

package daf.dataset.export.cleanup

import org.slf4j.Logger

sealed case class CleanupStatistics(successes: List[SuccessfulAttempt], failures: List[FailedAttempt], timeElapsed: Long) {

  lazy val fatalFailures    = failures.filter { _.reason.nonEmpty }

  lazy val nonFatalFailures = failures.filter { _.reason.isEmpty }

  private def logSuccesses(logger: Logger) = if (successes.nonEmpty) {
    logger.info { s"Successfully deleted [${successes.size}] path(s)" }
    successes.foreach {
      case SuccessfulAttempt(path) => logger.debug { s"${path.toString}" }
    }
  }

  private def logFailures(logger: Logger) = if (failures.nonEmpty) {
    logger.warn { s"Failed to deleted [${successes.size}] path(s)" }
    failures.foreach {
      case FailedAttempt(path, None)         => logger.warn { s"${path.toString} - reason unknown" }
      case FailedAttempt(path, Some(reason)) => logger.warn(s"${path.toString}", reason)
    }
  }

  def log(logger: Logger) = {
    logSuccesses(logger)
    logFailures(logger)
    logger.info { s"Cleanup finished in [$timeElapsed] millisecond(s)" }
  }

}

object CleanupStatistics {

  private def splitAttempts(attempts: List[CleanupAttempt],
                            successes: List[SuccessfulAttempt] = List.empty[SuccessfulAttempt],
                            failures: List[FailedAttempt] = List.empty[FailedAttempt]): (List[SuccessfulAttempt], List[FailedAttempt]) = attempts match {
    case (attempt: SuccessfulAttempt) :: tail => splitAttempts(tail, attempt  :: successes, failures)
    case (attempt: FailedAttempt)     :: tail => splitAttempts(tail, successes, attempt  :: failures)
    case Nil                                  => (successes, failures)
  }

  def collect(attempts: List[CleanupAttempt], timeElapsed: Long) = splitAttempts(attempts) match {
    case (successes, failures) => apply(successes, failures, timeElapsed)
  }

}

Source File: HiveEngineManagerSpringConfiguration.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.enginemanager.hive.conf

import com.webank.wedatasphere.linkis.enginemanager.EngineHook
import com.webank.wedatasphere.linkis.enginemanager.conf.EnvConfiguration
import com.webank.wedatasphere.linkis.enginemanager.hook.{ConsoleConfigurationEngineHook, JarLoaderEngineHook}
import com.webank.wedatasphere.linkis.resourcemanager.domain.ModuleInfo
import com.webank.wedatasphere.linkis.resourcemanager.{LoadInstanceResource, ResourceRequestPolicy}
import com.webank.wedatasphere.linkis.rpc.Sender
import org.slf4j.{Logger, LoggerFactory}
import org.springframework.context.annotation.{Bean, Configuration}


@Configuration
class HiveEngineManagerSpringConfiguration {

  private val logger:Logger = LoggerFactory.getLogger(getClass)

  @Bean(Array("resources"))
  def createResource(): ModuleInfo = {
    val totalresource = new LoadInstanceResource(EnvConfiguration.ENGINE_MANAGER_MAX_MEMORY_AVAILABLE.getValue.toLong ,
        EnvConfiguration.ENGINE_MANAGER_MAX_CORES_AVAILABLE.getValue, EnvConfiguration.ENGINE_MANAGER_MAX_CREATE_INSTANCES.getValue)


    val protectresource = new LoadInstanceResource(EnvConfiguration.ENGINE_MANAGER_PROTECTED_MEMORY.getValue.toLong,
        EnvConfiguration.ENGINE_MANAGER_PROTECTED_CORES.getValue, EnvConfiguration.ENGINE_MANAGER_PROTECTED_CORES.getValue)
    logger.info("create resource for hive")
    ModuleInfo(Sender.getThisServiceInstance, totalresource, protectresource, ResourceRequestPolicy.LoadInstance)
  }

  @Bean(name = Array("hooks"))
  def createEngineHook(): Array[EngineHook] = {
    Array(new ConsoleConfigurationEngineHook, new JarLoaderEngineHook)// TODO
  }


}

Source File: PipeLineManagerSpringConfiguration.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.enginemanager.pipeline

import com.webank.wedatasphere.linkis.enginemanager.EngineCreator
import com.webank.wedatasphere.linkis.enginemanager.conf.EnvConfiguration
import com.webank.wedatasphere.linkis.resourcemanager.domain.ModuleInfo
import com.webank.wedatasphere.linkis.resourcemanager.{LoadInstanceResource, ResourceRequestPolicy}
import com.webank.wedatasphere.linkis.rpc.Sender
import org.slf4j.{Logger, LoggerFactory}
import org.springframework.context.annotation.{Bean, Configuration}


@Configuration
class PipeLineManagerSpringConfiguration {
  private val logger:Logger = LoggerFactory.getLogger(getClass)

  @Bean(Array("engineCreator"))
  def createEngineCreator(): EngineCreator =new PipeLineDefaultEngineCreator

  @Bean(Array("resources"))
  def createResource(): ModuleInfo = {
    val totalresource = new LoadInstanceResource(EnvConfiguration.ENGINE_MANAGER_MAX_MEMORY_AVAILABLE.getValue.toLong ,
      EnvConfiguration.ENGINE_MANAGER_MAX_CORES_AVAILABLE.getValue, EnvConfiguration.ENGINE_MANAGER_MAX_CREATE_INSTANCES.getValue)


    val protectresource = new LoadInstanceResource(EnvConfiguration.ENGINE_MANAGER_PROTECTED_MEMORY.getValue.toLong,
      EnvConfiguration.ENGINE_MANAGER_PROTECTED_CORES.getValue, EnvConfiguration.ENGINE_MANAGER_PROTECTED_INSTANCES.getValue)
    logger.info("create resource for pipeline")
    ModuleInfo(Sender.getThisServiceInstance, totalresource, protectresource, ResourceRequestPolicy.LoadInstance)
  }
}

Source File: CommentInterceptor.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.entrance.interceptor.impl

import java.lang
import java.util.regex.Pattern

import com.webank.wedatasphere.linkis.entrance.interceptor.EntranceInterceptor
import com.webank.wedatasphere.linkis.protocol.query.RequestPersistTask
import com.webank.wedatasphere.linkis.protocol.task.Task
import org.slf4j.{Logger, LoggerFactory}

import scala.util.matching.Regex

"
  override def dealComment(code: String): String = {
    val p = Pattern.compile(scalaCommentPattern)
    p.matcher(code).replaceAll("$1")
  }
}


object CommentMain{
  def main(args: Array[String]): Unit = {
    val sqlCode = "select * from default.user;--你好;show tables"
    val sqlCode1 = "select * from default.user--你好;show tables"
    println(SQLCommentHelper.dealComment(sqlCode))
  }
}

Source File: EntranceGroupFactory.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.entrance.scheduler

import com.webank.wedatasphere.linkis.entrance.conf.EntranceConfiguration
import com.webank.wedatasphere.linkis.entrance.execute.EntranceJob
import com.webank.wedatasphere.linkis.entrance.persistence.HaPersistenceTask
import com.webank.wedatasphere.linkis.protocol.config.{RequestQueryAppConfig, ResponseQueryConfig}
import com.webank.wedatasphere.linkis.rpc.Sender
import com.webank.wedatasphere.linkis.scheduler.queue.parallelqueue.ParallelGroup
import com.webank.wedatasphere.linkis.scheduler.queue.{Group, GroupFactory, SchedulerEvent}
import com.webank.wedatasphere.linkis.server.JMap
import org.apache.commons.lang.StringUtils
import org.slf4j.{Logger, LoggerFactory}


class EntranceGroupFactory extends GroupFactory {

  private val groupNameToGroups = new JMap[String, Group]
  private val logger:Logger = LoggerFactory.getLogger(classOf[EntranceGroupFactory])
  override def getOrCreateGroup(groupName: String): Group = {
    if(!groupNameToGroups.containsKey(groupName)) synchronized{
      //TODO Query the database and get initCapacity, maxCapacity, maxRunningJobs, maxAskExecutorTimes(查询数据库，拿到initCapacity、maxCapacity、maxRunningJobs、maxAskExecutorTimes)
      val initCapacity = 100
      val maxCapacity = 100
      var maxRunningJobs =  EntranceConfiguration.WDS_LINKIS_INSTANCE.getValue
      val maxAskExecutorTimes = EntranceConfiguration.MAX_ASK_EXECUTOR_TIME.getValue.toLong
      if (groupName.split("_").length < 2){
        logger.warn(s"name style of group: $groupName is not correct, we will set default value for the group")
      }else{
        val sender:Sender = Sender.getSender(EntranceConfiguration.CLOUD_CONSOLE_CONFIGURATION_SPRING_APPLICATION_NAME.getValue)
        val creator = groupName.split("_")(0)
        val username = groupName.split("_")(1)
        val engineName = EntranceConfiguration.ENGINE_SPRING_APPLICATION_NAME.getValue
        val engineType = if (engineName.trim().toLowerCase().contains("engine")) engineName.substring(0, engineName.length - "engine".length) else "spark"
        logger.info(s"Getting parameters for $groupName(正在为 $groupName 获取参数) username: $username, creator:$creator, engineType: $engineType")
        val keyAndValue = sender.ask(RequestQueryAppConfig(username, creator, engineType)).asInstanceOf[ResponseQueryConfig].getKeyAndValue
        try{
          maxRunningJobs = Integer.parseInt(keyAndValue.get(EntranceConfiguration.WDS_LINKIS_INSTANCE.key))
        }catch{
          case t:Throwable => logger.warn("Get maxRunningJobs from configuration server failed! Next use the default value to continue.",t)
        }
      }
      logger.info("groupName: {} =>  maxRunningJobs is {}", groupName, maxRunningJobs)
      val group = new ParallelGroup(groupName, initCapacity, maxCapacity)
      group.setMaxRunningJobs(maxRunningJobs)
      group.setMaxAskExecutorTimes(maxAskExecutorTimes)
      if(!groupNameToGroups.containsKey(groupName)) groupNameToGroups.put(groupName, group)
    }
    groupNameToGroups.get(groupName)
  }


  override def getGroupNameByEvent(event: SchedulerEvent): String = event match {
    case job: EntranceJob =>
      job.getTask match {
        case HaPersistenceTask(task) =>
          "HA"
        case _ =>EntranceGroupFactory.getGroupName(job.getCreator, job.getUser)
      }
  }
}
object EntranceGroupFactory {
  def getGroupName(creator: String, user: String): String = {
    if (StringUtils.isNotEmpty(creator)) creator + "_" + user
    else EntranceConfiguration.DEFAULT_REQUEST_APPLICATION_NAME.getValue + "_" + user
  }
}

Source File: package.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis

import java.util

import javax.servlet.http.HttpServletRequest
import com.webank.wedatasphere.linkis.common.exception.{ErrorException, ExceptionManager, FatalException, WarnException}
import com.webank.wedatasphere.linkis.common.utils.Utils
import com.webank.wedatasphere.linkis.server.exception.{BDPServerErrorException, NonLoginException}
import com.webank.wedatasphere.linkis.server.security.SecurityFilter
import org.apache.commons.lang.StringUtils
import org.apache.commons.lang.exception.ExceptionUtils
import org.slf4j.Logger

import scala.collection.{JavaConversions, mutable}


package object server {

  val EXCEPTION_MSG = "errorMsg"
  type JMap[K, V] = java.util.HashMap[K, V]

  implicit def getUser(req: HttpServletRequest): String = SecurityFilter.getLoginUsername(req)

  def validateFailed(message: String): Message = Message(status = 2).setMessage(message)
  def validate[T](json: util.Map[String, T], keys: String*): Unit = {
    keys.foreach(k => if(!json.contains(k) || json.get(k) == null || StringUtils.isEmpty(json.get(k).toString))
      throw new BDPServerErrorException(11001, s"Verification failed, $k cannot be empty!(验证失败，$k 不能为空！)"))
  }
  def error(message: String): Message = Message.error(message)
  implicit def ok(msg: String): Message = Message.ok(msg)
  implicit def error(t: Throwable): Message = Message.error(t)
  implicit def error(e: (String, Throwable)): Message = Message.error(e)
  implicit def error(msg: String, t: Throwable): Message = Message.error(msg -> t)
  //  def tryCatch[T](tryOp: => T)(catchOp: Throwable => T): T = Utils.tryCatch(tryOp)(catchOp)
//  def tryCatch(tryOp: => Message)(catchOp: Throwable => Message): Message = Utils.tryCatch(tryOp){
//    case nonLogin: NonLoginException => Message.noLogin(msg = nonLogin.getMessage)
//    case t => catchOp(t)
//  }
  def catchMsg(tryOp: => Message)(msg: String)(implicit log: Logger): Message = Utils.tryCatch(tryOp){
    case fatal: FatalException =>
      log.error("Fatal Error, system exit...", fatal)
      System.exit(fatal.getErrCode)
      Message.error("Fatal Error, system exit...")
    case nonLogin: NonLoginException =>
      val message = Message.noLogin(nonLogin.getMessage)
      message.data(EXCEPTION_MSG, nonLogin.toMap)
      message
    case error: ErrorException =>
      val cause = error.getCause
      val errorMsg = cause match {
        case t: ErrorException => s"error code(错误码): ${t.getErrCode}, error message(错误信息): ${t.getDesc}."
        case _ => s"error code(错误码): ${error.getErrCode}, error message(错误信息): ${error.getDesc}."
      }
      log.error(errorMsg, error)
      val message = Message.error(errorMsg)
      message.data(EXCEPTION_MSG, error.toMap)
      message
    case warn: WarnException =>
      val warnMsg = s"Warning code(警告码): ${warn.getErrCode}, Warning message(警告信息): ${warn.getDesc}."
      log.warn(warnMsg, warn)
      val message = Message.warn(warnMsg)
      message.data(EXCEPTION_MSG, warn.toMap)
      message
    case t =>
      log.error(msg, t)
      val errorMsg = ExceptionUtils.getRootCauseMessage(t)
      val message = if(StringUtils.isNotEmpty(errorMsg) && "operation failed(操作失败)" != msg) error(msg + "！the reason(原因)：" + errorMsg)
      else if(StringUtils.isNotEmpty(errorMsg)) error(errorMsg) else error(msg)
      message.data(EXCEPTION_MSG, ExceptionManager.unknownException(message.getMessage))
  }
  def catchIt(tryOp: => Message)(implicit log: Logger): Message = catchMsg(tryOp)("operation failed(操作失败)s")
  implicit def toScalaBuffer[T](list: util.List[T]): mutable.Buffer[T] = JavaConversions.asScalaBuffer(list)
  implicit def toScalaMap[K, V](map: util.Map[K, V]): mutable.Map[K, V] = JavaConversions.mapAsScalaMap(map)
  implicit def toJavaList[T](list: mutable.Buffer[T]): util.List[T] = {
    val arrayList = new util.ArrayList[T]
    list.foreach(arrayList.add)
    arrayList
  }
  implicit def toJavaMap[K, V](map: mutable.Map[K, V]): JMap[K, V] = {
    val hashMap = new util.HashMap[K, V]()
    map.foreach(m => hashMap.put(m._1, m._2))
    hashMap
  }
  implicit def toJavaMap[K, V](map: Map[K, V]): JMap[K, V] = {
    val hashMap = new util.HashMap[K, V]()
    map.foreach(m => hashMap.put(m._1, m._2))
    hashMap
  }
  implicit def asString(mapWithKey: (util.Map[String, Object], String)): String = mapWithKey._1.get(mapWithKey._2).asInstanceOf[String]
  implicit def getString(mapWithKey: (util.Map[String, String], String)): String = mapWithKey._1.get(mapWithKey._2)
  implicit def asInt(map: util.Map[String, Object], key: String): Int = map.get(key).asInstanceOf[Int]
  implicit def asBoolean(mapWithKey: (util.Map[String, Object], String)): Boolean = mapWithKey._1.get(mapWithKey._2).asInstanceOf[Boolean]

}

Source File: HostsStatuses.scala From algoliasearch-client-scala with MIT License

5 votes

package algolia

import java.util.concurrent.ConcurrentHashMap

import org.slf4j.{Logger, LoggerFactory}

case class HostsStatuses(
    configuration: AlgoliaClientConfiguration,
    utils: AlgoliaUtils,
    queryHosts: Seq[String],
    indexingHosts: Seq[String]
) {

  private[algolia] val hostStatuses: ConcurrentHashMap[String, HostStatus] =
    new ConcurrentHashMap[String, HostStatus](5)

  val logger: Logger = LoggerFactory.getLogger("algoliasearch")

  def markHostAsUp(host: String): Unit = {
    logger.debug("Marking {} as `up`", host)
    hostStatuses.put(host, HostStatus.up(utils.now()))
  }

  def markHostAsDown(host: String): Unit = {
    logger.debug("Marking {} as `down`", host)
    hostStatuses.put(host, HostStatus.down(utils.now()))
  }

  def indexingHostsThatAreUp(): Seq[String] = hostsThatAreUp(indexingHosts)

  def queryHostsThatAreUp(): Seq[String] = hostsThatAreUp(queryHosts)

  private def hostsThatAreUp(hosts: Seq[String]): Seq[String] = {
    val filteredHosts = hosts.filter(h => isUpOrCouldBeRetried(getHostStatus(h))
    )
    if (filteredHosts.isEmpty) {
      hosts
    } else {
      filteredHosts
    }
  }

  def isUpOrCouldBeRetried(hostStatus: HostStatus): Boolean =
    hostStatus.up || (utils
      .now() - hostStatus.updatedAt) >= configuration.hostDownTimeoutMs

  private def getHostStatus(host: String): HostStatus =
    hostStatuses.getOrDefault(host, HostStatus.up(utils.now()))
}

private case class HostStatus(up: Boolean, updatedAt: Long)

private object HostStatus {

  def up(now: Long) = HostStatus(up = true, now)

  def down(now: Long) = HostStatus(up = false, now)

}

Source File: SparkCassOutputHandler.scala From Spark2Cassandra with Apache License 2.0

5 votes

package com.github.jparkie.spark.cassandra

import org.apache.cassandra.utils.OutputHandler
import org.slf4j.Logger


class SparkCassOutputHandler(log: Logger) extends OutputHandler {
  override def warn(msg: String): Unit = {
    log.warn(msg)
  }

  override def warn(msg: String, th: Throwable): Unit = {
    log.warn(msg, th)
  }

  override def debug(msg: String): Unit = {
    log.debug(msg)
  }

  override def output(msg: String): Unit = {
    log.info(msg)
  }
}

Source File: SampleRoutes.scala From akka_streams_tutorial with MIT License

5 votes

package akkahttp

import java.io.File

import akka.actor.ActorSystem
import akka.http.scaladsl.Http
import akka.http.scaladsl.server.Directives._
import akka.http.scaladsl.server.Route
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Await
import scala.concurrent.duration._
import scala.sys.process.Process
import scala.util.{Failure, Success}


object SampleRoutes extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
  implicit val system = ActorSystem("SampleRoutes")
  implicit val executionContext = system.dispatcher


  def getFromBrowsableDir: Route = {
    val dirToBrowse = File.separator + "tmp"

    // pathPrefix allows loading dirs and files recursively
    pathPrefix("entries") {
      getFromBrowseableDirectory(dirToBrowse)
    }
  }

  def parseFormData: Route = path("post") {
    formFields('color, 'age.as[Int]) { (color, age) =>
      complete(s"The color is '$color' and the age is $age")
    }
  }

  def routes: Route = {
    getFromBrowsableDir ~ parseFormData
  }

  val bindingFuture = Http().bindAndHandle(routes, "127.0.0.1", 8000)

  bindingFuture.onComplete {
    case Success(b) =>
      println("Server started, listening on: " + b.localAddress)
    case Failure(e) =>
      println(s"Server could not bind to... Exception message: ${e.getMessage}")
      system.terminate()
  }

  def browserClient() = {
    val os = System.getProperty("os.name").toLowerCase
    if (os == "mac os x") Process("open ./src/main/resources/SampleRoutes.html").!
  }

  browserClient()

  sys.addShutdownHook {
    println("About to shutdown...")
    val fut = bindingFuture.map(serverBinding => serverBinding.terminate(hardDeadline = 3.seconds))
    println("Waiting for connections to terminate...")
    val onceAllConnectionsTerminated = Await.result(fut, 10.seconds)
    println("Connections terminated")
    onceAllConnectionsTerminated.flatMap { _ => system.terminate()
    }
  }
}

Source File: PublishToSourceQueueFromMultipleThreads.scala From akka_streams_tutorial with MIT License

5 votes

package sample.stream

import akka.actor.ActorSystem
import akka.stream.Supervision.Decider
import akka.stream._
import akka.stream.scaladsl.{Flow, Sink, Source, SourceQueueWithComplete}
import akka.{Done, NotUsed}
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Future
import scala.concurrent.duration._
import scala.util.{Failure, Success}


object PublishToSourceQueueFromMultipleThreads extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
  implicit val system = ActorSystem("PublishToSourceQueueFromMultipleThreads")
  implicit val ec = system.dispatcher

  val bufferSize = 100
  // As of akka 2.6.x there is a thread safe implementation for SourceQueue
  val maxConcurrentOffers = 1000
  val numberOfPublishingClients = 1000

  val slowSink: Sink[Seq[Int], NotUsed] =
    Flow[Seq[Int]]
      .delay(2.seconds, DelayOverflowStrategy.backpressure)
      .to(Sink.foreach(e => logger.info(s"Reached sink: $e")))

  val sourceQueue: SourceQueueWithComplete[Int] = Source
    .queue[Int](bufferSize, OverflowStrategy.backpressure, maxConcurrentOffers)
    .groupedWithin(10, 1.seconds)
    .to(slowSink)
    .run

  val doneConsuming: Future[Done] = sourceQueue.watchCompletion()
  signalWhen(doneConsuming, "consuming") //never completes

  simulatePublishingFromMulitpleThreads()

  // Before 2.6.x a stream had to be used to throttle and control the backpressure
  //simulatePublishingClientsFromStream()

  // Decide on the stream level, because the OverflowStrategy.backpressure
  // on the sourceQueue causes an IllegalStateException
  // Handling this on the stream level allows to restart the stream
  private def simulatePublishingClientsFromStream() = {

    val decider: Decider = {
      case _: IllegalStateException => println("Got backpressure signal for offered element, restart..."); Supervision.Restart
      case _ => Supervision.Stop
    }

    val donePublishing: Future[Done] = Source(1 to numberOfPublishingClients)
      .mapAsync(10)(offerToSourceQueue) //throttle
      .withAttributes(ActorAttributes.supervisionStrategy(decider))
      .runWith(Sink.ignore)
    signalWhen(donePublishing, "publishing")
  }

  private def simulatePublishingFromMulitpleThreads() = (1 to numberOfPublishingClients).par.foreach(offerToSourceQueue)

  private def offerToSourceQueue(each: Int) = {
    sourceQueue.offer(each).map {
      case QueueOfferResult.Enqueued => logger.info(s"enqueued $each")
      case QueueOfferResult.Dropped => logger.info(s"dropped $each")
      case QueueOfferResult.Failure(ex) => logger.info(s"Offer failed: $ex")
      case QueueOfferResult.QueueClosed => logger.info("Source Queue closed")
    }
  }

  private def signalWhen(done: Future[Done], operation: String) = {
    done.onComplete {
      case Success(b) =>
        logger.info(s"Finished: $operation")
      case Failure(e) =>
        logger.info(s"Failure: $e About to terminate...")
        system.terminate()
    }
  }
}

Source File: TweetExample.scala From akka_streams_tutorial with MIT License

5 votes

package sample.stream

import java.time.{Instant, ZoneId}

import akka.NotUsed
import akka.actor.{ActorSystem, Cancellable}
import akka.stream.DelayOverflowStrategy
import akka.stream.scaladsl.{Flow, MergePrioritized, Sink, Source}
import org.apache.commons.lang3.exception.ExceptionUtils
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.duration._
import scala.util.{Failure, Success}



object TweetExample extends App {
  implicit val system = ActorSystem("TweetExample")
  implicit val ec = system.dispatcher
  val logger: Logger = LoggerFactory.getLogger(this.getClass)

  final case class Author(handle: String)

  final case class Hashtag(name: String)

  final case class Tweet(author: Author, timestamp: Long, body: String) {
    def hashtags: Set[Hashtag] =
      body.split(" ").collect { case t if t.startsWith("#") => Hashtag(t) }.toSet

    override def toString = {
      val localDateTime = Instant.ofEpochMilli(timestamp).atZone(ZoneId.systemDefault()).toLocalDateTime
      s"$localDateTime - ${author.handle} tweeted: ${body.take(5)}..."
    }
  }

  val akkaTag = Hashtag("#akka")

  val tweetsLowPrio: Source[Tweet, Cancellable] = Source.tick(1.second, 200.millis, NotUsed).map(_ => Tweet(Author("LowPrio"), System.currentTimeMillis, "#other #akka aBody"))
  val tweetsHighPrio: Source[Tweet, Cancellable] = Source.tick(2.second, 1.second, NotUsed).map(_ => Tweet(Author("HighPrio"), System.currentTimeMillis, "#akka #other aBody"))
  val tweetsVeryHighPrio: Source[Tweet, Cancellable] = Source.tick(2.second, 1.second, NotUsed).map(_ => Tweet(Author("VeryHighPrio"), System.currentTimeMillis, "#akka #other aBody"))

  val limitedTweets: Source[Tweet, NotUsed] = Source.combine(tweetsLowPrio, tweetsHighPrio, tweetsVeryHighPrio)(_ => MergePrioritized(List(1, 10, 100))).take(20)

  val processingFlow = Flow[Tweet]
    .filter(_.hashtags.contains(akkaTag))
    .wireTap(each => logger.info(s"$each"))

  val slowDownstream  =
    Flow[Tweet]
      .delay(5.seconds, DelayOverflowStrategy.backpressure)

  val processedTweets =
    limitedTweets
      .via(processingFlow)
      .via(slowDownstream)
      .runWith(Sink.seq)

  processedTweets.onComplete {
    case Success(results) =>
      logger.info(s"Successfully processed: ${results.size} tweets")
      system.terminate()
    case Failure(exception) =>
      logger.info(s"The stream failed with: ${ExceptionUtils.getRootCause(exception)}")
      system.terminate()
  }
}

Source File: AsyncExecution.scala From akka_streams_tutorial with MIT License

5 votes

package sample.stream

import akka.Done
import akka.actor.ActorSystem
import akka.stream.ActorAttributes
import akka.stream.scaladsl.{Flow, Sink, Source}
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Future
import scala.util.{Failure, Success}


object AsyncExecution extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
  implicit val system = ActorSystem("AsyncExecution")
  implicit val ec = system.dispatcher

  def stage(name: String) =
    Flow[Int]
      .wireTap(index => logger.info(s"Stage $name processing element $index by ${Thread.currentThread().getName}"))

  def stageBlocking(name: String) =
    Flow[Int]
      .wireTap(index => logger.info(s"Stage $name processing element $index by ${Thread.currentThread().getName}"))
      .wireTap(_ => Thread.sleep(5000))
      .withAttributes(ActorAttributes.dispatcher("custom-dispatcher-for-blocking"))

  def sinkBlocking: Sink[Int, Future[Done]] =
    Sink.foreach { index: Int =>
      Thread.sleep(2000)
      logger.info(s"Slow sink processing element $index by ${Thread.currentThread().getName}")
     }
      //Adding a custom dispatcher creates an async boundary
      //see discussion in: https://discuss.lightbend.com/t/how-can-i-make-sure-that-fileio-frompath-is-picking-up-my-dispatcher/6528/4
      .withAttributes(ActorAttributes.dispatcher("custom-dispatcher-for-blocking"))


  val done = Source(1 to 10)
    .via(stage("A")).async
    //When activated instead of alsoTo(sinkBlocking): elements for stage C are held up by stage B
    //.via(stageBlocking("B")).async
    .alsoTo(sinkBlocking).async
    .via(stage("C")).async
    .runWith(Sink.ignore)

  //With alsoTo(sinkBlocking) the stages A and C signal "done" too early and thus would terminate the whole stream
  //The reason for this is the custom dispatcher in sinkBlocking
  //terminateWhen(done)

  def terminateWhen(done: Future[_]) = {
    done.onComplete {
      case Success(_) =>
        println("Flow Success. About to terminate...")
        system.terminate()
      case Failure(e) =>
        println(s"Flow Failure: $e. About to terminate...")
        system.terminate()
    }
  }
}

Source File: WaitForThreeFlowsToComplete.scala From akka_streams_tutorial with MIT License

5 votes

package sample.stream

import java.nio.file.Paths

import akka.actor.ActorSystem
import akka.stream._
import akka.stream.scaladsl._
import akka.util.ByteString
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent._
import scala.concurrent.duration._


object WaitForThreeFlowsToComplete extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
  implicit val system = ActorSystem("WaitForThreeFlowsToComplete")
  implicit val ec = system.dispatcher

  def lineSink(filename: String): Sink[String, Future[IOResult]] =
    Flow[String]
      .map(s => ByteString(s + "\n"))
      .wireTap(_ => logger.info(s"Add line to file: $filename"))
      .toMat(FileIO.toPath(Paths.get(filename)))(Keep.right) //retain to the Future[IOResult]
      .withAttributes(ActorAttributes.dispatcher("custom-dispatcher-for-blocking"))

  val origSource = Source(1 to 10)
  //scan (= transform) the source
  val factorialsSource = origSource.scan(BigInt(1))((acc, next) => acc * next)

  val fastFlow = origSource.runForeach(i => logger.info(s"Reached sink: $i"))

  val slowFlow1 = factorialsSource
    .map(_.toString)
    .runWith(lineSink("factorial1.txt"))

  val slowFlow2 = factorialsSource
    .zipWith(Source(0 to 10))((num, idx) => s"$idx! = $num")
    .throttle(1, 1.second, 1, ThrottleMode.shaping)
    .runWith(lineSink("factorial2.txt"))

  val allDone = for {
    fastFlowDone <- fastFlow
    slowFlow1Done <- slowFlow1
    slowFlow2Done <- slowFlow2
  } yield (fastFlowDone, slowFlow1Done, slowFlow2Done)

  allDone.onComplete { results =>
    logger.info(s"Resulting futures from flows: $results - about to terminate")
    system.terminate()
  }
}

Source File: SplitAfter.scala From akka_streams_tutorial with MIT License

5 votes

package sample.stream_shared_state

import java.time.{Instant, LocalDateTime, ZoneOffset}

import akka.Done
import akka.actor.ActorSystem
import akka.stream.scaladsl.{Sink, Source}
import org.slf4j.{Logger, LoggerFactory}

import scala.collection.immutable._
import scala.concurrent.Future
import scala.concurrent.duration._
import scala.util.{Failure, Success}


object SplitAfter extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
  implicit val system = ActorSystem("SplitAfter")
  implicit val executionContext = system.dispatcher

  private def hasSecondChanged: () => Seq[(Int, Instant)] => Iterable[(Instant, Boolean)] = {
    () => {
      slidingElements => {
        if (slidingElements.size == 2) {
          val current = slidingElements.head
          val next = slidingElements.tail.head
          val currentBucket = LocalDateTime.ofInstant(current._2, ZoneOffset.UTC).withNano(0)
          val nextBucket = LocalDateTime.ofInstant(next._2, ZoneOffset.UTC).withNano(0)
          List((current._2, currentBucket != nextBucket))
        } else {
          val current = slidingElements.head
          List((current._2, false))
        }
      }
    }
  }

  val done: Future[Done] = Source(1 to 100)
    .throttle(1, 100.millis)
    .map(elem => (elem, Instant.now()))
    .sliding(2)                           // allows to compare this element with the next element
    .statefulMapConcat(hasSecondChanged)  // stateful decision
    .splitAfter(_._2)                     // split when second has changed
    .map(_._1)                            // proceed with payload
    .fold(0)((acc, _) => acc + 1)   // sum
    .mergeSubstreams
    .runWith(Sink.foreach(each => println(s"Elements in group: $each")))

  terminateWhen(done)


  def terminateWhen(done: Future[_]) = {
    done.onComplete {
      case Success(_) =>
        println("Flow Success. About to terminate...")
        system.terminate()
      case Failure(e) =>
        println(s"Flow Failure: $e. About to terminate...")
        system.terminate()
    }
  }
}

Source File: ConflateWithSeed.scala From akka_streams_tutorial with MIT License

5 votes

package sample.stream_shared_state

import akka.actor.ActorSystem
import akka.stream.scaladsl.{Flow, Source}
import org.slf4j.{Logger, LoggerFactory}

import scala.collection._
import scala.concurrent.duration._
import scala.util.Random


object ConflateWithSeed extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
  implicit val system = ActorSystem("ConflateWithSeed")
  implicit val executionContext = system.dispatcher

  def seed(i: Int): mutable.LinkedHashMap[Int, Int] = mutable.LinkedHashMap[Int, Int](i -> 1)

  def aggregate(state: mutable.LinkedHashMap[Int, Int], i: Int): mutable.LinkedHashMap[Int, Int] = {
    logger.info(s"Got: $i")
    state.put(i, state.getOrElseUpdate(i, 0) + 1)
    state
  }

  // lazyFlow is not really needed here, but nice to know that it exists
  // conflateWithSeed invokes the seed method every time, so it
  // is safe to materialize this flow multiple times
  val lazyFlow = Flow.lazyFlow(() =>
    Flow[Int]
    .map(_ => Random.nextInt(100))
    .conflateWithSeed(seed)(aggregate)

  )
  Source(1 to 10)
    .via(lazyFlow)
    .throttle(1, 1.second) //simulate slow sink
    .runForeach(each => logger.info(s"1st reached sink: $each"))

//  Source(1 to 10)
//    .via(lazyFlow)
//    .throttle(1, 1.second) //simulate slow sink
//    .runForeach(each => logger.info(s"2nd reached sink: $each"))
}

Source File: SplitWhen.scala From akka_streams_tutorial with MIT License

5 votes

package sample.stream_shared_state

import java.nio.file.Paths

import akka.NotUsed
import akka.actor.ActorSystem
import akka.stream.IOResult
import akka.stream.scaladsl.{FileIO, Flow, Framing, Keep, Sink, Source}
import akka.util.ByteString
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Future
import scala.util.{Failure, Success}


object SplitWhen extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
  implicit val system = ActorSystem("SplitWhen")
  implicit val executionContext = system.dispatcher

  val nonLinearCapacityFactor = 100 //raise to see how it scales
  val filename = "splitWhen.csv"

  def genResourceFile() = {
    logger.info(s"Writing resource file: $filename...")

    def fileSink(filename: String): Sink[String, Future[IOResult]] =
      Flow[String]
        .map(s => ByteString(s + "\n"))
        .toMat(FileIO.toPath(Paths.get(filename)))(Keep.right)

    Source.fromIterator(() => (1 to nonLinearCapacityFactor).toList.combinations(2))
      .map(each => s"${each.head},${each.last}")
      .runWith(fileSink(filename))
  }

  val sourceOfLines = FileIO.fromPath(Paths.get(filename))
    .via(Framing.delimiter(ByteString("\n"), maximumFrameLength = 1024, allowTruncation = true)
      .map(_.utf8String))

  val csvToRecord: Flow[String, Record, NotUsed] = Flow[String]
    .map(_.split(",").map(_.trim))
    .map(stringArrayToRecord)

  val terminationHook: Flow[Record, Record, Unit] = Flow[Record]
    .watchTermination() { (_, done) =>
      done.onComplete {
        case Failure(err) => logger.info(s"Flow failed: $err")
        case _ => system.terminate(); logger.info(s"Flow terminated")
      }
    }

  val printSink = Sink.foreach[Vector[Record]](each => println(s"Reached sink: $each"))

  private def stringArrayToRecord(cols: Array[String]) = Record(cols(0), cols(1))

  private def hasKeyChanged = {
    () => {
      var lastRecordKey: Option[String] = None
      currentRecord: Record =>
        lastRecordKey match {
          case Some(currentRecord.key) | None =>
            lastRecordKey = Some(currentRecord.key)
            List((currentRecord, false))
          case _ =>
            lastRecordKey = Some(currentRecord.key)
            List((currentRecord, true))
        }
    }
  }

  genResourceFile().onComplete {
    case Success(_) =>
      logger.info(s"Start processing...")
      sourceOfLines
        .via(csvToRecord)
        .via(terminationHook)
        .statefulMapConcat(hasKeyChanged)   // stateful decision
        .splitWhen(_._2)                    // split when key has changed
        .map(_._1)                          // proceed with payload
        .fold(Vector.empty[Record])(_ :+ _) // sum payload
        .mergeSubstreams                    // better performance, but why?
        .runWith(printSink)
    case Failure(exception) => logger.info(s"Exception: $exception")
  }

  case class Record(key: String, value: String)
}

Source File: Hl7TcpClient.scala From akka_streams_tutorial with MIT License

5 votes

package alpakka.tcp_to_websockets.hl7mllp

import akka.actor.ActorSystem
import akka.stream.scaladsl.{Sink, Source, Tcp}
import akka.util.ByteString
import ca.uhn.hl7v2.AcknowledgmentCode
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Future
import scala.concurrent.duration._

object Hl7TcpClient  extends App with MllpProtocol {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
  val system = ActorSystem("Hl7TcpClient")

  val (address, port) = ("127.0.0.1", 6160)

  //(1 to 1).par.foreach(each => localStreamingMessageClient(each, 1000, system, address, port))
  (1 to 1).par.foreach(each => localSingleMessageClient(each, 100, system, address, port))


  def localSingleMessageClient(clientname: Int, numberOfMessages: Int, system: ActorSystem, address: String, port: Int): Unit = {
    implicit val sys = system
    implicit val ec = system.dispatcher

    val connection = Tcp().outgoingConnection(address, port)

    def sendAndReceive(i: Int): Future[Int] = {
      val traceID = s"$clientname-${i.toString}"
      val source = Source.single(ByteString(encodeMllp(generateTestMessage(traceID)))).via(connection)
      val closed = source.runForeach(each =>
        if (isNACK(each)) {
          logger.info(s"Client: $clientname-$i received NACK: ${printable(each.utf8String)}")
          throw new RuntimeException("NACK")
        } else {
          logger.info(s"Client: $clientname-$i received ACK: ${printable(each.utf8String)}")
        }
      ).recoverWith {
        case _: RuntimeException => {
          logger.info(s"About to retry for: $clientname-$i...")
          sendAndReceive(i)
        }
        case e: Throwable => Future.failed(e)
      }
      closed.onComplete(each => logger.debug(s"Client: $clientname-$i closed: $each"))
      Future(i)
    }

    Source(1 to numberOfMessages)
      .throttle(1, 1.second)
      .mapAsync(1)(i => sendAndReceive(i))
      .runWith(Sink.ignore)
  }

  def localStreamingMessageClient(id: Int, numberOfMesssages: Int, system: ActorSystem, address: String, port: Int): Unit = {
    implicit val sys = system
    implicit val ec = system.dispatcher

    val connection = Tcp().outgoingConnection(address, port)

    val hl7MllpMessages=  (1 to numberOfMesssages).map(each => ByteString(encodeMllp(generateTestMessage(each.toString)) ))
    val source = Source(hl7MllpMessages).throttle(10, 1.second).via(connection)
    val closed = source.runForeach(each => logger.info(s"Client: $id received echo: ${printable(each.utf8String)}"))
    closed.onComplete(each => logger.info(s"Client: $id closed: $each"))
  }

  private def generateTestMessage(senderTraceID: String) = {
    //For now put the senderTraceID into the "sender lab" field to follow the messages accross the workflow
    val message = new StringBuilder
    message ++= s"MSH|^~\\&|$senderTraceID|MCM|LABADT|MCM|198808181126|SECURITY|ADT^A01|1234|P|2.5.1|"
    message ++= CARRIAGE_RETURN
    message ++= "EVN|A01|198808181123||"
    message ++= CARRIAGE_RETURN
    message ++= "PID|||PATID1234^5^M11^ADT1^MR^MCM~123456789^^^USSSA^SS||EVERYMAN^ADAM^A^III||19610615|M||C|1200 N ELM STREET^^GREENSBORO^NC^27401-1020"
    message ++= CARRIAGE_RETURN
    message ++= "NK1|1|JONES^BARBARA^K|SPO^Spouse^HL70063|171 ZOBERLEIN^^ISHPEMING^MI^49849^|"
    message ++= CARRIAGE_RETURN
    message ++= "PV1|1|I|2000^2012^01||||004777^LEBAUER^SIDNEY^J.|||SUR||||9|A0|"
    message ++= CARRIAGE_RETURN
    message.toString()
  }

  private def isNACK(message: ByteString): Boolean = {
    message.utf8String.contains(AcknowledgmentCode.AE.name()) ||
      message.utf8String.contains(AcknowledgmentCode.AR.name()) ||
      message.utf8String.contains(AcknowledgmentCode.CE.name()) ||
      message.utf8String.contains(AcknowledgmentCode.CR.name())
  }
}

Source File: JMSTextMessageProducerClient.scala From akka_streams_tutorial with MIT License

5 votes

package alpakka.jms

import akka.Done
import akka.actor.ActorSystem
import akka.stream.ThrottleMode
import akka.stream.alpakka.jms.scaladsl.JmsProducer
import akka.stream.alpakka.jms.{JmsProducerSettings, JmsTextMessage}
import akka.stream.scaladsl.{Sink, Source}
import com.typesafe.config.Config
import javax.jms.ConnectionFactory
import org.apache.activemq.ActiveMQConnectionFactory
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Future
import scala.concurrent.duration._

object JMSTextMessageProducerClient {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
  implicit val system = ActorSystem("JMSTextMessageProducerClient")
  implicit val ec = system.dispatcher

  //The "failover:" part in the brokerURL instructs ActiveMQ to reconnect on network failure
  //This does not interfere with the new 1.0-M2 implementation
  val connectionFactory: ConnectionFactory = new ActiveMQConnectionFactory("artemis", "simetraehcapa", "failover:tcp://127.0.0.1:21616")


  def main(args: Array[String]): Unit = {
    jmsTextMessageProducerClient(connectionFactory)
  }

  private def jmsTextMessageProducerClient(connectionFactory: ConnectionFactory) = {
    val producerConfig: Config = system.settings.config.getConfig(JmsProducerSettings.configPath)
    val jmsProducerSink: Sink[JmsTextMessage, Future[Done]] = JmsProducer.sink(
      JmsProducerSettings(producerConfig, connectionFactory).withQueue("test-queue")
    )

    Source(1 to 2000000)
      .throttle(1, 1.second, 1, ThrottleMode.shaping)
      .wireTap(number => logger.info(s"SEND Msg with TRACE_ID: $number"))
      .map { number =>
        JmsTextMessage(s"Payload: ${number.toString}")
          .withProperty("TRACE_ID", number)
      }
      .runWith(jmsProducerSink)
  }
}

Source File: TiRowRDD.scala From tispark with Apache License 2.0

5 votes

package org.apache.spark.sql.tispark

import com.pingcap.tikv._
import com.pingcap.tikv.columnar.TiColumnarBatchHelper
import com.pingcap.tikv.meta.TiDAGRequest
import com.pingcap.tispark.listener.CacheInvalidateListener
import com.pingcap.tispark.{TiPartition, TiTableReference}
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.vectorized.ColumnarBatch
import org.apache.spark.{Partition, TaskContext, TaskKilledException}
import org.slf4j.Logger

import scala.collection.JavaConversions._

class TiRowRDD(
    override val dagRequest: TiDAGRequest,
    override val physicalId: Long,
    val chunkBatchSize: Int,
    override val tiConf: TiConfiguration,
    val output: Seq[Attribute],
    override val tableRef: TiTableReference,
    @transient private val session: TiSession,
    @transient private val sparkSession: SparkSession)
    extends TiRDD(dagRequest, physicalId, tiConf, tableRef, session, sparkSession) {

  protected val logger: Logger = log

  // cache invalidation call back function
  // used for driver to update PD cache
  private val callBackFunc = CacheInvalidateListener.getInstance()

  override def compute(split: Partition, context: TaskContext): Iterator[InternalRow] =
    new Iterator[ColumnarBatch] {
      checkTimezone()

      private val tiPartition = split.asInstanceOf[TiPartition]
      private val session = TiSession.getInstance(tiConf)
      session.injectCallBackFunc(callBackFunc)
      private val snapshot = session.createSnapshot(dagRequest.getStartTs)
      private[this] val tasks = tiPartition.tasks

      private val iterator =
        snapshot.tableReadChunk(dagRequest, tasks, chunkBatchSize)

      override def hasNext: Boolean = {
        // Kill the task in case it has been marked as killed. This logic is from
        // Interrupted Iterator, but we inline it here instead of wrapping the iterator in order
        // to avoid performance overhead.
        if (context.isInterrupted()) {
          throw new TaskKilledException
        }
        iterator.hasNext
      }

      override def next(): ColumnarBatch = {
        TiColumnarBatchHelper.createColumnarBatch(iterator.next)
      }
    }.asInstanceOf[Iterator[InternalRow]]

}

Source File: SparkFunSuite.scala From tispark with Apache License 2.0

5 votes

package org.apache.spark

import java.io.File

import org.apache.spark.internal.Logging
import org.scalatest._
import org.slf4j.Logger

abstract class SparkFunSuite extends FunSuite with Logging {
  protected val logger: Logger = log

  
  final protected override def withFixture(test: NoArgTest): Outcome = {
    val testName = test.text
    val suiteName = this.getClass.getName
    val shortSuiteName = suiteName.replaceAll("org.apache.spark", "o.a.s")
    try {
      logInfo(s"\n\n===== TEST OUTPUT FOR $shortSuiteName: '$testName' =====\n")
      test()
    } finally {
      logInfo(s"\n\n===== FINISHED $shortSuiteName: '$testName' =====\n")
    }
  }

  protected final def getTestResourcePath(file: String): String =
    getTestResourceFile(file).getCanonicalPath

  // helper function
  protected final def getTestResourceFile(file: String): File =
    new File(getClass.getClassLoader.getResource(file).getFile)

}

Source File: Utils.scala From tispark with Apache License 2.0

5 votes

package org.apache.spark.sql.test

import java.io.{File, PrintWriter}
import java.nio.file.{Files, Paths}
import java.util.Properties

import org.slf4j.Logger

import scala.collection.JavaConversions._

object Utils {

  def writeFile(content: String, path: String): Unit =
    TryResource(new PrintWriter(path))(_.close()) {
      _.print(content)
    }

  def TryResource[T](res: T)(closeOp: T => Unit)(taskOp: T => Unit): Unit =
    try {
      taskOp(res)
    } finally {
      closeOp(res)
    }

  def readFile(path: String): List[String] =
    Files.readAllLines(Paths.get(path)).toList

  def getOrThrow(prop: Properties, key: String): String = {
    val jvmProp = System.getProperty(key)
    if (jvmProp != null) {
      jvmProp
    } else {
      val v = prop.getProperty(key)
      if (v == null) {
        throw new IllegalArgumentException(key + " is null")
      } else {
        v
      }
    }
  }

  def getFlagOrFalse(prop: Properties, key: String): Boolean =
    getFlag(prop, key, "false")

  private def getFlag(prop: Properties, key: String, defValue: String): Boolean =
    getOrElse(prop, key, defValue).equalsIgnoreCase("true")

  def getOrElse(prop: Properties, key: String, defValue: String): String = {
    val jvmProp = System.getProperty(key)
    if (jvmProp != null) {
      jvmProp
    } else {
      Option(prop.getProperty(key)).getOrElse(defValue)
    }
  }

  def getFlagOrTrue(prop: Properties, key: String): Boolean =
    getFlag(prop, key, "true")

  def time[R](block: => R)(logger: Logger): R = {
    val t0 = System.nanoTime()
    val result = block
    val t1 = System.nanoTime()
    logger.info("Elapsed time: " + (t1 - t0) / 1000.0 / 1000.0 / 1000.0 + "s")
    result
  }

  def ensurePath(basePath: String, paths: String*): Boolean =
    new File(joinPath(basePath, paths: _*)).mkdirs()

  def joinPath(basePath: String, paths: String*): String =
    Paths.get(basePath, paths: _*).toAbsolutePath.toString
}

Source File: Logging.scala From spark-redis with BSD 3-Clause "New" or "Revised" License

5 votes

package com.redislabs.provider.redis.util

import org.slf4j.{Logger, LoggerFactory}


  @transient private var _logger: Logger = _

  protected def loggerName: String =
    this.getClass.getName.stripSuffix("$")

  protected def logger: Logger = {
    if (_logger == null) {
      _logger = LoggerFactory.getLogger(loggerName)
    }
    _logger
  }

  def logInfo(msg: => String): Unit = {
    if (logger.isInfoEnabled) {
      _logger.info(msg)
    }
  }

  def logDebug(msg: => String): Unit = {
    if (logger.isDebugEnabled) {
      _logger.debug(msg)
    }
  }

  def logTrace(msg: => String): Unit = {
    if (logger.isTraceEnabled) {
      _logger.trace(msg)
    }
  }
}

Source File: CacheManagerTokenStore.scala From meteorite-core with Apache License 2.0

5 votes

package bi.meteorite.core.security.tokenprovider

import com.hazelcast.core.HazelcastInstance
import org.ops4j.pax.cdi.api.OsgiService
import org.osgi.framework.BundleContext
import org.osgi.framework.wiring.BundleWiring
import org.slf4j.Logger
import org.slf4j.LoggerFactory
import javax.annotation.PostConstruct
import javax.inject.{Named, Inject, Singleton}


@Singleton
@Named("CManager")
object CacheManagerTokenStore {
  private var logger: Logger = LoggerFactory.getLogger(classOf[CacheManagerTokenStore])
}

@Singleton class CacheManagerTokenStore extends TokenStorageProvider {
  @OsgiService
  @Inject private var cacheManager: HazelcastInstance = null
  @Inject private var bcontext: BundleContext = null

  @PostConstruct def init {
    CacheManagerTokenStore.logger.debug("*** Activating CacheManager")
    val c: CompositeClassLoader = new CompositeClassLoader
    val tccl: ClassLoader = Thread.currentThread.getContextClassLoader
    try {
      cacheManager.getConfig.setClassLoader(c)
    } finally {
      cacheManager.getConfig.setClassLoader(c)
    }
  }

  def addToken(token: Token) {
    addInvokerClassLoader(this.getClass.getClassLoader)
    cacheManager.getMap("tokens").put(token.getToken, token)
  }

  def updateToken(token: Token) {
  }

  def getToken(token: String): Token = {
    addInvokerClassLoader(getInvokerClassLoader)
    cacheManager.getMap("tokens").get(token).asInstanceOf[Token]
  }

  def hasToken(token: String): Boolean = {
    addInvokerClassLoader(getInvokerClassLoader)
    cacheManager.getMap("tokens").get(token) != null
  }

  def removeToken(token: Token) {
  }

  def setCacheManagerService(hazel: HazelcastInstance) {
    this.cacheManager = hazel
  }

  protected def addInvokerClassLoader(cl: ClassLoader) {
    getInstance.getConfig.getClassLoader.asInstanceOf[CompositeClassLoader].add(cl)
  }

  protected def getInvokerClassLoader: ClassLoader = {
    bcontext.getBundle.adapt(classOf[BundleWiring]).getClassLoader
  }

  def setBcontext(bcontext: BundleContext) {
    this.bcontext = bcontext
  }

  def getInstance: HazelcastInstance = {
    cacheManager
  }
}

Source File: StreamingApp.scala From odsc-east-realish-predictions with Apache License 2.0

5 votes

package com.twilio.open.odsc.realish.utils

import com.twilio.open.odsc.realish.listeners.InsightsQueryListener
import org.apache.spark.sql.SparkSession
import org.slf4j.Logger

trait StreamingApp {
  val logger: Logger
  def run(): Unit
}

trait Restartable {
  def restart(): Unit
}

trait RestartableStreamingApp extends StreamingApp with Restartable {
  val spark: SparkSession

  val streamingQueryListener: InsightsQueryListener = {
    new InsightsQueryListener(spark, restart)
  }

  def monitoredRun(): Unit = {
    run()
    monitorStreams()
  }

  
  def restart(): Unit = {
    logger.info(s"restarting the application. cleaning up old stream listener and streams")

    val streams = spark.streams
    streams.removeListener(streamingQueryListener)
    streams.active.foreach { stream =>
      logger.info(s"stream_name=${stream.name} state=active status=${stream.status} action=stop_stream")
      stream.stop()
    }
    logger.info(s"attempting to restart the application")
    monitoredRun()
  }
}

Source File: InsightsQueryListener.scala From odsc-east-realish-predictions with Apache License 2.0

5 votes

package com.twilio.open.odsc.realish.listeners

import kamon.Kamon
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.streaming.StreamingQueryListener
import org.apache.spark.sql.streaming.StreamingQueryListener.{QueryProgressEvent, QueryStartedEvent, QueryTerminatedEvent}
import org.slf4j.{Logger, LoggerFactory}

import scala.collection.JavaConverters._

object InsightsQueryListener {
  val log: Logger = LoggerFactory.getLogger(classOf[InsightsQueryListener])

  def apply(spark: SparkSession, restart: () => Unit): InsightsQueryListener = {
    new InsightsQueryListener(spark, restart)
  }

}

class InsightsQueryListener(sparkSession: SparkSession, restart: () => Unit) extends StreamingQueryListener {
  import InsightsQueryListener._
  private val streams = sparkSession.streams
  private val defaultTag = Map("app_name" -> sparkSession.sparkContext.appName)

  def doubleToLong(value: Double): Long = {
    value match {
      case a if a.isInfinite => 0L
      case b if b == Math.floor(b) => b.toLong
      case c => Math.rint(c).toLong
    }
  }

  override def onQueryStarted(event: QueryStartedEvent): Unit = {
    if (log.isDebugEnabled) log.debug(s"onQueryStarted queryName=${event.name} id=${event.id} runId=${event.runId}")
  }

  //https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
  override def onQueryProgress(progressEvent: QueryProgressEvent): Unit = {
    val progress = progressEvent.progress
    val inputRowsPerSecond = progress.inputRowsPerSecond
    val processedRowsPerSecond = progress.processedRowsPerSecond

    // note: leaving this here to remind that we can do fancy things with this for metrics sake
    

    val sources = progress.sources.map { source =>
      val description = source.description
      val startOffset = source.startOffset
      val endOffset = source.endOffset
      val inputRows = source.numInputRows

      s"topic=$description startOffset=$startOffset endOffset=$endOffset numRows=$inputRows"
    }
    val tags = defaultTag + ( "stream_name" -> progress.name )
    Kamon.metrics.histogram("spark.query.progress.processed.rows.rate", tags).record(doubleToLong(processedRowsPerSecond))
    Kamon.metrics.histogram("spark.query.progress.input.rows.rate", tags).record(doubleToLong(inputRowsPerSecond))

    // todo - could take num.rows.total, given total percentage of records that will be watermarked going forwards... (simple metric that say loss_percentage due to watermark)

    // should give min, avg, max, watermark
    val eventTime = progress.eventTime
    if (eventTime != null) {

      log.info(s"event.time=${eventTime.asScala.mkString(",")}")
    }

    log.info(s"query.progress query=${progress.name} kafka=${sources.mkString(",")} inputRows/s=$inputRowsPerSecond processedRows/s=$processedRowsPerSecond durationMs=${progress.durationMs} sink=${progress.sink.json}")
  }

  override def onQueryTerminated(event: QueryTerminatedEvent): Unit = {
    log.warn(s"queryTerminated: $event")
    val possibleStreamingQuery = streams.get(event.id)
    if (possibleStreamingQuery != null) {
      val progress = possibleStreamingQuery.lastProgress
      val sources = progress.sources
      log.warn(s"last.progress.sources sources=$sources")
    }

    event.exception match {
      case Some(exception) =>
        log.warn(s"queryEndedWithException exception=$exception resetting.all.streams")
        restart()
      case None =>
    }
  }
}

Source File: ImapTestUtils.scala From gatling-imap with GNU Affero General Public License v3.0

5 votes

package com.linagora.gatling.imap

import java.net.URI
import java.util.Properties

import org.slf4j.Logger

import com.yahoo.imapnio.async.client.ImapAsyncSession.DebugMode
import com.yahoo.imapnio.async.client.{ImapAsyncClient, ImapAsyncSession, ImapAsyncSessionConfig}

import scala.concurrent.Future
import scala.concurrent.ExecutionContext.Implicits.global

trait ImapTestUtils {

  protected def logger: Logger

  val threadNumber = 4
  val config = new Properties()
  val imapClient = new ImapAsyncClient(threadNumber)

  def connect(port: Int): Future[ImapAsyncSession] = {
    val serverUri = new URI(s"imap://localhost:$port")
    val config = new ImapAsyncSessionConfig
    config.setConnectionTimeoutMillis(5000)
    config.setReadTimeoutMillis(6000)
    val sniNames = null

    val localAddress = null
    Future {
      imapClient
        .createSession(serverUri, config, localAddress, sniNames, DebugMode.DEBUG_ON, "ImapTestUtilsCreated")
        .get()
        .getSession
    }
  }

}

Source File: ImapSessionsSpec.scala From gatling-imap with GNU Affero General Public License v3.0

5 votes

package com.linagora.gatling.imap.protocol.command

import java.util.Properties

import akka.actor.ActorSystem
import akka.testkit.TestProbe
import com.linagora.gatling.imap.Fixture.bart
import com.linagora.gatling.imap.protocol.{Command, ImapProtocol, ImapResponses, ImapSessions, Response, UserId}
import com.linagora.gatling.imap.{CyrusServer, ImapTestUtils, RunningServer}
import org.scalatest.{BeforeAndAfterEach, Matchers, WordSpec}
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.duration._

class ImapSessionsSpec extends WordSpec with Matchers with ImapTestUtils with BeforeAndAfterEach {
  val logger: Logger = LoggerFactory.getLogger(this.getClass.getCanonicalName)

  private val server: RunningServer = CyrusServer.start()

  override def beforeEach(): Unit = {
    server.addUser(bart)
  }

  override protected def afterEach(): Unit = {
    system.terminate()
    server.stop()
  }

  implicit lazy val system: ActorSystem = ActorSystem("LoginHandlerSpec")
  "the imap sessions actor" should {
    "log a user in" in {
      val config = new Properties()
      val protocol = ImapProtocol("localhost", server.mappedImapPort(), config)

      val sessions = system.actorOf(ImapSessions.props(protocol))
      val probe = TestProbe()
      val userId = UserId(1)
      probe.send(sessions, Command.Connect(userId))
      probe.expectMsg(10.second, Response.Connected(ImapResponses.empty))
      probe.send(sessions, Command.Login(userId, bart))
      probe.expectMsgPF(10.second) {
        case Response.LoggedIn(responses: ImapResponses) => responses.isOk shouldBe true
      }
    }
  }

}

Source File: CyrusServer.scala From gatling-imap with GNU Affero General Public License v3.0

5 votes

package com.linagora.gatling.imap

import org.slf4j.{Logger, LoggerFactory}
import org.testcontainers.containers.GenericContainer

import com.yahoo.imapnio.async.request.CreateFolderCommand

import scala.concurrent.duration._
import scala.concurrent.{Await, ExecutionContext, ExecutionContextExecutor}

object CyrusServer extends Server {

  private val imapPort = 143
  private val logger: Logger = LoggerFactory.getLogger(CyrusServer.getClass)

  class RunningCyrusServer(val container: GenericContainer[_]) extends RunningServer with ImapTestUtils {
    protected val logger: Logger = CyrusServer.logger
    lazy val mappedImapPort: Integer = container.getMappedPort(imapPort)

    def addUser(login: String, password: String): Unit = {
      container.execInContainer("bash", "-c", s"echo $password | saslpasswd2 -u test -c $login -p")
      implicit val executionContext: ExecutionContextExecutor = ExecutionContext.global
      Await.result(
        connect(mappedImapPort)
          .flatMap(implicit session =>
          for {
            _ <- Imap.login("cyrus", "cyrus")
            _ <- Imap.rawCommand(new CreateFolderCommand(s"user.$login"))
            _ <- Imap.disconnect()
          } yield ()), 1.minute)

    }
    def stop(): Unit = container.stop()
  }

  def start(): RunningServer = {
    val cyrus = new GenericContainer("linagora/cyrus-imap")
    cyrus.addExposedPort(imapPort)
    cyrus.start()
    new RunningCyrusServer(cyrus)
  }
}

Source File: GenericMainClass.scala From darwin with Apache License 2.0

5 votes

package it.agilelab.darwin.app.spark

import java.text.SimpleDateFormat
import java.util.Date

import com.typesafe.config.{Config, ConfigFactory}
import org.apache.hadoop.fs.FileSystem
import org.apache.spark.sql.SparkSession
import org.slf4j.{Logger, LoggerFactory}
import scala.collection.JavaConverters._

trait GenericMainClass {
  self: SparkManager =>

  val genericMainClassLogger: Logger = LoggerFactory.getLogger("SparkManager")

  private def makeFileSystem(session: SparkSession): FileSystem = {
    if (session.sparkContext.isLocal) {
      FileSystem.getLocal(session.sparkContext.hadoopConfiguration)
    }
    else {
      FileSystem.get(session.sparkContext.hadoopConfiguration)
    }
  }


  
  // scalastyle:off
  private def getGlobalConfig: Config = {
    genericMainClassLogger.debug("system environment vars")
    for ((k, v) <- System.getenv().asScala.toSeq.sortBy(_._1)) genericMainClassLogger.debug(s"$k -> $v")

    genericMainClassLogger.debug("system properties")
    for ((k, v) <- System.getProperties.asScala.toSeq.sortBy(_._1)) genericMainClassLogger.debug(s"$k -> $v")

    ConfigFactory.load()
  }

  // scalastyle:on

}

Source File: SparkManager.scala From darwin with Apache License 2.0

5 votes

package it.agilelab.darwin.app.spark

import com.typesafe.config.Config
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.slf4j.{Logger, LoggerFactory}

import scala.collection.JavaConverters._

trait SparkManager {

  val sparkManagerLogger: Logger = LoggerFactory.getLogger("SparkManager")

  
  protected def defaultParallelism(implicit sparkSession: SparkSession, config: Config): Int = {
    sparkSession.conf.getOption(SparkConfigurationKeys.SPARK_EXECUTOR_INSTANCES) match {
      case Some(instances) =>
        sparkSession.conf.getOption(SparkConfigurationKeys.SPARK_CORES).getOrElse("1").toInt * instances.toInt
      case None =>
        sparkManagerLogger.info("Spark is configured with dynamic allocation, default parallelism will be gathered from app " +
          "conf: " +
          "next.process.parallelism")
        if (config.hasPath(SparkConfigurationKeys.PARALLELISM)) {
          config.getInt(SparkConfigurationKeys.PARALLELISM)
        } else {
          sparkManagerLogger.info("next.process.parallelism was not set fallback to sparkSession.defaultParallelism")
          sparkSession.sparkContext.defaultParallelism
        }
    }
  }
}

Source File: SchemaManagerSparkApp.scala From darwin with Apache License 2.0

5 votes

package it.agilelab.darwin.app.spark

import java.nio.ByteOrder

import com.typesafe.config.{Config, ConfigFactory}
import it.agilelab.darwin.app.spark.classes._
import it.agilelab.darwin.manager.AvroSchemaManagerFactory
import org.apache.avro.reflect.ReflectData
import org.apache.hadoop.fs.FileSystem
import org.apache.spark.sql.SparkSession
import org.slf4j.{Logger, LoggerFactory}

object SchemaManagerSparkApp extends GenericMainClass with SparkManager {

  val mainLogger: Logger = LoggerFactory.getLogger("SchemaManagerSparkApp")

  val endianness: ByteOrder = ByteOrder.BIG_ENDIAN

  override protected def runJob(settings: Config)(implicit fs: FileSystem, sparkSession: SparkSession): Int = {
    import sparkSession.implicits._

    val ds = sparkSession.createDataset(sparkSession.sparkContext.parallelize(1 to 1000, 20))
    mainLogger.info("Registering schemas")
    //    val reflections = new Reflections("it.agilelab.darwin.app.spark.classes")
    //    val annotationClass: Class[AvroSerde] = classOf[AvroSerde]
    //    val classes = reflections.getTypesAnnotatedWith(annotationClass).asScala.toSeq
    //      .filter(c => !c.isInterface && !Modifier.isAbstract(c.getModifiers))
    //    val schemas = classes.map(c => ReflectData.get().getSchema(Class.forName(c.getName)))
    val schemas = Seq(ReflectData.get().getSchema(classOf[Menu]), ReflectData.get().getSchema(classOf[MenuItem]),
      ReflectData.get().getSchema(classOf[Food]), ReflectData.get().getSchema(classOf[Order]),
      ReflectData.get().getSchema(classOf[Price]))
    val conf = ConfigFactory.load()
    val manager = AvroSchemaManagerFactory.initialize(conf)
    val registeredIDs: Seq[Long] = manager.registerAll(schemas).map(_._1)
    mainLogger.info("Schemas registered")

    mainLogger.info("Getting ID for a schema")
    manager.getId(ReflectData.get().getSchema(classOf[Menu]))
    mainLogger.info("ID retrieved for the schema")

    mainLogger.info("Get Schema from ID")
    val d2 = ds.map { x =>
      AvroSchemaManagerFactory.initialize(conf).getSchema(registeredIDs(x % registeredIDs.size))
      x
    }
    d2.count()
    mainLogger.info("All schemas obtained")
    10
  }

  override protected def handleException(exception: Throwable, applicationSettings: Config): Unit = {
    mainLogger.error(exception.getMessage)
  }
}

Source File: Logging.scala From keystone with Apache License 2.0

5 votes

package keystoneml.pipelines

import org.slf4j.{Logger, LoggerFactory}


trait Logging {
  // Make the log field transient so that objects with Logging can
  // be serialized and used on another machine
  @transient private var log_ : Logger = null

  // Method to get or create the logger for this object
  protected def log: Logger = {
    if (log_ == null) {
      var className = this.getClass.getName
      // Ignore trailing $'s in the class names for Scala objects
      if (className.endsWith("$")) {
        className = className.substring(0, className.length - 1)
      }
      log_ = LoggerFactory.getLogger(className)
    }
    log_
  }

  // Log methods that take only a String
  protected def logInfo(msg: => String) {
    if (log.isInfoEnabled) log.info(msg)
  }

  protected def logDebug(msg: => String) {
    if (log.isDebugEnabled) log.debug(msg)
  }

  protected def logTrace(msg: => String) {
    if (log.isTraceEnabled) log.trace(msg)
  }

  protected def logWarning(msg: => String) {
    if (log.isWarnEnabled) log.warn(msg)
  }

  protected def logError(msg: => String) {
    if (log.isErrorEnabled) log.error(msg)
  }

  // Log methods that take Throwables (Exceptions/Errors) too
  protected def logInfo(msg: => String, throwable: Throwable) {
    if (log.isInfoEnabled) log.info(msg, throwable)
  }

  protected def logDebug(msg: => String, throwable: Throwable) {
    if (log.isDebugEnabled) log.debug(msg, throwable)
  }

  protected def logTrace(msg: => String, throwable: Throwable) {
    if (log.isTraceEnabled) log.trace(msg, throwable)
  }

  protected def logWarning(msg: => String, throwable: Throwable) {
    if (log.isWarnEnabled) log.warn(msg, throwable)
  }

  protected def logError(msg: => String, throwable: Throwable) {
    if (log.isErrorEnabled) log.error(msg, throwable)
  }
}

Source File: BackgroundAsyncHandler.scala From play-ws with Apache License 2.0

5 votes

package play.api.libs.ws.ahc.cache

import play.shaded.ahc.org.asynchttpclient._
import com.typesafe.play.cachecontrol.ResponseCachingActions.DoCacheResponse
import com.typesafe.play.cachecontrol.ResponseCachingActions.DoNotCacheResponse
import org.slf4j.Logger
import org.slf4j.LoggerFactory
import play.shaded.ahc.io.netty.handler.codec.http.HttpHeaders

import scala.concurrent.Await


class BackgroundAsyncHandler[T](request: Request, cache: AhcHttpCache, ahcConfig: AsyncHttpClientConfig)
    extends AsyncHandler[T]
    with Debug {

  import BackgroundAsyncHandler.logger

  private val timeout = scala.concurrent.duration.Duration(1, "second")

  private val builder = new CacheableResponseBuilder(ahcConfig)

  private val key = EffectiveURIKey(request)

  @throws(classOf[Exception])
  def onBodyPartReceived(content: HttpResponseBodyPart): AsyncHandler.State = {
    builder.accumulate(content)
    AsyncHandler.State.CONTINUE
  }

  @throws(classOf[Exception])
  def onStatusReceived(status: HttpResponseStatus): AsyncHandler.State = {
    builder.reset()
    builder.accumulate(status)
    AsyncHandler.State.CONTINUE
  }

  @throws(classOf[Exception])
  def onHeadersReceived(headers: HttpHeaders): AsyncHandler.State = {
    builder.accumulate(headers)
    AsyncHandler.State.CONTINUE
  }

  def onThrowable(t: Throwable): Unit = {
    logger.error(s"onThrowable: received on request $request", t)
  }

  override def onCompleted(): T = {
    val response: CacheableResponse = builder.build

    if (cache.isNotModified(response)) {
      processNotModifiedResponse(response)
    } else {
      processFullResponse(response)
    }

    response.asInstanceOf[T]
  }

  protected def processFullResponse(fullResponse: CacheableResponse): Unit = {
    logger.debug(s"processFullResponse: fullResponse = ${debug(fullResponse)}")

    cache.cachingAction(request, fullResponse) match {
      case DoNotCacheResponse(reason) =>
        logger.debug(s"onCompleted: DO NOT CACHE, because $reason")
      case DoCacheResponse(reason) =>
        logger.debug(s"isCacheable: DO CACHE, because $reason")
        cache.cacheResponse(request, fullResponse)
    }
  }

  protected def processNotModifiedResponse(notModifiedResponse: CacheableResponse): Unit = {
    logger.trace(s"processNotModifiedResponse: notModifiedResponse = $notModifiedResponse")

    val result = Await.result(cache.get(key), timeout)
    logger.debug(s"processNotModifiedResponse: result = $result")

    // FIXME XXX Find the response which matches the secondary keys...
    result match {
      case Some(entry) =>
        val newHeaders    = notModifiedResponse.getHeaders
        val freshResponse = cache.freshenResponse(newHeaders, entry.response)
        cache.cacheResponse(request, freshResponse)
      case None =>
      // XXX FIXME what do we do if we have a 304 and there's nothing in the cache for it?
      // If we make another call and it sends us another 304 back, we can get stuck in an
      // endless loop?

    }

  }

}

object BackgroundAsyncHandler {
  private val logger: Logger = LoggerFactory.getLogger("play.api.libs.ws.ahc.cache.BackgroundAsyncHandler")
}

Source File: KryoStringEventBatch.scala From maha with Apache License 2.0

5 votes

// Copyright 2017, Yahoo Holdings Inc.
// Licensed under the terms of the Apache License 2.0. Please see LICENSE file in project root for terms.
package com.yahoo.maha.log

import com.esotericsoftware.kryo.{Kryo, Serializer}
import com.esotericsoftware.kryo.io.{Input, Output}
import com.yahoo.maha.data.StringEventBatch
import org.slf4j.{Logger, LoggerFactory}


object KryoStringEventBatch {
  private val logger: Logger = LoggerFactory.getLogger(classOf[KryoStringEventBatch])
}

class KryoStringEventBatch extends Serializer[StringEventBatch] {
  KryoStringEventBatch.logger.info("Created instance of " + this.getClass.getSimpleName)

  override def write(kryo: Kryo, output: Output, stringEventBatch: StringEventBatch): Unit = {
    val size: Int = stringEventBatch.getEvents.size
    output.writeInt(size)
    stringEventBatch.getEvents.stream().forEach(output.writeString(_))
  }

  override def read(kryo: Kryo, input: Input, `type`: Class[StringEventBatch]): StringEventBatch = {
    val size: Int = input.readInt
    val builder: StringEventBatch.Builder = new StringEventBatch.Builder(size)
    var i: Int = 0
    while ( i < size) {
      builder.add(input.readString)
      i += 1
    }
    builder.build.asInstanceOf[StringEventBatch]
  }
}

Source File: PlanResultLogger.scala From rug with GNU General Public License v3.0

5 votes

package com.atomist.rug.runtime.plans

import com.atomist.rug.spi.Handlers.Status.Failure
import com.atomist.rug.spi.Handlers._
import org.slf4j.Logger

import scala.annotation.tailrec
import scala.concurrent.Await
import scala.concurrent.duration.DurationInt

class PlanResultLogger(val logger: Logger) {

  def log(planResult: PlanResult): Unit = {
    logEvents(planResult.log)
  }

  @tailrec
  private def logEvents(log: Seq[PlanLogEvent]): Unit = {
    log.headOption match {
      case Some(head) =>
        val remainingEvents = head match {
          case logError: PlanLogError =>
            logger.error("Error running plan.", logError.error)
            log.tail
          case result: InstructionResult if result.response.status == Failure =>
            logger.error("Failure running plan.", result)
            log.tail
          case result: NestedPlanRun =>
            val planResult = result.planResult
            log.tail ++ planResult.log
          case _ => log.tail
        }
        logEvents(remainingEvents)
      case None =>
    }
  }
}

Source File: StatusCheckerModule.scala From CloudGenesis with Apache License 2.0

5 votes

package com.lifeway.cloudops.cloudformation

import akka.actor.{ActorSystem, Scheduler}
import com.amazonaws.services.cloudformation.AmazonCloudFormation

import scala.concurrent.{Await, ExecutionContext, Future}
import scala.concurrent.duration._
import akka.pattern.after
import com.amazonaws.AmazonServiceException
import com.lifeway.cloudops.cloudformation.Types.StackName
import org.scalactic._
import org.slf4j.Logger

trait StatusCheckerModule {
  val logger: Logger

  def waitForStatus(
      actorSystem: ActorSystem,
      maxRetries: Int = 100,
      maxWaitTime: Duration = 5.minutes,
      retrySpeed: FiniteDuration = 3.seconds)(statusFetcher: (AmazonCloudFormation, String) => (String, String))(
      cfClient: AmazonCloudFormation,
      id: String,
      stackName: StackName,
      waitForStatus: Types.Status,
      failIfInStatus: Seq[Types.Status]): Unit Or AutomationError = {

    implicit val ec: ExecutionContext = actorSystem.dispatcher
    implicit val sch: Scheduler       = actorSystem.scheduler

    sealed trait StatusException            extends Exception
    case object PendingException            extends StatusException
    case class FailedException(msg: String) extends StatusException

    def checkStatus: Unit = {
      val (status, reason) = statusFetcher(cfClient, id)

      if (status == waitForStatus) ()
      else if (failIfInStatus.contains(status))
        throw new FailedException(s"Unexpected stack status: $status. Reason: $reason")
      else throw PendingException
    }

    def retry(op: => Unit, delay: FiniteDuration, retries: Int): Future[Unit Or AutomationError] =
      Future(op).map(x => Good(x)) recoverWith {
        case PendingException if retries > 0 => after(delay, sch)(retry(op, delay, retries - 1))
        case FailedException(err) =>
          Future.successful(
            Bad(StackError(s"Failed to reach expected status of $waitForStatus for $stackName due to: $err")))
        case t: AmazonServiceException if t.getStatusCode >= 500 =>
          logger.error(s"AWS 500 Service Exception: Failed to reach expected status of $waitForStatus for $stackName",
                       t)
          Future.successful(
            Bad(ServiceError(
              s"AWS 500 Service Exception: Failed to reach expected status of $waitForStatus for $stackName")))
        case _ =>
          Future.successful(Bad(StackError(s"Failed to reach expected status of $waitForStatus for $stackName")))
      }

    //Retry to find final status for up to max time...
    try {
      Await.result(retry(checkStatus, retrySpeed, maxRetries), maxWaitTime)
    } catch {
      case _: Throwable =>
        Bad(
          StackError(
            s"Failed to wait to reach expected status of $waitForStatus for $stackName due to process timeout"))
    }
  }
}

Source File: Application.scala From spring-scala-examples with Apache License 2.0

5 votes

package hello

import org.slf4j.Logger
import org.slf4j.LoggerFactory
import org.springframework.beans.factory.annotation.Autowired
import org.springframework.boot.CommandLineRunner
import org.springframework.boot.SpringApplication
import org.springframework.boot.autoconfigure.SpringBootApplication
import org.springframework.jdbc.core.JdbcTemplate

import collection.JavaConverters._
import scala.collection.mutable
import scala.collection.mutable.{ArrayBuffer, ListBuffer}

@SpringBootApplication
class Application extends CommandLineRunner {

    @Autowired
    var jdbcTemplate: JdbcTemplate = _


    val log: Logger = LoggerFactory.getLogger(classOf[Application])

    override def run(args: String*): Unit = {
        log.info("Creating tables")
        jdbcTemplate.execute("DROP TABLE customers IF EXISTS")
        jdbcTemplate.execute("CREATE TABLE customers(" +
                "id SERIAL, first_name VARCHAR(255), last_name VARCHAR(255))")

        // Split up the array of whole names into an array of first/last names
        val splitUpNames: mutable.Buffer[Array[AnyRef]] = ListBuffer("John Woo", "Jeff Dean", "Josh Bloch", "Josh Long").map(_.split(" ")).asInstanceOf[mutable.Buffer[Array[AnyRef]]]

        // Use a Java 8 stream to print out each tuple of the list
      for(name <- splitUpNames)
        splitUpNames.foreach{ case(Array(name)) => log.info("Inserting customer record for %s %s".format(name(0), name(1)))}

        // Uses JdbcTemplate's batchUpdate operation to bulk load data
        jdbcTemplate.batchUpdate("INSERT INTO customers(first_name, last_name) VALUES (?,?)", splitUpNames.asJava)

        log.info("Querying for customer records where first_name = 'Josh':")
//        jdbcTemplate.query(
//                "SELECT id, first_name, last_name FROM customers WHERE first_name = ?", new Object[] { "Josh" },
//                (rs, rowNum) -> new Customer(rs.getLong("id"), rs.getString("first_name"), rs.getString("last_name"))
//        ).forEach(customer -> log.info(customer.toString()))

    }
}

object Application extends App {
  SpringApplication.run(classOf[Application], args:_*)
}

Source File: Application.scala From spring-scala-examples with Apache License 2.0

5 votes

package hello

import java.sql.ResultSet

import org.slf4j.Logger
import org.slf4j.LoggerFactory
import org.springframework.beans.factory.annotation.Autowired
import org.springframework.boot.CommandLineRunner
import org.springframework.boot.SpringApplication
import org.springframework.boot.autoconfigure.SpringBootApplication
import org.springframework.jdbc.core.{JdbcTemplate, RowMapper}

import collection.JavaConverters._
import scala.collection.mutable
import scala.collection.mutable.ListBuffer

@SpringBootApplication
class Application extends CommandLineRunner {

  @Autowired
  var jdbcTemplate: JdbcTemplate = _


  val log: Logger = LoggerFactory.getLogger(classOf[Application])

  override def run(args: String*): Unit = {
    log.info("Creating tables")
    jdbcTemplate.execute("DROP TABLE customers IF EXISTS")
    jdbcTemplate.execute("CREATE TABLE customers(" +
      "id SERIAL, first_name VARCHAR(255), last_name VARCHAR(255))")

    val splitUpNames = ListBuffer("John Woo", "Jeff Dean", "Josh Bloch", "Josh Long").map(_.split(" "))
    splitUpNames.foreach(name => log.info("Inserting customer record for %s %s".format(name(0), name(1))))

    jdbcTemplate.batchUpdate("INSERT INTO customers(first_name, last_name) VALUES (?,?)", splitUpNames.asInstanceOf[mutable.Buffer[Array[AnyRef]]].asJava)

    log.info("Querying for customer records where first_name = 'Josh':")
    jdbcTemplate.query(
      "SELECT id, first_name, last_name FROM customers WHERE first_name = ?",
      Array("Josh").asInstanceOf[Array[AnyRef]],
      // no Java 8 Lambda support in Scala pre 2.12
      new RowMapper[Customer]{
        override def mapRow(rs: ResultSet, rowNum: Int): Customer = new Customer(rs.getLong("id"), rs.getString("first_name"), rs.getString("last_name"))
      })
      // Works in Scala 2.12
      // (rs: ResultSet, rowNum: Int) => new Customer(rs.getLong("id"), rs.getString("first_name"), rs.getString("last_name"))    )
      .asScala.foreach((customer:Customer) => log.info(customer.toString))
  }
}

object Application extends App {
  SpringApplication.run(classOf[Application], args:_*)
}

Source File: Application.scala From spring-scala-examples with Apache License 2.0

5 votes

package hello

import org.slf4j.{Logger, LoggerFactory}
import org.springframework.boot.autoconfigure.SpringBootApplication
import org.springframework.boot.{CommandLineRunner, SpringApplication}
import org.springframework.http.ResponseEntity
import org.springframework.util.concurrent.{ListenableFuture, ListenableFutureCallback, SuccessCallback}
import org.springframework.web.client.{AsyncRestTemplate, RestTemplate}

@SpringBootApplication
class Application extends CommandLineRunner{
  val log: Logger = LoggerFactory.getLogger(classOf[Application])

  override def run(args: String*): Unit = {
    val restTemplate = new RestTemplate()

    // synchronous version
    val quote : Quote =  restTemplate.getForObject("http://gturnquist-quoters.cfapps.io/api/random", classOf[Quote])
    log.info(quote.toString)

    // async version
    val asyncRestTemplate = new AsyncRestTemplate()
    val quoteFuture : ListenableFuture[ResponseEntity[Quote]] =  asyncRestTemplate.getForEntity("http://gturnquist-quoters.cfapps.io/api/random", classOf[Quote])

    quoteFuture.addCallback(new ListenableFutureCallback[ResponseEntity[Quote]]() {
      override def onSuccess(entity : ResponseEntity[Quote]) : Unit = log.info("async: " + entity.getBody.toString)
      override def onFailure(t : Throwable) : Unit = log.error("Async error", t)
    })
  }
}

object Application extends App {
  SpringApplication.run(classOf[Application], args:_*)
}

Source File: MyLogger.scala From Squerall with Apache License 2.0

5 votes

package org.squerall

import org.slf4j.{Logger, LoggerFactory}

import scala.language.implicitConversions


trait MyLogger {

  @transient private var log_ : Logger = _

  // Method to get or create the logger for this object
  protected def log: Logger = {
    if (log_ == null) {
      log_ = LoggerFactory.getLogger(logName)
    }
    log_
  }

  // Method to get the logger name for this object
  protected def logName: String = {
    // Ignore trailing $'s in the class names for Scala objects
    this.getClass.getName.stripSuffix("$")
  }


  def trace(msg: => String): Unit = { if (log.isTraceEnabled) log.trace(msg) }
  def trace(msg: => String, e: Throwable): Unit = { if (log.isTraceEnabled) log.trace(msg, e) }
  def trace(msg: => String, o: Any, os: Any*): Unit = { if (log.isTraceEnabled) log.trace(msg, o, os) }

  def debug(msg: => String): Unit = { if (log.isDebugEnabled) log.debug(msg) }
  def debug(msg: => String, e: Throwable): Unit = { if (log.isDebugEnabled) log.debug(msg, e) }
  def debug(msg: => String, o: Any, os: Any*): Unit = { if (log.isDebugEnabled) log.debug(msg, o, os) }

  def info(msg: => String): Unit = { if (log.isInfoEnabled)  log.info(msg) }
  def info(msg: => String, e: Throwable): Unit = { if (log.isInfoEnabled)  log.info(msg, e) }
  def info(msg: => String, o: Any, os: Any*): Unit = { if (log.isInfoEnabled)  log.info(msg, o, os) }

  def warn(msg: => String): Unit = { if (log.isWarnEnabled)  log.warn(msg) }
  def warn(msg: => String, e: Throwable): Unit = { if (log.isWarnEnabled)  log.warn(msg, e) }
  def warn(msg: => String, o: Any, os: Any*): Unit = { if (log.isWarnEnabled)  log.warn(msg, o, os) }

  def error(msg: => String): Unit = { if (log.isErrorEnabled) log.error(msg) }
  def error(msg: => String, e: Throwable): Unit = { if (log.isErrorEnabled) log.error(msg, e) }
  def error(msg: => String, o: Any, os: Any*): Unit = { if (log.isErrorEnabled) log.error(msg, o, os) }

  def mark(msg: => String): Unit = { if (log.isErrorEnabled) log.error(msg) }
  def mark(msg: => String, e: Throwable): Unit = { if (log.isErrorEnabled) log.error(msg, e) }
  def mark(msg: => String, o: Any, os: Any*): Unit = { if (log.isErrorEnabled) log.error(msg, o, os) }
}

private object MyLogger {
  implicit def logging2Logger(anything: MyLogger): Logger = anything.log
}

Source File: StructuredLogSpanHandler.scala From money with Apache License 2.0

5 votes

package com.comcast.money.core.handlers

import com.comcast.money.api.{ Note, SpanInfo }
import com.typesafe.config.Config
import org.slf4j.{ Logger, LoggerFactory, MDC }


class StructuredLogSpanHandler(
  val logger: Logger = LoggerFactory.getLogger(classOf[StructuredLogSpanHandler]),
  val mdcFunc: (String, String) => Unit = (x: String, y: String) => MDC.put(x, y))
  extends ConfigurableHandler {

  // Extra constructor because java spring programs have a problem with the default function in the constructor above.
  def this() = this(LoggerFactory.getLogger(classOf[StructuredLogSpanHandler]), (k: String, v: String) => MDC.put(k, v))

  import com.comcast.money.core.handlers.LoggingSpanHandler._

  protected var logFunction: LogFunction = logger.info

  def configure(config: Config): Unit = {

    if (config.hasPath("log-level")) {
      val level = config.getString("log-level").toUpperCase

      // set the log level based on the configured value
      level match {
        case "ERROR" => logFunction = logger.error
        case "WARN" => logFunction = logger.warn
        case "INFO" => logFunction = logger.info
        case "DEBUG" => logFunction = logger.debug
        case "TRACE" => logFunction = logger.trace
      }
    }
  }

  def handle(spanInfo: SpanInfo): Unit = {
    import scala.collection.JavaConverters._
    val baseFields = Seq(
      // The field names below are the same as cedi-dtrace. This makes it easier to query a transaction in elastic search.
      ("trace-id", spanInfo.id.traceId()),
      ("parent-id", spanInfo.id.parentId()),
      ("span-id", spanInfo.id.selfId()),
      ("span-name", spanInfo.name()),
      ("app", spanInfo.appName()),
      ("host", spanInfo.host()),
      ("start-time", java.time.Instant.ofEpochMilli(spanInfo.startTimeMillis())),
      ("end-time", java.time.Instant.ofEpochMilli(spanInfo.endTimeMillis())),
      ("span-duration", spanInfo.durationMicros()),
      ("span-success", spanInfo.success()))
    val noteFields: Seq[(String, Any)] = spanInfo.notes.values.asScala.map(n => (n.name(), n.value())).toSeq
    val allFields = baseFields ++ noteFields

    allFields.foreach(p => mdcFunc(p._1, p._2.toString))

    logFunction(allFields.map { case (k, v) => s"$k:$v" }.mkString("[", "][", "]"))
  }
}

Source File: LoggingSpanHandler.scala From money with Apache License 2.0

5 votes

package com.comcast.money.core.handlers

import com.comcast.money.api.SpanInfo
import com.typesafe.config.Config
import org.slf4j.{ Logger, LoggerFactory }

object LoggingSpanHandler {

  type LogFunction = String => Unit

  val HEADER_FORMAT: String = "Span: [ span-id=%s ][ trace-id=%s ][ parent-id=%s ][ span-name=%s ][ " +
    "app-name=%s ][ start-time=%s ][ span-duration=%s ][ span-success=%s ] [ host=%s ]"
  val NOTE_BEGIN = "[ "
  val NOTE_END = " ]"
  val EQ = "="
  val NULL: String = "NULL"
}

class LoggingSpanHandler(val logger: Logger, makeFormatter: Config => SpanLogFormatter) extends ConfigurableHandler {

  def this() = this(LoggerFactory.getLogger(classOf[LoggingSpanHandler]), SpanLogFormatter.apply)

  import LoggingSpanHandler._

  protected var logFunction: LogFunction = logger.info
  protected var formatter: SpanLogFormatter = _

  def configure(config: Config): Unit = {

    if (config.hasPath("log-level")) {
      val level = config.getString("log-level").toUpperCase

      // set the log level based on the configured value
      level match {
        case "ERROR" => logFunction = logger.error
        case "WARN" => logFunction = logger.warn
        case "INFO" => logFunction = logger.info
        case "DEBUG" => logFunction = logger.debug
        case "TRACE" => logFunction = logger.trace
      }
    }

    val formattingConfig = config.getConfig("formatting")
    formatter = makeFormatter(formattingConfig)
  }

  def handle(spanInfo: SpanInfo): Unit = {
    logFunction(formatter.buildMessage(spanInfo))
  }
}

Source File: TraceLoggingSpec.scala From money with Apache License 2.0

5 votes

package com.comcast.money.core.logging

import org.mockito.Mockito._
import org.scalatest.mockito.MockitoSugar
import org.scalatest.{ Matchers, OneInstancePerTest, WordSpec }
import org.slf4j.Logger

class TraceLoggingSpec extends WordSpec with Matchers with MockitoSugar with OneInstancePerTest {

  val mockLogger = mock[Logger]

  "TraceLogging" should {
    "capture exceptions into a log" in {
      val testTraceLogging = new TraceLogging {
        override lazy val shouldLogExceptions: Boolean = true
        override val logger: Logger = mockLogger
      }

      val t = mock[Throwable]
      testTraceLogging.logException(t)
      verify(mockLogger).error("Tracing exception", t)
    }
    "not capture exceptions if log exceptions is not enabled" in {
      val testTraceLogging = new TraceLogging {
        override lazy val shouldLogExceptions: Boolean = false
        override val logger: Logger = mockLogger
      }
      val t = mock[Throwable]
      testTraceLogging.logException(t)
      verifyZeroInteractions(mockLogger)
    }
  }
}

Source File: PipeLine.scala From gearpump-examples with Apache License 2.0

5 votes

package io.gearpump.examples.tap_pipeline

import akka.actor.ActorSystem
import com.typesafe.config.{ConfigFactory, ConfigRenderOptions}
import io.gearpump.cluster.UserConfig
import io.gearpump.cluster.client.ClientContext
import io.gearpump.cluster.main.{ArgumentsParser, CLIOption, ParseResult}
import io.gearpump.external.hbase.HBaseSink
import io.gearpump.streaming.StreamApplication
import io.gearpump.streaming.kafka.{KafkaSource, KafkaStorageFactory}
import io.gearpump.streaming.sink.DataSinkProcessor
import io.gearpump.streaming.source.DataSourceProcessor
import io.gearpump.tap.TapJsonConfig
import io.gearpump.util.Graph._
import io.gearpump.util.{AkkaApp, Graph, LogUtil}
import org.slf4j.Logger

object PipeLine extends AkkaApp with ArgumentsParser {
  private val LOG: Logger = LogUtil.getLogger(getClass)

  override val options: Array[(String, CLIOption[Any])] = Array(
    "hbase"-> CLIOption[String]("<hbase instance>", required = false, defaultValue = Some("hbase")),
    "kafka"-> CLIOption[String]("<kafka instance>", required = false, defaultValue = Some("kafka")),
    "table"-> CLIOption[String]("<hbase table>", required = false, defaultValue = Some("gp_tap_table")),
    "topic"-> CLIOption[String]("<kafka topic>", required = false, defaultValue = Some("gp_tap_topic"))
  )

  def application(config: ParseResult, system: ActorSystem): StreamApplication = {
    implicit val actorSystem = system

    val conf = ConfigFactory.load
    val services = conf.root.withOnlyKey("VCAP_SERVICES").render(ConfigRenderOptions.defaults().setJson(true))
    val tjc = new TapJsonConfig(services)
    val hbaseconfig = tjc.getHBase(config.getString("hbase"))
    //val kafkaconfig = tjc.getKafka(config.getString("hbase"))
    val kafkaconfig = Map(
      "zookeepers" -> "10.10.10.46:9092,10.10.10.164:9092,10.10.10.236:9092",
      "brokers" -> "10.10.10.46:2181,10.10.10.236:2181,10.10.10.164:2181/kafka"
    )
    val topic = config.getString("topic")
    val table = config.getString("table")
    val zookeepers = kafkaconfig.get("zookeepers").get
    val brokers = kafkaconfig.get("brokers").get
    val source = DataSourceProcessor(new KafkaSource(topic, zookeepers,new KafkaStorageFactory(zookeepers, brokers)), 1)
    val sink = DataSinkProcessor(new HBaseSink(table, hbaseconfig), 1)
    val app = StreamApplication("TAPPipeline", Graph(
      source ~> sink
    ), UserConfig.empty)
    app
  }

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    val config = parse(args)
    val context = ClientContext(akkaConf)
    val appId = context.submit(application(config, context.system))
    context.close()
  }

}

Source File: PipeLine.scala From gearpump-examples with Apache License 2.0

5 votes

package io.gearpump.examples.kafka_hdfs_pipeline

import akka.actor.ActorSystem
import com.julianpeeters.avro.annotations._
import io.gearpump.cluster.UserConfig
import io.gearpump.cluster.client.ClientContext
import io.gearpump.cluster.main.{ArgumentsParser, CLIOption, ParseResult}
import io.gearpump.partitioner.ShufflePartitioner
import io.gearpump.streaming.kafka.{KafkaSource, KafkaStorageFactory}
import io.gearpump.streaming.source.DataSourceProcessor
import io.gearpump.streaming.{Processor, StreamApplication}
import io.gearpump.util.Graph._
import io.gearpump.util.{AkkaApp, Graph, LogUtil}
import org.slf4j.Logger

case class SpaceShuttleMessage(id: String, on: String, body: String)


@AvroRecord
case class SpaceShuttleRecord(var ts: Long, var anomaly: Double)

object PipeLine extends AkkaApp with ArgumentsParser {
  private val LOG: Logger = LogUtil.getLogger(getClass)

  override val options: Array[(String, CLIOption[Any])] = Array(
    "reader"-> CLIOption[Int]("<kafka data reader number>", required = false, defaultValue = Some(2)),
    "scorer"-> CLIOption[Int]("<scorer number>", required = false, defaultValue = Some(2)),
    "writer"-> CLIOption[Int]("<parquet file writer number>", required = false, defaultValue = Some(1)),
    "output"-> CLIOption[String]("<output path directory>", required = false, defaultValue = Some("/parquet")),
    "topic" -> CLIOption[String]("<topic>", required = false, defaultValue = Some("topic-105")),
    "brokers" -> CLIOption[String]("<brokers>", required = false, defaultValue = Some("10.10.10.46:9092,10.10.10.164:9092,10.10.10.236:9092")),
    "zookeepers" -> CLIOption[String]("<zookeepers>", required = false, defaultValue = Some("10.10.10.46:2181,10.10.10.236:2181,10.10.10.164:2181/kafka"))
  )

  def application(config: ParseResult, system: ActorSystem): StreamApplication = {
    implicit val actorSystem = system
    val readerNum = config.getInt("reader")
    val scorerNum = config.getInt("scorer")
    val writerNum = config.getInt("writer")
    val outputPath = config.getString("output")
    val topic = config.getString("topic")
    val brokers = config.getString("brokers")
    val zookeepers = config.getString("zookeepers")
    val appConfig = UserConfig.empty.withString(ParquetWriterTask.PARQUET_OUTPUT_DIRECTORY, outputPath)
    val offsetStorageFactory = new KafkaStorageFactory(zookeepers, brokers)

    val partitioner = new ShufflePartitioner()
    val source = new KafkaSource(topic, zookeepers, offsetStorageFactory)
    val reader = DataSourceProcessor(source, readerNum)
    val scorer = Processor[ScoringTask](scorerNum)
    val writer = Processor[ParquetWriterTask](writerNum)

    val dag = Graph(reader ~ partitioner ~> scorer ~ partitioner ~> writer)
    val app = StreamApplication("KafkaHdfsPipeLine", dag, appConfig)
    app
  }

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    val config = parse(args)
    val context = ClientContext(akkaConf)
    val appId = context.submit(application(config, context.system))
    context.close()
  }
}

Source File: PipeLineSpec.scala From gearpump-examples with Apache License 2.0

5 votes

package io.gearpump.examples.kafka_hdfs_pipeline

import akka.actor.ActorSystem
import io.gearpump._
import io.gearpump.cluster.UserConfig
import io.gearpump.streaming.task.{StartTime, Task, TaskContext}
import io.gearpump.streaming.transaction.api.TimeReplayableSource
import io.gearpump.util.LogUtil
import org.scalatest.prop.PropertyChecks
import org.scalatest.{BeforeAndAfterAll, Matchers, PropSpec}
import org.slf4j.Logger

import scala.util.{Failure, Success, Try}

class SpaceShuttleReplayableSource extends TimeReplayableSource {
  val data = Array[String](
    """
      |{"id":"2a329674-12ad-49f7-b40d-6485aae0aae8","on":"2015-04-02T18:52:02.680178753Z","body":"[-0.414141,-0.0246564,-0.125,0.0140301,-0.474359,0.0256049,-0.0980392,0.463884,0.40836]"}
    """
      .stripMargin,
    """
      |{"id":"043ade58-2fbc-4fe2-8253-84ab181b8cfa","on":"2015-04-02T18:52:02.680078434Z","body": "[-0.414141,-0.0246564,-0.125,0.0140301,-0.474359,0.0256049,-0.0980392,0.463884,0.40836]"}
    """.stripMargin,
    """
      |{"id":"043ade58-2fbc-4fe2-8253-84ab181b8cfa","on":"2015-04-02T18:52:02.680078434Z","body": "[-0.414141,-0.0246564,-0.125,0.0140301,-0.474359,0.0256049,-0.0980392,0.463884,0.40836]"}
    """.stripMargin
  )

  override def open(context: TaskContext, startTime: Option[TimeStamp]): Unit = {}

  override def read(num: Int): List[Message] = List(Message(data(0)), Message(data(1)), Message(data(2)))

  override def close(): Unit = {}
}

class SpaceShuttleProducer(taskContext : TaskContext, conf: UserConfig)
  extends Task(taskContext, conf) {

  import taskContext.{output, parallelism}

  private val batchSize = 3

  val taskParallelism = parallelism

  private val source: TimeReplayableSource = new SpaceShuttleReplayableSource()
  private var startTime: TimeStamp = 0L

  override def onStart(newStartTime: StartTime): Unit = {
    startTime = newStartTime.startTime
    LOG.info(s"start time $startTime")
    source.open(taskContext, Some(startTime))
    self ! Message("start", System.currentTimeMillis())
  }

  override def onNext(msg: Message): Unit = {
    Try({

      source.read(batchSize).foreach(msg => {
        output(msg)
      })
    }) match {
      case Success(ok) =>
      case Failure(throwable) =>
        LOG.error(s"failed ${throwable.getMessage}")
    }
    self ! Message("continue", System.currentTimeMillis())
  }

  override def onStop(): Unit = {
    LOG.info("closing kafka source...")
    source.close()
  }
}

class PipeLineSpec extends PropSpec with PropertyChecks with Matchers with BeforeAndAfterAll {
  val LOG: Logger = LogUtil.getLogger(getClass)
  implicit var system: ActorSystem = null

  override def beforeAll(): Unit = {
    system = ActorSystem("PipeLineSpec")
  }

  override def afterAll(): Unit = {
    system.shutdown()
  }

  property("PipeLineSpec should be able to create a DataSource") {
    Option(new SpaceShuttleReplayableSource) match {
      case Some(replayableSource) =>
      case None =>
        assert(false)
    }
  }
}

Source File: PipeLine.scala From gearpump-examples with Apache License 2.0

5 votes

package io.gearpump.examples.kafka_hbase_pipeline

import akka.actor.ActorSystem
import com.typesafe.config.ConfigFactory
import io.gearpump.cluster.UserConfig
import io.gearpump.cluster.client.ClientContext
import io.gearpump.cluster.main.{ArgumentsParser, CLIOption, ParseResult}
import io.gearpump.streaming.kafka.{KafkaSource, KafkaStorageFactory}
import io.gearpump.streaming.source.DataSourceProcessor
import io.gearpump.streaming.{Processor, StreamApplication}
import io.gearpump.util.Graph._
import io.gearpump.util.{AkkaApp, Graph, LogUtil}
import org.slf4j.Logger

object PipeLine extends AkkaApp with ArgumentsParser {
  private val LOG: Logger = LogUtil.getLogger(getClass)
  val PROCESSORS = "pipeline.processors"
  val PERSISTORS = "pipeline.persistors"

  override val options: Array[(String, CLIOption[Any])] = Array(
    "processors"-> CLIOption[Int]("<processor number>", required = false, defaultValue = Some(1)),
    "persistors"-> CLIOption[Int]("<persistor number>", required = false, defaultValue = Some(1)),
    "topic" -> CLIOption[String]("<topic>", required = false, defaultValue = Some("gptest")),
    "brokers" -> CLIOption[String]("<brokers>", required = false, defaultValue = Some("10.10.10.46:9092,10.10.10.164:9092,10.10.10.236:9092")),
    "zookeepers" -> CLIOption[String]("<zookeepers>", required = false, defaultValue = Some("10.10.10.46:2181,10.10.10.236:2181,10.10.10.164:2181/kafka"))
  )

  def application(config: ParseResult, system: ActorSystem): StreamApplication = {
    implicit val actorSystem = system
    import Messages._
    val pipelineString =
      """
        |pipeline {
        |  cpu.interval = 20
        |  memory.interval = 20
        |  processors = 1
        |  persistors = 1
        |}
        |hbase {
        |  table {
        |    name = "pipeline"
        |    column {
        |      family = "metrics"
        |      name = "average"
        |    }
        |  }
        |}
      """.stripMargin
    val pipelineConfig = PipeLineConfig(ConfigFactory.parseString(pipelineString))
    val processors = config.getInt("processors")
    val persistors = config.getInt("persistors")
    val topic = config.getString("topic")
    val brokers = config.getString("brokers")
    val zookeepers = config.getString("zookeepers")

    val appConfig = UserConfig.empty.withValue[PipeLineConfig](PIPELINE, pipelineConfig)

    val offsetStorageFactory = new KafkaStorageFactory(zookeepers, brokers)
    val source = new KafkaSource(topic, zookeepers, offsetStorageFactory)
    val kafka = DataSourceProcessor(source, 1)
    val cpuProcessor = Processor[CpuProcessor](processors, "CpuProcessor")
    val memoryProcessor = Processor[MemoryProcessor](processors, "MemoryProcessor")
    val cpuPersistor = Processor[CpuPersistor](persistors, "CpuPersistor")
    val memoryPersistor = Processor[MemoryPersistor](persistors, "MemoryPersistor")
    val app = StreamApplication("KafkaHbasePipeLine", Graph(
      kafka ~> cpuProcessor ~> cpuPersistor,
      kafka ~> memoryProcessor ~> memoryPersistor
    ), appConfig)
    app
  }

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    val config = parse(args)
    val context = ClientContext(akkaConf)
    val appId = context.submit(application(config, context.system))
    context.close()
  }

}

Source File: Logging.scala From graphframes with Apache License 2.0

5 votes

package org.graphframes

import org.slf4j.{Logger, LoggerFactory}

// This needs to be accessible to org.apache.spark.graphx.lib.backport
private[org] trait Logging {

  @transient private lazy val logger: Logger = LoggerFactory.getLogger(getClass.getName)

  protected def logDebug(s: => String): Unit = {
    if (logger.isDebugEnabled) logger.debug(s)
  }

  protected def logWarn(s: => String): Unit = {
    if (logger.isWarnEnabled) logger.warn(s)
  }

  protected def logInfo(s: => String): Unit = {
    if (logger.isInfoEnabled) logger.info(s)
  }

  protected def logTrace(s: => String): Unit = {
    if (logger.isTraceEnabled) logger.trace(s)
  }
}

Source File: DistributedShellClient.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.examples.distributedshell

import java.util.concurrent.TimeUnit
import scala.concurrent.Await
import scala.concurrent.duration.Duration

import akka.pattern.ask
import org.slf4j.{Logger, LoggerFactory}

import org.apache.gearpump.cluster.client.ClientContext
import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption}
import org.apache.gearpump.examples.distributedshell.DistShellAppMaster.ShellCommand
import org.apache.gearpump.util.{AkkaApp, Constants}


object DistributedShellClient extends AkkaApp with ArgumentsParser {
  implicit val timeout = Constants.FUTURE_TIMEOUT
  private val LOG: Logger = LoggerFactory.getLogger(getClass)

  override val options: Array[(String, CLIOption[Any])] = Array(
    "appid" -> CLIOption[Int]("<the distributed shell appid>", required = true),
    "command" -> CLIOption[String]("<shell command>", required = true)
  )

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    val config = parse(args)
    val context = ClientContext(akkaConf)
    implicit val system = context.system
    implicit val dispatcher = system.dispatcher
    val appid = config.getInt("appid")
    val command = config.getString("command")
    val appMaster = context.resolveAppID(appid)
    LOG.info(s"Resolved appMaster $appid address $appMaster, sending command $command")
    val future = (appMaster ? ShellCommand(command)).map { result =>
      LOG.info(s"Result: \n$result")
      context.close()
    }
    Await.ready(future, Duration(60, TimeUnit.SECONDS))
  }
}

Source File: DistributedShell.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.examples.distributedshell

import org.slf4j.Logger

import org.apache.gearpump.cluster.client.ClientContext
import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption}
import org.apache.gearpump.cluster.{Application, UserConfig}
import org.apache.gearpump.util.{AkkaApp, LogUtil}


object DistributedShell extends AkkaApp with ArgumentsParser {
  private val LOG: Logger = LogUtil.getLogger(getClass)

  override val options: Array[(String, CLIOption[Any])] = Array.empty

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    LOG.info(s"Distributed shell submitting application...")
    val context = ClientContext(akkaConf)
    val app = context.submit(Application[DistShellAppMaster]("DistributedShell",
    UserConfig.empty))
    context.close()
    LOG.info(s"Distributed Shell Application started with appId ${app.appId} !")
  }
}

Source File: ShellExecutor.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.examples.distributedshell

import scala.sys.process._
import scala.util.{Failure, Success, Try}

import akka.actor.Actor
import org.slf4j.Logger

import org.apache.gearpump.cluster.{ExecutorContext, UserConfig}
import org.apache.gearpump.examples.distributedshell.DistShellAppMaster.{ShellCommand, ShellCommandResult}
import org.apache.gearpump.util.LogUtil


class ShellExecutor(executorContext: ExecutorContext, userConf: UserConfig) extends Actor {
  import executorContext._
  private val LOG: Logger = LogUtil.getLogger(getClass, executor = executorId, app = appId)

  LOG.info(s"ShellExecutor started!")

  override def receive: Receive = {
    case ShellCommand(command) =>
      val process = Try(s"$command".!!)
      val result = process match {
        case Success(msg) => msg
        case Failure(ex) => ex.getMessage
      }
      sender ! ShellCommandResult(executorId, result)
  }
}

Source File: DistShellAppMaster.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.examples.distributedshell

import org.apache.gearpump.cluster.MasterToAppMaster.WorkerList

import scala.concurrent.Future

import akka.actor.{Deploy, Props}
import akka.pattern.{ask, pipe}
import akka.remote.RemoteScope
import com.typesafe.config.Config
import org.slf4j.Logger

import org.apache.gearpump.cluster.ClientToMaster.ShutdownApplication
import org.apache.gearpump.cluster.appmaster.ExecutorSystemScheduler.{ExecutorSystemJvmConfig, ExecutorSystemStarted, StartExecutorSystemTimeout}
import org.apache.gearpump.cluster._
import org.apache.gearpump.examples.distributedshell.DistShellAppMaster._
import org.apache.gearpump.util.{ActorUtil, Constants, LogUtil, Util}

class DistShellAppMaster(appContext: AppMasterContext, app: AppDescription)
  extends ApplicationMaster {

  import appContext._
  import context.dispatcher
  implicit val timeout = Constants.FUTURE_TIMEOUT
  private val LOG: Logger = LogUtil.getLogger(getClass, app = appId)
  protected var currentExecutorId = 0
  private var workerNum: Option[Int] = None

  override def preStart(): Unit = {
    LOG.info(s"Distributed Shell AppMaster started")
    ActorUtil.launchExecutorOnEachWorker(masterProxy, getExecutorJvmConfig, self)
  }

  override def receive: Receive = {
    case ExecutorSystemStarted(executorSystem, _) =>
      import executorSystem.{address, resource => executorResource, worker}
      val executorContext = ExecutorContext(currentExecutorId, worker, appId, app.name,
        self, executorResource)
      // Start executor
      val executor = context.actorOf(Props(classOf[ShellExecutor], executorContext, app.userConfig)
        .withDeploy(Deploy(scope = RemoteScope(address))), currentExecutorId.toString)
      executorSystem.bindLifeCycleWith(executor)
      currentExecutorId += 1
      ActorUtil.tellMasterIfApplicationReady(workerNum, currentExecutorId, appContext)
    case WorkerList(workers) =>
      workerNum = Some(workers.length)
      ActorUtil.tellMasterIfApplicationReady(workerNum, currentExecutorId, appContext)
    case StartExecutorSystemTimeout =>
      LOG.error(s"Failed to allocate resource in time")
      masterProxy ! ShutdownApplication(appId)
      context.stop(self)
    case msg: ShellCommand =>
      Future.fold(context.children.map(_ ? msg))(new ShellCommandResultAggregator) {
        (aggregator, response) => {
          aggregator.aggregate(response.asInstanceOf[ShellCommandResult])
        }
      }.map(_.toString()) pipeTo sender
  }

  private def getExecutorJvmConfig: ExecutorSystemJvmConfig = {
    val config: Config = app.clusterConfig
    val jvmSetting = Util.resolveJvmSetting(config.withFallback(context.system.settings.config))
      .executor
    ExecutorSystemJvmConfig(jvmSetting.classPath, jvmSetting.vmargs,
      appJar, username, config)
  }
}

object DistShellAppMaster {
  case class ShellCommand(command: String)

  case class ShellCommandResult(executorId: Int, msg: Any)

  class ShellCommandResultAggregator {
    val result: StringBuilder = new StringBuilder

    def aggregate(response: ShellCommandResult): ShellCommandResultAggregator = {
      result.append(s"Execute results from executor ${response.executorId} : \n")
      result.append(response.msg + "\n")
      this
    }

    override def toString: String = result.toString()
  }
}

Source File: SequenceFileIO.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.streaming.examples.fsio

import org.apache.hadoop.conf.Configuration
import org.slf4j.Logger

import org.apache.gearpump.cluster.UserConfig
import org.apache.gearpump.cluster.client.ClientContext
import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption, ParseResult}
import org.apache.gearpump.streaming.partitioner.ShufflePartitioner
import org.apache.gearpump.streaming.examples.fsio.HadoopConfig._
import org.apache.gearpump.streaming.{Processor, StreamApplication}
import org.apache.gearpump.util.Graph._
import org.apache.gearpump.util.{AkkaApp, Graph, LogUtil}

object SequenceFileIO extends AkkaApp with ArgumentsParser {
  private val LOG: Logger = LogUtil.getLogger(getClass)

  override val options: Array[(String, CLIOption[Any])] = Array(
    "source" -> CLIOption[Int]("<sequence file reader number>", required = false,
      defaultValue = Some(1)),
    "sink" -> CLIOption[Int]("<sequence file writer number>", required = false,
      defaultValue = Some(1)),
    "input" -> CLIOption[String]("<input file path>", required = true),
    "output" -> CLIOption[String]("<output file directory>", required = true)
  )

  def application(config: ParseResult): StreamApplication = {
    val spoutNum = config.getInt("source")
    val boltNum = config.getInt("sink")
    val input = config.getString("input")
    val output = config.getString("output")
    val appConfig = UserConfig.empty.withString(SeqFileStreamProducer.INPUT_PATH, input)
      .withString(SeqFileStreamProcessor.OUTPUT_PATH, output)
    val hadoopConfig = appConfig.withHadoopConf(new Configuration())
    val partitioner = new ShufflePartitioner()
    val streamProducer = Processor[SeqFileStreamProducer](spoutNum)
    val streamProcessor = Processor[SeqFileStreamProcessor](boltNum)

    val app = StreamApplication("SequenceFileIO",
      Graph(streamProducer ~ partitioner ~> streamProcessor), hadoopConfig)
    app
  }

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    val config = parse(args)
    val context = ClientContext(akkaConf)
    val appId = context.submit(application(config))
    context.close()
  }
}

Source File: SOL.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.streaming.examples.sol

import org.slf4j.Logger

import org.apache.gearpump.cluster.UserConfig
import org.apache.gearpump.cluster.client.ClientContext
import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption, ParseResult}
import org.apache.gearpump.streaming.partitioner.ShufflePartitioner
import org.apache.gearpump.streaming.{Processor, StreamApplication}
import org.apache.gearpump.util.Graph._
import org.apache.gearpump.util.{AkkaApp, Graph, LogUtil}

object SOL extends AkkaApp with ArgumentsParser {
  private val LOG: Logger = LogUtil.getLogger(getClass)

  override val options: Array[(String, CLIOption[Any])] = Array(
    "streamProducer" -> CLIOption[Int]("<stream producer number>", required = false,
    defaultValue = Some(1)),
    "streamProcessor" -> CLIOption[Int]("<stream processor number>", required = false,
    defaultValue = Some(1)),
    "bytesPerMessage" -> CLIOption[Int]("<size of each message>", required = false,
    defaultValue = Some(100)),
    "stages" -> CLIOption[Int]("<how many stages to run>", required = false,
    defaultValue = Some(2)))

  def application(config: ParseResult): StreamApplication = {
    val spoutNum = config.getInt("streamProducer")
    val boltNum = config.getInt("streamProcessor")
    val bytesPerMessage = config.getInt("bytesPerMessage")
    val stages = config.getInt("stages")
    val appConfig = UserConfig.empty.withInt(SOLStreamProducer.BYTES_PER_MESSAGE, bytesPerMessage)
    val partitioner = new ShufflePartitioner()
    val streamProducer = Processor[SOLStreamProducer](spoutNum)
    val streamProcessor = Processor[SOLStreamProcessor](boltNum)
    var computation = streamProducer ~ partitioner ~> streamProcessor
    computation = 0.until(stages - 2).foldLeft(computation) { (c, id) =>
      c ~ partitioner ~> streamProcessor.copy()
    }
    val dag = Graph(computation)
    val app = StreamApplication("sol", dag, appConfig)
    app
  }

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    val config = parse(args)
    val context = ClientContext(akkaConf)
    val appId = context.submit(application(config))
    context.close()
  }
}

Source File: Dag.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.streaming.examples.complexdag

import org.slf4j.Logger

import org.apache.gearpump.cluster.UserConfig
import org.apache.gearpump.cluster.client.ClientContext
import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption, ParseResult}
import org.apache.gearpump.streaming.partitioner.HashPartitioner
import org.apache.gearpump.streaming.task.TaskContext
import org.apache.gearpump.streaming.{Processor, StreamApplication}
import org.apache.gearpump.util.Graph.{Node => GraphNode}
import org.apache.gearpump.util.{AkkaApp, Graph, LogUtil}

case class Source_0(_context: TaskContext, _conf: UserConfig) extends Source(_context, _conf)
case class Source_1(_context: TaskContext, _conf: UserConfig) extends Source(_context, _conf)
case class Node_0(_context: TaskContext, _conf: UserConfig) extends Node(_context, _conf)
case class Node_1(_context: TaskContext, _conf: UserConfig) extends Node(_context, _conf)
case class Node_2(_context: TaskContext, _conf: UserConfig) extends Node(_context, _conf)
case class Node_3(_context: TaskContext, _conf: UserConfig) extends Node(_context, _conf)
case class Node_4(_context: TaskContext, _conf: UserConfig) extends Node(_context, _conf)
case class Sink_0(_context: TaskContext, _conf: UserConfig) extends Sink(_context, _conf)
case class Sink_1(_context: TaskContext, _conf: UserConfig) extends Sink(_context, _conf)
case class Sink_2(_context: TaskContext, _conf: UserConfig) extends Sink(_context, _conf)
case class Sink_3(_context: TaskContext, _conf: UserConfig) extends Sink(_context, _conf)
case class Sink_4(_context: TaskContext, _conf: UserConfig) extends Sink(_context, _conf)


object Dag extends AkkaApp with ArgumentsParser {
  private val LOG: Logger = LogUtil.getLogger(getClass)
  val RUN_FOR_EVER = -1

  override val options: Array[(String, CLIOption[Any])] = Array.empty

  def application(config: ParseResult): StreamApplication = {

    val source_0 = Processor[Source_0](1)
    val source_1 = Processor[Source_1](1)
    val node_0 = Processor[Node_0](1)
    val node_1 = Processor[Node_1](1)
    val node_2 = Processor[Node_2](1)
    val node_3 = Processor[Node_3](1)
    val node_4 = Processor[Node_4](1)
    val sink_0 = Processor[Sink_0](1)
    val sink_1 = Processor[Sink_1](1)
    val sink_2 = Processor[Sink_2](1)
    val sink_3 = Processor[Sink_3](1)
    val sink_4 = Processor[Sink_4](1)
    val partitioner = new HashPartitioner
    val app = StreamApplication("dag", Graph(
      source_0 ~ partitioner ~> sink_1,
      source_0 ~ partitioner ~> sink_2,
      source_0 ~ partitioner ~> node_2,
      source_0 ~ partitioner ~> node_3,
      source_0 ~ partitioner ~> node_1,
      source_0 ~ partitioner ~> sink_0,
      node_2 ~ partitioner ~> node_3,
      node_1 ~ partitioner ~> node_3,
      node_1 ~ partitioner ~> sink_3,
      node_1 ~ partitioner ~> node_4,
      source_1 ~ partitioner ~> sink_4,
      source_1 ~ partitioner ~> node_0,
      node_3 ~ partitioner ~> sink_3,
      node_4 ~ partitioner ~> sink_3,
      node_0 ~ partitioner ~> sink_3
    ), UserConfig.empty)
    app
  }

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    val userConf = parse(args)
    val context = ClientContext(akkaConf)
    val appId = context.submit(application(userConf))
    context.close()
  }
}

Source File: WindowAverageProcessor.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.streaming.examples.state.processor

import scala.collection.immutable.TreeMap
import com.twitter.algebird.{AveragedGroup, AveragedValue}
import org.slf4j.Logger
import org.apache.gearpump.Message
import org.apache.gearpump.cluster.UserConfig
import org.apache.gearpump.streaming.monoid.AlgebirdGroup
import org.apache.gearpump.streaming.serializer.ChillSerializer
import org.apache.gearpump.streaming.state.api.{PersistentState, PersistentTask}
import org.apache.gearpump.streaming.state.impl.{Interval, Window, WindowConfig, WindowState}
import org.apache.gearpump.streaming.task.TaskContext
import org.apache.gearpump.util.LogUtil

object WindowAverageProcessor {
  val LOG: Logger = LogUtil.getLogger(classOf[WindowAverageProcessor])
}

class WindowAverageProcessor(taskContext: TaskContext, conf: UserConfig)
  extends PersistentTask[AveragedValue](taskContext, conf) {

  override def persistentState: PersistentState[AveragedValue] = {
    val group = new AlgebirdGroup(AveragedGroup)
    val serializer = new ChillSerializer[TreeMap[Interval, AveragedValue]]
    val window = new Window(conf.getValue[WindowConfig](WindowConfig.NAME).get)
    new WindowState[AveragedValue](group, serializer, taskContext, window)
  }

  override def processMessage(state: PersistentState[AveragedValue],
      message: Message): Unit = {
    val value = AveragedValue(message.value.asInstanceOf[String].toLong)
    state.update(message.timestamp.toEpochMilli, value)
  }
}

Source File: HBaseConn.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.streaming.examples.hbase

import akka.actor.ActorSystem
import org.apache.gearpump.cluster.UserConfig
import org.apache.gearpump.cluster.client.ClientContext
import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption, ParseResult}
import org.apache.gearpump.external.hbase.HBaseSink
import org.apache.gearpump.streaming.StreamApplication
import org.apache.gearpump.streaming.partitioner.HashPartitioner
import org.apache.gearpump.streaming.sink.DataSinkProcessor
import org.apache.gearpump.streaming.source.DataSourceProcessor
import org.apache.gearpump.util.Graph.Node
import org.apache.gearpump.util.{AkkaApp, Graph, LogUtil}
import org.slf4j.Logger

object HBaseConn extends AkkaApp with ArgumentsParser {
  private val LOG: Logger = LogUtil.getLogger(getClass)
  val RUN_FOR_EVER = -1

  override val options: Array[(String, CLIOption[Any])] = Array(
    "splitNum" -> CLIOption[Int]("<how many sum tasks>", required = false, defaultValue = Some(1)),
    "sinkNum" -> CLIOption[Int]("<how many sum tasks>", required = false, defaultValue = Some(1))
  )

  def application(config: ParseResult, system: ActorSystem): StreamApplication = {
    implicit val actorSystem = system

    val splitNum = config.getInt("splitNum")
    val sinkNum = config.getInt("sinkNum")

    val split = new Split
    val sourceProcessor = DataSourceProcessor(split, splitNum, "Split")
    val sink = HBaseSink(UserConfig.empty, "hbase")
    val sinkProcessor = DataSinkProcessor(sink, sinkNum)
    val partitioner = new HashPartitioner
    val computation = sourceProcessor ~ partitioner ~> sinkProcessor
    val application = StreamApplication("HBase", Graph(computation), UserConfig.empty)

    application

  }

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    val config = parse(args)
    val context = ClientContext(akkaConf)
    val appId = context.submit(application(config, context.system))
    context.close()
  }
}

Source File: WordCount.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.streaming.examples.wordcount

import akka.actor.ActorSystem
import org.apache.gearpump.cluster.UserConfig
import org.apache.gearpump.cluster.client.ClientContext
import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption, ParseResult}
import org.apache.gearpump.streaming.partitioner.HashPartitioner
import org.apache.gearpump.streaming.source.DataSourceProcessor
import org.apache.gearpump.streaming.{Processor, StreamApplication}
import org.apache.gearpump.util.Graph.Node
import org.apache.gearpump.util.{AkkaApp, Graph, LogUtil}
import org.slf4j.Logger


object WordCount extends AkkaApp with ArgumentsParser {
  private val LOG: Logger = LogUtil.getLogger(getClass)
  val RUN_FOR_EVER = -1

  override val options: Array[(String, CLIOption[Any])] = Array(
    "split" -> CLIOption[Int]("<how many source tasks>", required = false,
      defaultValue = Some(1)),
    "sum" -> CLIOption[Int]("<how many sum tasks>", required = false, defaultValue = Some(1))
  )

  def application(config: ParseResult, system: ActorSystem): StreamApplication = {
    implicit val actorSystem = system

    val sumNum = config.getInt("sum")
    val splitNum = config.getInt("split")
    val split = new Split
    val sourceProcessor = DataSourceProcessor(split, splitNum, "Split")
    val sum = Processor[Sum](sumNum)
    val partitioner = new HashPartitioner
    val computation = sourceProcessor ~ partitioner ~> sum
    val app = StreamApplication("wordCount", Graph(computation), UserConfig.empty)
    app
  }

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    val config = parse(args)
    val context: ClientContext = ClientContext(akkaConf)
    val app = application(config, context.system)
    context.submit(app)
    context.close()
  }
}

Source File: KuduConn.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.streaming.examples.kudu

import akka.actor.ActorSystem
import org.apache.gearpump.cluster.UserConfig
import org.apache.gearpump.cluster.client.ClientContext
import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption, ParseResult}
import org.apache.gearpump.external.kudu.KuduSink
import org.apache.gearpump.streaming.StreamApplication
import org.apache.gearpump.streaming.partitioner.HashPartitioner
import org.apache.gearpump.streaming.sink.DataSinkProcessor
import org.apache.gearpump.streaming.source.DataSourceProcessor
import org.apache.gearpump.util.Graph.Node
import org.apache.gearpump.util.{AkkaApp, Graph, LogUtil}
import org.slf4j.Logger

object KuduConn extends AkkaApp with ArgumentsParser {
  private val LOG: Logger = LogUtil.getLogger(getClass)
  val RUN_FOR_EVER = -1

  override val options: Array[(String, CLIOption[Any])] = Array(
    "splitNum" -> CLIOption[Int]("<how many sum tasks>", required = false, defaultValue = Some(1)),
    "sinkNum" -> CLIOption[Int]("<how many sum tasks>", required = false, defaultValue = Some(1))
  )

  def application(config: ParseResult, system: ActorSystem): StreamApplication = {
    implicit val actorSystem = system
    val splitNum = config.getInt("splitNum")
    val sinkNum = config.getInt("sinkNum")

    val map = Map[String, String]("KUDUSINK" -> "kudusink", "kudu.masters" -> "kuduserver",
      "KUDU_USER" -> "kudu.user", "GEARPUMP_KERBEROS_PRINCIPAL" -> "gearpump.kerberos.principal",
      "GEARPUMP_KEYTAB_FILE" -> "gearpump.keytab.file", "TABLE_NAME" -> "kudu.table.name"
    )

    val userConfig = new UserConfig(map)
    val split = new Split
    val sourceProcessor = DataSourceProcessor(split, splitNum, "Split")
    val sink = KuduSink(userConfig, "impala::default.kudu_1")
    val sinkProcessor = DataSinkProcessor(sink, sinkNum)
    val partitioner = new HashPartitioner
    val computation = sourceProcessor ~ partitioner ~> sinkProcessor
    val application = StreamApplication("Kudu", Graph(computation), userConfig)

    application
  }

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    val config = parse(args)
    val context = ClientContext(akkaConf)
    val appId = context.submit(application(config, context.system))
    context.close()
  }
}

Source File: KafkaReadWrite.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.streaming.examples.kafka

import java.util.Properties

import akka.actor.ActorSystem
import org.apache.gearpump.streaming.kafka.util.KafkaConfig
import org.slf4j.Logger

import org.apache.gearpump.cluster.UserConfig
import org.apache.gearpump.cluster.client.ClientContext
import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption, ParseResult}
import org.apache.gearpump.streaming.partitioner.ShufflePartitioner
import org.apache.gearpump.streaming.StreamApplication
import org.apache.gearpump.streaming.kafka._
import org.apache.gearpump.streaming.sink.DataSinkProcessor
import org.apache.gearpump.streaming.source.DataSourceProcessor
import org.apache.gearpump.util.Graph._
import org.apache.gearpump.util.{AkkaApp, Graph, LogUtil}

object KafkaReadWrite extends AkkaApp with ArgumentsParser {
  private val LOG: Logger = LogUtil.getLogger(getClass)

  override val options: Array[(String, CLIOption[Any])] = Array(
    "source" -> CLIOption[Int]("<hom many kafka producer tasks>", required = false,
      defaultValue = Some(1)),
    "sink" -> CLIOption[Int]("<hom many kafka processor tasks>", required = false,
      defaultValue = Some(1)),
    "zookeeperConnect" -> CLIOption[String]("<zookeeper connect string>", required = false,
      defaultValue = Some("localhost:2181")),
    "brokerList" -> CLIOption[String]("<broker server list string>", required = false,
      defaultValue = Some("localhost:9092")),
    "sourceTopic" -> CLIOption[String]("<kafka source topic>", required = false,
      defaultValue = Some("topic1")),
    "sinkTopic" -> CLIOption[String]("<kafka sink topic>", required = false,
      defaultValue = Some("topic2"))
  )

  def application(config: ParseResult, system: ActorSystem): StreamApplication = {
    implicit val actorSystem = system
    val appName = "KafkaReadWrite"
    val sourceNum = config.getInt("source")
    val sinkNum = config.getInt("sink")
    val zookeeperConnect = config.getString("zookeeperConnect")
    val brokerList = config.getString("brokerList")
    val sourceTopic = config.getString("sourceTopic")
    val sinkTopic = config.getString("sinkTopic")

    val appConfig = UserConfig.empty
    val props = new Properties
    props.put(KafkaConfig.ZOOKEEPER_CONNECT_CONFIG, zookeeperConnect)
    props.put(KafkaConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList)
    props.put(KafkaConfig.CHECKPOINT_STORE_NAME_PREFIX_CONFIG, appName)
    val source = new KafkaSource(sourceTopic, props)
    val checkpointStoreFactory = new KafkaStoreFactory(props)
    source.setCheckpointStore(checkpointStoreFactory)
    val sourceProcessor = DataSourceProcessor(source, sourceNum)
    val sink = new KafkaSink(sinkTopic, props)
    val sinkProcessor = DataSinkProcessor(sink, sinkNum)
    val partitioner = new ShufflePartitioner
    val computation = sourceProcessor ~ partitioner ~> sinkProcessor
    val app = StreamApplication(appName, Graph(computation), appConfig)
    app
  }

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    val config = parse(args)
    val context = ClientContext(akkaConf)
    val appId = context.submit(application(config, context.system))
    context.close()
  }
}

Source File: KafkaWordCount.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.streaming.examples.kafka.wordcount

import java.util.Properties

import akka.actor.ActorSystem
import kafka.api.OffsetRequest
import org.apache.gearpump.streaming.kafka.util.KafkaConfig
import org.slf4j.Logger

import org.apache.gearpump.cluster.UserConfig
import org.apache.gearpump.cluster.client.ClientContext
import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption, ParseResult}
import org.apache.gearpump.streaming.partitioner.HashPartitioner
import org.apache.gearpump.streaming.kafka._
import org.apache.gearpump.streaming.sink.DataSinkProcessor
import org.apache.gearpump.streaming.source.DataSourceProcessor
import org.apache.gearpump.streaming.{Processor, StreamApplication}
import org.apache.gearpump.util.Graph._
import org.apache.gearpump.util.{AkkaApp, Graph, LogUtil}

object KafkaWordCount extends AkkaApp with ArgumentsParser {
  private val LOG: Logger = LogUtil.getLogger(getClass)

  override val options: Array[(String, CLIOption[Any])] = Array(
    "source" -> CLIOption[Int]("<how many kafka source tasks>", required = false,
      defaultValue = Some(1)),
    "split" -> CLIOption[Int]("<how many split tasks>", required = false, defaultValue = Some(1)),
    "sum" -> CLIOption[Int]("<how many sum tasks>", required = false, defaultValue = Some(1)),
    "sink" -> CLIOption[Int]("<how many kafka sink tasks>", required = false,
      defaultValue = Some(1))
  )

  def application(config: ParseResult, system: ActorSystem): StreamApplication = {
    implicit val actorSystem = system
    val appName = "KafkaWordCount"
    val sourceNum = config.getInt("source")
    val splitNum = config.getInt("split")
    val sumNum = config.getInt("sum")
    val sinkNum = config.getInt("sink")
    val appConfig = UserConfig.empty
    val props = new Properties
    props.put(KafkaConfig.ZOOKEEPER_CONNECT_CONFIG, "localhost:2181")
    props.put(KafkaConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092")
    props.put(KafkaConfig.CONSUMER_START_OFFSET_CONFIG,
      new java.lang.Long(OffsetRequest.LatestTime))
    props.put(KafkaConfig.CHECKPOINT_STORE_NAME_PREFIX_CONFIG, appName)
    val sourceTopic = "topic1"
    val source = new KafkaSource(sourceTopic, props)
    val checkpointStoreFactory = new KafkaStoreFactory(props)
    source.setCheckpointStore(checkpointStoreFactory)
    val sourceProcessor = DataSourceProcessor(source, sourceNum)
    val split = Processor[Split](splitNum)
    val sum = Processor[Sum](sumNum)
    val sink = new KafkaSink("topic2", props)
    val sinkProcessor = DataSinkProcessor(sink, sinkNum)
    val partitioner = new HashPartitioner
    val computation = sourceProcessor ~ partitioner ~> split ~ partitioner ~>
      sum ~ partitioner ~> sinkProcessor
    val app = StreamApplication(appName, Graph(computation), appConfig)
    app
  }

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    val config = parse(args)
    val context = ClientContext(akkaConf)
    val appId = context.submit(application(config, context.system))
    context.close()
  }
}

Source File: DistServiceExecutor.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.experiments.distributeservice

import java.io.{File, FileWriter}
import java.net.InetAddress
import scala.collection.JavaConverters._
import scala.io.Source
import scala.sys.process._
import scala.util.{Failure, Success, Try}

import akka.actor.Actor
import org.apache.commons.io.FileUtils
import org.apache.commons.lang.text.StrSubstitutor
import org.slf4j.Logger

import org.apache.gearpump.cluster.{ExecutorContext, UserConfig}
import org.apache.gearpump.experiments.distributeservice.DistServiceAppMaster.InstallService
import org.apache.gearpump.util.{ActorUtil, LogUtil}

class DistServiceExecutor(executorContext: ExecutorContext, userConf: UserConfig) extends Actor {
  import executorContext._
  private val LOG: Logger = LogUtil.getLogger(getClass, executor = executorId, app = appId)

  override def receive: Receive = {
    case InstallService(url, zipFileName, targetPath, scriptData, serviceName, serviceSettings) =>
      LOG.info(s"Executor $executorId receive command to install " +
        s"service $serviceName to $targetPath")
      unzipFile(url, zipFileName, targetPath)
      installService(scriptData, serviceName, serviceSettings)
  }

  private def unzipFile(url: String, zipFileName: String, targetPath: String) = {
    val zipFile = File.createTempFile(System.currentTimeMillis().toString, zipFileName)
    val dir = new File(targetPath)
    if (dir.exists()) {
      FileUtils.forceDelete(dir)
    }
    val bytes = FileServer.newClient.get(url).get
    FileUtils.writeByteArrayToFile(zipFile, bytes)
    val result = Try(s"unzip ${zipFile.getAbsolutePath} -d $targetPath".!!)
    result match {
      case Success(msg) => LOG.info(s"Executor $executorId unzip file to $targetPath")
      case Failure(ex) => throw ex
    }
  }

  private def installService(
      scriptData: Array[Byte], serviceName: String, serviceSettings: Map[String, Any]) = {
    val tempFile = File.createTempFile("gearpump", serviceName)
    FileUtils.writeByteArrayToFile(tempFile, scriptData)
    val script = new File("/etc/init.d", serviceName)
    writeFileWithEnvVariables(tempFile, script, serviceSettings ++ getEnvSettings)
    val result = Try(s"chkconfig --add $serviceName".!!)
    result match {
      case Success(msg) => LOG.info(s"Executor install service $serviceName successfully!")
      case Failure(ex) => throw ex
    }
  }

  private def getEnvSettings: Map[String, Any] = {
    Map("workerId" -> worker,
      "localhost" -> ActorUtil.getSystemAddress(context.system).host.get,
      "hostname" -> InetAddress.getLocalHost.getHostName)
  }

  private def writeFileWithEnvVariables(source: File, target: File, envs: Map[String, Any]) = {
    val writer = new FileWriter(target)
    val sub = new StrSubstitutor(envs.asJava)
    sub.setEnableSubstitutionInVariables(true)
    Source.fromFile(source).getLines().foreach(line => writer.write(sub.replace(line) + "\r\n"))
    writer.close()
  }
}

Source File: DistServiceAppMaster.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.experiments.distributeservice

import java.io.File
import org.apache.gearpump.cluster.MasterToAppMaster.WorkerList

import scala.concurrent.Future

import akka.actor.{Deploy, Props}
import akka.pattern.{ask, pipe}
import akka.remote.RemoteScope
import com.typesafe.config.Config
import org.slf4j.Logger

import org.apache.gearpump.cluster.ClientToMaster.ShutdownApplication
import org.apache.gearpump.cluster.appmaster.ExecutorSystemScheduler.{ExecutorSystemJvmConfig, ExecutorSystemStarted, StartExecutorSystemTimeout}
import org.apache.gearpump.cluster.{AppDescription, AppMasterContext, ApplicationMaster, ExecutorContext}
import org.apache.gearpump.experiments.distributeservice.DistServiceAppMaster.{FileContainer, GetFileContainer, InstallService}
import org.apache.gearpump.util._

class DistServiceAppMaster(appContext: AppMasterContext, app: AppDescription)
  extends ApplicationMaster {
  import appContext._
  import context.dispatcher
  implicit val timeout = Constants.FUTURE_TIMEOUT
  private val LOG: Logger = LogUtil.getLogger(getClass, app = appId)
  private var currentExecutorId = 0
  private var workerNum: Option[Int] = None
  private var fileServerPort = -1

  val rootDirectory = new File("/")
  val host = context.system.settings.config.getString(Constants.GEARPUMP_HOSTNAME)
  val server = context.actorOf(Props(classOf[FileServer], rootDirectory, host, 0))

  override def preStart(): Unit = {
    LOG.info(s"Distribute Service AppMaster started")
    ActorUtil.launchExecutorOnEachWorker(masterProxy, getExecutorJvmConfig, self)
  }

  (server ? FileServer.GetPort).asInstanceOf[Future[FileServer.Port]] pipeTo self

  override def receive: Receive = {
    case ExecutorSystemStarted(executorSystem, _) =>
      import executorSystem.{address, resource => executorResource, worker}
      val executorContext = ExecutorContext(currentExecutorId, worker,
        appId, app.name, self, executorResource)
      // start executor
      val executor = context.actorOf(Props(classOf[DistServiceExecutor],
        executorContext, app.userConfig).withDeploy(
        Deploy(scope = RemoteScope(address))), currentExecutorId.toString)
      executorSystem.bindLifeCycleWith(executor)
      currentExecutorId += 1
      ActorUtil.tellMasterIfApplicationReady(workerNum, currentExecutorId, appContext)
    case WorkerList(workers) =>
      workerNum = Some(workers.length)
      ActorUtil.tellMasterIfApplicationReady(workerNum, currentExecutorId, appContext)
    case StartExecutorSystemTimeout =>
      LOG.error(s"Failed to allocate resource in time")
      masterProxy ! ShutdownApplication(appId)
      context.stop(self)
    case FileServer.Port(port) =>
      this.fileServerPort = port
    case GetFileContainer =>
      val name = Math.abs(new java.util.Random().nextLong()).toString
      sender ! new FileContainer(s"http://$host:$fileServerPort/$name")
    case installService: InstallService =>
      context.children.foreach(_ ! installService)
  }

  private def getExecutorJvmConfig: ExecutorSystemJvmConfig = {
    val config: Config = app.clusterConfig
    val jvmSetting = Util.resolveJvmSetting(
      config.withFallback(context.system.settings.config)).executor
    ExecutorSystemJvmConfig(jvmSetting.classPath, jvmSetting.vmargs,
      appJar, username, config)
  }
}

object DistServiceAppMaster {
  case object GetFileContainer

  case class FileContainer(url: String)

  case class InstallService(
      url: String,
      zipFileName: String,
      targetPath: String,
      script: Array[Byte],
      serviceName: String,
      serviceSettings: Map[String, Any])
}

Source File: DistributeService.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.experiments.distributeservice

import org.slf4j.Logger

import org.apache.gearpump.cluster.client.ClientContext
import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption}
import org.apache.gearpump.cluster.{Application, UserConfig}
import org.apache.gearpump.util.{AkkaApp, LogUtil}


object DistributeService extends AkkaApp with ArgumentsParser {
  private val LOG: Logger = LogUtil.getLogger(getClass)

  override val options: Array[(String, CLIOption[Any])] = Array.empty

  override def main(akkaConf: Config, args: Array[String]): Unit = {
    LOG.info(s"Distribute Service submitting application...")
    val context = ClientContext(akkaConf)
    val app = context.submit(Application[DistServiceAppMaster]("DistributedService",
      UserConfig.empty))
    context.close()
    LOG.info(s"Distribute Service Application started with appId ${app.appId} !")
  }
}

Source File: FetchThread.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.streaming.kafka.lib.source.consumer

import java.nio.channels.ClosedByInterruptException
import java.util.concurrent.LinkedBlockingQueue

import kafka.common.TopicAndPartition
import org.apache.gearpump.streaming.kafka.lib.util.KafkaClient
import org.apache.gearpump.streaming.kafka.util.KafkaConfig
import org.slf4j.Logger

import org.apache.gearpump.util.LogUtil

object FetchThread {
  private val LOG: Logger = LogUtil.getLogger(classOf[FetchThread])

  val factory = new FetchThreadFactory

  class FetchThreadFactory extends java.io.Serializable {
    def getFetchThread(config: KafkaConfig, client: KafkaClient): FetchThread = {
      val fetchThreshold = config.getInt(KafkaConfig.FETCH_THRESHOLD_CONFIG)
      val fetchSleepMS = config.getLong(KafkaConfig.FETCH_SLEEP_MS_CONFIG)
      val startOffsetTime = config.getLong(KafkaConfig.CONSUMER_START_OFFSET_CONFIG)
      FetchThread(fetchThreshold, fetchSleepMS, startOffsetTime, client)
    }
  }

  def apply(fetchThreshold: Int,
      fetchSleepMS: Long,
      startOffsetTime: Long,
      client: KafkaClient): FetchThread = {
    val createConsumer = (tp: TopicAndPartition) =>
      client.createConsumer(tp.topic, tp.partition, startOffsetTime)
    val incomingQueue = new LinkedBlockingQueue[KafkaMessage]()
    val sleeper = new ExponentialBackoffSleeper(
      backOffMultiplier = 2.0,
      initialDurationMs = 100L,
      maximumDurationMs = 10000L)
    new FetchThread(createConsumer, incomingQueue, sleeper, fetchThreshold, fetchSleepMS)
  }
}


  private def fetchMessage: Boolean = {
    if (incomingQueue.size >= fetchThreshold) {
      false
    } else {
      consumers.foldLeft(false) { (hasNext, tpAndConsumer) =>
        val (_, consumer) = tpAndConsumer
        if (consumer.hasNext) {
          incomingQueue.put(consumer.next())
          true
        } else {
          hasNext
        }
      }
    }
  }

  private def createAllConsumers: Map[TopicAndPartition, KafkaConsumer] = {
    topicAndPartitions.map(tp => tp -> createConsumer(tp)).toMap
  }

  private def resetConsumers(nextOffsets: Map[TopicAndPartition, Long]): Unit = {
    consumers.values.foreach(_.close())
    consumers = createAllConsumers
    consumers.foreach { case (tp, consumer) =>
      consumer.setStartOffset(nextOffsets(tp))
    }
  }
}

Source File: CGroupProcessLauncher.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.cluster.worker

import java.io.File
import scala.sys.process.Process

import com.typesafe.config.Config
import org.slf4j.{Logger, LoggerFactory}

import org.apache.gearpump.cluster.scheduler.Resource
import org.apache.gearpump.util.{ProcessLogRedirector, RichProcess}


class CGroupProcessLauncher(val config: Config) extends ExecutorProcessLauncher {
  private val APP_MASTER = -1
  private val cgroupManager: Option[CGroupManager] = CGroupManager.getInstance(config)
  private val LOG: Logger = LoggerFactory.getLogger(getClass)

  override def cleanProcess(appId: Int, executorId: Int): Unit = {
    if (executorId != APP_MASTER) {
      cgroupManager.foreach(_.shutDownExecutor(appId, executorId))
    }
  }

  override def createProcess(
      appId: Int, executorId: Int, resource: Resource, appConfig: Config, options: Array[String],
    classPath: Array[String], mainClass: String, arguments: Array[String]): RichProcess = {
    val cgroupCommand = if (executorId != APP_MASTER) {
      cgroupManager.map(_.startNewExecutor(appConfig, resource.slots, appId,
        executorId)).getOrElse(List.empty)
    } else List.empty
    LOG.info(s"Launch executor $executorId with CGroup ${cgroupCommand.mkString(" ")}, " +
      s"classpath: ${classPath.mkString(File.pathSeparator)}")

    val java = System.getProperty("java.home") + "/bin/java"
    val command = cgroupCommand ++ List(java) ++ options ++ List("-cp", classPath
      .mkString(File.pathSeparator), mainClass) ++ arguments
    LOG.info(s"Starting executor process java $mainClass ${arguments.mkString(" ")}; " +
      s"options: ${options.mkString(" ")}")
    val logger = new ProcessLogRedirector()
    val process = Process(command).run(logger)
    new RichProcess(process, logger)
  }
}

Source File: StormRunner.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.experiments.storm

import org.apache.gearpump.experiments.storm.main.{GearpumpNimbus, GearpumpStormClient}
import org.apache.gearpump.util.LogUtil
import org.slf4j.Logger

object StormRunner {
  private val LOG: Logger = LogUtil.getLogger(getClass)

  private val commands = Map("nimbus" -> GearpumpNimbus, "app" -> GearpumpStormClient)

  private def usage(): Unit = {
    val keys = commands.keys.toList.sorted
    // scalastyle:off println
    Console.err.println("Usage: " + "<" + keys.mkString("|") + ">")
    // scalastyle:on println
  }

  private def executeCommand(command: String, commandArgs: Array[String]): Unit = {
    if (!commands.contains(command)) {
      usage()
    } else {
      commands(command).main(commandArgs)
    }
  }

  def main(args: Array[String]): Unit = {
    if (args.length == 0) {
      usage()
    } else {
      val command = args(0)
      val commandArgs = args.drop(1)
      executeCommand(command, commandArgs)
    }
  }
}

Source File: ContainerLaunchContext.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.experiments.yarn.glue

import java.io.File
import java.nio.ByteBuffer
import scala.collection.JavaConverters._

import org.apache.hadoop.fs.{FileSystem => YarnFileSystem, Path}
import org.apache.hadoop.io.DataOutputBuffer
import org.apache.hadoop.mapreduce.security.TokenCache
import org.apache.hadoop.security.UserGroupInformation
import org.apache.hadoop.yarn.api.ApplicationConstants.Environment
import org.apache.hadoop.yarn.api.records._
import org.apache.hadoop.yarn.conf.YarnConfiguration
import org.apache.hadoop.yarn.util.ConverterUtils
import org.slf4j.Logger

import org.apache.gearpump.util.LogUtil

private[glue]
object ContainerLaunchContext {
  private val LOG: Logger = LogUtil.getLogger(getClass)

  def apply(yarnConf: YarnConfiguration, command: String, packagePath: String, configPath: String)
    : ContainerLaunchContext = {
    val context = Records.newRecord(classOf[ContainerLaunchContext])
    context.setCommands(Seq(command).asJava)
    context.setEnvironment(getAppEnv(yarnConf).asJava)
    context.setTokens(getToken(yarnConf, packagePath, configPath))
    context.setLocalResources(getAMLocalResourcesMap(yarnConf, packagePath, configPath).asJava)
    context
  }

  private def getFs(yarnConf: YarnConfiguration) = YarnFileSystem.get(yarnConf)

  private def getAppEnv(yarnConf: YarnConfiguration): Map[String, String] = {
    val classPaths = yarnConf.getStrings(
      YarnConfiguration.YARN_APPLICATION_CLASSPATH,
      YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH.mkString(File.pathSeparator))
    val allPaths = Option(classPaths).getOrElse(Array(""))

    allPaths :+ Environment.PWD.$() + File.separator + "*" + File.pathSeparator

    Map(Environment.CLASSPATH.name -> allPaths.map(_.trim).mkString(File.pathSeparator))
  }

  private def getAMLocalResourcesMap(
      yarnConf: YarnConfiguration, packagePath: String, configPath: String)
    : Map[String, LocalResource] = {
    val fs = getFs(yarnConf)

    Map(
      "pack" -> newYarnAppResource(fs, new Path(packagePath),
        LocalResourceType.ARCHIVE, LocalResourceVisibility.APPLICATION),
      "conf" -> newYarnAppResource(fs, new Path(configPath),
        LocalResourceType.FILE, LocalResourceVisibility.APPLICATION))
  }

  private def newYarnAppResource(
      fs: YarnFileSystem, path: Path,
      resourceType: LocalResourceType, vis: LocalResourceVisibility): LocalResource = {
    val qualified = fs.makeQualified(path)
    val status = fs.getFileStatus(qualified)
    val resource = Records.newRecord(classOf[LocalResource])
    resource.setType(resourceType)
    resource.setVisibility(vis)
    resource.setResource(ConverterUtils.getYarnUrlFromPath(qualified))
    resource.setTimestamp(status.getModificationTime)
    resource.setSize(status.getLen)
    resource
  }

  private def getToken(yc: YarnConfiguration, packagePath: String, configPath: String)
    : ByteBuffer = {
    val credentials = UserGroupInformation.getCurrentUser.getCredentials
    val dob = new DataOutputBuffer
    val dirs = Array(new Path(packagePath), new Path(configPath))
    TokenCache.obtainTokensForNamenodes(credentials, dirs, yc)
    credentials.writeTokenStorageToStream(dob)
    ByteBuffer.wrap(dob.getData)
  }
}

Source File: Client.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.experiments.yarn.client

import org.apache.gearpump.util.LogUtil
import org.slf4j.Logger


object Client {

  private val LOG: Logger = LogUtil.getLogger(getClass)
  val LAUNCH = "launch"

  val commands = Map(LAUNCH -> LaunchCluster) ++
    ManageCluster.commands.map(key => (key, ManageCluster)).toMap

  def usage(): Unit = {
    val keys = commands.keys.toList.sorted
    // scalastyle:off println
    Console.err.println("Usage: " + "<" + keys.mkString("|") + ">")
    // scalastyle:on println
  }

  def main(args: Array[String]): Unit = {
    if (args.length == 0) {
      usage()
    } else {
      val key = args(0)
      val command = commands.get(key)
      command match {
        case Some(command) =>
          if (key == LAUNCH) {
            val remainArgs = args.drop(1)
            command.main(remainArgs)
          } else {
            val commandArg = Array("-" + ManageCluster.COMMAND, key)
            val remainArgs = args.drop(1)
            val updatedArgs = commandArg ++ args.drop(1)
            command.main(updatedArgs)
          }
        case None =>
          usage
      }
    }
  }
}

Source File: NonWindowState.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.streaming.state.impl

import org.slf4j.Logger

import org.apache.gearpump.Time.MilliSeconds
import org.apache.gearpump.streaming.state.api.{Monoid, MonoidState, Serializer}
import org.apache.gearpump.streaming.state.impl.NonWindowState._
import org.apache.gearpump.util.LogUtil

object NonWindowState {
  val LOG: Logger = LogUtil.getLogger(classOf[NonWindowState[_]])
}


class NonWindowState[T](monoid: Monoid[T], serializer: Serializer[T])
  extends MonoidState[T](monoid) {

  override def recover(timestamp: MilliSeconds, bytes: Array[Byte]): Unit = {
    serializer.deserialize(bytes).foreach(left = _)
  }

  override def update(timestamp: MilliSeconds, t: T): Unit = {
    updateState(timestamp, t)
  }

  override def checkpoint(): Array[Byte] = {
    val serialized = serializer.serialize(left)
    LOG.debug(s"checkpoint time: $checkpointTime; checkpoint value: ($checkpointTime, $left)")
    left = monoid.plus(left, right)
    right = monoid.zero
    serialized
  }
}

Source File: StreamingTransportSerializer.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.streaming.task

import java.io.{DataInput, DataOutput}

import org.slf4j.Logger

import org.apache.gearpump.streaming.{AckRequestSerializer, AckSerializer, InitialAckRequestSerializer, LatencyProbeSerializer}
import org.apache.gearpump.transport.netty.ITransportMessageSerializer
import org.apache.gearpump.util.LogUtil

class StreamingTransportSerializer extends ITransportMessageSerializer {
  private val log: Logger = LogUtil.getLogger(getClass)
  private val serializers = new SerializerResolver

  serializers.register(classOf[Ack], new AckSerializer)
  serializers.register(classOf[AckRequest], new AckRequestSerializer)
  serializers.register(classOf[InitialAckRequest], new InitialAckRequestSerializer)
  serializers.register(classOf[LatencyProbe], new LatencyProbeSerializer)
  serializers.register(classOf[SerializedMessage], new SerializedMessageSerializer)

  override def serialize(dataOutput: DataOutput, obj: Object): Unit = {
    val registration = serializers.getRegistration(obj.getClass)
    if (registration != null) {
      dataOutput.writeInt(registration.id)
      registration.serializer.asInstanceOf[TaskMessageSerializer[AnyRef]].write(dataOutput, obj)
    } else {
      log.error(s"Can not find serializer for class type ${obj.getClass}")
    }
  }

  override def deserialize(dataInput: DataInput, length: Int): Object = {
    val classID = dataInput.readInt()
    val registration = serializers.getRegistration(classID)
    if (registration != null) {
      registration.serializer.asInstanceOf[TaskMessageSerializer[AnyRef]].read(dataInput)
    } else {
      log.error(s"Can not find serializer for class id $classID")
      null
    }
  }

  override def getLength(obj: Object): Int = {
    val registration = serializers.getRegistration(obj.getClass)
    if (registration != null) {
      registration.serializer.asInstanceOf[TaskMessageSerializer[AnyRef]].getLength(obj) + 4
    } else {
      log.error(s"Can not find serializer for class type ${obj.getClass}")
      0
    }
  }
}

Source File: Context.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.transport.netty

import java.io.Closeable
import java.util.concurrent._

import scala.collection.JavaConverters._

import akka.actor.{ActorRef, ActorSystem, Props}
import com.typesafe.config.Config
import org.jboss.netty.channel.socket.nio.NioClientSocketChannelFactory
import org.slf4j.Logger

import org.apache.gearpump.transport.netty.Server.ServerPipelineFactory
import org.apache.gearpump.transport.{ActorLookupById, HostPort}
import org.apache.gearpump.util.{Constants, LogUtil}

object Context {
  private final val LOG: Logger = LogUtil.getLogger(getClass)
}


  def close(): Unit = {

    LOG.info(s"Context.term, cleanup resources...., " +
      s"we have ${closeHandler.size()} items to close...")

    // Cleans up resource in reverse order so that client actor can be cleaned
    // before clientChannelFactory
    closeHandler.iterator().asScala.toList.reverse.foreach(_.close())
  }
}

Source File: GearpumpSerialization.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.serializer

import com.esotericsoftware.kryo.{Kryo, Serializer => KryoSerializer}
import com.typesafe.config.Config
import org.slf4j.Logger
import org.apache.gearpump.util.{Constants, LogUtil}

class GearpumpSerialization(config: Config) {

  private val LOG: Logger = LogUtil.getLogger(getClass)

  def customize(kryo: Kryo): Unit = {

    val serializationMap = configToMap(config, Constants.GEARPUMP_SERIALIZERS)

    serializationMap.foreach { kv =>
      val (key, value) = kv
      val keyClass = Class.forName(key)

      if (value == null || value.isEmpty) {

        // Use default serializer for this class type
        kryo.register(keyClass)
      } else {
        val valueClass = Class.forName(value)
        val register = kryo.register(keyClass,
          valueClass.newInstance().asInstanceOf[KryoSerializer[_]])
        LOG.debug(s"Registering ${keyClass}, id: ${register.getId}")
      }
    }
    kryo.setReferences(false)

    // Requires the user to register the class first before using
    kryo.setRegistrationRequired(true)
  }

  private final def configToMap(config: Config, path: String) = {
    import scala.collection.JavaConverters._
    config.getConfig(path).root.unwrapped.asScala.toMap map { case (k, v) => k -> v.toString }
  }
}

Source File: Worker.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.cluster.main

import akka.actor.{ActorSystem, Props}
import org.apache.gearpump.cluster.ClusterConfig
import org.apache.gearpump.cluster.master.MasterProxy
import org.apache.gearpump.cluster.worker.{Worker => WorkerActor}
import org.apache.gearpump.transport.HostPort
import org.apache.gearpump.util.Constants._
import org.apache.gearpump.util.LogUtil.ProcessType
import org.apache.gearpump.util.{AkkaApp, LogUtil}
import org.slf4j.Logger

import scala.collection.JavaConverters._
import scala.concurrent.Await
import scala.concurrent.duration.Duration


object Worker extends AkkaApp with ArgumentsParser {
  protected override def akkaConfig = ClusterConfig.worker()

  override val description = "Start a worker daemon"

  var LOG: Logger = LogUtil.getLogger(getClass)

  private def uuid = java.util.UUID.randomUUID.toString

  def main(akkaConf: Config, args: Array[String]): Unit = {
    val id = uuid

    this.LOG = {
      LogUtil.loadConfiguration(akkaConf, ProcessType.WORKER)
      // Delay creation of LOG instance to avoid creating an empty log file as we
      // reset the log file name here
      LogUtil.getLogger(getClass)
    }

    val system = ActorSystem(id, akkaConf)

    val masterAddress = akkaConf.getStringList(GEARPUMP_CLUSTER_MASTERS).asScala.map { address =>
      val hostAndPort = address.split(":")
      HostPort(hostAndPort(0), hostAndPort(1).toInt)
    }

    LOG.info(s"Trying to connect to masters " + masterAddress.mkString(",") + "...")
    val masterProxy = system.actorOf(MasterProxy.props(masterAddress), s"masterproxy${system.name}")

    system.actorOf(Props(classOf[WorkerActor], masterProxy),
      classOf[WorkerActor].getSimpleName + id)

    Await.result(system.whenTerminated, Duration.Inf)
  }
}

Source File: Local.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.cluster.main

import akka.actor.{ActorSystem, Props}
import com.typesafe.config.ConfigValueFactory
import org.apache.gearpump.cluster.ClusterConfig
import org.apache.gearpump.cluster.master.{Master => MasterActor}
import org.apache.gearpump.cluster.worker.{Worker => WorkerActor}
import org.apache.gearpump.util.Constants._
import org.apache.gearpump.util.LogUtil.ProcessType
import org.apache.gearpump.util.{ActorUtil, Constants, LogUtil, MasterClientCommand, Util}
import org.slf4j.Logger

import scala.collection.JavaConverters._
import scala.concurrent.Await
import scala.concurrent.duration.Duration

object Local extends MasterClientCommand with ArgumentsParser {
  override def akkaConfig: Config = ClusterConfig.master()

  var LOG: Logger = LogUtil.getLogger(getClass)

  override val options: Array[(String, CLIOption[Any])] =
    Array("sameprocess" -> CLIOption[Boolean]("", required = false, defaultValue = Some(false)),
      "workernum" -> CLIOption[Int]("<how many workers to start>", required = false,
        defaultValue = Some(2)))

  override val description = "Start a local cluster"

  def main(akkaConf: Config, args: Array[String]): Unit = {

    this.LOG = {
      LogUtil.loadConfiguration(akkaConf, ProcessType.LOCAL)
      LogUtil.getLogger(getClass)
    }

    val config = parse(args)
    if (null != config) {
      local(config.getInt("workernum"), config.getBoolean("sameprocess"), akkaConf)
    }
  }

  def local(workerCount: Int, sameProcess: Boolean, akkaConf: Config): Unit = {
    if (sameProcess) {
      LOG.info("Starting local in same process")
      System.setProperty("LOCAL", "true")
    }
    val masters = akkaConf.getStringList(Constants.GEARPUMP_CLUSTER_MASTERS)
      .asScala.flatMap(Util.parseHostList)
    val local = akkaConf.getString(Constants.GEARPUMP_HOSTNAME)

    if (masters.size != 1 && masters.head.host != local) {
      LOG.error(s"The ${Constants.GEARPUMP_CLUSTER_MASTERS} is not match " +
        s"with ${Constants.GEARPUMP_HOSTNAME}")
    } else {

      val hostPort = masters.head
      implicit val system = ActorSystem(MASTER, akkaConf.
        withValue("akka.remote.netty.tcp.port", ConfigValueFactory.fromAnyRef(hostPort.port))
      )

      val master = system.actorOf(Props[MasterActor], MASTER)
      val masterPath = ActorUtil.getSystemAddress(system).toString + s"/user/$MASTER"

      0.until(workerCount).foreach { id =>
        system.actorOf(Props(classOf[WorkerActor], master), classOf[WorkerActor].getSimpleName + id)
      }

      Await.result(system.whenTerminated, Duration.Inf)
    }
  }
}

Source File: PresenceUpdater.scala From AckCord with MIT License

5 votes

package ackcord.cachehandlers

import ackcord.data.{Guild, GuildMember, Presence}
import ackcord.gateway.GatewayEvent.PresenceUpdateData
import org.slf4j.Logger

object PresenceUpdater extends CacheUpdater[PresenceUpdateData] {
  override def handle(builder: CacheSnapshotBuilder, obj: PresenceUpdateData, registry: CacheTypeRegistry)(
      implicit log: Logger
  ): Unit = {
    val PresenceUpdateData(partialUser, roles, rawActivity, guildId, status, _, clientStatus, premiumSince, nick) = obj

    registry.updateData(builder)(partialUser)

    for {
      guildHandler <- registry.getUpdater[Guild]
      oldGuild     <- builder.guildMap.get(guildId)
    } {

      val presencesToUse = if (registry.hasUpdater[Presence]) {
        val newActivity = rawActivity.map(_.toActivity).flatMap {
          case Right(activity) => Some(activity)
          case Left(e) =>
            log.warn(e)
            None
        }

        val newPresence = Presence(partialUser.id, newActivity, status, clientStatus)
        oldGuild.presences.updated(partialUser.id, newPresence)
      } else {
        oldGuild.presences
      }

      val oldMembers = oldGuild.members
      val membersToUse = if (registry.hasUpdater[GuildMember]) {
        oldMembers
          .get(partialUser.id)
          .map(member => oldMembers.updated(partialUser.id, member.copy(roleIds = roles, nick = nick)))
          .getOrElse(oldMembers)
      } else {
        oldMembers
      }

      val newGuild = oldGuild.copy(presences = presencesToUse, members = membersToUse)

      guildHandler.handle(builder, newGuild, registry)
    }
  }
}

Source File: JVMObjectTracker.scala From seahorse-workflow-executor with Apache License 2.0

5 votes

// scalastyle:off

private[r] object JVMObjectTracker {
  @transient
  protected lazy val logger: Logger = LoggerFactory.getLogger(getClass.getName)
  private[this] val objMap = new TrieMap[String, Object]
  private[this] val objCounter = new AtomicInteger(0)

  def getObject(id: String): Object = {
    logger.info(s"Get object at  $id")
    objMap(id)
  }

  def get(id: String): Option[Object] = {
    logger.info(s"Get object at $id")
    objMap.get(id)
  }

  def put(obj: Object): String = {
    val objId = objCounter.getAndIncrement.toString
    val objName = obj.getClass.getName
    logger.info(s"Puts $objName at $objId ")
    objMap.put(objId, obj)
    objId
  }

  def remove(id: String): Option[Object] = {
    logger.info(s"Removed $id")
    objMap.remove(id)
  }

}

Source File: LoggingSerializationSpec.scala From seahorse-workflow-executor with Apache License 2.0

5 votes

package io.deepsense.commons

import org.slf4j.Logger

import io.deepsense.commons.serialization.Serialization
import io.deepsense.commons.utils.Logging

class LoggingSerializationSpec
  extends StandardSpec
  with UnitTestSupport
  with Serialization {

  "Object" when {
    "mixes-in SerializableLogging" should {
      "be serializable" in {
        val testObject = new SerializableTestObject()
        testObject.getLogger.trace("Logging just to force initiation of lazy logger")
        val deserialized = serializeDeserialize[SerializableTestObject](testObject)
        deserialized.getLogger should not be null
        deserialized.getLogger.trace("If this is printed everything is OK")
      }
    }
  }
}

class SerializableTestObject extends Serializable with Logging {
  def getLogger: Logger = this.logger
}

Source File: LoggingServerInterceptor.scala From scala-server-toolkit with MIT License

5 votes

package com.avast.sst.grpc.server.interceptor

import io.grpc.ForwardingServerCall.SimpleForwardingServerCall
import io.grpc.ForwardingServerCallListener.SimpleForwardingServerCallListener
import io.grpc._
import org.slf4j.Logger


class LoggingServerInterceptor(logger: Logger) extends ServerInterceptor {

  override def interceptCall[ReqT, RespT](
      call: ServerCall[ReqT, RespT],
      headers: Metadata,
      next: ServerCallHandler[ReqT, RespT]
  ): ServerCall.Listener[ReqT] = {
    val methodName = call.getMethodDescriptor.getFullMethodName
    val finalCall = new CloseServerCall(methodName, call)
    new OnMessageServerCallListener(methodName, next.startCall(finalCall, headers))
  }

  private class CloseServerCall[A, B](methodName: String, delegate: ServerCall[A, B]) extends SimpleForwardingServerCall[A, B](delegate) {
    override def close(status: Status, trailers: Metadata): Unit = {
      import io.grpc.Status
      if ((status.getCode eq Status.Code.UNKNOWN) || (status.getCode eq Status.Code.INTERNAL)) {
        logger.error(
          String.format(
            "Error response from method %s: %s %s",
            methodName,
            status.getCode,
            status.getDescription
          ),
          status.getCause
        )
      } else if (!status.isOk) {
        logger.warn(
          String.format(
            "Error response from method %s: %s %s",
            methodName,
            status.getCode,
            status.getDescription
          ),
          status.getCause
        )
      } else {
        logger.debug("Successful response from method {}: {}", Array(methodName, status): _*)
      }
      super.close(status, trailers)
    }
  }

  private class OnMessageServerCallListener[A](methodName: String, delegate: ServerCall.Listener[A])
      extends SimpleForwardingServerCallListener[A](delegate) {
    override def onMessage(message: A): Unit = {
      logger.debug("Dispatching method {}", methodName)
      super.onMessage(message)
    }
  }

}

Source File: MysqlSink1.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.stream.api.jdbcOutput

import java.sql.{Connection, DriverManager, PreparedStatement, SQLException}
import org.apache.flink.api.common.io.OutputFormat
import org.apache.flink.configuration.Configuration
import org.slf4j.{Logger, LoggerFactory}

class MysqlSink1 extends OutputFormat[User]{

  val logger: Logger = LoggerFactory.getLogger("MysqlSink1")
  var conn: Connection = _
  var ps: PreparedStatement = _
  val jdbcUrl = "jdbc:mysql://192.168.229.128:3306?useSSL=false&allowPublicKeyRetrieval=true"
  val username = "root"
  val password = "123456"
  val driverName = "com.mysql.jdbc.Driver"

  override def configure(parameters: Configuration): Unit = {
    // not need
  }

  override def open(taskNumber: Int, numTasks: Int): Unit = {
    Class.forName(driverName)
    try {
      Class.forName(driverName)
      conn = DriverManager.getConnection(jdbcUrl, username, password)

      // close auto commit
      conn.setAutoCommit(false)
    } catch {
      case e@(_: ClassNotFoundException | _: SQLException) =>
        logger.error("init mysql error")
        e.printStackTrace()
        System.exit(-1);
    }
  }

  override def writeRecord(user: User): Unit = {

    println("get user : " + user.toString)
    ps = conn.prepareStatement("insert into async.user(username, password, sex, phone) values(?,?,?,?)")
    ps.setString(1, user.username)
    ps.setString(2, user.password)
    ps.setInt(3, user.sex)
    ps.setString(4, user.phone)

    ps.execute()
    conn.commit()
  }

  override def close(): Unit = {

    if (conn != null){
      conn.commit()
      conn.close()
    }
  }
}

Source File: MysqlSink.scala From flink-rookie with Apache License 2.0

5 votes

package com.venn.stream.api.jdbcOutput

import java.sql.{Connection, DriverManager, PreparedStatement, SQLException}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction}
import org.slf4j.{Logger, LoggerFactory}

class MysqlSink extends RichSinkFunction[User] {

  val logger: Logger = LoggerFactory.getLogger("MysqlSink")
  var conn: Connection = _
  var ps: PreparedStatement = _
  val jdbcUrl = "jdbc:mysql://192.168.229.128:3306?useSSL=false&allowPublicKeyRetrieval=true"
  val username = "root"
  val password = "123456"
  val driverName = "com.mysql.jdbc.Driver"

  override def open(parameters: Configuration): Unit = {

    Class.forName(driverName)
    try {
      Class.forName(driverName)
      conn = DriverManager.getConnection(jdbcUrl, username, password)

      // close auto commit
      conn.setAutoCommit(false)
    } catch {
      case e@(_: ClassNotFoundException | _: SQLException) =>
        logger.error("init mysql error")
        e.printStackTrace()
        System.exit(-1);
    }
  }

  
  override def invoke(user: User, context: SinkFunction.Context[_]): Unit = {
    println("get user : " + user.toString)
    ps = conn.prepareStatement("insert into async.user(username, password, sex, phone) values(?,?,?,?)")
    ps.setString(1, user.username)
    ps.setString(2, user.password)
    ps.setInt(3, user.sex)
    ps.setString(4, user.phone)

    ps.execute()
    conn.commit()
  }



  override def close(): Unit = {
    if (conn != null){
      conn.commit()
      conn.close()
    }
  }
}

Source File: SparkNRedshiftUtil.scala From SqlShift with MIT License

5 votes

package com.goibibo.sqlshift

import java.sql.{Connection, DriverManager}
import java.util.Properties

import com.databricks.spark.redshift.RedshiftReaderM
import com.typesafe.config.Config
import org.apache.spark.sql.{DataFrame, SQLContext}
import org.apache.spark.{SparkConf, SparkContext}
import org.scalatest.{BeforeAndAfterAll, Suite}
import org.slf4j.{Logger, LoggerFactory}


trait SparkNRedshiftUtil extends BeforeAndAfterAll {
    self: Suite =>
    private val logger: Logger = LoggerFactory.getLogger(this.getClass)
    @transient private var _sc: SparkContext = _
    @transient private var _sqlContext: SQLContext = _

    def sc: SparkContext = _sc
    def sqlContext: SQLContext = _sqlContext

    private def getRedshiftConnection(config: Config): Connection = {
        val mysql = config.getConfig("redshift")
        val connectionProps = new Properties()
        connectionProps.put("user", mysql.getString("username"))
        connectionProps.put("password", mysql.getString("password"))
        val jdbcUrl = s"jdbc:redshift://${mysql.getString("hostname")}:${mysql.getInt("portno")}/${mysql.getString("database")}?useSSL=false"
        Class.forName("com.amazon.redshift.jdbc4.Driver")
        DriverManager.getConnection(jdbcUrl, connectionProps)
    }

    val getSparkContext: (SparkContext, SQLContext) = {
        val sparkConf: SparkConf = new SparkConf().setAppName("Full Dump Testing").setMaster("local")
        val sc: SparkContext = new SparkContext(sparkConf)
        val sqlContext: SQLContext = new SQLContext(sc)

        System.setProperty("com.amazonaws.services.s3.enableV4", "true")
        sc.hadoopConfiguration.set("fs.s3a.endpoint", "s3.ap-south-1.amazonaws.com")
        sc.hadoopConfiguration.set("fs.s3a.fast.upload", "true")
        (sc, sqlContext)
    }

    def readTableFromRedshift(config: Config, tableName: String): DataFrame = {
        val redshift: Config = config.getConfig("redshift")
        val options = Map("dbtable" -> tableName,
            "user" -> redshift.getString("username"),
            "password" -> redshift.getString("password"),
            "url" -> s"jdbc:redshift://${redshift.getString("hostname")}:${redshift.getInt("portno")}/${redshift.getString("database")}",
            "tempdir" -> config.getString("s3.location"),
            "aws_iam_role" -> config.getString("redshift.iamRole")
        )
        RedshiftReaderM.getDataFrameForConfig(options, sc, sqlContext)
    }

    def dropTableRedshift(config: Config, tables: String*): Unit = {
        logger.info("Droping table: {}", tables)
        val conn = getRedshiftConnection(config)
        val statement = conn.createStatement()
        try {
            val dropTableQuery = s"""DROP TABLE ${tables.mkString(",")}"""
            logger.info("Running query: {}", dropTableQuery)
            statement.executeUpdate(dropTableQuery)
        } finally {
            statement.close()
            conn.close()
        }
    }

    override protected def beforeAll(): Unit = {
        super.beforeAll()
        val (sc, sqlContext) = getSparkContext
        _sc = sc
        _sqlContext = sqlContext
    }

    override protected def afterAll(): Unit = {
        super.afterAll()
        _sc.stop()
    }
}

Source File: MySQLUtil.scala From SqlShift with MIT License

5 votes

package com.goibibo.sqlshift

import java.net.URL
import java.sql.{Connection, DriverManager}
import java.util.Properties

import com.typesafe.config.Config
import org.slf4j.{Logger, LoggerFactory}

import scala.io.Source


object MySQLUtil {
    private val logger: Logger = LoggerFactory.getLogger(this.getClass)

    private def getMySQLConnection(config: Config): Connection = {
        val mysql = config.getConfig("mysql")
        val connectionProps = new Properties()
        connectionProps.put("user", mysql.getString("username"))
        connectionProps.put("password", mysql.getString("password"))
        val jdbcUrl = s"jdbc:mysql://${mysql.getString("hostname")}:${mysql.getInt("portno")}/${mysql.getString("db")}?createDatabaseIfNotExist=true&useSSL=false"
        Class.forName("com.mysql.jdbc.Driver")
        DriverManager.getConnection(jdbcUrl, connectionProps)
    }

    def createTableAndInsertRecords(config: Config, tableName: String, psvFile: URL): Unit = {
        logger.info("Inserting records in table: {}", tableName)
        val records = Source.fromFile(psvFile.toURI).getLines().toList.drop(1) // removing header

        val conn = getMySQLConnection(config)
        val statement = conn.createStatement()
        try {
            val tableCreateQuery = config.getString("table.tableCreateQuery").replace("${tableName}", tableName)
            logger.info("Running query: {}", tableCreateQuery)
            statement.executeUpdate(tableCreateQuery)
            val insertIntoQuery = config.getString("table.insertIntoQuery").replace("${tableName}", tableName)
            logger.info("Running query: {}", insertIntoQuery)
            records.foreach { record: String =>
                val columns = record.split("\\|")
                val query = insertIntoQuery.format(columns: _*)
                statement.executeUpdate(query)
            }
        } finally {
            statement.close()
            conn.close()
        }
    }
}

Source File: MailAPI.scala From SqlShift with MIT License

5 votes

package com.goibibo.sqlshift.alerting

import java.util.Properties
import javax.mail.Message.RecipientType
import javax.mail.internet.{InternetAddress, MimeMessage, _}
import javax.mail.{Authenticator, PasswordAuthentication, Session, Transport}

import com.goibibo.sqlshift.models.Configurations.AppConfiguration
import com.goibibo.sqlshift.models.Params.MailParams
import org.slf4j.{Logger, LoggerFactory}

class MailAPI(mailParams: MailParams) {
    private val logger: Logger = LoggerFactory.getLogger(classOf[MailAPI])

    private val prop = new Properties() {
        put("mail.smtp.host", mailParams.host)
        put("mail.smtp.port", mailParams.port.toString)
    }

    private val session: Session = mailParams.password match {
        case Some(password) =>
            prop.setProperty("mail.smtp.auth", "true")
            Session.getDefaultInstance(prop, new Authenticator {
                override def getPasswordAuthentication: PasswordAuthentication = {
                    new PasswordAuthentication(mailParams.username, password)
                }
            })
        case None => Session.getDefaultInstance(prop)
    }


    
    def send(appConfs: List[AppConfiguration]): Unit = {
        val from = "[email protected]"
        logger.info("Mail from: {}", from)
        var subject = "SQLShift:"
        var text = "<html>" +
                "<body>" +
                "<table border='1' style='width:100%' bgcolor='#F5F5F5'>" +
                "<tr> <th size=6>Mysql schema</th>" +
                "<th size=6>Mysql table_name</th>" +
                "<th size=6>Redshift schema</th>" +
                "<th size=6>Status</th>" +
                "<th size=6>Migration Time(sec)</th>" +
                "<th size=6>Error</th></tr>"

        logger.info(s"Mail to: '${mailParams.to}' and cc: '${mailParams.cc}'")
        val tos: List[String] = mailParams.to.split(",").toList
        var ccs: List[String] = List()
        if (mailParams.cc != "")
            ccs = mailParams.cc.split(",").toList

        var errorCnt = 0
        var successCnt = 0
        for (appConf <- appConfs) {

            text += "<tr>" +
                    "<td bgcolor='#FFE4C4'>" + appConf.mysqlConf.db + "</td>" +
                    "<td bgcolor='#E0FFFF'>" + appConf.mysqlConf.tableName + "</td>" +
                    "<td bgcolor='#F5F5DC'>" + appConf.redshiftConf.schema + "</td>" +
                    "<td bgcolor='#E0FFFF'>" + appConf.status.get.isSuccessful + "</td>" +
                    "<td bgcolor='#E0FFFF'>" + appConf.migrationTime.get + "</td>"

            if (appConf.status.get.isSuccessful) {
                successCnt += 1
            }
            else {
                text += "<td bgcolor='#F0FFFF'>%s\n%s</td></tr>"
                        .format(appConf.status.get.e.getMessage, appConf.status.get.e.getStackTrace.mkString("\n"))
                errorCnt += 1
            }
        }

        subject += " Failed " + errorCnt.toString + " Success " + successCnt.toString + mailParams.subject

        text += "</table></body></html>"
        logger.info("Subject: {}", subject)

        val message = new MimeMessage(session)
        message.setFrom(new InternetAddress(from))
        for (to <- tos)
            message.addRecipient(RecipientType.TO, new InternetAddress(to))
        for (cc <- ccs)
            message.addRecipient(RecipientType.CC, new InternetAddress(cc))
        message.setSubject(subject)
        message.setText(text)

        val mimeBdyPart = new MimeBodyPart()

        mimeBdyPart.setContent(text, "text/html; charset=utf-8")

        val multiPart = new MimeMultipart()

        logger.info("Sending message...")
        multiPart.addBodyPart(mimeBdyPart)
        message.setContent(multiPart)
        Transport.send(message)
    }
}

Source File: MetricsWrapper.scala From SqlShift with MIT License

5 votes

package com.goibibo.sqlshift.commons

import java.util.concurrent.TimeUnit

import com.codahale.metrics.Timer.Context
import com.codahale.metrics._
import org.slf4j.{Logger, LoggerFactory}


    def stopSLF4JReporting(): Unit = {
        slf4jReporter.stop()
    }

    def stopJMXReporting(): Unit = {
        jmxReporter.stop()
    }

    def getTimerMetrics(metricName: String): Context = {
        val timer: Timer = metricRegistry.timer(metricName)
        timer.time()
    }

    def stopTimerContext(context: Context): Long = {
        context.stop()
    }

    def registerGauge(metricName: String, value: Boolean): Gauge[Boolean] = {
        try {
            metricRegistry.register(metricName, new Gauge[Boolean] {
                override def getValue: Boolean = {
                    value
                }
            })
        } catch {
            case e: IllegalArgumentException => logger.warn(s"$metricName gauge metric is already registered!!!")
                metricRegistry.getGauges.get(metricName).asInstanceOf[Gauge[Boolean]]
        }
    }

    def registerGauge(metricName: String, value: Int): Gauge[Int] = {
        try {
            metricRegistry.register(metricName, new Gauge[Int] {
                override def getValue: Int = {
                    value
                }
            })
        } catch {
            case e: IllegalArgumentException => logger.warn(s"$metricName gauge metric is already registered!!!")
                metricRegistry.getGauges.get(metricName).asInstanceOf[Gauge[Int]]
        }
    }

    def incCounter(metricName: String, incValue: Long = 1): Unit = {
        metricRegistry.counter(metricName).inc(incValue)
    }

}

Source File: cacheUpdates.scala From AckCord with MIT License

5 votes

package ackcord

import ackcord.cachehandlers.{CacheHandler, CacheSnapshotBuilder, CacheTypeRegistry}
import ackcord.gateway.Dispatch
import org.slf4j.Logger


case class APIMessageCacheUpdate[Data](
    data: Data,
    sendEvent: CacheState => Option[APIMessage],
    handler: CacheHandler[Data],
    registry: CacheTypeRegistry,
    dispatch: Dispatch[_]
) extends CacheEvent {

  override def process(builder: CacheSnapshotBuilder)(implicit log: Logger): Unit =
    handler.handle(builder, data, registry)
}

Source File: CacheStreams.scala From AckCord with MIT License

5 votes

package ackcord

import scala.collection.mutable

import ackcord.cachehandlers.CacheSnapshotBuilder
import ackcord.gateway.GatewayEvent.ReadyData
import ackcord.gateway.GatewayMessage
import ackcord.requests.SupervisionStreams
import akka.NotUsed
import akka.actor.typed.ActorSystem
import akka.stream.scaladsl.{BroadcastHub, Flow, Keep, MergeHub, Sink, Source}
import org.slf4j.Logger

object CacheStreams {

  
  def cacheUpdater(
      cacheProcessor: MemoryCacheSnapshot.CacheProcessor
  )(implicit system: ActorSystem[Nothing]): Flow[CacheEvent, (CacheEvent, CacheState), NotUsed] =
    Flow[CacheEvent].statefulMapConcat { () =>
      var state: CacheState    = null
      implicit val log: Logger = system.log

      //We only handle events when we are ready to, and we have received the ready event.
      def isReady: Boolean = state != null

      {
        case readyEvent @ APIMessageCacheUpdate(_: ReadyData, _, _, _, _) =>
          val builder = new CacheSnapshotBuilder(
            0,
            null, //The event will populate this,
            mutable.Map.empty,
            mutable.Map.empty,
            mutable.Map.empty,
            mutable.Map.empty,
            mutable.Map.empty,
            mutable.Map.empty,
            mutable.Map.empty,
            mutable.Map.empty,
            cacheProcessor
          )

          readyEvent.process(builder)

          val snapshot = builder.toImmutable
          state = CacheState(snapshot, snapshot)
          List(readyEvent -> state)
        case handlerEvent: CacheEvent if isReady =>
          val builder = CacheSnapshotBuilder(state.current)
          handlerEvent.process(builder)

          state = state.update(builder.toImmutable)
          List(handlerEvent -> state)
        case _ if !isReady =>
          log.error("Received event before ready")
          Nil
      }
    }
}

Source File: MLPMnistSingleLayerExample.scala From dl4scala with MIT License

5 votes

package org.dl4scala.examples.feedforward.mnist

import org.dl4scala.datasets.iterator.impl.MnistDataSetIterator
import org.slf4j.LoggerFactory
import org.slf4j.Logger
import org.deeplearning4j.eval.Evaluation
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork
import org.deeplearning4j.optimize.listeners.ScoreIterationListener
import org.deeplearning4j.nn.api.OptimizationAlgorithm
import org.deeplearning4j.nn.conf.NeuralNetConfiguration
import org.deeplearning4j.nn.conf.Updater
import org.deeplearning4j.nn.conf.layers.DenseLayer
import org.deeplearning4j.nn.conf.layers.OutputLayer
import org.deeplearning4j.nn.weights.WeightInit
import org.nd4j.linalg.activations.Activation
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction


object MLPMnistSingleLayerExample extends App{
  private val log: Logger = LoggerFactory.getLogger(MLPMnistSingleLayerExample.getClass)

  // number of rows and columns in the input pictures//number of rows and columns in the input pictures
  private  val numRows = 28
  private  val numColumns = 28
  private  val outputNum = 10 // number of output classes
  private  val batchSize = 128 // batch size for each epoch
  private  val rngSeed = 123 // random number seed for reproducibility
  private  val numEpochs = 15 // number of epochs to perform

  // Get the DataSetIterators://Get the DataSetIterators:
  private val mnistTrain = new MnistDataSetIterator(batchSize, true, rngSeed)
  private val mnistTest = new MnistDataSetIterator(batchSize, false, rngSeed)


  log.info("Build model....")

  private val conf = new NeuralNetConfiguration
    .Builder()
    .seed(rngSeed) // include a random seed for reproducibility
    .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) // use stochastic gradient descent as an optimization algorithm
    .iterations(1)
    .learningRate(0.006) // specify the learning rate
    .updater(Updater.NESTEROVS)
    .regularization(true).l2(1e-4)
    .list()
    .layer(0, new DenseLayer.Builder() // create the first, input layer with xavier initialization
        .nIn(numRows * numColumns)
        .nOut(1000)
        .activation(Activation.RELU)
        .weightInit(WeightInit.XAVIER)
        .build())
    .layer(1, new OutputLayer.Builder(LossFunction.NEGATIVELOGLIKELIHOOD) // create hidden layer
        .nIn(1000)
        .nOut(outputNum)
        .activation(Activation.SOFTMAX)
        .weightInit(WeightInit.XAVIER)
        .build())
    .pretrain(false).backprop(true) // use backpropagation to adjust weights
    .build()

  private val model = new MultiLayerNetwork(conf)
  model.init()
  // print the score with every 1 iteration
  model.setListeners(new ScoreIterationListener(1))
  log.info("Train model....")

  for(i <- 0 until numEpochs){
    model.fit(mnistTrain)
  }

  log.info("Evaluate model....")
  val eval = new Evaluation(outputNum) // create an evaluation object with 10 possible classes

  while(mnistTest.hasNext){
    val next = mnistTest.next()
    val output = model.output(next.getFeatureMatrix) // get the networks prediction
    eval.eval(next.getLabels, output) // check the prediction against the true class
  }

  log.info(eval.stats)
  log.info("****************Example finished********************")

}

Source File: CacheTypeRegistry.scala From AckCord with MIT License

5 votes

package ackcord.cachehandlers

import scala.reflect.ClassTag

import ackcord.data._
import ackcord.data.raw.PartialUser
import org.slf4j.Logger

class CacheTypeRegistry(
    val updateHandlers: Map[Class[_], CacheUpdater[_]],
    val deleteHandlers: Map[Class[_], CacheDeleter[_]],
    log: Logger
) {

  private def handleWithData[D: ClassTag, HandlerTpe[-A] <: CacheHandler[A]](
      handlers: Map[Class[_], HandlerTpe[_]],
      tpe: String,
      data: => D,
      builder: CacheSnapshotBuilder
  ): Unit =
    getWithData[D, HandlerTpe](tpe, handlers).foreach(handler => handler.handle(builder, data, this)(log))

  def updateData[D: ClassTag](builder: CacheSnapshotBuilder)(data: => D): Unit =
    handleWithData(updateHandlers, "updater", data, builder)

  def deleteData[D: ClassTag](builder: CacheSnapshotBuilder)(data: => D): Unit =
    handleWithData(deleteHandlers, "deleter", data, builder)

  private def getWithData[D, HandlerTpe[-A] <: CacheHandler[A]](
      tpe: String,
      handlers: Map[Class[_], HandlerTpe[_]]
  )(implicit tag: ClassTag[D]): Option[HandlerTpe[D]] = {
    val res = handlers
      .get(tag.runtimeClass)
      .asInstanceOf[Option[HandlerTpe[D]]]
      .orElse(handlers.find(_._1.isAssignableFrom(tag.runtimeClass)).map(_._2.asInstanceOf[HandlerTpe[D]]))

    if (res.isEmpty) {
      log.debug(s"No $tpe found for ${tag.runtimeClass}")
    }

    res
  }

  def getUpdater[D: ClassTag]: Option[CacheUpdater[D]] =
    getWithData("updater", updateHandlers)

  def getDeleter[D: ClassTag]: Option[CacheDeleter[D]] =
    getWithData("deleter", deleteHandlers)

  def hasUpdater[D: ClassTag]: Boolean =
    getUpdater.isDefined

  def hasDeleter[D: ClassTag]: Boolean =
    getDeleter.isDefined
}
object CacheTypeRegistry {

  private val noPresencesBansEmojiUpdaters: Map[Class[_], CacheUpdater[_]] = Map(
    classOf[PartialUser]      -> CacheHandlers.partialUserUpdater,
    classOf[Guild]            -> CacheHandlers.guildUpdater,
    classOf[GuildMember]      -> CacheHandlers.guildMemberUpdater,
    classOf[GuildChannel]     -> CacheHandlers.guildChannelUpdater,
    classOf[DMChannel]        -> CacheHandlers.dmChannelUpdater,
    classOf[GroupDMChannel]   -> CacheHandlers.dmGroupChannelUpdater,
    classOf[User]             -> CacheHandlers.userUpdater,
    classOf[UnavailableGuild] -> CacheHandlers.unavailableGuildUpdater,
    classOf[Message]          -> CacheHandlers.messageUpdater,
    classOf[Role]             -> CacheHandlers.roleUpdater
  )

  private val noPresencesUpdaters: Map[Class[_], CacheUpdater[_]] = noPresencesBansEmojiUpdaters ++ Map(
    classOf[Ban]   -> CacheUpdater.dummy[Ban],
    classOf[Emoji] -> CacheUpdater.dummy[Emoji]
  )

  private val allUpdaters: Map[Class[_], CacheUpdater[_]] =
    noPresencesUpdaters + (classOf[Presence] -> CacheUpdater.dummy[Presence])

  private val noBanDeleters: Map[Class[_], CacheDeleter[_]] = Map(
    classOf[GuildChannel]   -> CacheHandlers.guildChannelDeleter,
    classOf[DMChannel]      -> CacheHandlers.dmChannelDeleter,
    classOf[GroupDMChannel] -> CacheHandlers.groupDmChannelDeleter,
    classOf[GuildMember]    -> CacheHandlers.guildMemberDeleter,
    classOf[Role]           -> CacheHandlers.roleDeleter,
    classOf[Message]        -> CacheHandlers.messageDeleter
  )

  private val allDeleters: Map[Class[_], CacheDeleter[_]] = noBanDeleters + (classOf[Ban] -> CacheDeleter.dummy[Ban])

  def default(log: Logger) = new CacheTypeRegistry(allUpdaters, allDeleters, log)

  def noPresences(log: Logger) = new CacheTypeRegistry(noPresencesUpdaters, allDeleters, log)

  def noPresencesBansEmoji(log: Logger) =
    new CacheTypeRegistry(noPresencesBansEmojiUpdaters, noBanDeleters, log)
}

Source File: ReadyUpdater.scala From AckCord with MIT License

5 votes

package ackcord.cachehandlers

import ackcord.CacheSnapshot.BotUser
import ackcord.gateway.GatewayEvent.ReadyData
import org.slf4j.Logger
import shapeless.tag

//We handle this one separately as is it's kind of special
object ReadyUpdater extends CacheUpdater[ReadyData] {
  override def handle(builder: CacheSnapshotBuilder, obj: ReadyData, registry: CacheTypeRegistry)(
      implicit log: Logger
  ): Unit = {
    val ReadyData(_, botUser, unavailableGuilds, _, _) = obj

    val guilds = unavailableGuilds.map(g => g.id -> g)

    builder.botUser = tag[BotUser](botUser)
    builder.unavailableGuildMap ++= guilds
  }
}

Source File: VoiceUDPHandler.scala From AckCord with MIT License

5 votes

package ackcord.voice

import java.net.InetSocketAddress

import scala.concurrent.duration._
import scala.util.{Failure, Success}

import ackcord.data.{RawSnowflake, UserId}
import akka.NotUsed
import akka.actor.typed._
import akka.actor.typed.scaladsl._
import akka.stream.OverflowStrategy
import akka.stream.scaladsl.{Keep, Sink, Source, SourceQueueWithComplete}
import akka.util.ByteString
import org.slf4j.Logger

object VoiceUDPHandler {

  def apply(
      address: String,
      port: Int,
      ssrc: Int,
      serverId: RawSnowflake,
      userId: UserId,
      soundProducer: Source[ByteString, NotUsed],
      soundConsumer: Sink[AudioAPIMessage, NotUsed],
      parent: ActorRef[VoiceHandler.Command]
  ): Behavior[Command] =
    Behaviors
      .supervise(
        Behaviors.setup[Command] { ctx =>
          implicit val system: ActorSystem[Nothing] = ctx.system

          val ((queue, futIp), watchDone) = soundProducer
            .viaMat(
              VoiceUDPFlow
                .flow(
                  new InetSocketAddress(address, port),
                  ssrc,
                  serverId,
                  userId,
                  Source.queue[Option[ByteString]](0, OverflowStrategy.dropBuffer)
                )
                .watchTermination()(Keep.both)
            )(Keep.right)
            .to(soundConsumer)
            .run()

          ctx.pipeToSelf(futIp) {
            case Success(value) => IPDiscoveryResult(value)
            case Failure(e)     => SendExeption(e)
          }
          ctx.pipeToSelf(watchDone)(_ => ConnectionDied)

          handle(ctx, ctx.log, ssrc, queue, parent)
        }
      )
      .onFailure(
        SupervisorStrategy
          .restartWithBackoff(100.millis, 5.seconds, 1D)
          .withResetBackoffAfter(10.seconds)
          .withMaxRestarts(5)
      )

  def handle(
      ctx: ActorContext[Command],
      log: Logger,
      ssrc: Int,
      queue: SourceQueueWithComplete[Option[ByteString]],
      parent: ActorRef[VoiceHandler.Command]
  ): Behavior[Command] = Behaviors.receiveMessage {
    case SendExeption(e) => throw e
    case ConnectionDied  => Behaviors.stopped
    case Shutdown =>
      queue.complete()
      Behaviors.same
    case IPDiscoveryResult(VoiceUDPFlow.FoundIP(localAddress, localPort)) =>
      parent ! VoiceHandler.GotLocalIP(localAddress, localPort)
      Behaviors.same
    case SetSecretKey(key) =>
      queue.offer(key)
      Behaviors.same
  }

  sealed trait Command

  case object Shutdown extends Command

  private case class SendExeption(e: Throwable)                       extends Command
  private case object ConnectionDied                                  extends Command
  private case class IPDiscoveryResult(foundIP: VoiceUDPFlow.FoundIP) extends Command
  private[voice] case class SetSecretKey(key: Option[ByteString])     extends Command
}

Source File: LoggingTrait.scala From azure-sqldb-spark with MIT License

5 votes

package com.microsoft.azure.sqldb.spark

import org.slf4j.{Logger, LoggerFactory}

private[spark] trait LoggingTrait {

  // Make the log field transient so that objects with Logging can
  // be serialized and used on another machine
  @transient private var log_ : Logger = null // scalastyle:ignore

  // Method to get the logger name for this object
  protected def logName = {
    // Ignore trailing $'s in the class names for Scala objects
    this.getClass.getName.stripSuffix("$")
  }

  // Method to get or create the logger for this object
  protected def log: Logger = {
    if (log_ == null) {
      // scalastyle:ignore
      log_ = LoggerFactory.getLogger(logName)
    }
    log_
  }

  // Log methods that take only a String
  protected def logInfo(msg: => String) {
    if (log.isInfoEnabled) log.info(msg)
  }

  protected def logDebug(msg: => String) {
    if (log.isDebugEnabled) log.debug(msg)
  }

  protected def logTrace(msg: => String) {
    if (log.isTraceEnabled) log.trace(msg)
  }

  protected def logWarning(msg: => String) {
    if (log.isWarnEnabled) log.warn(msg)
  }

  protected def logError(msg: => String) {
    if (log.isErrorEnabled) log.error(msg)
  }

  // Log methods that take Throwables (Exceptions/Errors) too
  protected def logInfo(msg: => String, throwable: Throwable) {
    if (log.isInfoEnabled) log.info(msg, throwable)
  }

  protected def logDebug(msg: => String, throwable: Throwable) {
    if (log.isDebugEnabled) log.debug(msg, throwable)
  }

  protected def logTrace(msg: => String, throwable: Throwable) {
    if (log.isTraceEnabled) log.trace(msg, throwable)
  }

  protected def logWarning(msg: => String, throwable: Throwable) {
    if (log.isWarnEnabled) log.warn(msg, throwable)
  }

  protected def logError(msg: => String, throwable: Throwable) {
    if (log.isErrorEnabled) log.error(msg, throwable)
  }
}

Source File: IrisMLP.scala From scala-deeplearn-examples with Apache License 2.0

5 votes

package io.brunk.examples.scalnet

import io.brunk.examples.IrisReader
import org.deeplearning4j.datasets.iterator.impl.ListDataSetIterator
import org.deeplearning4j.eval.Evaluation
import org.deeplearning4j.nn.conf.Updater
import org.deeplearning4j.nn.weights.WeightInit
import org.deeplearning4j.optimize.listeners.ScoreIterationListener
import org.deeplearning4j.scalnet.layers.core.Dense
import org.deeplearning4j.scalnet.models.Sequential
import org.deeplearning4j.scalnet.regularizers.L2
import org.nd4j.linalg.activations.Activation
import org.nd4j.linalg.api.ndarray.INDArray
import org.nd4j.linalg.learning.config.Sgd
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction
import org.slf4j.{Logger, LoggerFactory}


object IrisMLP {

  private val log: Logger = LoggerFactory.getLogger(IrisMLP.getClass)

  def main(args: Array[String]): Unit = {

    val seed         = 1
    val numInputs    = 4
    val numHidden    = 10
    val numOutputs   = 3
    val learningRate = 0.1
    val iterations   = 1000

    val testAndTrain  = IrisReader.readData()
    val trainList     = testAndTrain.getTrain.asList()
    val trainIterator = new ListDataSetIterator(trainList, trainList.size)

    val model = Sequential(rngSeed = seed)
    model.add(Dense(numHidden, nIn = numInputs, weightInit = WeightInit.XAVIER, activation = Activation.RELU))
    model.add(Dense(numOutputs, weightInit = WeightInit.XAVIER, activation = Activation.SOFTMAX))

    model.compile(lossFunction = LossFunction.NEGATIVELOGLIKELIHOOD, updater = Updater.SGD)

    log.info("Running training")
    model.fit(iter = trainIterator,
              nbEpoch = iterations,
              listeners = List(new ScoreIterationListener(100)))
    log.info("Training finished")

    log.info(s"Evaluating model on ${testAndTrain.getTest.getLabels.rows()} examples")
    val evaluator        = new Evaluation(numOutputs)
    val output: INDArray = model.predict(testAndTrain.getTest.getFeatureMatrix)
    evaluator.eval(testAndTrain.getTest.getLabels, output)
    log.info(evaluator.stats())

  }
}

Source File: MnistMLP.scala From scala-deeplearn-examples with Apache License 2.0

5 votes

package io.brunk.examples.scalnet

import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator
import org.deeplearning4j.eval.Evaluation
import org.deeplearning4j.nn.conf.Updater
import org.deeplearning4j.nn.weights.WeightInit
import org.deeplearning4j.optimize.listeners.ScoreIterationListener
import org.deeplearning4j.scalnet.layers.core.Dense
import org.deeplearning4j.scalnet.models.Sequential
import org.nd4j.linalg.activations.Activation
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator
import org.nd4j.linalg.learning.config.Sgd
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction
import org.slf4j.{Logger, LoggerFactory}

import scala.collection.JavaConverters.asScalaIteratorConverter



object MnistMLP {
  private val log: Logger = LoggerFactory.getLogger(MnistMLP.getClass)

  def main(args: Array[String]): Unit = {

    val seed         = 1       // for reproducibility
    val numInputs    = 28 * 28
    val numHidden    = 512     // size (number of neurons) in our hidden layer
    val numOutputs   = 10      // digits from 0 to 9
    val learningRate = 0.01
    val batchSize    = 128
    val numEpochs    = 10

    // download and load the MNIST images as tensors
    val mnistTrain: DataSetIterator = new MnistDataSetIterator(batchSize, true, seed)
    val mnistTest: DataSetIterator = new MnistDataSetIterator(batchSize, false, seed)

    // define the neural network architecture
    val model: Sequential = Sequential(rngSeed = seed)
    model.add(Dense(nOut = numHidden, nIn = numInputs, weightInit = WeightInit.XAVIER, activation = Activation.RELU))
    model.add(Dense(nOut = numOutputs, weightInit = WeightInit.XAVIER, activation = Activation.RELU))
    model.compile(lossFunction = LossFunction.MCXENT, updater = Updater.SGD) // TODO how do we set the learning rate?

    // train the model
    model.fit(mnistTrain, nbEpoch = numEpochs, List(new ScoreIterationListener(100)))

    // evaluate model performance
    def accuracy(dataSet: DataSetIterator): Double = {
      val evaluator = new Evaluation(numOutputs)
      dataSet.reset()
      for (dataSet <- dataSet.asScala) {
        val output = model.predict(dataSet)
        evaluator.eval(dataSet.getLabels, output)
      }
      evaluator.accuracy()
    }

    log.info(s"Train accuracy = ${accuracy(mnistTrain)}")
    log.info(s"Test accuracy = ${accuracy(mnistTest)}")
  }
}

Source File: IrisMLP.scala From scala-deeplearn-examples with Apache License 2.0

5 votes

package io.brunk.examples.dl4j

import io.brunk.examples.IrisReader
import org.deeplearning4j.eval.Evaluation
import org.deeplearning4j.nn.conf.NeuralNetConfiguration
import org.deeplearning4j.nn.conf.layers.{ DenseLayer, OutputLayer }
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork
import org.deeplearning4j.nn.weights.WeightInit
import org.deeplearning4j.optimize.listeners.ScoreIterationListener
import org.nd4j.linalg.activations.Activation
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction
import org.slf4j.{ Logger, LoggerFactory }


object IrisMLP {
  private val log: Logger = LoggerFactory.getLogger(IrisMLP.getClass)

  def main(args: Array[String]): Unit = {

    val seed         = 1  // for reproducibility
    val numInputs    = 4
    val numHidden    = 10
    val numOutputs   = 3
    val learningRate = 0.1
    val numEpoch   =   30

    val testAndTrain = IrisReader.readData()

    val conf = new NeuralNetConfiguration.Builder()
      .seed(seed)
      .activation(Activation.RELU)
      .weightInit(WeightInit.XAVIER)
      .list()
      .layer(0, new DenseLayer.Builder().nIn(numInputs).nOut(numHidden).build())
      .layer(1,
             new OutputLayer.Builder(LossFunction.NEGATIVELOGLIKELIHOOD)
               .activation(Activation.SOFTMAX)
               .nIn(numHidden)
               .nOut(numOutputs)
               .build())
      .backprop(true)
      .pretrain(false)
      .build()

    val model = new MultiLayerNetwork(conf)
    model.init()
    model.setListeners(new ScoreIterationListener(100)) // print out scores every 100 iterations

    log.info("Running training")
    for(_ <- 0 until numEpoch)
      model.fit(testAndTrain.getTrain)

    log.info("Training finished")

    log.info(s"Evaluating model on ${testAndTrain.getTest.getLabels.rows()} examples")
    val evaluator = new Evaluation(numOutputs)
    val output    = model.output(testAndTrain.getTest.getFeatureMatrix)
    evaluator.eval(testAndTrain.getTest.getLabels, output)
    println(evaluator.stats)
  }
}

Source File: ScorexLogging.scala From Waves with MIT License

5 votes

package com.wavesplatform.utils

import monix.eval.Task
import monix.execution.{CancelableFuture, Scheduler}
import monix.reactive.Observable
import org.slf4j.{Logger, LoggerFactory}

case class LoggerFacade(logger: Logger) {
  def trace(message: => String, throwable: Throwable): Unit = {
    if (logger.isTraceEnabled)
      logger.trace(message, throwable)
  }

  def trace(message: => String): Unit = {
    if (logger.isTraceEnabled)
      logger.trace(message)
  }

  def debug(message: => String, arg: Any): Unit = {
    if (logger.isDebugEnabled)
      logger.debug(message, arg)
  }

  def debug(message: => String): Unit = {
    if (logger.isDebugEnabled)
      logger.debug(message)
  }

  def info(message: => String): Unit = {
    if (logger.isInfoEnabled)
      logger.info(message)
  }

  def info(message: => String, arg: Any): Unit = {
    if (logger.isInfoEnabled)
      logger.info(message, arg)
  }

  def info(message: => String, throwable: Throwable): Unit = {
    if (logger.isInfoEnabled)
      logger.info(message, throwable)
  }

  def warn(message: => String): Unit = {
    if (logger.isWarnEnabled)
      logger.warn(message)
  }

  def warn(message: => String, throwable: Throwable): Unit = {
    if (logger.isWarnEnabled)
      logger.warn(message, throwable)
  }

  def error(message: => String): Unit = {
    if (logger.isErrorEnabled)
      logger.error(message)
  }

  def error(message: => String, throwable: Throwable): Unit = {
    if (logger.isErrorEnabled)
      logger.error(message, throwable)
  }
}

trait ScorexLogging {
  protected lazy val log = LoggerFacade(LoggerFactory.getLogger(this.getClass))

  implicit class TaskExt[A](t: Task[A]) {
    def runAsyncLogErr(implicit s: Scheduler): CancelableFuture[A] =
      logErr.runToFuture(s)

    def logErr: Task[A] = {
      t.onErrorHandleWith(ex => {
        log.error(s"Error executing task", ex)
        Task.raiseError[A](ex)
      })
    }
  }

  implicit class ObservableExt[A](o: Observable[A]) {

    def logErr: Observable[A] = {
      o.onErrorHandleWith(ex => {
        log.error(s"Error observing item", ex)
        Observable.raiseError[A](ex)
      })
    }
  }
}

Source File: LogPublisherHub.scala From vamp with Apache License 2.0

5 votes

package io.vamp.common.akka

import akka.actor.{ ActorRef, ActorSystem }
import ch.qos.logback.classic.filter.ThresholdFilter
import ch.qos.logback.classic.spi.ILoggingEvent
import ch.qos.logback.classic.{ Level, LoggerContext, Logger ⇒ LogbackLogger }
import ch.qos.logback.core.AppenderBase
import io.vamp.common.Namespace
import org.slf4j.{ Logger, LoggerFactory }

import scala.collection.mutable

object LogPublisherHub {

  private val logger = LoggerFactory.getLogger(LogPublisherHub.getClass)

  private val context = LoggerFactory.getILoggerFactory.asInstanceOf[LoggerContext]
  private val rootLogger = context.getLogger(Logger.ROOT_LOGGER_NAME)

  private val sessions: mutable.Map[String, LogPublisher] = new mutable.HashMap()

  def subscribe(to: ActorRef, level: String, loggerName: Option[String], encoder: (ILoggingEvent) ⇒ AnyRef)(implicit actorSystem: ActorSystem, namespace: Namespace): Unit = {
    val appenderLevel = Level.toLevel(level, Level.INFO)
    val appenderLogger = loggerName.map(context.getLogger).getOrElse(rootLogger)

    val exists = sessions.get(to.toString).exists { publisher ⇒
      publisher.level == appenderLevel && publisher.logger.getName == appenderLogger.getName
    }

    if (!exists) {
      unsubscribe(to)
      if (appenderLevel != Level.OFF) {
        logger.info(s"Starting log publisher [${appenderLevel.levelStr}] '${appenderLogger.getName}': $to")
        val publisher = LogPublisher(to, appenderLogger, appenderLevel, encoder)
        publisher.start()
        sessions.put(to.toString, publisher)
      }
    }
  }

  def unsubscribe(to: ActorRef): Unit = {
    sessions.remove(to.toString).foreach { publisher ⇒
      logger.info(s"Stopping log publisher: $to")
      publisher.stop()
    }
  }
}

private case class LogPublisher(to: ActorRef, logger: LogbackLogger, level: Level, encoder: (ILoggingEvent) ⇒ AnyRef)(implicit actorSystem: ActorSystem, namespace: Namespace) {

  private val filter = new ThresholdFilter()
  filter.setLevel(level.levelStr)

  private val appender = new AppenderBase[ILoggingEvent] {
    override def append(loggingEvent: ILoggingEvent) = to ! encoder(loggingEvent)
  }

  appender.addFilter(filter)
  appender.setName(to.toString)

  def start() = {
    val context = logger.getLoggerContext
    filter.setContext(context)
    appender.setContext(context)
    filter.start()
    appender.start()
    logger.addAppender(appender)
  }

  def stop() = {
    appender.stop()
    filter.stop()
    logger.detachAppender(appender)
  }
}

Source File: DataUtilities.scala From dl4scala with MIT License

5 votes

package org.dl4scala.examples.utilities

import java.io._

import org.apache.commons.compress.archivers.tar.TarArchiveInputStream
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream
import org.slf4j.{Logger, LoggerFactory}


object DataUtilities {
  val logger: Logger = LoggerFactory.getLogger(DataUtilities.getClass)
  private val BUFFER_SIZE = 4096

  @throws(classOf[IOException])
  def extractTarGz(filePath: String, outputPath: String): Unit = {
    var fileCount = 0
    var dirCount = 0

    logger.info("Extracting files")

    val tais = new TarArchiveInputStream(new GzipCompressorInputStream(
      new BufferedInputStream(new FileInputStream(filePath))))
    // Read the tar entries using the getNextEntry method
    Stream.continually(tais.getNextTarEntry).takeWhile(_ !=null).foreach{ entry =>
      // Create directories as required
      if (entry.isDirectory) {
        new File(outputPath + "/" + entry.getName).mkdirs
        dirCount += 1
      } else {
        val data = new Array[Byte](BUFFER_SIZE)
        val fos = new FileOutputStream(outputPath + "/" + entry.getName)
        val dest = new BufferedOutputStream(fos, BUFFER_SIZE)
        Stream.continually(tais.read(data, 0, BUFFER_SIZE)).takeWhile(_ != -1).foreach{ count =>
          dest.write(data, 0, count)
        }
        dest.close()
        fileCount = fileCount + 1
      }
      if (fileCount % 1000 == 0) logger.info(".")
    }

    tais.close()
  }
}

Source File: MLPMnistTwoLayerExample.scala From dl4scala with MIT License

5 votes

package org.dl4scala.examples.feedforward.mnist

import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator
import org.deeplearning4j.eval.Evaluation
import org.deeplearning4j.nn.api.OptimizationAlgorithm
import org.deeplearning4j.nn.conf.layers.{DenseLayer, OutputLayer}
import org.deeplearning4j.nn.conf.{NeuralNetConfiguration, Updater}
import org.deeplearning4j.nn.weights.WeightInit
import org.nd4j.linalg.activations.Activation
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction
import org.slf4j.{Logger, LoggerFactory}
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork
import org.deeplearning4j.optimize.listeners.ScoreIterationListener
import org.nd4j.linalg.learning.config.Nesterovs


object MLPMnistTwoLayerExample extends App{
  private val log: Logger = LoggerFactory.getLogger(MLPMnistTwoLayerExample.getClass)

  // number of rows and columns in the input pictures//number of rows and columns in the input pictures
  private  val numRows = 28
  private  val numColumns = 28
  private  val outputNum = 10 // number of output classes
  private  val batchSize = 128 // batch size for each epoch
  private  val rngSeed = 123 // random number seed for reproducibility
  private  val numEpochs = 15 // number of epochs to perform
  private  val rate: Double = 0.0015 // learning rate

  // Get the DataSetIterators://Get the DataSetIterators:
  private val mnistTrain = new MnistDataSetIterator(batchSize, true, rngSeed)
  private val mnistTest = new MnistDataSetIterator(batchSize, false, rngSeed)

  log.info("Build model....")

  private val conf = new NeuralNetConfiguration
    .Builder()
    .seed(rngSeed) // include a random seed for reproducibility
    .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) // use stochastic gradient descent as an optimization algorithm
    .iterations(1)
    .activation(Activation.RELU)
    .weightInit(WeightInit.XAVIER)
    .learningRate(rate) // specify the learning rate
    .updater(new Nesterovs(0.98))
    .regularization(true).l2(rate * 0.005) // regularize learning model
    .list()
    .layer(0, new DenseLayer.Builder() // create the first input layer.
      .nIn(numRows * numColumns)
      .nOut(500)
      .build())
    .layer(1, new DenseLayer.Builder() // create the second input layer
      .nIn(500)
      .nOut(100)
      .build())
    .layer(2, new OutputLayer.Builder(LossFunction.NEGATIVELOGLIKELIHOOD) // create hidden layer
      .activation(Activation.SOFTMAX)
      .nIn(100)
      .nOut(outputNum)
      .build())
    .pretrain(false).backprop(true)
    .build()

  val model = new MultiLayerNetwork(conf)
  model.init()
  model.setListeners(new ScoreIterationListener(5)) // print the score with every iteration

  log.info("Train model....")

  for(i <- 0 until numEpochs){
    model.fit(mnistTrain)
  }

  log.info("Evaluate model....")
  val eval = new Evaluation(outputNum) // create an evaluation object with 10 possible classes

  while(mnistTest.hasNext){
    val next = mnistTest.next
    val output = model.output(next.getFeatureMatrix) // get the networks prediction
    eval.eval(next.getLabels, output) // check the prediction against the true class
  }

  log.info(eval.stats)
  log.info("****************Example finished********************")
}

Source File: ExecutorSystemLauncher.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.cluster.appmaster

import scala.concurrent.duration._

import akka.actor._
import org.slf4j.Logger

import org.apache.gearpump.cluster.AppMasterToWorker.LaunchExecutor
import org.apache.gearpump.cluster.ExecutorJVMConfig
import org.apache.gearpump.cluster.WorkerToAppMaster._
import org.apache.gearpump.cluster.appmaster.ExecutorSystemLauncher._
import org.apache.gearpump.cluster.appmaster.ExecutorSystemScheduler.{ExecutorSystemJvmConfig, Session}
import org.apache.gearpump.cluster.scheduler.Resource
import org.apache.gearpump.util.ActorSystemBooter.{ActorSystemRegistered, RegisterActorSystem}
import org.apache.gearpump.util.{ActorSystemBooter, ActorUtil, Constants, LogUtil}


private[appmaster]
class ExecutorSystemLauncher(appId: Int, session: Session) extends Actor {

  private val LOG: Logger = LogUtil.getLogger(getClass)

  val scheduler = context.system.scheduler
  implicit val executionContext = context.dispatcher

  private val systemConfig = context.system.settings.config
  val timeoutSetting = systemConfig.getInt(Constants.GEARPUMP_START_EXECUTOR_SYSTEM_TIMEOUT_MS)

  val timeout = scheduler.scheduleOnce(timeoutSetting.milliseconds,
    self, LaunchExecutorSystemTimeout(session))

  def receive: Receive = waitForLaunchCommand

  def waitForLaunchCommand: Receive = {
    case LaunchExecutorSystem(worker, executorSystemId, resource) =>
      val launcherPath = ActorUtil.getFullPath(context.system, self.path)
      val jvmConfig = Option(session.executorSystemJvmConfig)
        .map(getExecutorJvmConfig(_, s"app${appId}system${executorSystemId}", launcherPath)).orNull

      val launch = LaunchExecutor(appId, executorSystemId, resource, jvmConfig)
      LOG.info(s"Launching Executor ...appId: $appId, executorSystemId: $executorSystemId, " +
        s"slots: ${resource.slots} on worker $worker")

      worker.ref ! launch
      context.become(waitForActorSystemToStart(sender, launch, worker, executorSystemId))
  }

  def waitForActorSystemToStart(
      replyTo: ActorRef, launch: LaunchExecutor, worker: WorkerInfo, executorSystemId: Int)
    : Receive = {
    case RegisterActorSystem(systemPath) =>
      import launch._
      timeout.cancel()
      LOG.info(s"Received RegisterActorSystem $systemPath for session ${session.requestor}")
      sender ! ActorSystemRegistered(worker.ref)
      val system =
        ExecutorSystem(executorId, AddressFromURIString(systemPath), sender, resource, worker)
      replyTo ! LaunchExecutorSystemSuccess(system, session)
      context.stop(self)
    case reject@ExecutorLaunchRejected(reason, ex) =>
      LOG.error(s"Executor Launch ${launch.resource} failed reason: $reason", ex)
      replyTo ! LaunchExecutorSystemRejected(launch.resource, reason, session)
      context.stop(self)
    case timeout: LaunchExecutorSystemTimeout =>
      LOG.error(s"The Executor ActorSystem $executorSystemId has not been started in time")
      replyTo ! timeout
      context.stop(self)
  }
}

private[appmaster]
object ExecutorSystemLauncher {

  case class LaunchExecutorSystem(worker: WorkerInfo, systemId: Int, resource: Resource)

  case class LaunchExecutorSystemSuccess(system: ExecutorSystem, session: Session)

  case class LaunchExecutorSystemRejected(resource: Resource, reason: Any, session: Session)

  case class LaunchExecutorSystemTimeout(session: Session)

  private def getExecutorJvmConfig(conf: ExecutorSystemJvmConfig, systemName: String,
      reportBack: String): ExecutorJVMConfig = {
    Option(conf).map { conf =>
      import conf._
      ExecutorJVMConfig(classPath, jvmArguments, classOf[ActorSystemBooter].getName,
        Array(systemName, reportBack), jar, username, executorAkkaConfig)
    }.getOrElse(null)
  }
}

Source File: RunningApplication.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.cluster.client

import akka.actor.ActorRef
import akka.pattern.ask
import akka.util.Timeout
import org.apache.gearpump.cluster.ClientToMaster.{RegisterAppResultListener, ResolveAppId, ShutdownApplication}
import org.apache.gearpump.cluster.MasterToClient._
import org.apache.gearpump.cluster.client.RunningApplication._
import org.apache.gearpump.util.{ActorUtil, LogUtil}
import org.slf4j.Logger
import java.time.Duration
import java.util.concurrent.TimeUnit

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future
import scala.concurrent.duration._
import scala.util.{Failure, Success}

class RunningApplication(val appId: Int, master: ActorRef, timeout: Timeout) {
  lazy val appMaster: Future[ActorRef] = resolveAppMaster(appId)

  def shutDown(): Unit = {
    val result = ActorUtil.askActor[ShutdownApplicationResult](master,
      ShutdownApplication(appId), timeout)
    result.appId match {
      case Success(_) =>
      case Failure(ex) => throw ex
    }
  }

  
  def waitUntilFinish(): Unit = {
    this.waitUntilFinish(INF_DURATION)
  }

  def waitUntilFinish(duration: Duration): Unit = {
    val result = ActorUtil.askActor[ApplicationResult](master,
      RegisterAppResultListener(appId), new Timeout(duration.getSeconds, TimeUnit.SECONDS))
    if (result.appId == appId) {
      result match {
        case failed: ApplicationFailed =>
          throw failed.error
        case _: ApplicationSucceeded =>
          LOG.info(s"Application $appId succeeded")
        case _: ApplicationTerminated =>
          LOG.info(s"Application $appId terminated")
      }
    } else {
      LOG.warn(s"Received unexpected result $result for application $appId")
    }
  }

  def askAppMaster[T](msg: Any): Future[T] = {
    appMaster.flatMap(_.ask(msg)(timeout).asInstanceOf[Future[T]])
  }

  private def resolveAppMaster(appId: Int): Future[ActorRef] = {
    master.ask(ResolveAppId(appId))(timeout).
      asInstanceOf[Future[ResolveAppIdResult]].map(_.appMaster.get)
  }
}

object RunningApplication {
  private val LOG: Logger = LogUtil.getLogger(getClass)
  // This magic number is derived from Akka's configuration, which is the maximum delay
  private val INF_DURATION = Duration.ofSeconds(2147482)
}

Source File: Scheduler.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.cluster.scheduler

import akka.actor.{Actor, ActorRef}
import org.apache.gearpump.Time.MilliSeconds
import org.apache.gearpump.cluster.MasterToWorker.{UpdateResourceFailed, UpdateResourceSucceed, WorkerRegistered}
import org.apache.gearpump.cluster.WorkerToMaster.ResourceUpdate
import org.apache.gearpump.cluster.master.Master.WorkerTerminated
import org.apache.gearpump.cluster.scheduler.Scheduler.ApplicationFinished
import org.apache.gearpump.cluster.worker.WorkerId
import org.apache.gearpump.util.LogUtil
import org.slf4j.Logger

import scala.collection.mutable


abstract class Scheduler extends Actor {
  val LOG: Logger = LogUtil.getLogger(getClass)
  protected var resources = new mutable.HashMap[WorkerId, (ActorRef, Resource)]

  def handleScheduleMessage: Receive = {
    case WorkerRegistered(id, _) =>
      if (!resources.contains(id)) {
        LOG.info(s"Worker $id added to the scheduler")
        resources.put(id, (sender, Resource.empty))
      }
    case update@ResourceUpdate(worker, workerId, resource) =>
      LOG.info(s"$update...")
      if (resources.contains(workerId)) {
        val resourceReturned = resource > resources.get(workerId).get._2
        resources.update(workerId, (worker, resource))
        if (resourceReturned) {
          allocateResource()
        }
        sender ! UpdateResourceSucceed
      }
      else {
        sender ! UpdateResourceFailed(
          s"ResourceUpdate failed! The worker $workerId has not been registered into master")
      }
    case WorkerTerminated(workerId) =>
      if (resources.contains(workerId)) {
        resources -= workerId
      }
    case ApplicationFinished(appId) =>
      doneApplication(appId)
  }

  def allocateResource(): Unit

  def doneApplication(appId: Int): Unit
}

object Scheduler {
  case class PendingRequest(
      appId: Int, appMaster: ActorRef, request: ResourceRequest, timeStamp: MilliSeconds)

  case class ApplicationFinished(appId: Int)
}

Source File: LocalJarStore.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.jarstore.local

import java.io._

import com.typesafe.config.Config
import org.apache.gearpump.jarstore.JarStore
import org.apache.gearpump.util.{Constants, FileUtils, LogUtil, Util}
import org.slf4j.Logger


  override def getFile(fileName: String): InputStream = {
    val localFile = new File(rootPath, fileName)
    val is = try {
      new FileInputStream(localFile)
    } catch {
      case ex: Exception =>
        LOG.error(s"Fetch file $fileName failed: ${ex.getStackTrace}")
        new ClosedInputStream
    }
    is
  }

  private def createDirIfNotExists(file: File): Unit = {
    if (!file.exists()) {
      FileUtils.forceMkdir(file)
    }
  }
}

Source File: JarStoreClient.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.jarstore

import java.io.File
import java.util.concurrent.TimeUnit
import scala.collection.JavaConverters._
import scala.concurrent.duration.Duration
import scala.concurrent.Await

import akka.pattern.ask
import akka.actor.{ActorSystem, ActorRef}
import com.typesafe.config.Config
import org.apache.gearpump.cluster.master.MasterProxy
import org.apache.gearpump.util.{Util, Constants, LogUtil}
import org.slf4j.Logger

import org.apache.gearpump.cluster.ClientToMaster.{GetJarStoreServer, JarStoreServerAddress}
import scala.concurrent.{Future, ExecutionContext}

class JarStoreClient(config: Config, system: ActorSystem) {
  private def LOG: Logger = LogUtil.getLogger(getClass)
  private implicit val timeout = Constants.FUTURE_TIMEOUT
  private implicit def dispatcher: ExecutionContext = system.dispatcher

  private val master: ActorRef = {
    val masters = config.getStringList(Constants.GEARPUMP_CLUSTER_MASTERS)
      .asScala.flatMap(Util.parseHostList)
    system.actorOf(MasterProxy.props(masters), s"masterproxy${Util.randInt()}")
  }

  private lazy val client = (master ? GetJarStoreServer).asInstanceOf[Future[JarStoreServerAddress]]
    .map { address =>
      val client = new FileServer.Client(system, address.url)
      client
    }

  
  def copyFromLocal(localFile: File): FilePath = {
    val future = client.flatMap(_.upload(localFile))
    Await.result(future, Duration(60, TimeUnit.SECONDS))
  }
}

Source File: TimeOutSchedulerSpec.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.util

import scala.concurrent.duration._

import akka.actor._
import akka.testkit.{ImplicitSender, TestActorRef, TestKit, TestProbe}
import org.scalatest.{BeforeAndAfterAll, Matchers, WordSpecLike}
import org.slf4j.Logger

import org.apache.gearpump.cluster.TestUtil

class TimeOutSchedulerSpec(_system: ActorSystem) extends TestKit(_system) with ImplicitSender
  with WordSpecLike with Matchers with BeforeAndAfterAll {

  def this() = this(ActorSystem("WorkerSpec", TestUtil.DEFAULT_CONFIG))
  val mockActor = TestProbe()

  override def afterAll {
    TestKit.shutdownActorSystem(system)
  }

  "The TimeOutScheduler" should {
    "handle the time out event" in {
      val testActorRef = TestActorRef(Props(classOf[TestActor], mockActor.ref))
      val testActor = testActorRef.underlyingActor.asInstanceOf[TestActor]
      testActor.sendMsgToIgnore()
      mockActor.expectMsg(30.seconds, MessageTimeOut)
    }
  }
}

case object Echo
case object MessageTimeOut

class TestActor(mock: ActorRef) extends Actor with TimeOutScheduler {
  private val LOG: Logger = LogUtil.getLogger(getClass)

  val target = context.actorOf(Props(classOf[EchoActor]))

  override def receive: Receive = {
    case _ =>
  }

  def sendMsgToIgnore(): Unit = {
    sendMsgWithTimeOutCallBack(target, Echo, 2000, sendMsgTimeOut())
  }

  private def sendMsgTimeOut(): Unit = {
    mock ! MessageTimeOut
  }
}

class EchoActor extends Actor {
  override def receive: Receive = {
    case _ =>
  }
}

Source File: ScribeLoggerFactory.scala From scribe with MIT License

5 votes

package scribe.slf4j

import java.util.concurrent.ConcurrentHashMap

import org.slf4j.{Logger, ILoggerFactory}

class ScribeLoggerFactory extends ILoggerFactory {
  private val map = new ConcurrentHashMap[String, Logger]

  override def getLogger(name: String): Logger = {
    val loggerName = if (name.equalsIgnoreCase(Logger.ROOT_LOGGER_NAME)) {
      ""
    } else {
      name
    }
    Option(map.get(loggerName)) match {
      case Some(logger) => logger
      case None => {
        val adapter = new ScribeLoggerAdapter(loggerName)
        val old = map.putIfAbsent(loggerName, adapter)
        Option(old) match {
          case Some(a) => a
          case None => adapter
        }
      }
    }
  }
}

Source File: ScribeLoggerFactory.scala From scribe with MIT License

5 votes

package scribe.slf4j

import java.util.concurrent.ConcurrentHashMap

import org.slf4j.{ILoggerFactory, Logger}

object ScribeLoggerFactory extends ILoggerFactory {
  private lazy val map = new ConcurrentHashMap[String, Logger]()

  override def getLogger(name: String): Logger = Option(map.get(name)) match {
    case Some(logger) => logger
    case None => {
      val logger = new ScribeLoggerAdapter(name)
      val oldInstance = map.putIfAbsent(name, logger)
      Option(oldInstance).getOrElse(logger)
    }
  }
}

Source File: SequoiadbRDDIterator.scala From spark-sequoiadb with Apache License 2.0

5 votes

package com.sequoiadb.spark.rdd


import _root_.com.sequoiadb.spark.SequoiadbConfig
import _root_.com.sequoiadb.spark.io.SequoiadbReader
import org.apache.spark._
import org.apache.spark.sql.sources.Filter
import org.bson.BSONObject
import org.slf4j.{Logger, LoggerFactory}
//import java.io.FileOutputStream;  


class SequoiadbRDDIterator(
  taskContext: TaskContext,
  partition: Partition,
  config: SequoiadbConfig,
  requiredColumns: Array[String],
  filters: Array[Filter],
  queryReturnType: Int = SequoiadbConfig.QUERYRETURNBSON,
  queryLimit: Long = -1)
  extends Iterator[BSONObject] {

  
  
  private var LOG: Logger = LoggerFactory.getLogger(this.getClass.getName())
  protected var finished = false
  private var closed = false
  private var initialized = false

  lazy val reader = {
    initialized = true
    initReader()
  }

  // Register an on-task-completion callback to close the input stream.
  taskContext.addTaskCompletionListener((context: TaskContext) => closeIfNeeded())

  override def hasNext: Boolean = {
    !finished && reader.hasNext
  }

  override def next(): BSONObject = {
    if (!hasNext) {
      throw new NoSuchElementException("End of stream")
    }
    reader.next()
  }

  def closeIfNeeded(): Unit = {
    if (!closed) {
      close()
      closed = true
    }
  }

  protected def close(): Unit = {
    if (initialized) {
      reader.close()
    }
  }

  def initReader() = {
    val reader = new SequoiadbReader(config,requiredColumns,filters, queryReturnType, queryLimit)
    reader.init(partition)
    reader
  }
}

Source File: SequoiadbRDD.scala From spark-sequoiadb with Apache License 2.0

5 votes

package com.sequoiadb.spark.rdd

import org.apache.spark.SparkContext
import _root_.com.sequoiadb.spark.SequoiadbConfig
import com.sequoiadb.spark.partitioner._
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.sources.Filter
import org.apache.spark.{Partition, TaskContext}
import org.bson.BSONObject
import org.slf4j.{Logger, LoggerFactory}
import scala.collection.mutable.ArrayBuffer
//import java.io.FileOutputStream;  


  def apply (
    sc: SQLContext,
    config: SequoiadbConfig,
    partitioner: Option[SequoiadbPartitioner] = None,
    requiredColumns: Array[String] = Array(),
    filters: Array[Filter] = Array(),
    queryReturnType: Int = SequoiadbConfig.QUERYRETURNBSON,
    queryLimit: Long = -1) = {
    new SequoiadbRDD ( sc.sparkContext, config, partitioner,
      requiredColumns, filters, queryReturnType, queryLimit)
  }
}

Source File: SequoiadbWriter.scala From spark-sequoiadb with Apache License 2.0

5 votes

package com.sequoiadb.spark.io


  def save(it: Iterator[Row], schema: StructType): Unit = {
    try {
      ds = Option(new SequoiadbDatasource (
          config[List[String]](SequoiadbConfig.Host),
          config[String](SequoiadbConfig.Username),
          config[String](SequoiadbConfig.Password),
          ConnectionUtil.initConfigOptions,
          ConnectionUtil.initSequoiadbOptions ))
      // pickup a connection
      connection = Option(ds.get.getConnection)
      
      // locate collection
      val cl = connection.get.getCollectionSpace(
          config[String](SequoiadbConfig.CollectionSpace)).getCollection(
              config[String](SequoiadbConfig.Collection))
      LOG.info ("bulksize = " + config[String](SequoiadbConfig.BulkSize))
      // loop through it and perform batch insert
      // batch size is defined in SequoiadbConfig.BulkSize
      val list : ArrayList[BSONObject] = new ArrayList[BSONObject]()
      while ( it.hasNext ) {
        val record = it.next
        val bsonrecord = SequoiadbRowConverter.rowAsDBObject ( record, schema )
        list.add(bsonrecord)
        if ( list.size >= config[String](SequoiadbConfig.BulkSize).toInt ) {
          cl.bulkInsert ( list, 0 )
          list.clear
        }
      }
      // insert rest of the record if there's any
      if ( list.size > 0 ) {
        cl.bulkInsert ( list, 0 )
        list.clear
      }
    } catch {
      case ex: Exception => throw SequoiadbException(ex.getMessage, ex)
    } finally {
      ds.fold(ifEmpty=()) { connectionpool =>
        connection.fold(ifEmpty=()) { conn =>
          connectionpool.close(conn)
        }
        connectionpool.close
      } // ds.fold(ifEmpty=())
    } // finally
  } // def save(it: Iterator[BSONObject]): Unit =
}

Source File: AbstractLoggingServiceRegistryClient.scala From lagom with Apache License 2.0

5 votes

package com.lightbend.lagom.devmode.internal.registry

import java.net.URI

import org.slf4j.Logger
import org.slf4j.LoggerFactory

import scala.collection.immutable
import scala.concurrent.ExecutionContext
import scala.concurrent.Future
import scala.util.Failure
import scala.util.Success

private[lagom] abstract class AbstractLoggingServiceRegistryClient(implicit ec: ExecutionContext)
    extends ServiceRegistryClient {
  protected val log: Logger = LoggerFactory.getLogger(getClass)

  override def locateAll(serviceName: String, portName: Option[String]): Future[immutable.Seq[URI]] = {
    require(
      serviceName != ServiceRegistryClient.ServiceName,
      "The service registry client cannot locate the service registry service itself"
    )
    log.debug("Locating service name=[{}] ...", serviceName)

    val location: Future[immutable.Seq[URI]] = internalLocateAll(serviceName, portName)

    location.onComplete {
      case Success(Nil) =>
        log.warn("serviceName=[{}] was not found. Hint: Maybe it was not started?", serviceName)
      case Success(uris) =>
        log.debug("serviceName=[{}] can be reached at uris=[{}]", serviceName: Any, uris: Any)
      case Failure(e) =>
        log.warn("Service registry replied with an error when looking up serviceName=[{}]", serviceName: Any, e: Any)
    }

    location
  }

  protected def internalLocateAll(serviceName: String, portName: Option[String]): Future[immutable.Seq[URI]]
}

Source File: LivySubmit.scala From spark-bench with Apache License 2.0

5 votes

package com.ibm.sparktc.sparkbench.sparklaunch.submission.livy

import com.ibm.sparktc.sparkbench.sparklaunch.confparse.SparkJobConf
import com.ibm.sparktc.sparkbench.sparklaunch.submission.livy.LivySubmit._
import com.ibm.sparktc.sparkbench.sparklaunch.submission.Submitter
import com.ibm.sparktc.sparkbench.utils.SparkBenchException
import com.softwaremill.sttp.{Id, SttpBackend}
import org.slf4j.{Logger, LoggerFactory}

import scala.annotation.tailrec
import scala.sys.ShutdownHookThread

object LivySubmit {
  val log: Logger = LoggerFactory.getLogger(this.getClass)
  val successCode = 200

  import com.softwaremill.sttp._

  val emptyBodyException: SparkBenchException = SparkBenchException("REST call returned empty message body")
  val nonSuccessCodeException: Int => SparkBenchException = (code: Int) => SparkBenchException(s"REST call returned non-sucess code: $code")

  def apply(): LivySubmit = {
    new LivySubmit()(HttpURLConnectionBackend())
  }

  def cancelAllBatches(livyWithID: LivyRequestWithID)(implicit backend: SttpBackend[Id, Nothing]): Response[ResponseBodyDelete] = {
    log.info(s"Cancelling batch request id: ${livyWithID.id}")
    val response = livyWithID.deleteRequest.send()
    (response.is200, response.body) match {
      case (true, Right(bod)) => if (bod.msg == "deleted") response else throw SparkBenchException(s"Unexpected status for delete request: ${bod.msg}")
      case (true, Left(b))    => throw emptyBodyException
      case (_, _)             => throw nonSuccessCodeException(response.code)
    }
  }

  def sendPostBatchRequest(conf: SparkJobConf)
                          (implicit backend: SttpBackend[Id, Nothing]):
                            (LivyRequestWithID, Response[ResponseBodyBatch]) = {
    val livyRequest = LivyRequest(conf)
    log.info(s"Sending Livy POST request:\n${livyRequest.postRequest.toString}")
    val response: Id[Response[ResponseBodyBatch]] = livyRequest.postRequest.send()
    (response.isSuccess, response.body) match {
      case (true, Left(_)) => throw emptyBodyException
      case (false, Left(_)) => throw nonSuccessCodeException(response.code)
      case (false, Right(bod)) => throw SparkBenchException(s"POST Request to ${livyRequest.postBatchUrl} failed:\n" +
        s"${bod.log.mkString("\n")}")
      case (_,_) => // no exception thrown
    }
    val livyWithID = LivyRequestWithID(livyRequest, response.body.right.get.id)
    (livyWithID, response)
  }

  private def pollHelper(request: LivyRequestWithID)(implicit backend: SttpBackend[Id, Nothing]): Response[ResponseBodyState] = {
    Thread.sleep(request.pollSeconds * 1000)
    log.info(s"Sending Livy status GET request:\n${request.statusRequest.toString}")
    val response: Id[Response[ResponseBodyState]] = request.statusRequest.send()
    response
  }

  @tailrec
  def poll(request: LivyRequestWithID, response: Response[ResponseBodyState])
          (implicit backend: SttpBackend[Id, Nothing]): Response[ResponseBodyState] = (response.isSuccess, response.body) match {
    case (false, _) => throw SparkBenchException(s"Request failed with code ${response.code}")
    case (_, Left(_)) => throw emptyBodyException
    case (true, Right(bod)) => bod.state match {
      case "success" => response
      case "dead" => throw SparkBenchException(s"Poll request failed with state: dead\n" + getLogs(request))
      case "running" => poll(request, pollHelper(request))
      case st => throw SparkBenchException(s"Poll request failed with state: $st")
    }
  }

  def getLogs(request: LivyRequestWithID)(implicit backend: SttpBackend[Id, Nothing]): String = {
    val response = request.logRequest.send()
    (response.is200, response.body) match {
      case (true, Right(bod)) => bod.log.mkString("\n")
      case (false, Right(_)) => throw SparkBenchException(s"Log request failed with code: ${response.code}")
      case (_, Left(_)) => throw emptyBodyException
    }
  }
}

class LivySubmit()(implicit val backend: SttpBackend[Id, Nothing]) extends Submitter {
  override def launch(conf: SparkJobConf): Unit = {
    val (livyWithID, postResponse) = sendPostBatchRequest(conf)(backend)
    val shutdownHook: ShutdownHookThread = sys.ShutdownHookThread {
      // interrupt any batches
      cancelAllBatches(livyWithID)(backend)
    }
    val pollResponse = poll(livyWithID, pollHelper(livyWithID))(backend)
    // The request has completed, so we're going to remove the shutdown hook.
    shutdownHook.remove()
  }
}

Source File: CLIKickoff.scala From spark-bench with Apache License 2.0

5 votes

package com.ibm.sparktc.sparkbench.cli

import org.slf4j.{Logger, LoggerFactory}
import com.ibm.sparktc.sparkbench.workload.MultipleSuiteKickoff

object CLIKickoff extends App {
  override def main(args: Array[String]): Unit = {
    val log: Logger = LoggerFactory.getLogger(this.getClass)
    log.info(s"args received: ${args.mkString(", ")}")
    if(args.isEmpty) throw new IllegalArgumentException("CLIKickoff received no arguments")
    val oneStr = args.mkString(" ")
    val worksuites = Configurator(oneStr)
    MultipleSuiteKickoff.run(worksuites)
  }
}

Source File: RestartableStreamingApp.scala From spark-summit-2018 with GNU General Public License v3.0

5 votes

package com.twilio.open.streaming.trend.discovery

import com.twilio.open.streaming.trend.discovery.config.Configuration
import com.twilio.open.streaming.trend.discovery.listeners.SparkStreamingQueryListener
import org.apache.spark.sql.SparkSession
import org.slf4j.Logger

trait StreamingApp[+Configuration] {
  val config: Configuration
  val logger: Logger
  def run(): Unit
}

trait Restartable {
  def restart(): Unit
}

trait RestartableStreamingApp[T <: Configuration] extends StreamingApp[T] with Restartable {
  val spark: SparkSession

  val streamingQueryListener: SparkStreamingQueryListener = {
    new SparkStreamingQueryListener(spark, restart)
  }

  def monitoredRun(): Unit = {
    run()
    monitorStreams()
  }

  
  def restart(): Unit = {
    logger.info(s"restarting the application. cleaning up old stream listener and streams")

    val streams = spark.streams
    streams.removeListener(streamingQueryListener)
    streams.active.foreach { stream =>
      logger.info(s"stream_name=${stream.name} state=active status=${stream.status} action=stop_stream")
      stream.stop()
    }
    logger.info(s"attempting to restart the application")
    monitoredRun()
  }
}

Source File: SparkStreamingQueryListener.scala From spark-summit-2018 with GNU General Public License v3.0

5 votes

package com.twilio.open.streaming.trend.discovery.listeners

import kamon.Kamon
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.streaming.StreamingQueryListener
import org.apache.spark.sql.streaming.StreamingQueryListener.{QueryProgressEvent, QueryStartedEvent, QueryTerminatedEvent}
import org.slf4j.{Logger, LoggerFactory}

object SparkStreamingQueryListener {
  val log: Logger = LoggerFactory.getLogger(classOf[SparkStreamingQueryListener])

  def apply(spark: SparkSession, restart: () => Unit): SparkStreamingQueryListener = {
    new SparkStreamingQueryListener(spark, restart)
  }

}

class SparkStreamingQueryListener(sparkSession: SparkSession, restart: () => Unit) extends StreamingQueryListener {
  import SparkStreamingQueryListener._
  private val streams = sparkSession.streams
  private val defaultTag = Map("app_name" -> sparkSession.sparkContext.appName)


  override def onQueryStarted(event: QueryStartedEvent): Unit = {
    if (log.isDebugEnabled) log.debug(s"onQueryStarted queryName=${event.name} id=${event.id} runId=${event.runId}")
  }

  //https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
  override def onQueryProgress(progressEvent: QueryProgressEvent): Unit = {
    val progress = progressEvent.progress
    val inputRowsPerSecond = progress.inputRowsPerSecond
    val processedRowsPerSecond = progress.processedRowsPerSecond

    val sources = progress.sources.map { source =>
      val description = source.description
      val startOffset = source.startOffset
      val endOffset = source.endOffset
      val inputRows = source.numInputRows

      s"topic=$description startOffset=$startOffset endOffset=$endOffset numRows=$inputRows"
    }
    Kamon.metrics.histogram("spark.query.progress.processed.rows.rate").record(processedRowsPerSecond.toLong)
    Kamon.metrics.histogram("spark.query.progress.input.rows.rate", defaultTag).record(inputRowsPerSecond.toLong)
    log.info(s"query.progress query=${progress.name} kafka=${sources.mkString(",")} inputRows/s=$inputRowsPerSecond processedRows/s=$processedRowsPerSecond durationMs=${progress.durationMs} sink=${progress.sink.json}")
  }

  override def onQueryTerminated(event: QueryTerminatedEvent): Unit = {
    log.warn(s"queryTerminated: $event")
    val possibleStreamingQuery = streams.get(event.id)
    if (possibleStreamingQuery != null) {
      val progress = possibleStreamingQuery.lastProgress
      val sources = progress.sources
      log.warn(s"last.progress.sources sources=$sources")
    }

    event.exception match {
      case Some(exception) =>
        log.warn(s"queryEndedWithException exception=$exception resetting.all.streams")
        restart()
      case None =>
    }
  }
}

Source File: EventAggregationSpec.scala From spark-summit-2018 with GNU General Public License v3.0

5 votes

package com.twilio.open.streaming.trend.discovery

import java.util

import com.twilio.open.protocol.Calls.CallEvent
import com.twilio.open.protocol.Metrics
import com.twilio.open.streaming.trend.discovery.streams.EventAggregation
import org.apache.kafka.common.serialization.{Deserializer, Serializer, StringDeserializer, StringSerializer}
import org.apache.spark.sql.streaming.{OutputMode, Trigger}
import org.apache.spark.sql._
import org.apache.spark.sql.kafka010.KafkaTestUtils
import org.apache.spark.{SparkConf, SparkContext}
import org.slf4j.{Logger, LoggerFactory}

class EventAggregationSpec extends KafkaBackedTest[String, CallEvent] {
  override val testUtils = new KafkaTestUtils[String, CallEvent] {
    override val keySerializer: Serializer[String] = new StringSerializer
    override val keyDeserializer: Deserializer[String] = new StringDeserializer
    override val valueSerializer: Serializer[CallEvent] = new CallEventSerializer
    override val valueDeserializer: Deserializer[CallEvent] = new CallEventDeserializer
  }
  override protected val kafkaTopic = "spark.summit.call.events"
  override protected val partitions = 8

  private val pathToTestScenarios = "src/test/resources/scenarios"

  val log: Logger = LoggerFactory.getLogger(classOf[EventAggregation])

  lazy val session: SparkSession = sparkSql

  override def conf: SparkConf = {
    new SparkConf()
      .setMaster("local[*]")
      .setAppName("aggregation-test-app")
      .set("spark.ui.enabled", "false")
      .set("spark.app.id", appID)
      .set("spark.driver.host", "localhost")
      .set("spark.sql.shuffle.partitions", "32")
      .set("spark.executor.cores", "4")
      .set("spark.executor.memory", "1g")
      .set("spark.ui.enabled", "false")
      .setJars(SparkContext.jarOfClass(classOf[EventAggregation]).toList)
  }

  test("Should aggregate call events") {
    import session.implicits._
    val appConfig = appConfigForTest()
    val scenario = TestHelper.loadScenario[CallEvent](s"$pathToTestScenarios/pdd_events.json")
    val scenarioIter = scenario.toIterator
    scenario.nonEmpty shouldBe true

    testUtils.createTopic(kafkaTopic, partitions, overwrite = true)
    sendNextMessages(scenarioIter, 30, _.getEventId, _.getLoggedEventTime)

    val trendDiscoveryApp = new TrendDiscoveryApp(appConfigForTest(), session)
    val eventAggregation = EventAggregation(appConfig)

    eventAggregation.process(trendDiscoveryApp.readKafkaStream())(session)
      .writeStream
      .queryName("calleventaggs")
      .format("memory")
      .outputMode(eventAggregation.outputMode)
      .start()
      .processAllAvailable()

    val df = session.sql("select * from calleventaggs")
    df.printSchema()
    df.show

    val res = session
      .sql("select avg(stats.p99) from calleventaggs")
      .collect()
      .map { r =>
        r.getAs[Double](0) }
      .head

    DiscoveryUtils.round(res) shouldEqual 7.13

  }


}

class CallEventSerializer extends Serializer[CallEvent] {
  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
  override def serialize(topic: String, data: CallEvent): Array[Byte] = data.toByteArray
  override def close(): Unit = {}
}

class CallEventDeserializer extends Deserializer[CallEvent] {
  override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {}
  override def deserialize(topic: String, data: Array[Byte]): CallEvent = CallEvent.parseFrom(data)
  override def close(): Unit = {}
}

Source File: TestHelper.scala From spark-summit-2018 with GNU General Public License v3.0

5 votes

package com.twilio.open.streaming.trend.discovery

import java.io.{ByteArrayInputStream, InputStream}

import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.google.protobuf.Message
import com.googlecode.protobuf.format.JsonFormat
import com.holdenkarau.spark.testing.{LocalSparkContext, SparkContextProvider}
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SparkSession
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers, Suite}
import org.slf4j.{Logger, LoggerFactory}

import scala.collection.Seq
import scala.io.Source
import scala.reflect.ClassTag
import scala.reflect.classTag

object TestHelper {
  val log: Logger = LoggerFactory.getLogger("com.twilio.open.streaming.trend.discovery.TestHelper")
  val mapper: ObjectMapper = {
    val m = new ObjectMapper()
    m.registerModule(DefaultScalaModule)
  }

  val jsonFormat: JsonFormat = new JsonFormat

  def loadScenario[T<: Message : ClassTag](file: String): Seq[T] = {
    val fileString = Source.fromFile(file).mkString
    val parsed = mapper.readValue(fileString, classOf[Sceanario])
    parsed.input.map { data =>
      val json = mapper.writeValueAsString(data)
      convert[T](json)
    }
  }

  def convert[T<: Message : ClassTag](json: String): T = {
    val clazz = classTag[T].runtimeClass
    val builder = clazz.getMethod("newBuilder").invoke(clazz).asInstanceOf[Message.Builder]
    try {
      val input: InputStream = new ByteArrayInputStream(json.getBytes())
      jsonFormat.merge(input, builder)
      builder.build().asInstanceOf[T]
    } catch {
      case e: Exception =>
        throw e
    }
  }

}

@SerialVersionUID(1L)
case class KafkaDataFrame(key: Array[Byte], topic: Array[Byte], value: Array[Byte]) extends Serializable

case class Sceanario(input: Seq[Any], expected: Option[Any] = None)

trait SparkSqlTest extends BeforeAndAfterAll with SparkContextProvider {
  self: Suite =>

  @transient var _sparkSql: SparkSession = _
  @transient private var _sc: SparkContext = _

  override def sc: SparkContext = _sc

  def conf: SparkConf

  def sparkSql: SparkSession = _sparkSql

  override def beforeAll() {
    _sparkSql = SparkSession.builder().config(conf).getOrCreate()

    _sc = _sparkSql.sparkContext
    setup(_sc)
    super.beforeAll()
  }

  override def afterAll() {
    try {
      _sparkSql.close()
      _sparkSql = null
      LocalSparkContext.stop(_sc)
      _sc = null
    } finally {
      super.afterAll()
    }
  }

}

Source File: Sourcer.scala From eidos with Apache License 2.0

5 votes

package org.clulab.wm.wmexchanger.utils

import java.io.{File, FileNotFoundException}
import java.nio.charset.StandardCharsets

import org.slf4j.{Logger, LoggerFactory}

import scala.io.BufferedSource
import scala.io.Source

object Sourcer {
  protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass)
  val utf8: String = StandardCharsets.UTF_8.toString
  
  def sourceFromResource(path: String): BufferedSource = {
    val url = Option(Sourcer.getClass.getResource(path))
        .getOrElse(throw newFileNotFoundException(path))

    logger.info("Sourcing resource " + url.getPath)
    Source.fromURL(url, utf8)
  }
  
  def sourceFromFile(file: File): BufferedSource = {
    logger.info("Sourcing file " + file.getPath)
    Source.fromFile(file, utf8)
  }

  def sourceFromFile(path: String): BufferedSource = sourceFromFile(new File(path))

  def newFileNotFoundException(path: String): FileNotFoundException = {
    val message1 = path + " (The system cannot find the path specified"
    val message2 = message1 + (if (path.startsWith("~")) ".  Make sure to not use the tilde (~) character in paths in lieu of the home directory." else "")
    val message3 = message2 + ")"

    new FileNotFoundException(message3)
  }
}

Source File: SafeThread.scala From eidos with Apache License 2.0

5 votes

package org.clulab.wm.wmexchanger.utils

import java.util.Scanner

import org.apache.kafka.common.errors.InterruptException
import org.slf4j.Logger

abstract class SafeThread(logger: Logger) extends Thread {

  def runSafely(): Unit

  override def run(): Unit = {
    try {
      runSafely()
    }
    catch {
      case exception: InterruptException =>
        // This usually happens during consumer.poll().
        logger.info("Kafka interruption") // This is expected.
      case exception: InterruptedException =>
        logger.info("Java interruption") // This is expected.
      case exception: Throwable =>
        logger.error("Consumer interruption", exception)
    }
    finally {
      // This seems to be the only way to "cancel" the scanner.nextLine.
      System.exit(0)
    }
  }

  def waitSafely(duration: Long): Unit = SafeThread.waitSafely(this, logger, duration)

  start
}

object SafeThread {

  def waitSafely(thread: Thread, logger: Logger, duration: Long): Unit = {
    try {
      println("Press ENTER to exit...")
      new Scanner(System.in).nextLine()
      logger.info("User interruption")
      ThreadUtils.stop(thread, duration)
      logger.info("Exiting...")
    }
    catch {
      case _: Throwable => logger.info("Exiting...")
    }
  }
}

Source File: KafkaConsumer.scala From eidos with Apache License 2.0

5 votes

package org.clulab.wm.wmexchanger.wmconsumer

import java.io.File
import java.time.Duration
import java.util.Collections
import java.util.ConcurrentModificationException
import java.util.Properties

import org.apache.kafka.clients.consumer.{KafkaConsumer => ApacheKafkaConsumer}
import org.clulab.wm.wmexchanger.utils.Closer.AutoCloser
import org.clulab.wm.wmexchanger.utils.FileUtils
import org.clulab.wm.wmexchanger.utils.FileEditor
import org.json4s._
import org.slf4j.Logger
import org.slf4j.LoggerFactory

class KafkaConsumer(properties: Properties, closeDuration: Int, topic: String, outputDir: String) {
  import KafkaConsumer._
  implicit val formats: DefaultFormats.type = org.json4s.DefaultFormats

  logger.info("Opening consumer...")

  protected val consumer: ApacheKafkaConsumer[String, String] = {
    val consumer = new ApacheKafkaConsumer[String, String](properties)

    consumer.subscribe(Collections.singletonList(topic))
    consumer
  }

  def poll(duration: Int): Unit = {
    val records = consumer.poll(Duration.ofSeconds(duration))

    logger.info(s"Polling ${records.count} records...")
    records.forEach { record =>
      val key = record.key
      val value = record.value
      // Imply an extension on the file so that it can be replaced.
      val file = FileEditor(new File(key + ".")).setDir(outputDir).setExt("json").get
      logger.info("Consuming " + file.getName)

      FileUtils.printWriterFromFile(file).autoClose { printWriter =>
        printWriter.print(value)
      }
    }
  }

  def close(): Unit = {
    logger.info("Closing consumer...")
    try {
      consumer.close(Duration.ofSeconds(closeDuration))
    }
    catch {
      case _: ConcurrentModificationException => // KafkaConsumer is not safe for multi-threaded access
    }
  }
}

object KafkaConsumer {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
}

Source File: KafkaConsumerApp.scala From eidos with Apache License 2.0

5 votes

package org.clulab.wm.wmexchanger.wmconsumer

import java.util.Properties

import org.clulab.wm.wmexchanger.utils.PropertiesBuilder
import org.clulab.wm.wmexchanger.utils.WmUserApp
import org.clulab.wm.wmexchanger.utils.SafeThread
import org.slf4j.Logger
import org.slf4j.LoggerFactory

class KafkaConsumerApp(args: Array[String]) extends WmUserApp(args,  "/kafkaconsumer.properties") {
  val localKafkaProperties: Properties = {
    // This allows the login to be contained in a file external to the project.
    val loginProperty = appProperties.getProperty("login")
    val loginPropertiesBuilder = PropertiesBuilder.fromFile(loginProperty)

    PropertiesBuilder(kafkaProperties).putAll(loginPropertiesBuilder).get
  }

  val topic: String = appProperties.getProperty("topic")
  val outputDir: String = appProperties.getProperty("outputDir")

  val pollDuration: Int = appProperties.getProperty("poll.duration").toInt
  val waitDuration: Long = appProperties.getProperty("wait.duration").toLong
  val closeDuration: Int = appProperties.getProperty("close.duration").toInt

  val thread: SafeThread = new SafeThread(KafkaConsumerApp.logger) {
    override def runSafely(): Unit = {
      val consumer = new KafkaConsumer(localKafkaProperties, closeDuration, topic, outputDir)

      // autoClose isn't executed if the thread is shot down, so this hook is used instead.
      sys.ShutdownHookThread { consumer.close() }
      while (!isInterrupted)
        consumer.poll(pollDuration)
    }
  }

  if (interactive)
    thread.waitSafely(waitDuration)
}

object KafkaConsumerApp extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)

  new KafkaConsumerApp(args)
}

Source File: CurlProducerApp.scala From eidos with Apache License 2.0

5 votes

package org.clulab.wm.wmexchanger.wmproducer

import com.typesafe.config.ConfigFactory
import org.clulab.wm.wmexchanger.utils.Closer.AutoCloser
import org.clulab.wm.wmexchanger.utils.FileUtils
import org.clulab.wm.wmexchanger.utils.PropertiesBuilder
import org.clulab.wm.wmexchanger.utils.Sinker
import org.clulab.wm.wmexchanger.utils.StringUtils
import org.slf4j.Logger
import org.slf4j.LoggerFactory

object CurlProducerApp extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
  val version = "0.2.3"

  val inputDir = args(0)
  val outputFile = args(1)

  val config = ConfigFactory.load("curlproducer")
  val service = config.getString("CurlProducerApp.service")
  val login = config.getString("CurlProducerApp.login")
  val properties = PropertiesBuilder.fromFile(login).get
  val username = properties.getProperty("username")
  val password = properties.getProperty("password")

  val files = FileUtils.findFiles(inputDir, "jsonld")

  Sinker.printWriterFromFile(outputFile).autoClose { printWriter =>
    files.foreach { file =>
      logger.info(s"Processing ${file.getName}")
      val docId = StringUtils.beforeFirst(file.getName, '.')
      try {
        val command = s"""curl
            |--basic
            |--user "$username:$password"
            |-X POST "$service"
            |-H "accept: application/json"
            |-H "Content-Type: multipart/form-data"
            |-F 'metadata={ "identity": "eidos", "version": "$version", "document_id": "$docId" }'
            |-F "file=@${file.getName}"
            |""".stripMargin.replace('\r', ' ').replace('\n', ' ')

        printWriter.print(command)
        printWriter.print("\n")
      }
      catch {
        case exception: Exception =>
          logger.error(s"Exception for file $file", exception)
      }
    }
  }
}

Source File: DocumentFilter.scala From eidos with Apache License 2.0

5 votes

package org.clulab.wm.eidos.utils

import org.clulab.processors.corenlp.CoreNLPDocument
import org.clulab.processors.shallownlp.ShallowNLPProcessor
import org.clulab.processors.{Document, Processor}
import org.slf4j.{Logger, LoggerFactory}

trait DocumentFilter {
  def whileFiltered(document: Document)(transform: Document => Document): Document
}

class FilterByNothing extends DocumentFilter {

  def whileFiltered(doc: Document)(transform: Document => Document): Document = transform(doc)
}

object FilterByNothing {
  def apply() = new FilterByNothing
}


class FilterByLength(processor: Processor, cutoff: Int = 200) extends DocumentFilter {

  def whileFiltered(doc: Document)(transform: Document => Document): Document = {
    val text = doc.text
    val filteredDoc = filter(doc)
    val transformedDoc = transform(filteredDoc)
    val unfilteredDoc = unfilter(transformedDoc, text)

    unfilteredDoc
  }

  protected def unfilter(doc: Document, textOpt: Option[String]): Document = {
    doc.text = textOpt
    doc
  }

  protected def filter(doc: Document): Document = {
    // Iterate through the sentences, any sentence that is too long (number of tokens), remove
    val sanitizedText = sanitizeText(doc)
    val kept = doc.sentences.filter(s => s.words.length < cutoff)
    val skipped = doc.sentences.length - kept.length
    val newDoc = Document(doc.id, kept, doc.coreferenceChains, doc.discourseTree, sanitizedText)
    val newerDoc = // This is a hack for lack of copy constructor for CoreNLPDocument
      if (doc.isInstanceOf[CoreNLPDocument])
        ShallowNLPProcessor.cluDocToCoreDoc(newDoc, keepText = true)
      else
        newDoc
    if (skipped != 0)
      FilterByLength.logger.info(s"skipping $skipped sentences")
    // Return a new document from these sentences
    newerDoc
  }

  protected def sanitizeText(doc: Document): Option[String] = doc.text.map { text =>
    // Assume that these characters are never parts of words.
    var newText = text.replace('\n', ' ').replace(0x0C.toChar, ' ')
    for (s <- doc.sentences if s.endOffsets.last < newText.size) {
      // Only perform this if it isn't part of a word.  A space is most reliable.
      if (newText(s.endOffsets.last) == ' ')
        newText = newText.updated(s.endOffsets.last, '\n')
    }
    newText
  }
}

object FilterByLength {
  protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass)

  def apply(processor: Processor, cutoff: Int = 200): FilterByLength = new FilterByLength(processor, cutoff)
}

Source File: Sourcer.scala From eidos with Apache License 2.0

5 votes

package org.clulab.wm.eidos.utils

import java.io.{File, FileNotFoundException}
import java.nio.charset.StandardCharsets

import org.slf4j.{Logger, LoggerFactory}

import scala.io.BufferedSource
import scala.io.Source

object Sourcer {
  protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass)
  val utf8: String = StandardCharsets.UTF_8.toString
  
  def sourceFromResource(path: String): BufferedSource = {
    val url = Option(Sourcer.getClass.getResource(path))
        .getOrElse(throw newFileNotFoundException(path))

    logger.info("Sourcing resource " + url.getPath)
    Source.fromURL(url, utf8)
  }
  
  def sourceFromFile(file: File): BufferedSource = {
    logger.info("Sourcing file " + file.getPath)
    Source.fromFile(file, utf8)
  }

  def sourceFromFile(path: String): BufferedSource = sourceFromFile(new File(path))

  def newFileNotFoundException(path: String): FileNotFoundException = {
    val message1 = path + " (The system cannot find the path specified"
    val message2 = message1 + (if (path.startsWith("~")) ".  Make sure to not use the tilde (~) character in paths in lieu of the home directory." else "")
    val message3 = message2 + ")"

    new FileNotFoundException(message3)
  }
}

Source File: Timer.scala From eidos with Apache License 2.0

5 votes

package org.clulab.wm.eidos.utils

import org.slf4j.{Logger, LoggerFactory}

import scala.collection.mutable

class Timer(val description: String) {
  var elapsedTime: Option[Long] = None
  var startTime: Option[Long] = None

  def time[R](block: => R): R = {
    val t0 = System.currentTimeMillis()
    val result: R = block    // call-by-name
    val t1 = System.currentTimeMillis()

    elapsedTime = Some(t1 - t0)
    result
  }

  def start(): Unit = {
    val t0 = System.currentTimeMillis()

    startTime = Some(t0)
  }

  def stop(): Unit = {
    if (startTime.isDefined) {
      val t1 = System.currentTimeMillis()

      elapsedTime = Some(t1 - startTime.get)
    }
  }

  override def toString: String = {
    if (elapsedTime.isDefined)
      s"\tTime\t$description\t${Timer.diffToString(elapsedTime.get)}"
    else if (startTime.isDefined)
      s"\tStart\t$description\t${startTime.get}\tms"
    else
      s"\tTimer\t$description"
  }
}

object Timer {
  protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass)

  val elapsedTimes: mutable.Map[String, Long] = mutable.Map.empty

  def addTime(key: String, milliseconds: Long): Unit = this synchronized {
    val oldTime = elapsedTimes.getOrElseUpdate(key, 0)
    val newTime = oldTime + milliseconds

    elapsedTimes.update(key, newTime)
  }

  def summarize: Unit = {
    elapsedTimes.toSeq.sorted.foreach { case (key, milliseconds) =>
      logger.info(s"\tTotal\t$key\t$milliseconds")
    }
  }

  def diffToString(diff: Long): String = {
    val  days = (diff / (1000 * 60 * 60 * 24)) / 1
    val hours = (diff % (1000 * 60 * 60 * 24)) / (1000 * 60 * 60)
    val  mins = (diff % (1000 * 60 * 60)) / (1000 * 60)
    val  secs = (diff % (1000 * 60)) / 1000
    val msecs = (diff % (1000 * 1)) / 1

    f"$days:$hours%02d:$mins%02d:$secs%02d.$msecs%03d"
  }

  // See http://biercoff.com/easily-measuring-code-execution-time-in-scala/
  def time[R](description: String, verbose: Boolean = true)(block: => R): R = {
    val t0 = System.currentTimeMillis()
    if (verbose) logger.info(s"\tStart\t$description\t$t0\tms")

    val result: R = block // call-by-name

    val t1 = System.currentTimeMillis()
    if (verbose) logger.info(s"\tStop\t$description\t$t1\tms")

    val diff = t1 - t0
    if (verbose) logger.info(s"\tDiff\t$description\t$diff\tms")
    if (verbose) logger.info(s"\tTime\t$description\t${diffToString(diff)}")
    addTime(description, diff)
    result
  }
}

Source File: DomainHandler.scala From eidos with Apache License 2.0

5 votes

package org.clulab.wm.eidos.groundings

import java.time.ZonedDateTime

import com.github.clulab.eidos.Version
import com.github.clulab.eidos.Versions
import com.github.worldModelers.ontologies.{Versions => AwayVersions}
import org.clulab.wm.eidos.SentencesExtractor
import org.clulab.wm.eidos.groundings.ontologies.FullTreeDomainOntology.FullTreeDomainOntologyBuilder
import org.clulab.wm.eidos.groundings.OntologyHandler.serializedPath
import org.clulab.wm.eidos.groundings.ontologies.CompactDomainOntology
import org.clulab.wm.eidos.groundings.ontologies.FastDomainOntology
import org.clulab.wm.eidos.groundings.ontologies.HalfTreeDomainOntology.HalfTreeDomainOntologyBuilder
import org.clulab.wm.eidos.utils.Canonicalizer
import org.clulab.wm.eidos.utils.StringUtils
import org.slf4j.Logger
import org.slf4j.LoggerFactory

object DomainHandler {
  protected lazy val logger: Logger = LoggerFactory.getLogger(getClass)

  // The intention is to stop the proliferation of the generated Version class to this single method.
  protected def getVersionOpt(ontologyPath: String): (Option[String], Option[ZonedDateTime]) = {
    // This should work for local ontologies.  Absolute
    val goodVersionOpt = Versions.versions.get(MockVersions.codeDir + ontologyPath)
    // See what might have come from WordModelers/Ontologies
    val bestVersionOpt = goodVersionOpt.getOrElse {
      // These are always stored in top level directory.
      val awayVersionOpt = AwayVersions.versions.get(StringUtils.afterLast(ontologyPath, '/')).getOrElse(None)
      val homeVersionOpt = awayVersionOpt.map { awayVersion => Version(awayVersion.commit, awayVersion.date) }

      homeVersionOpt
    }

    if (bestVersionOpt.isDefined)
      (Some(bestVersionOpt.get.commit), Some(bestVersionOpt.get.date))
    else
      (None, None)
  }

  def apply(ontologyPath: String, serializedPath: String, sentencesExtractor: SentencesExtractor,
      canonicalizer: Canonicalizer, filter: Boolean = true, useCacheForOntologies: Boolean = false,
      includeParents: Boolean = false): DomainOntology = {

    // As coded below, when parents are included, the FullTreeDomainOntology is being used.
    // The faster loading version is the FastDomainOntology.
    // If parents are not included, as had traditionally been the case, the HalfTreeDomainOntology suffices.
    // Being smaller and faster, it is preferred.  The faster loading counterpart is CompactDomainOntology.
    if (includeParents) {
      if (useCacheForOntologies) {
        logger.info(s"Processing cached yml ontology with parents from $serializedPath...")
        FastDomainOntology.load(serializedPath)
      }
      else {
        logger.info(s"Processing yml ontology with parents from $ontologyPath...")
        val (versionOpt, dateOpt) = getVersionOpt(ontologyPath)
        new FullTreeDomainOntologyBuilder(sentencesExtractor, canonicalizer, filter).buildFromPath(ontologyPath, versionOpt, dateOpt)
      }
    }
    else {
      if (useCacheForOntologies) {
        logger.info(s"Processing cached yml ontology without parents from $serializedPath...")
        CompactDomainOntology.load(serializedPath)
      }
      else {
        logger.info(s"Processing yml ontology without parents from $ontologyPath...")
        val (versionOpt, dateOpt) = getVersionOpt(ontologyPath)
        new HalfTreeDomainOntologyBuilder(sentencesExtractor, canonicalizer, filter).buildFromPath(ontologyPath, versionOpt, dateOpt)
      }
    }
  }

  def mkDomainOntology(name: String, ontologyPath: String, sentenceExtractor: SentencesExtractor,
      canonicalizer: Canonicalizer, cacheDir: String, useCacheForOntologies: Boolean,
      includeParents: Boolean): DomainOntology = {
    val ontSerializedPath: String = serializedPath(name, cacheDir, includeParents)

    DomainHandler(ontologyPath, ontSerializedPath, sentenceExtractor, canonicalizer: Canonicalizer, filter = true,
        useCacheForOntologies = useCacheForOntologies, includeParents = includeParents)
  }
}

Source File: ExtractCluMetaFromDirectory.scala From eidos with Apache License 2.0

5 votes

package org.clulab.wm.eidos.apps.batch

import java.io.File

import org.clulab.serialization.json.stringify
import org.clulab.wm.eidos.EidosSystem
import org.clulab.wm.eidos.groundings.EidosAdjectiveGrounder
import org.clulab.wm.eidos.serialization.jsonld.JLDCorpus
import org.clulab.wm.eidos.utils.Closer.AutoCloser
import org.clulab.wm.eidos.utils.FileEditor
import org.clulab.wm.eidos.utils.FileUtils
import org.clulab.wm.eidos.utils.ThreadUtils
import org.clulab.wm.eidos.utils.Timer
import org.clulab.wm.eidos.utils.meta.CluText
import org.slf4j.Logger
import org.slf4j.LoggerFactory

object ExtractCluMetaFromDirectory extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)

  val inputDir = args(0)
  val metaDir = args(1)
  val outputDir = args(2)
  val timeFile = args(3)
  val threads = args(4).toInt

  val doneDir = inputDir + "/done"
  val textToMeta = CluText.convertTextToMeta _

  val files = FileUtils.findFiles(inputDir, "txt")
  val parFiles = ThreadUtils.parallelize(files, threads)

  Timer.time("Whole thing") {
    val timePrintWriter = FileUtils.appendingPrintWriterFromFile(timeFile)
    timePrintWriter.println("File\tSize\tTime")
    val timer = new Timer("Startup")

    timer.start()
    // Prime it first.  This counts on overall time, but should not be attributed
    // to any particular document.
    val config = EidosSystem.defaultConfig
    val reader = new EidosSystem(config)
    val options = EidosSystem.Options()
    // 0. Optionally include adjective grounding
    val adjectiveGrounder = EidosAdjectiveGrounder.fromEidosConfig(config)

    reader.extractFromText("This is a test.")
    timer.stop()

    timePrintWriter.println("Startup\t0\t" + timer.elapsedTime.get)

    parFiles.foreach { file =>
      try {
        // 1. Open corresponding output file
        logger.info(s"Extracting from ${file.getName}")
        val timer = new Timer("Single file in parallel")
        val size = timer.time {
          // 2. Get the input file text and metadata
          val metafile = textToMeta(file, metaDir)
          val eidosText = CluText(reader, file, Some(metafile))
          val text = eidosText.getText
          val metadata = eidosText.getMetadata
          // 3. Extract causal mentions from the text
          val annotatedDocument = reader.extractFromText(text, options, metadata)
          // 4. Convert to JSON
          val corpus = new JLDCorpus(annotatedDocument)
          val mentionsJSONLD = corpus.serialize()
          // 5. Write to output file
          val path = CluText.convertTextToJsonld(file, outputDir)
          FileUtils.printWriterFromFile(path).autoClose { pw =>
            pw.println(stringify(mentionsJSONLD, pretty = true))
          }
          // Now move the file to directory done
          val newFile = FileEditor(file).setDir(doneDir).get
          file.renameTo(newFile)

          text.length
        }
        this.synchronized {
          timePrintWriter.println(file.getName + "\t" + size + "\t" + timer.elapsedTime.get)
        }
      }
      catch {
        case exception: Exception =>
          logger.error(s"Exception for file $file", exception)
      }
    }
    timePrintWriter.close()
  }
}

Source File: ExtractCdrMetaFromDirectory.scala From eidos with Apache License 2.0

5 votes

package org.clulab.wm.eidos.apps.batch

import org.clulab.wm.eidos.EidosSystem
import org.clulab.wm.eidos.serialization.jsonld.JLDCorpus
import org.clulab.wm.eidos.utils.Closer.AutoCloser
import org.clulab.wm.eidos.utils.FileEditor
import org.clulab.wm.eidos.utils.FileUtils
import org.clulab.wm.eidos.utils.ThreadUtils
import org.clulab.wm.eidos.utils.Timer
import org.clulab.wm.eidos.utils.meta.CdrText
import org.slf4j.Logger
import org.slf4j.LoggerFactory

object ExtractCdrMetaFromDirectory extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)

  val inputDir = args(0)
  val outputDir = args(1)
  val timeFile = args(2)
  val threads = args(3).toInt

  val doneDir = inputDir + "/done"

  val files = FileUtils.findFiles(inputDir, "json")
  val parFiles = ThreadUtils.parallelize(files, threads)

  Timer.time("Whole thing") {
    val timePrintWriter = FileUtils.appendingPrintWriterFromFile(timeFile)
    timePrintWriter.println("File\tSize\tTime")
    val timer = new Timer("Startup")

    timer.start()
    // Prime it first.  This counts on overall time, but should not be attributed
    // to any particular document.
    val reader = new EidosSystem()
    val options = EidosSystem.Options()

    Timer.time("EidosPrimer") {
      reader.extractFromText("This is a test.")
    }
    timer.stop()
    timePrintWriter.println("Startup\t0\t" + timer.elapsedTime.get)

    parFiles.foreach { file =>
      try {
        // 1. Open corresponding output file
        logger.info(s"Extracting from ${file.getName}")
        val timer = new Timer("Single file in parallel")
        val size = timer.time {
          // 1. Get the input file text and metadata
          val eidosText = CdrText(file)
          val text = eidosText.getText
          val metadata = eidosText.getMetadata
          // 2. Extract causal mentions from the text
          val annotatedDocument = reader.extractFromText(text, options, metadata)
          // 3. Write to output file
          val path = FileEditor(file).setDir(outputDir).setExt("jsonld").get
          FileUtils.printWriterFromFile(path).autoClose { printWriter =>
            new JLDCorpus(annotatedDocument).serialize(printWriter)
          }
          // Now move the file to directory done
          val newFile = FileEditor(file).setDir(doneDir).get
          file.renameTo(newFile)

          text.length
        }
        this.synchronized {
          timePrintWriter.println(file.getName + "\t" + size + "\t" + timer.elapsedTime.get)
        }
      }
      catch {
        case exception: Exception =>
          logger.error(s"Exception for file $file", exception)
      }
    }
    timePrintWriter.close()
  }
}

Source File: Sourcer.scala From eidos with Apache License 2.0

5 votes

package org.clulab.wm.elasticsearch.utils

import java.io.{File, FileNotFoundException}
import java.nio.charset.StandardCharsets

import org.slf4j.{Logger, LoggerFactory}

import scala.io.BufferedSource
import scala.io.Source

object Sourcer {
  protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass)
  val utf8: String = StandardCharsets.UTF_8.toString
  
  def sourceFromResource(path: String): BufferedSource = {
    val url = Option(Sourcer.getClass.getResource(path))
        .getOrElse(throw newFileNotFoundException(path))

    logger.info("Sourcing resource " + url.getPath)
    Source.fromURL(url, utf8)
  }
  
  def sourceFromFile(file: File): BufferedSource = {
    logger.info("Sourcing file " + file.getPath)
    Source.fromFile(file, utf8)
  }

  def sourceFromFile(path: String): BufferedSource = sourceFromFile(new File(path))

  def newFileNotFoundException(path: String): FileNotFoundException = {
    val message1 = path + " (The system cannot find the path specified"
    val message2 = message1 + (if (path.startsWith("~")) ".  Make sure to not use the tilde (~) character in paths in lieu of the home directory." else "")
    val message3 = message2 + ")"

    new FileNotFoundException(message3)
  }
}

Source File: SignalLogger.scala From SparkCore with Apache License 2.0

5 votes

package org.apache.spark.util

import org.apache.commons.lang3.SystemUtils
import org.slf4j.Logger
import sun.misc.{Signal, SignalHandler}


  def register(log: Logger): Unit = synchronized {
    if (SystemUtils.IS_OS_UNIX) {
      require(!registered, "Can't re-install the signal handlers")
      registered = true

      val signals = Seq("TERM", "HUP", "INT")
      for (signal <- signals) {
        try {
          new SignalLoggerHandler(signal, log)
        } catch {
          case e: Exception => log.warn("Failed to register signal handler " + signal, e)
        }
      }
      log.info("Registered signal handlers for [" + signals.mkString(", ") + "]")
    }
  }
}

private sealed class SignalLoggerHandler(name: String, log: Logger) extends SignalHandler {

  val prevHandler = Signal.handle(new Signal(name), this)

  override def handle(signal: Signal): Unit = {
    log.error("RECEIVED SIGNAL " + signal.getNumber() + ": SIG" + signal.getName())
    prevHandler.handle(signal)
  }
}

Source File: ActorLogReceive.scala From SparkCore with Apache License 2.0

5 votes

package org.apache.spark.util

import akka.actor.Actor
import org.slf4j.Logger


private[spark] trait ActorLogReceive {
  self: Actor =>

  override def receive: Actor.Receive = new Actor.Receive {

    private val _receiveWithLogging = receiveWithLogging

    override def isDefinedAt(o: Any): Boolean = _receiveWithLogging.isDefinedAt(o)

    override def apply(o: Any): Unit = {
      if (log.isDebugEnabled) {
        log.debug(s"[actor] received message $o from ${self.sender}")
      }
      val start = System.nanoTime
      _receiveWithLogging.apply(o)
      val timeTaken = (System.nanoTime - start).toDouble / 1000000
      if (log.isDebugEnabled) {
        log.debug(s"[actor] handled message ($timeTaken ms) $o from ${self.sender}")
      }
    }
  }

  def receiveWithLogging: Actor.Receive

  protected def log: Logger
}

Source File: CarbonThriftServer.scala From carbondata with Apache License 2.0

5 votes

package org.apache.carbondata.spark.thriftserver

import java.io.File

import org.apache.spark.SparkConf
import org.apache.spark.sql.{CarbonEnv, SparkSession}
import org.apache.spark.sql.hive.thriftserver.HiveThriftServer2
import org.slf4j.{Logger, LoggerFactory}

import org.apache.carbondata.common.logging.LogServiceFactory
import org.apache.carbondata.core.util.CarbonProperties
import org.apache.carbondata.spark.util.CarbonSparkUtil


object CarbonThriftServer {

  def main(args: Array[String]): Unit = {
    if (args.length != 0 && args.length != 3) {
      val logger: Logger = LoggerFactory.getLogger(this.getClass)
      logger.error("parameters: [access-key] [secret-key] [s3-endpoint]")
      System.exit(0)
    }
    val sparkConf = new SparkConf(loadDefaults = true)
    val builder = SparkSession
      .builder()
      .config(sparkConf)
      .appName("Carbon Thrift Server(uses CarbonExtensions)")
      .enableHiveSupport()
      .config("spark.sql.extensions", "org.apache.spark.sql.CarbonExtensions")
    configPropertiesFile(sparkConf, builder)
    if (args.length == 3) {
      builder.config(CarbonSparkUtil.getSparkConfForS3(args(0), args(1), args(2)))
    }
    val spark = builder.getOrCreate()
    CarbonEnv.getInstance(spark)
    waitingForSparkLaunch()
    HiveThriftServer2.startWithContext(spark.sqlContext)
  }

  private def waitingForSparkLaunch(): Unit = {
    val warmUpTime = CarbonProperties.getInstance().getProperty("carbon.spark.warmUpTime", "5000")
    try {
      Thread.sleep(Integer.parseInt(warmUpTime))
    } catch {
      case e: Exception =>
        val LOG = LogServiceFactory.getLogService(this.getClass.getCanonicalName)
        LOG.error(s"Wrong value for carbon.spark.warmUpTime $warmUpTime " +
                  "Using default Value and proceeding")
        Thread.sleep(5000)
    }
  }

  private def configPropertiesFile(sparkConf: SparkConf, builder: SparkSession.Builder): Unit = {
    sparkConf.contains("carbon.properties.filepath") match {
      case false =>
        val sparkHome = System.getenv.get("SPARK_HOME")
        if (null != sparkHome) {
          val file = new File(sparkHome + '/' + "conf" + '/' + "carbon.properties")
          if (file.exists()) {
            builder.config("carbon.properties.filepath", file.getCanonicalPath)
            System.setProperty("carbon.properties.filepath", file.getCanonicalPath)
          }
        }
      case true =>
        System.setProperty(
          "carbon.properties.filepath", sparkConf.get("carbon.properties.filepath"))
    }
  }
}

Source File: S3CsvExample.scala From carbondata with Apache License 2.0

5 votes

package org.apache.carbondata.examples

import java.io.File

import org.apache.hadoop.fs.s3a.Constants.{ACCESS_KEY, SECRET_KEY}
import org.apache.spark.sql.SparkSession
import org.slf4j.{Logger, LoggerFactory}

object S3CsvExample {

  
  def main(args: Array[String]) {
    val rootPath = new File(this.getClass.getResource("/").getPath
                            + "../../../..").getCanonicalPath
    val logger: Logger = LoggerFactory.getLogger(this.getClass)

    import org.apache.spark.sql.CarbonUtils._
    if (args.length != 4) {
      logger.error("Usage: java CarbonS3Example <access-key> <secret-key>" +
                   "<s3.csv.location> <spark-master>")
      System.exit(0)
    }

    val spark = SparkSession
      .builder()
      .master(args(3))
      .appName("S3CsvExample")
      .config("spark.driver.host", "localhost")
      .config("spark.hadoop." + ACCESS_KEY, args(0))
      .config("spark.hadoop." + SECRET_KEY, args(1))
      .config("spark.sql.extensions", "org.apache.spark.sql.CarbonExtensions")
      .getOrCreate()

    spark.sparkContext.setLogLevel("ERROR")

    spark.sql(
      s"""
         | CREATE TABLE if not exists carbon_table1(
         | shortField SHORT,
         | intField INT,
         | bigintField LONG,
         | doubleField DOUBLE,
         | stringField STRING,
         | timestampField TIMESTAMP,
         | decimalField DECIMAL(18,2),
         | dateField DATE,
         | charField CHAR(5),
         | floatField FLOAT
         | )
         | STORED AS carbondata
         | LOCATION '$rootPath/examples/spark/target/store'
       """.stripMargin)

    spark.sql(
      s"""
         | LOAD DATA LOCAL INPATH '${ args(2) }'
         | INTO TABLE carbon_table1
         | OPTIONS('HEADER'='true')
       """.stripMargin)

    spark.sql(
      s"""
         | LOAD DATA LOCAL INPATH '${ args(2) }'
         | INTO TABLE carbon_table1
         | OPTIONS('HEADER'='true')
       """.stripMargin)

    spark.sql(
      s"""
         | SELECT *
         | FROM carbon_table1
      """.stripMargin).show()

    spark.sql("Drop table if exists carbon_table1")

    spark.stop()
  }
}

Source File: S3UsingSDkExample.scala From carbondata with Apache License 2.0

5 votes

package org.apache.carbondata.examples

import org.apache.hadoop.fs.s3a.Constants.{ACCESS_KEY, ENDPOINT, SECRET_KEY}
import org.apache.spark.sql.SparkSession
import org.slf4j.{Logger, LoggerFactory}

import org.apache.carbondata.core.metadata.datatype.{DataTypes, Field}
import org.apache.carbondata.sdk.file.{CarbonWriter, Schema}
import org.apache.carbondata.spark.util.CarbonSparkUtil



  def main(args: Array[String]) {
    val logger: Logger = LoggerFactory.getLogger(this.getClass)

    if (args.length < 2 || args.length > 6) {
      logger.error("Usage: java CarbonS3Example <access-key> <secret-key>" +
        "[table-path-on-s3] [s3-endpoint] [number-of-rows] [spark-master]")
      System.exit(0)
    }

    val (accessKey, secretKey, endpoint) = CarbonSparkUtil.getKeyOnPrefix(args(2))
    val spark = SparkSession
      .builder()
      .master(getSparkMaster(args))
      .appName("S3UsingSDKExample")
      .config("spark.driver.host", "localhost")
      .config(accessKey, args(0))
      .config(secretKey, args(1))
      .config(endpoint, CarbonSparkUtil.getS3EndPoint(args))
      .config("spark.sql.extensions", "org.apache.spark.sql.CarbonExtensions")
      .getOrCreate()

    spark.sparkContext.setLogLevel("WARN")
    val path = if (args.length < 3) {
      "s3a://sdk/WriterOutput2 "
    } else {
      args(2)
    }
    val num = if (args.length > 4) {
      Integer.parseInt(args(4))
    } else {
      3
    }
    buildTestData(args, path, num)

    spark.sql("DROP TABLE IF EXISTS s3_sdk_table")
    spark.sql(s"CREATE EXTERNAL TABLE s3_sdk_table STORED AS carbondata" +
      s" LOCATION '$path'")
    spark.sql("SELECT * FROM s3_sdk_table LIMIT 10").show()
    spark.stop()
  }

  def getSparkMaster(args: Array[String]): String = {
    if (args.length == 6) args(5)
    else "local"
  }

}

Source File: ViewTestSupport.scala From ddd-leaven-akka-v2 with MIT License

5 votes

package ecommerce.sales.view

import com.typesafe.config.Config
import org.scalatest.concurrent.ScalaFutures
import org.scalatest.time.{Millis, Seconds, Span}
import org.scalatest.{BeforeAndAfterAll, Suite}
import org.slf4j.Logger
import org.slf4j.LoggerFactory.getLogger
import pl.newicom.dddd.view.sql.SqlViewStore
import slick.dbio._

import scala.concurrent.ExecutionContext
import slick.jdbc.H2Profile

trait ViewTestSupport extends BeforeAndAfterAll with ScalaFutures {
  this: Suite =>

  def config: Config
  lazy val viewStore = new SqlViewStore(config)
  val log: Logger = getLogger(getClass)

  implicit val profile = H2Profile

  implicit class ViewStoreAction[A](a: DBIO[A])(implicit ex: ExecutionContext) {
    private val future = viewStore.run(a)

    def run(): Unit = future.map(_ => ()).futureValue
    def result: A = future.futureValue
  }

  def ensureSchemaDropped: DBIO[Unit]
  def ensureSchemaCreated: DBIO[Unit]

  implicit override val patienceConfig = PatienceConfig(
    timeout = scaled(Span(10, Seconds)),
    interval = scaled(Span(200, Millis))
  )

  override def beforeAll() {
    viewStore.run {
      ensureSchemaDropped >> ensureSchemaCreated
    }.futureValue

  }

}

Source File: ViewTestSupport.scala From ddd-leaven-akka-v2 with MIT License

5 votes

package ecommerce.sales.view

import com.typesafe.config.Config
import org.scalatest.concurrent.ScalaFutures
import org.scalatest.time.{Millis, Seconds, Span}
import org.scalatest.{BeforeAndAfterAll, Suite}
import org.slf4j.Logger
import org.slf4j.LoggerFactory.getLogger
import pl.newicom.dddd.view.sql.SqlViewStore
import slick.dbio._
import slick.jdbc.H2Profile

import scala.concurrent.ExecutionContext

trait ViewTestSupport extends BeforeAndAfterAll with ScalaFutures {
  this: Suite =>

  def config: Config
  lazy val viewStore = new SqlViewStore(config)
  val log: Logger = getLogger(getClass)

  implicit val profile = H2Profile

  implicit override val patienceConfig = PatienceConfig(
    timeout = scaled(Span(5, Seconds)),
    interval = scaled(Span(200, Millis))
  )

  implicit class ViewStoreAction[A](a: DBIO[A])(implicit ex: ExecutionContext) {
    private val future = viewStore.run(a)

    def run(): Unit = future.map(_ => ()).futureValue
    def result: A = future.futureValue
  }

  def ensureSchemaDropped: DBIO[Unit]
  def ensureSchemaCreated: DBIO[Unit]

  override def beforeAll() {
    val setup = viewStore.run {
      ensureSchemaDropped >> ensureSchemaCreated
    }
    assert(setup.isReadyWithin(Span(5, Seconds)))

  }

}

Source File: HeadquartersConfiguration.scala From ddd-leaven-akka-v2 with MIT License

5 votes

package ecommerce.headquarters.app

import java.util.UUID

import akka.actor.{Props, _}
import com.typesafe.config.Config
import ecommerce.headquarters.app.HeadquartersConfiguration._
import ecommerce.headquarters.processes.OrderProcessManager
import ecommerce.shipping.ShipmentId
import org.slf4j.Logger
import pl.newicom.dddd.actor.{ActorFactory, DefaultConfig, PassivationConfig}
import pl.newicom.dddd.aggregate.{AggregateRootActorFactory, AggregateRootLogger}
import pl.newicom.dddd.coordination.ReceptorConfig
import pl.newicom.dddd.office.LocalOfficeId
import pl.newicom.dddd.process._
import pl.newicom.dddd.scheduling.{Scheduler, SchedulerEvent, schedulingOfficeId}
import pl.newicom.eventstore.EventstoreSubscriber

import scala.concurrent.duration._

object HeadquartersConfiguration {
  val HQDepartment: String = "Headquarters"
}

trait HeadquartersConfiguration {

  def log: Logger
  def config: Config
  implicit def system: ActorSystem

  implicit val schedulingOfficeID: LocalOfficeId[Scheduler] = schedulingOfficeId(HQDepartment)
  implicit val commandQueueOfficeID: LocalOfficeId[CommandSink] = commandSinkOfficeId(HQDepartment)

  implicit object SchedulerFactory extends AggregateRootActorFactory[Scheduler] {
    override def props(pc: PassivationConfig) = Props(new Scheduler(DefaultConfig(pc, replyWithEvents = false)) with AggregateRootLogger[SchedulerEvent] {
      // TODO not needed
      override def id = "global"
    })
  }

  implicit object CommandSinkFactory extends AggregateRootActorFactory[CommandSink] {
    override def props(pc: PassivationConfig) = Props(new CommandSink(DefaultConfig(pc, replyWithEvents = false)) with AggregateRootLogger[CommandEnqueued])
  }

  implicit object OrderProcessManagerActorFactory extends SagaActorFactory[OrderProcessManager] {
    def props(pc: PassivationConfig): Props =
      Props(new OrderProcessManager(DefaultConfig(pc, replyWithEvents = false), () => new ShipmentId(UUID.randomUUID().toString)))
  }

  implicit def receptorActorFactory[A : LocalOfficeId : ActorFactory]: ReceptorActorFactory[A] = new ReceptorActorFactory[A] {
    def receptorFactory: ReceptorFactory = (config: ReceptorConfig) => {
      new Receptor(config.copy(capacity = 100)) with EventstoreSubscriber {
        override def redeliverInterval: FiniteDuration = 10.seconds
      }
    }
  }

}

Source File: DeltaLoad.scala From m3d-engine with Apache License 2.0

5 votes

package com.adidas.analytics.algo

import com.adidas.analytics.algo.DeltaLoad._
import com.adidas.analytics.algo.core.Algorithm
import com.adidas.analytics.algo.shared.DateComponentDerivation
import com.adidas.analytics.config.DeltaLoadConfiguration.PartitionedDeltaLoadConfiguration
import com.adidas.analytics.util.DataFrameUtils._
import com.adidas.analytics.util._
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.functions._
import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
import org.apache.spark.storage.StorageLevel
import org.slf4j.{Logger, LoggerFactory}


  private def getUpsertRecords(deltaRecords: Dataset[Row], resultColumns: Seq[String]): Dataset[Row] = {
    // Create partition window - Partitioning by delta records logical key (i.e. technical key of active records)
    val partitionWindow = Window
      .partitionBy(businessKey.map(col): _*)
      .orderBy(technicalKey.map(component => col(component).desc): _*)

    // Ranking & projection
    val rankedDeltaRecords = deltaRecords
      .withColumn(rankingColumnName, row_number().over(partitionWindow))
      .filter(upsertRecordsModesFilterFunction)

    rankedDeltaRecords
      .filter(rankedDeltaRecords(rankingColumnName) === 1)
      .selectExpr(resultColumns: _*)
  }

  protected def withDatePartitions(spark: SparkSession, dfs: DFSWrapper, dataFrames: Vector[DataFrame]): Vector[DataFrame] = {
    logger.info("Adding partitioning information if needed")
    try {
      dataFrames.map { df =>
        if (df.columns.toSeq.intersect(targetPartitions) != targetPartitions){
          df.transform(withDateComponents(partitionSourceColumn, partitionSourceColumnFormat, targetPartitions))
        }
        else df
      }
    } catch {
      case e: Throwable =>
        logger.error("Cannot add partitioning information for data frames.", e)
        //TODO: Handle failure case properly
        throw new RuntimeException("Unable to transform data frames.", e)
    }
  }
}


object DeltaLoad {

  private val logger: Logger = LoggerFactory.getLogger(getClass)

  def apply(spark: SparkSession, dfs: DFSWrapper, configLocation: String): DeltaLoad = {
    new DeltaLoad(spark, dfs, configLocation)
  }
}

Source File: GzipDecompressor.scala From m3d-engine with Apache License 2.0

5 votes

package com.adidas.analytics.algo

import java.util.concurrent.{Executors, TimeUnit}

import com.adidas.analytics.algo.GzipDecompressor.{changeFileExtension, compressedExtension, _}
import com.adidas.analytics.algo.core.JobRunner
import com.adidas.analytics.config.GzipDecompressorConfiguration
import com.adidas.analytics.util.DFSWrapper
import com.adidas.analytics.util.DFSWrapper._
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.io.IOUtils
import org.apache.hadoop.io.compress.CompressionCodecFactory
import org.apache.spark.sql.SparkSession
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent._
import scala.concurrent.duration._


final class GzipDecompressor protected(val spark: SparkSession, val dfs: DFSWrapper, val configLocation: String)
  extends JobRunner with GzipDecompressorConfiguration {

  private val hadoopConfiguration: Configuration = spark.sparkContext.hadoopConfiguration
  private val fileSystem: FileSystem = dfs.getFileSystem(inputDirectoryPath)


  override def run(): Unit = {
    //check if directory exists
    if (!fileSystem.exists(inputDirectoryPath)){
      logger.error(s"Input directory: $inputDirectoryPath does not exist.")
      throw new RuntimeException(s"Directory $inputDirectoryPath does not exist.")
    }

    val compressedFilePaths = fileSystem.ls(inputDirectoryPath, recursive)
      .filterNot(path => fileSystem.isDirectory(path))
      .filter(_.getName.toLowerCase.endsWith(compressedExtension))

    if (compressedFilePaths.isEmpty) {
      logger.warn(s"Input directory $inputDirectoryPath does not contain compressed files. Skipping...")
    } else {
      implicit val ec: ExecutionContext = ExecutionContext.fromExecutor(Executors.newFixedThreadPool(threadPoolSize))
      Await.result(Future.sequence(
        compressedFilePaths.map { compressedFilePath =>
          Future {
            logger.info(s"Decompressing file: $compressedFilePath")

            val decompressedFileName = changeFileExtension(compressedFilePath.getName, compressedExtension, outputExtension)
            val decompressedFilePath = new Path(compressedFilePath.getParent, decompressedFileName)

            val compressionCodecFactory = new CompressionCodecFactory(hadoopConfiguration)
            val inputCodec = compressionCodecFactory.getCodec(compressedFilePath)

            val inputStream = inputCodec.createInputStream(fileSystem.open(compressedFilePath))
            val output = fileSystem.create(decompressedFilePath)

            IOUtils.copyBytes(inputStream, output, hadoopConfiguration)
            logger.info(s"Finished decompressing file: $compressedFilePath")

            //Delete the compressed file
            fileSystem.delete(compressedFilePath, false)
            logger.info(s"Removed file: $compressedFilePath")
          }
        }
      ), Duration(4, TimeUnit.HOURS))
    }
  }
}


object GzipDecompressor {

  private val logger: Logger = LoggerFactory.getLogger(this.getClass)

  private val compressedExtension: String = ".gz"

  def apply(spark: SparkSession, dfs: DFSWrapper, configLocation: String): GzipDecompressor = {
    new GzipDecompressor(spark, dfs, configLocation)
  }

  private def changeFileExtension(fileName: String, currentExt: String, newExt: String): String = {
    val newFileName =  fileName.substring(0, fileName.lastIndexOf(currentExt))
    if (newFileName.endsWith(newExt)) newFileName else newFileName + newExt
  }
}

Source File: DataFrameUtils.scala From m3d-engine with Apache License 2.0

5 votes

package com.adidas.analytics.util

import org.apache.spark.sql.types._
import org.apache.spark.sql.{DataFrame, Row, functions}
import org.slf4j.{Logger, LoggerFactory}


object DataFrameUtils {

  private val logger: Logger = LoggerFactory.getLogger(getClass)

  type FilterFunction = Row => Boolean

  type PartitionCriteria = Seq[(String, String)]

  def mapPartitionsToDirectories(partitionCriteria: PartitionCriteria): Seq[String] = {
    partitionCriteria.map {
      case (columnName, columnValue) => s"$columnName=$columnValue"
    }
  }

  def buildPartitionsCriteriaMatcherFunc(multiplePartitionsCriteria: Seq[PartitionCriteria], schema: StructType): FilterFunction = {
    val targetPartitions = multiplePartitionsCriteria.flatten.map(_._1).toSet
    val fieldNameToMatchFunctionMapping = schema.fields.filter {
      case StructField(name, _, _, _) => targetPartitions.contains(name)
    }.map {
      case StructField(name, _: ByteType, _, _)    => name -> ((r: Row, value: String) => r.getAs[Byte](name)    == value.toByte)
      case StructField(name, _: ShortType, _, _)   => name -> ((r: Row, value: String) => r.getAs[Short](name)   == value.toShort)
      case StructField(name, _: IntegerType, _, _) => name -> ((r: Row, value: String) => r.getAs[Int](name)     == value.toInt)
      case StructField(name, _: LongType, _, _)    => name -> ((r: Row, value: String) => r.getAs[Long](name)    == value.toLong)
      case StructField(name, _: FloatType, _, _)   => name -> ((r: Row, value: String) => r.getAs[Float](name)   == value.toFloat)
      case StructField(name, _: DoubleType, _, _)  => name -> ((r: Row, value: String) => r.getAs[Double](name)  == value.toDouble)
      case StructField(name, _: BooleanType, _, _) => name -> ((r: Row, value: String) => r.getAs[Boolean](name) == value.toBoolean)
      case StructField(name, _: StringType, _, _)  => name -> ((r: Row, value: String) => r.getAs[String](name)  == value)
    }.toMap

    def convertPartitionCriteriaToFilterFunctions(partitionCriteria: PartitionCriteria): Seq[FilterFunction] = partitionCriteria.map {
      case (name, value) => (row: Row) => fieldNameToMatchFunctionMapping(name)(row, value)
    }

    def joinSinglePartitionFilterFunctionsWithAnd(partitionFilterFunctions: Seq[FilterFunction]): FilterFunction =
      partitionFilterFunctions
        .reduceOption((predicate1, predicate2) => (row: Row) => predicate1(row) && predicate2(row))
        .getOrElse((_: Row) => false)

    multiplePartitionsCriteria
      .map(convertPartitionCriteriaToFilterFunctions)
      .map(joinSinglePartitionFilterFunctionsWithAnd)
      .reduceOption((predicate1, predicate2) => (row: Row) => predicate1(row) || predicate2(row))
      .getOrElse((_: Row) => false)
  }


  implicit class DataFrameHelper(df: DataFrame) {

    def collectPartitions(targetPartitions: Seq[String]): Seq[PartitionCriteria] = {
      logger.info(s"Collecting unique partitions for partitions columns (${targetPartitions.mkString(", ")})")
      val partitions = df.selectExpr(targetPartitions: _*).distinct().collect()

      partitions.map { row =>
        targetPartitions.map { columnName =>
          Option(row.getAs[Any](columnName)) match {
            case Some(columnValue) => columnName -> columnValue.toString
            case None => throw new RuntimeException(s"Partition column '$columnName' contains null value")
          }
        }
      }
    }

    def addMissingColumns(targetSchema: StructType): DataFrame = {
      val dataFieldsSet = df.schema.fieldNames.toSet
      val selectColumns = targetSchema.fields.map { field =>
        if (dataFieldsSet.contains(field.name)) {
          functions.col(field.name)
        } else {
          functions.lit(null).cast(field.dataType).as(field.name)
        }
      }
      df.select(selectColumns: _*)
    }

    def isEmpty: Boolean = df.head(1).isEmpty

    def nonEmpty: Boolean = df.head(1).nonEmpty
  }
}

Source File: DataFormat.scala From m3d-engine with Apache License 2.0

5 votes

package com.adidas.analytics.util

import org.apache.spark.sql._
import org.apache.spark.sql.types.StructType
import org.slf4j.{Logger, LoggerFactory}


sealed trait DataFormat {

  protected val logger: Logger = LoggerFactory.getLogger(getClass)

  def read(reader: DataFrameReader, locations: String*): DataFrame

  def write(writer: DataFrameWriter[Row], location: String): Unit
}


object DataFormat {

  case class ParquetFormat(optionalSchema: Option[StructType] = None) extends DataFormat {

    override def read(reader: DataFrameReader, locations: String*): DataFrame = {
      val filesString = locations.mkString(", ")
      logger.info(s"Reading Parquet data from $filesString")
      optionalSchema.fold(reader)(schema => reader.schema(schema)).parquet(locations: _*)
    }

    override def write(writer: DataFrameWriter[Row], location: String): Unit = {
      logger.info(s"Writing Parquet data to $location")
      writer.parquet(location)
    }
  }

  case class DSVFormat(optionalSchema: Option[StructType] = None) extends DataFormat {

    override def read(reader: DataFrameReader, locations: String*): DataFrame = {
      val filesString = locations.mkString(", ")
      logger.info(s"Reading DSV data from $filesString")
      optionalSchema.fold(reader.option("inferSchema", "true"))(schema => reader.schema(schema)).csv(locations: _*)
    }

    override def write(writer: DataFrameWriter[Row], location: String): Unit = {
      logger.info(s"Writing DSV data to $location")
      writer.csv(location)
    }
  }

  case class JSONFormat(optionalSchema: Option[StructType] = None) extends DataFormat {

    override def read(reader: DataFrameReader, locations: String*): DataFrame = {
      val filesString = locations.mkString(", ")
      logger.info(s"Reading JSON data from $filesString")
      optionalSchema.fold(reader.option("inferSchema", "true"))(schema => reader.schema(schema)).json(locations: _*)
    }

    override def write(writer: DataFrameWriter[Row], location: String): Unit = {
      logger.info(s"Writing JSON data to $location")
      writer.json(location)
    }
  }
}

Source File: InputReader.scala From m3d-engine with Apache License 2.0

5 votes

package com.adidas.analytics.util

import org.apache.spark.sql.{DataFrame, SparkSession}
import org.slf4j.{Logger, LoggerFactory}


  def newTableLocationReader(table: String, format: DataFormat, options: Map[String, String] = Map.empty): TableLocationReader = {
    TableLocationReader(table, format, options)
  }

  case class TableReader(table: String, options: Map[String, String]) extends InputReader {
    override def read(sparkSession: SparkSession): DataFrame = {
      logger.info(s"Reading data from table $table")
      sparkSession.read.options(options).table(table)
    }
  }

  case class FileSystemReader(location: String, format: DataFormat, options: Map[String, String]) extends InputReader {
    override def read(sparkSession: SparkSession): DataFrame = {
      logger.info(s"Reading data from location $location")
      format.read(sparkSession.read.options(options), location)
    }
  }

  case class TableLocationReader(table: String, format: DataFormat, options: Map[String, String]) extends InputReader {
    override def read(sparkSession: SparkSession): DataFrame = {
      val location = HiveTableAttributeReader(table, sparkSession).getTableLocation
      logger.info(s"Reading data from location $location")
      format.read(sparkSession.read.options(options), location)
    }
  }
}

Source File: ConfigReader.scala From m3d-engine with Apache License 2.0

5 votes

package com.adidas.analytics.util

import java.text.DecimalFormatSymbols

import org.slf4j.{Logger, LoggerFactory}

import scala.util.parsing.json.{JSON, JSONArray, JSONObject}


class ConfigReader(jsonContent: String) extends Serializable {

  private val logger: Logger = LoggerFactory.getLogger(getClass)

  private val decimalSeparator: Char = new DecimalFormatSymbols().getDecimalSeparator

  JSON.globalNumberParser = (in: String) => if (in.contains(decimalSeparator)) in.toDouble else in.toInt

  private lazy val config = JSON.parseRaw(jsonContent) match {
    case Some(JSONObject(obj)) => obj
    case _ => throw new IllegalArgumentException(s"Wrong format of the configuration file: $jsonContent")
  }

  def getAsSeq[T](propertyName: String): Seq[T] = {
    config.get(propertyName) match {
      case Some(JSONArray(list)) => list.map(_.asInstanceOf[T])
      case _ => throw new IllegalArgumentException(s"Unable to find configuration property $propertyName")
    }
  }

  def getAsMap[K, V](propertyName: String): Map[K,V] = {
    config.get(propertyName) match {
      case Some(JSONObject(obj)) => obj.asInstanceOf[Map[K,V]]
      case _ => throw new IllegalArgumentException(s"Unable to find configuration property $propertyName")
    }
  }

  def getAs[T](propertyName: String): T = {
    config.get(propertyName) match {
      case Some(property) => property.asInstanceOf[T]
      case None => throw new IllegalArgumentException(s"Unable to find configuration property $propertyName")
    }
  }

  def getAsOption[T](propertyName: String): Option[T] = {
    config.get(propertyName).map(property => property.asInstanceOf[T])
  }

  def getAsOptionSeq[T](propertyName: String): Option[Seq[T]] = {
    config.get(propertyName).map(_ => getAsSeq(propertyName))
  }

  def contains(propertyName: String): Boolean = {
    config.contains(propertyName)
  }
}

object ConfigReader {
  def apply(jsonContent: String): ConfigReader = new ConfigReader(jsonContent)
}

Source File: BaseAlgorithmTest.scala From m3d-engine with Apache License 2.0

5 votes

package com.adidas.utils

import java.util.UUID

import com.adidas.analytics.util.{DFSWrapper, LoadMode}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.spark.sql.types.StructType
import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, Suite}
import org.slf4j.{Logger, LoggerFactory}

import scala.io.Source

trait BaseAlgorithmTest extends Suite with BeforeAndAfterAll with BeforeAndAfterEach with HDFSSupport with SparkSupport {

  override val logger: Logger = LoggerFactory.getLogger(getClass)
  override val testAppId: String = UUID.randomUUID().toString
  override val localTestDir: String = "target"
  override val sparkHadoopConf: Option[Configuration] = Some(fs.getConf)

  val hdfsRootTestPath: Path = new Path("hdfs:///tmp/tests")
  val dfs: DFSWrapper = DFSWrapper(spark.sparkContext.hadoopConfiguration)

  override def afterAll(): Unit = {
    spark.stop()
    cluster.shutdown(true)
  }

  override def beforeEach(): Unit = {
    fs.delete(hdfsRootTestPath, true)
    fs.mkdirs(hdfsRootTestPath)
  }

  override def afterEach(): Unit = {
    spark.sqlContext.clearCache()
    spark.sparkContext.getPersistentRDDs.foreach {
      case (_, rdd) => rdd.unpersist(true)
    }
  }

  def resolveResource(fileName: String, withProtocol: Boolean = false): String = {
    val resource = s"${getClass.getSimpleName}/$fileName"
    logger.info(s"Resolving resource $resource")
    val location = getClass.getClassLoader.getResource(resource).getPath
    if (withProtocol) {
      s"file://$location"
    } else {
      location
    }
  }

  def getResourceAsText(fileName: String): String = {
    val resource = s"${getClass.getSimpleName}/$fileName"
    logger.info(s"Reading resource $resource")
    val stream = getClass.getClassLoader.getResourceAsStream(resource)
    Source.fromInputStream(stream).mkString
  }

  def copyResourceFileToHdfs(resource: String, targetPath: Path): Unit = {
    val localResourceRoot = resolveResource("", withProtocol = true)
    val sourcePath = new Path(localResourceRoot, resource)
    logger.info(s"Copying local resource to HDFS $sourcePath -> $targetPath")
    fs.copyFromLocalFile(sourcePath, targetPath)
  }

  
  def createAndLoadParquetTable(database: String, tableName: String, partitionColumns: Option[Seq[String]] = None, schema: StructType, filePath: String, reader: FileReader): Table = {
    val table = createParquetTable(database, tableName, partitionColumns, schema)
    val inputTableDataURI = resolveResource(filePath, withProtocol = true)
    table.write(Seq(inputTableDataURI), reader, LoadMode.OverwritePartitions)
    table
  }
}

Source File: HDFSSupport.scala From m3d-engine with Apache License 2.0

5 votes

package com.adidas.utils

import java.io.File

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{CommonConfigurationKeysPublic, FileSystem}
import org.apache.hadoop.hdfs.MiniDFSCluster
import org.slf4j.Logger

trait HDFSSupport {

  private lazy val defaultDataNodesNum: Int = 2
  private lazy val defaultPort: Int = 8201

  lazy val cluster: MiniDFSCluster = startHDFS(clusterHdfsConf)
  lazy val fs: FileSystem = cluster.getFileSystem()

  def logger: Logger
  def testAppId: String
  def localTestDir: String
  def clusterHdfsConf: Option[Configuration] = Option.empty

  def startHDFS(hadoopConf: Option[Configuration]): MiniDFSCluster = {
    val appDir = new File(localTestDir, testAppId)
    val hdfsTestDir = new File(appDir, "hdfs").getAbsoluteFile
    hdfsTestDir.mkdirs()

    val clusterConf = hadoopConf.fold(new Configuration())(c => new Configuration(c))
    clusterConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, hdfsTestDir.getAbsolutePath)
    clusterConf.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, s"hdfs://localhost:$defaultPort/")

    logger.info(s"Starting test DFS cluster with base directory at ${hdfsTestDir.getAbsolutePath} ...")
    new MiniDFSCluster.Builder(clusterConf)
      .numDataNodes(defaultDataNodesNum)
      .nameNodePort(defaultPort)
      .format(true)
      .build()
  }
}

Source File: SparkSupport.scala From m3d-engine with Apache License 2.0

5 votes

package com.adidas.utils

import java.io.File

import org.apache.hadoop.conf.Configuration
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.slf4j.Logger

import scala.collection.JavaConversions._

trait SparkSupport extends SparkSessionWrapper {

  def logger: Logger

  def testAppId: String

  def localTestDir: String

  override def startSpark(hadoopConf: Option[Configuration] = None): SparkSession = {
    // This line makes it possible to use a remote debugger
    System.setSecurityManager(null)

    val appDir = new File(localTestDir, testAppId)
    val sparkTestDir = new File(appDir, "spark").getAbsoluteFile
    sparkTestDir.mkdirs()

    val sparkConf = hadoopConf.foldLeft {
      new SparkConf(false)
        .set("spark.ui.enabled", "false")
        .set("spark.sql.warehouse.dir", new File(sparkTestDir, "warehouse").getAbsolutePath)
    } { (sparkConf, hadoopConf) =>
      hadoopConf.foldLeft(sparkConf)((sc, entry) => sc.set(s"spark.hadoop.${entry.getKey}", entry.getValue))
    }

    System.setProperty("derby.system.home", new File(sparkTestDir, "metastore").getAbsolutePath)

    logger.info(s"Staring Spark Session with warehouse dir at ${sparkTestDir.getAbsolutePath} ...")
    SparkSession.builder()
      .config(sparkConf)
      .appName(s"test-${getClass.getName}")
      .master("local[*]")
      .enableHiveSupport()
      .getOrCreate()
  }

  def addHadoopConfiguration(conf: Configuration): Unit = {
    conf.foreach { property =>
      spark.sparkContext.hadoopConfiguration.set(property.getKey, property.getValue)
    }
  }

  def addHadoopProperty(key: String, value: String): Unit = {
    spark.sparkContext.hadoopConfiguration.set(key, value)
  }

}

Source File: tracerlog.scala From http4s-tracer with Apache License 2.0

5 votes

package dev.profunktor.tracer.instances

import cats.effect.Sync
import cats.syntax.flatMap._
import dev.profunktor.tracer.Trace
import dev.profunktor.tracer.Trace._
import dev.profunktor.tracer.TracerLog
import org.slf4j.{Logger, LoggerFactory}

import scala.reflect.ClassTag

object tracerlog {

  implicit def defaultLog[F[_]](implicit F: Sync[F]): TracerLog[Trace[F, ?]] =
    new TracerLog[Trace[F, ?]] {
      def logger[A](implicit ct: ClassTag[A]): F[Logger] =
        F.delay(LoggerFactory.getLogger(ct.runtimeClass))

      override def info[A: ClassTag](value: => String): Trace[F, Unit] = Trace { id =>
        logger[A].flatMap { log =>
          if (log.isInfoEnabled) F.delay(log.info(s"$id - $value"))
          else F.unit
        }
      }

      override def error[A: ClassTag](value: => String): Trace[F, Unit] = Trace { id =>
        logger[A].flatMap { log =>
          if (log.isErrorEnabled) F.delay(log.error(s"$id - $value"))
          else F.unit
        }
      }

      override def warn[A: ClassTag](value: => String): Trace[F, Unit] = Trace { id =>
        logger[A].flatMap { log =>
          if (log.isWarnEnabled) F.delay(log.warn(s"$id - $value"))
          else F.unit
        }
      }
    }

}

Source File: ImageNormalization.scala From flink-tensorflow with Apache License 2.0

5 votes

package org.apache.flink.contrib.tensorflow.examples.inception

import org.apache.flink.contrib.tensorflow.examples.common.GraphBuilder
import org.apache.flink.contrib.tensorflow.examples.inception.ImageNormalization._
import org.apache.flink.contrib.tensorflow.examples.inception.ImageNormalizationMethod._
import org.apache.flink.contrib.tensorflow.graphs.{GraphDefGraphLoader, GraphLoader, GraphMethod}
import org.apache.flink.contrib.tensorflow.models.generic.GenericModel
import org.apache.flink.contrib.tensorflow.models.ModelFunction
import org.slf4j.{Logger, LoggerFactory}
import org.tensorflow._
import org.tensorflow.contrib.scala._
import org.tensorflow.framework.{SignatureDef, TensorInfo}

sealed trait ImageNormalizationMethod extends GraphMethod {
  val name = NORMALIZE_METHOD_NAME
  override type Input = ImageFileTensor
  override type Output = ImageTensor
}

object ImageNormalizationMethod {
  val NORMALIZE_METHOD_NAME = "inception/normalize"
  val NORMALIZE_INPUTS = "inputs"
  val NORMALIZE_OUTPUTS = "outputs"

  
  def normalize = ModelFunction[ImageNormalizationMethod](session, signatureDef)
}

object ImageNormalization {

  private[inception] val LOG: Logger = LoggerFactory.getLogger(classOf[ImageNormalization])

}

Source File: ImageInputFormat.scala From flink-tensorflow with Apache License 2.0

5 votes

package org.apache.flink.contrib.tensorflow.examples.inception

import java.io.IOException
import java.util.Collections

import com.twitter.bijection.Conversion._
import org.apache.flink.api.common.io.GlobFilePathFilter
import org.apache.flink.configuration.Configuration
import org.apache.flink.contrib.tensorflow._
import org.apache.flink.contrib.tensorflow.common.functions.util.ModelUtils
import org.apache.flink.contrib.tensorflow.io.WholeFileInputFormat
import org.apache.flink.contrib.tensorflow.io.WholeFileInputFormat._
import org.apache.flink.core.fs.{FSDataInputStream, Path}
import org.slf4j.{Logger, LoggerFactory}
import org.tensorflow.contrib.scala.ByteStrings._
import resource._

import scala.collection.JavaConverters._


  override def readRecord(
       reuse: (String,ImageTensorValue),
       filePath: Path, fileStream: FSDataInputStream,
       fileLength: Long): (String,ImageTensorValue) = {

    if(fileLength > Int.MaxValue) {
      throw new IllegalArgumentException("the file is too large to be fully read")
    }
    val imageData =
      readFully(fileStream, new Array[Byte](fileLength.toInt), 0, fileLength.toInt).asByteString[ImageFile]

    val imageTensor: ImageTensorValue =
      managed(imageData.as[ImageFileTensor])
      .flatMap(x => model.normalize(x))
      .acquireAndGet(_.toValue)

    (filePath.getName, imageTensor)
  }
}

object ImageInputFormat {
  def apply(): ImageInputFormat = new ImageInputFormat
}

Source File: InceptionModel.scala From flink-tensorflow with Apache License 2.0

5 votes

package org.apache.flink.contrib.tensorflow.examples.inception

import java.net.URI
import java.nio.charset.StandardCharsets

import org.apache.flink.contrib.tensorflow.examples.inception.LabelMethod._
import org.apache.flink.contrib.tensorflow.graphs.{DefaultGraphLoader, GraphLoader, GraphMethod}
import org.apache.flink.contrib.tensorflow.models.generic.GenericModel
import org.apache.flink.contrib.tensorflow.models.ModelFunction
import org.apache.flink.contrib.tensorflow.util.GraphUtils
import org.apache.flink.core.fs.Path
import org.slf4j.{Logger, LoggerFactory}
import org.tensorflow.Tensor
import org.tensorflow.contrib.scala._
import org.tensorflow.framework.{SignatureDef, TensorInfo}

import scala.collection.JavaConverters._

sealed trait LabelMethod extends GraphMethod {
  def name = LABEL_METHOD_NAME
  override type Input = ImageTensor
  override type Output = LabelTensor
}

@SerialVersionUID(1L)
object LabelMethod {
  val LABEL_METHOD_NAME = "inception/label"
  val LABEL_INPUTS = "inputs"
  val LABEL_OUTPUTS = "outputs"

  
    def toTextLabels(take: Int = 3)(implicit model: InceptionModel): Array[LabeledImage] = {
      // the tensor consists of a row per image, with columns representing label probabilities
      require(t.numDimensions() == 2, "expected a [M N] shaped tensor")
      val matrix = Array.ofDim[Float](t.shape()(0).toInt,t.shape()(1).toInt)
      t.copyTo(matrix)
      matrix.map { row =>
        LabeledImage(row.toList.zip(model.labels).sortWith(_._1 > _._1).take(take))
      }
    }
  }

}

Source File: ProgressBar.scala From scaladex with BSD 3-Clause "New" or "Revised" License

5 votes

package ch.epfl.scala.index
package data

import me.tongfei.progressbar.{ProgressBar => PB, ProgressBarStyle}
import java.io.{PrintStream, ByteArrayOutputStream, OutputStream}
import org.slf4j.Logger

object ProgressBar {
  def apply(title: String, count: Int, logger: Logger): ProgressBar = {
    new ProgressBar(
      new PB(title, count, 1000, System.out, ProgressBarStyle.UNICODE_BLOCK),
      logger,
      count
    )
  }
}

class ProgressBar(inner: PB, logger: Logger, count: Int) {
  var c = 0
  var printed = 0

  def start(): Unit = {
    inner.start()
  }

  def step(): Unit = {
    inner.step()
    c += 1
    print()
  }

  def stepBy(n: Int): Unit = {
    inner.stepBy(n)
    c += n
    print()
  }

  def stop(): Unit = {
    inner.stop()
  }

  private def print(): Unit = {
    val pp = ((c.toDouble / count) * 100).toInt

    if (printed < pp) {
      logger.debug(pp + "%")
      printed = pp
    }
  }
}

Source File: PerceptronUtils.scala From spark-nlp with Apache License 2.0

5 votes

package com.johnsnowlabs.nlp.annotators.pos.perceptron

import org.slf4j.{Logger, LoggerFactory}
import scala.collection.mutable.{Map => MMap}

trait PerceptronUtils  {

  private[perceptron] val START = Array("-START-", "-START2-")
  private[perceptron] val END = Array("-END-", "-END2-")

  private[perceptron] val logger: Logger = LoggerFactory.getLogger("PerceptronTraining")

  
  private[perceptron] def getFeatures(
                                       init: Int,
                                       word: String,
                                       context: Array[String],
                                       prev: String,
                                       prev2: String
                                     ): Map[String, Int] = {
    val features = MMap[String, Int]().withDefaultValue(0)
    def add(name: String, args: Array[String] = Array()): Unit = {
      features((name +: args).mkString(" ")) += 1
    }
    val i = init + START.length
    add("bias")
    add("i suffix", Array(word.takeRight(3)))
    add("i pref1", Array(word.head.toString))
    add("i-1 tag", Array(prev))
    add("i-2 tag", Array(prev2))
    add("i tag+i-2 tag", Array(prev, prev2))
    add("i word", Array(context(i)))
    add("i-1 tag+i word", Array(prev, context(i)))
    add("i-1 word", Array(context(i-1)))
    add("i-1 suffix", Array(context(i-1).takeRight(3)))
    add("i-2 word", Array(context(i-2)))
    add("i+1 word", Array(context(i+1)))
    add("i+1 suffix", Array(context(i+1).takeRight(3)))
    add("i+2 word", Array(context(i+2)))
    features.toMap
  }
}

Source File: BookParser.scala From get-programming-with-scala with MIT License

5 votes

package org.example.books

import org.example.books.entities.Book
import com.github.tototoshi.csv._
import org.slf4j.{Logger, LoggerFactory}

import scala.io.Source
import scala.util.{Failure, Success, Try}

class BookParser(filePath: String) {

  private val logger: Logger = LoggerFactory.getLogger(this.getClass)

  val books: List[Book] = {
    loadCSVFile(filePath).flatMap { rowData =>
      Book.parse(rowData) match {
        case Success(book) => Some(book)
        case Failure(ex) =>
          logger.warn(s"Skipping book: Unable to parse row because of ${ex.getMessage} - row was $rowData")
          None
      }
    }
  }

  private def loadCSVFile(path: String): List[Map[String, String]] = {
    logger.info(s"Processing file $path...")
    val file = Source.fromResource(path)
    val reader = CSVReader.open(file)
    val data = reader.allWithHeaders()
    logger.info(s"Completed processing of file $path! ${data.size} records loaded")
    data
  }
}

Source File: BookService.scala From get-programming-with-scala with MIT License

5 votes

package org.example.books

import org.example.books.entities._
import org.slf4j.{Logger, LoggerFactory}

class BookService(bookCatalogPath: String) {
  private val logger: Logger = LoggerFactory.getLogger(this.getClass)

  private val books: List[Book] = new BookParser(bookCatalogPath).books

  private var bookLoans: Set[BookLoan] = Set.empty

  def search(title: Option[String] = None,
             author: Option[String] = None): List[Book] =
    books.filter { book =>
      title.forall(t => containsCaseInsensitive(book.title, t)) &&
      author.forall(a => book.authors.exists(containsCaseInsensitive(_, a)))
    }

  private def containsCaseInsensitive(text: String,
                                      substring: String): Boolean =
    text.toLowerCase.contains(substring.toLowerCase)

  def reserveBook(bookId: Long, user: User): Either[String, BookLoan] = {
    val res = for {
      _ <- checkReserveLimits(user)
      book <- checkBookExists(bookId)
      _ <- checkBookIsAvailable(book)
    } yield registerBookLoan(book, user)
    logger.info(s"Book $bookId - User ${user.id} - Reserve request: ${outcomeMsg(res)}")
    res
  }

  def returnBook(bookId: Long): Either[String, BookLoan] = {
    val res = for {
      book <- checkBookExists(bookId)
      user <- checkBookIsTaken(book)
    } yield unregisterBookLoan(book, user)
    logger.info(s"Book $bookId - Return request: ${outcomeMsg(res)}")
    res
  }

  private def outcomeMsg[T](res: Either[String, T]): String =
    res.left.getOrElse("OK")

  private val loanLimit = 5
  private def checkReserveLimits(user: User): Either[String, User] =
    if (bookLoans.count(_.user == user) < loanLimit) Right(user)
    else Left(
      s"You cannot loan more than $loanLimit books at the time")

  private def checkBookExists(bookId: Long): Either[String, Book] =
    books.find(_.id == bookId) match {
      case Some(book) => Right(book)
      case None => Left(s"Book with id $bookId not found")
    }

  private def checkBookIsAvailable(book: Book): Either[String, Book] =
    findBookLoan(book) match {
      case Some(_) => Left(s"Another user has book ${book.id}")
      case None => Right(book)
    }

  private def checkBookIsTaken(book: Book): Either[String, User] =
    findBookLoan(book) match {
      case Some(BookLoan(_, user)) => Right(user)
      case None => Left(s"Book ${book.id} does not result out on loan")
    }

  private def findBookLoan(book: Book): Option[BookLoan] =
    bookLoans.find(_.book == book)

  private def registerBookLoan(book: Book, user: User): BookLoan = {
    val bookLoan = BookLoan(book, user)
    updateBookLoans(loans => loans + bookLoan)
    bookLoan
  }

  private def unregisterBookLoan(book: Book, user: User): BookLoan = {
    val bookLoan = BookLoan(book, user)
    updateBookLoans(loans => loans - bookLoan)
    bookLoan
  }

  private def updateBookLoans(f: Set[BookLoan] => Set[BookLoan]): Unit =
    synchronized { bookLoans = f(bookLoans) }

}

Source File: MoviesDataset.scala From get-programming-with-scala with MIT License

5 votes

package org.example.movies

import com.github.tototoshi.csv._
import org.slf4j.{Logger, LoggerFactory}
import org.example.movies.entities.Movie

import scala.io.Source

class MoviesDataset(moviesMetadataFile: String) {

  private val logger: Logger = LoggerFactory.getLogger(this.getClass)

  val movies: List[Movie] = {
    val rawData = loadCSVFile(moviesMetadataFile)
    rawData.flatMap { raw =>
      // skipping malformed rows that are malformed
      Movie.parse(raw)
    }
  }

  private def loadCSVFile(path: String): List[Map[String, String]] = {
    logger.info(s"Processing file $path...")
    val file = Source.fromResource(path)
    val reader = CSVReader.open(file)
    val data = reader.allWithHeaders()
    logger.info(s"Completed processing of file $path! ${data.size} records loaded")
    data
  }

}

Source File: WSClient.scala From play-auditing with Apache License 2.0

5 votes

package uk.gov.hmrc.audit

import akka.stream.Materializer
import org.slf4j.{Logger, LoggerFactory}
import play.api.libs.ws.WSClientConfig
import play.api.libs.ws.ahc.{AhcConfigBuilder, AhcWSClientConfig, AhcWSClient}

import scala.concurrent.Future
import scala.concurrent.duration.Duration


package object handler {
  type WSClient = play.api.libs.ws.WSClient

  object WSClient {
    private val logger: Logger = LoggerFactory.getLogger(getClass)

    def apply(
      connectTimeout: Duration,
      requestTimeout: Duration,
      userAgent     : String
    )(implicit
      materializer: Materializer
    ): WSClient =
      new AhcWSClient(
        new AhcConfigBuilder(
          ahcConfig = AhcWSClientConfig()
                        .copy(wsClientConfig = WSClientConfig()
                          .copy(
                            connectionTimeout = connectTimeout,
                            requestTimeout    = requestTimeout,
                            userAgent         = Some(userAgent)

                          )
                        )
        ).build()
      )
  }
}

Source File: DatastreamHandler.scala From play-auditing with Apache License 2.0

5 votes

package uk.gov.hmrc.audit.handler

import java.net.URL

import akka.stream.Materializer
import org.slf4j.{Logger, LoggerFactory}
import play.api.inject.ApplicationLifecycle
import play.api.libs.json.JsValue
import uk.gov.hmrc.audit.HandlerResult
import uk.gov.hmrc.audit.HandlerResult.{Failure, Rejected, Success}

import scala.concurrent.{ExecutionContext, Future}
import scala.concurrent.duration.Duration

class DatastreamHandler(
  scheme        : String,
  host          : String,
  port          : Integer,
  path          : String,
  connectTimeout: Duration,
  requestTimeout: Duration,
  userAgent     : String,
  materializer  : Materializer,
  lifecycle     : ApplicationLifecycle
) extends HttpHandler(
  endpointUrl    = new URL(s"$scheme://$host:$port$path"),
  userAgent      = userAgent,
  connectTimeout = connectTimeout,
  requestTimeout = requestTimeout,
  materializer   = materializer,
  lifecycle      = lifecycle
) with AuditHandler {

  private val logger: Logger = LoggerFactory.getLogger(getClass)

  override def sendEvent(event: JsValue)(implicit ec: ExecutionContext): Future[HandlerResult] =
    sendEvent(event, retryIfMalformed = true)

  private def sendEvent(event: JsValue, retryIfMalformed: Boolean)(implicit ec: ExecutionContext): Future[HandlerResult] =
    sendHttpRequest(event).flatMap {
      case HttpResult.Response(status) =>
        Future.successful(status match {
          case 204 => Success
          case 400 => logger.warn("Malformed request rejected by Datastream")
                      Rejected
          case 413 => logger.warn("Too large request rejected by Datastream")
                      Rejected
          case _   => logger.error(s"Unknown return value $status")
                      Failure
        })
      case HttpResult.Malformed =>
        if (retryIfMalformed) {
          logger.warn("Malformed response on first request, retrying")
          sendEvent(event, retryIfMalformed = false)
        } else {
          logger.warn("Malformed response on second request, failing")
          Future.successful(Failure)
        }
      case HttpResult.Failure(msg, exceptionOption) =>
        exceptionOption match {
          case None     => logger.error(msg)
          case Some(ex) => logger.error(msg, ex)
        }
        Future.successful(Failure)
    }
}

Source File: HttpHandler.scala From play-auditing with Apache License 2.0

5 votes

package uk.gov.hmrc.audit.handler

import java.io.IOException
import java.net.URL
import java.util.concurrent.TimeoutException

import akka.stream.Materializer
import org.slf4j.{Logger, LoggerFactory}
import play.api.inject.ApplicationLifecycle
import play.api.libs.json.JsValue

import scala.concurrent.{ExecutionContext, Future}
import scala.concurrent.duration.Duration


sealed trait HttpResult
object HttpResult {
  case class Response(statusCode: Int) extends HttpResult
  case object Malformed extends HttpResult
  case class Failure(msg: String, nested: Option[Throwable] = None) extends Exception(msg, nested.orNull) with HttpResult
}

abstract class HttpHandler(
  endpointUrl      : URL,
  userAgent        : String,
  connectTimeout   : Duration,
  requestTimeout   : Duration,
  materializer     : Materializer,
  lifecycle        : ApplicationLifecycle
) {
  private val logger: Logger = LoggerFactory.getLogger(getClass)

  val HTTP_STATUS_CONTINUE = 100

  val wsClient: WSClient = {
    implicit val m = materializer
    val wsClient = WSClient(connectTimeout, requestTimeout, userAgent)
    lifecycle.addStopHook { () =>
      logger.info("Closing play-auditing http connections...")
      wsClient.close()
      Future.successful(())
    }
    wsClient
  }

  def sendHttpRequest(event: JsValue)(implicit ec: ExecutionContext): Future[HttpResult] =
    try {
      logger.debug(s"Sending audit request to URL ${endpointUrl.toString}")

      wsClient.url(endpointUrl.toString)
        .post(event)
        .map { response =>
          val httpStatusCode = response.status
          logger.debug(s"Got status code : $httpStatusCode")
          response.body
          logger.debug("Response processed and closed")

          if (httpStatusCode >= HTTP_STATUS_CONTINUE) {
            logger.info(s"Got status code $httpStatusCode from HTTP server.")
            HttpResult.Response(httpStatusCode)
          } else {
            logger.warn(s"Malformed response (status $httpStatusCode) returned from server")
            HttpResult.Malformed
          }
        }.recover {
          case e: TimeoutException =>
            HttpResult.Failure("Error opening connection, or request timed out", Some(e))
          case e: IOException =>
            HttpResult.Failure("Error opening connection, or request timed out", Some(e))
        }
    } catch {
      case t: Throwable =>
        Future.successful(HttpResult.Failure("Error sending HTTP request", Some(t)))
    }
}

Source File: LoggingHandlerSpec.scala From play-auditing with Apache License 2.0

5 votes

package uk.gov.hmrc.audit.handler

import org.mockito.Mockito._
import org.scalatest.wordspec.AnyWordSpecLike
import org.scalatestplus.mockito.MockitoSugar
import org.slf4j.Logger
import play.api.libs.json.JsString

import scala.concurrent.ExecutionContext.Implicits.global

class LoggingHandlerSpec extends AnyWordSpecLike with MockitoSugar {

  val mockLog: Logger = mock[Logger]
  val loggingHandler = new LoggingHandler(mockLog)

  "LoggingHandler" should {
    "log the event" in {
      val expectedLogContent = """DS_EventMissed_AuditRequestFailure : audit item : "FAILED_EVENT""""

      loggingHandler.sendEvent(JsString("FAILED_EVENT"))

      verify(mockLog).warn(expectedLogContent)
    }
  }
}

Source File: PrometheusUtils.scala From kafka-lag-exporter with Apache License 2.0

5 votes

package com.lightbend.kafkalagexporter.integration

import akka.actor.ActorSystem
import akka.http.scaladsl.Http
import akka.http.scaladsl.model.{HttpRequest, HttpResponse, StatusCodes}
import akka.http.scaladsl.unmarshalling.Unmarshal
import akka.stream.Materializer
import com.lightbend.kafkalagexporter.MetricsSink.GaugeDefinition
import org.scalatest.Matchers
import org.scalatest.concurrent.ScalaFutures
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.{ExecutionContext, Future}
import scala.util.matching.Regex


      val regex = s"""$name\\{$labels.*\\}\\s+(-?.+)""".r
      log.debug(s"Created regex: {}", regex.pattern.toString)
      Rule(regex, assertion)
    }
  }

  case class Rule(regex: Regex, assertion: String => _)

  case class Result(rule: Rule, groupResults: List[String]) {
    def assertDne(): Unit = {
      log.debug(s"Rule: ${rule.regex.toString}")
      groupResults.length shouldBe 0
    }

    def assert(): Unit = {
      log.debug(s"Rule: ${rule.regex.toString}")
      groupResults.length shouldBe 1
      log.debug(s"Actual value is ${groupResults.head}")
      rule.assertion(groupResults.head)
    }
  }
}

Source File: SignalLogger.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.util

import org.apache.commons.lang3.SystemUtils
import org.slf4j.Logger
import sun.misc.{Signal, SignalHandler}


  def register(log: Logger): Unit = synchronized {
    if (SystemUtils.IS_OS_UNIX) {
      require(!registered, "Can't re-install the signal handlers")
      registered = true

      val signals = Seq("TERM", "HUP", "INT")
      for (signal <- signals) {
        try {
          new SignalLoggerHandler(signal, log)
        } catch {
          case e: Exception => log.warn("Failed to register signal handler " + signal, e)
        }
      }
      log.info("Registered signal handlers for [" + signals.mkString(", ") + "]")
    }
  }
}

private sealed class SignalLoggerHandler(name: String, log: Logger) extends SignalHandler {

  val prevHandler = Signal.handle(new Signal(name), this)

  override def handle(signal: Signal): Unit = {
    log.error("RECEIVED SIGNAL " + signal.getNumber() + ": SIG" + signal.getName())
    prevHandler.handle(signal)
  }
}

Source File: ActorLogReceive.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.util

import akka.actor.Actor
import org.slf4j.Logger


private[spark] trait ActorLogReceive {
  self: Actor =>

  override def receive: Actor.Receive = new Actor.Receive {

    private val _receiveWithLogging = receiveWithLogging

    override def isDefinedAt(o: Any): Boolean = {
      val handled = _receiveWithLogging.isDefinedAt(o)
      if (!handled) {
        log.debug(s"Received unexpected actor system event: $o")
      }
      handled
    }

    override def apply(o: Any): Unit = {
      if (log.isDebugEnabled) {
        log.debug(s"[actor] received message $o from ${self.sender}")
      }
      val start = System.nanoTime
      _receiveWithLogging.apply(o)
      val timeTaken = (System.nanoTime - start).toDouble / 1000000
      if (log.isDebugEnabled) {
        log.debug(s"[actor] handled message ($timeTaken ms) $o from ${self.sender}")
      }
    }
  }

  def receiveWithLogging: Actor.Receive

  protected def log: Logger
}

org.slf4j.Logger Scala Examples