org.slf4j.LoggerFactory Scala Examples
The following examples show how to use org.slf4j.LoggerFactory.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: DefaultSource.scala From spark-snowflake with Apache License 2.0 | 7 votes |
package net.snowflake.spark.snowflake import net.snowflake.spark.snowflake.streaming.SnowflakeSink import net.snowflake.spark.snowflake.Utils.SNOWFLAKE_SOURCE_SHORT_NAME import org.apache.spark.sql.execution.streaming.Sink import org.apache.spark.sql.sources._ import org.apache.spark.sql.streaming.OutputMode import org.apache.spark.sql.types.StructType import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode} import org.slf4j.LoggerFactory override def createRelation(sqlContext: SQLContext, saveMode: SaveMode, parameters: Map[String, String], data: DataFrame): BaseRelation = { val params = Parameters.mergeParameters(parameters) // check spark version for push down if (params.autoPushdown) { SnowflakeConnectorUtils.checkVersionAndEnablePushdown( sqlContext.sparkSession ) } // pass parameters to pushdown functions pushdowns.setGlobalParameter(params) val table = params.table.getOrElse { throw new IllegalArgumentException( "For save operations you must specify a Snowfake table name with the 'dbtable' parameter" ) } def tableExists: Boolean = { val conn = jdbcWrapper.getConnector(params) try { jdbcWrapper.tableExists(conn, table.toString) } finally { conn.close() } } val (doSave, dropExisting) = saveMode match { case SaveMode.Append => (true, false) case SaveMode.Overwrite => (true, true) case SaveMode.ErrorIfExists => if (tableExists) { sys.error( s"Table $table already exists! (SaveMode is set to ErrorIfExists)" ) } else { (true, false) } case SaveMode.Ignore => if (tableExists) { log.info(s"Table $table already exists -- ignoring save request.") (false, false) } else { (true, false) } } if (doSave) { val updatedParams = parameters.updated("overwrite", dropExisting.toString) new SnowflakeWriter(jdbcWrapper) .save( sqlContext, data, saveMode, Parameters.mergeParameters(updatedParams) ) } createRelation(sqlContext, parameters) } override def createSink(sqlContext: SQLContext, parameters: Map[String, String], partitionColumns: Seq[String], outputMode: OutputMode): Sink = new SnowflakeSink(sqlContext, parameters, partitionColumns, outputMode) }
Example 2
Source File: SnowflakeConnectorUtils.scala From spark-snowflake with Apache License 2.0 | 6 votes |
package net.snowflake.spark.snowflake import java.nio.file.Paths import java.security.InvalidKeyException import net.snowflake.spark.snowflake.pushdowns.SnowflakeStrategy import org.apache.spark.sql.SparkSession import org.slf4j.{Logger, LoggerFactory} def disablePushdownSession(session: SparkSession): Unit = { session.experimental.extraStrategies = session.experimental.extraStrategies .filterNot(strategy => strategy.isInstanceOf[SnowflakeStrategy]) } def setPushdownSession(session: SparkSession, enabled: Boolean): Unit = { if (enabled) { enablePushdownSession(session) } else { disablePushdownSession(session) } } // TODO: Improve error handling with retries, etc. @throws[SnowflakeConnectorException] def handleS3Exception(ex: Exception): Unit = { if (ex.getCause.isInstanceOf[InvalidKeyException]) { // Most likely cause: Unlimited strength policy files not installed var msg: String = "Strong encryption with Java JRE requires JCE " + "Unlimited Strength Jurisdiction Policy " + "files. " + "Follow JDBC client installation instructions " + "provided by Snowflake or contact Snowflake " + "Support. This needs to be installed in the Java runtime for all Spark executor nodes." log.error( "JCE Unlimited Strength policy files missing: {}. {}.", ex.getMessage: Any, ex.getCause.getMessage: Any ) val bootLib: String = java.lang.System.getProperty("sun.boot.library.path") if (bootLib != null) { msg += " The target directory on your system is: " + Paths .get(bootLib, "security") .toString log.error(msg) } throw new SnowflakeConnectorException(msg) } else { throw ex } } } class SnowflakeConnectorException(message: String) extends Exception(message) class SnowflakePushdownException(message: String) extends SnowflakeConnectorException(message) class SnowflakeConnectorFeatureNotSupportException(message: String) extends Exception(message) class SnowflakePushdownUnsupportedException(message: String, val unsupportedOperation: String, val details: String, val isKnownUnsupportedOperation: Boolean) extends Exception(message)
Example 3
Source File: MLSQLSpringConfiguration.scala From Linkis with Apache License 2.0 | 6 votes |
package com.webank.wedatasphere.linkis.entrance.conf import com.webank.wedatasphere.linkis.entrance.EntranceParser import com.webank.wedatasphere.linkis.entrance.annotation._ import com.webank.wedatasphere.linkis.entrance.execute._ import com.webank.wedatasphere.linkis.entrance.executer.MLSQLEngineExecutorManagerImpl import com.webank.wedatasphere.linkis.entrance.parser.MLSQLEntranceParser import com.webank.wedatasphere.linkis.scheduler.queue.GroupFactory import org.slf4j.LoggerFactory import org.springframework.beans.factory.annotation.Autowired import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean import org.springframework.context.annotation.Configuration @Configuration class MLSQLSpringConfiguration { private val logger = LoggerFactory.getLogger(classOf[MLSQLSpringConfiguration]) @EntranceExecutorManagerBeanAnnotation @ConditionalOnMissingBean(value = Array(classOf[EntranceExecutorManager])) def generateEntranceExecutorManager(@GroupFactoryBeanAnnotation.GroupFactoryAutowiredAnnotation groupFactory: GroupFactory, @EngineBuilderBeanAnnotation.EngineBuilderAutowiredAnnotation engineBuilder: EngineBuilder, @EngineRequesterBeanAnnotation.EngineRequesterAutowiredAnnotation engineRequester: EngineRequester, @EngineSelectorBeanAnnotation.EngineSelectorAutowiredAnnotation engineSelector: EngineSelector, @EngineManagerBeanAnnotation.EngineManagerAutowiredAnnotation engineManager: EngineManager, @Autowired entranceExecutorRulers: Array[EntranceExecutorRuler]): EntranceExecutorManager = new MLSQLEngineExecutorManagerImpl(groupFactory, engineBuilder, engineRequester, engineSelector, engineManager, entranceExecutorRulers) @EntranceParserBeanAnnotation @ConditionalOnMissingBean(name = Array(EntranceParserBeanAnnotation.BEAN_NAME)) def generateEntranceParser(): EntranceParser = { logger.info("begin to get MLSQL Entrance parser") new MLSQLEntranceParser() } }
Example 4
Source File: JDBCSpringConfiguration.scala From Linkis with Apache License 2.0 | 6 votes |
package com.webank.wedatasphere.linkis.entrance.conf import com.webank.wedatasphere.linkis.entrance.EntranceParser import com.webank.wedatasphere.linkis.entrance.annotation._ import com.webank.wedatasphere.linkis.entrance.execute._ import com.webank.wedatasphere.linkis.entrance.executer.JDBCEngineExecutorManagerImpl import com.webank.wedatasphere.linkis.entrance.parser.JDBCEntranceParser import com.webank.wedatasphere.linkis.scheduler.queue.GroupFactory import org.slf4j.LoggerFactory import org.springframework.beans.factory.annotation.Autowired import org.springframework.context.annotation.Configuration @Configuration class JDBCSpringConfiguration { private val logger = LoggerFactory.getLogger(classOf[JDBCSpringConfiguration]) @EntranceExecutorManagerBeanAnnotation def generateEntranceExecutorManager(@GroupFactoryBeanAnnotation.GroupFactoryAutowiredAnnotation groupFactory: GroupFactory, @EngineBuilderBeanAnnotation.EngineBuilderAutowiredAnnotation engineBuilder: EngineBuilder, @EngineRequesterBeanAnnotation.EngineRequesterAutowiredAnnotation engineRequester: EngineRequester, @EngineSelectorBeanAnnotation.EngineSelectorAutowiredAnnotation engineSelector: EngineSelector, @EngineManagerBeanAnnotation.EngineManagerAutowiredAnnotation engineManager: EngineManager, @Autowired entranceExecutorRulers: Array[EntranceExecutorRuler]): EntranceExecutorManager = new JDBCEngineExecutorManagerImpl(groupFactory, engineBuilder, engineRequester, engineSelector, engineManager, entranceExecutorRulers) @EntranceParserBeanAnnotation def generateEntranceParser():EntranceParser = { logger.info("begin to get JDBC Entrance parser") new JDBCEntranceParser() } }
Example 5
Source File: TestHook.scala From spark-snowflake with Apache License 2.0 | 5 votes |
package net.snowflake.spark.snowflake.test import net.snowflake.client.jdbc.{ErrorCode, SnowflakeSQLException} import net.snowflake.spark.snowflake.test.TestHookFlag.TestHookFlag import org.slf4j.{Logger, LoggerFactory} object TestHookFlag extends Enumeration { type TestHookFlag = Value // All predefined test hook's name start with TH_ (TEST HOOK). val TH_WRITE_ERROR_AFTER_DROP_OLD_TABLE = Value("TH_WRITE_ERROR_AFTER_DROP_OLD_TABLE") val TH_WRITE_ERROR_AFTER_CREATE_NEW_TABLE = Value("TH_WRITE_ERROR_AFTER_CREATE_NEW_TABLE") val TH_WRITE_ERROR_AFTER_TRUNCATE_TABLE = Value("TH_WRITE_ERROR_AFTER_TRUNCATE_TABLE") val TH_WRITE_ERROR_AFTER_COPY_INTO = Value("TH_WRITE_ERROR_AFTER_COPY_INTO") val TH_GCS_UPLOAD_RAISE_EXCEPTION = Value("TH_GCS_UPLOAD_RAISE_EXCEPTION") val TH_COPY_INTO_TABLE_MISS_FILES_SUCCESS = Value("TH_COPY_INTO_TABLE_MISS_FILES_SUCCESS") val TH_COPY_INTO_TABLE_MISS_FILES_FAIL = Value("TH_COPY_INTO_TABLE_MISS_FILES_FAIL") } object TestHook { val log: Logger = LoggerFactory.getLogger(getClass) private val ENABLED_TEST_FLAGS = new scala.collection.mutable.HashSet[TestHookFlag]() private var IS_TEST_ENABLED = false private val TEST_MESSAGE_PREFIX = "Internal test error (should NOT be seen by user):" // Enable test private[snowflake] def enableTestHook() : Unit = { IS_TEST_ENABLED = true } // Disable test private[snowflake] def disableTestHook() : Unit = { IS_TEST_ENABLED = false ENABLED_TEST_FLAGS.clear() } // Enable a specific test flag private[snowflake] def enableTestFlag(testFlag : TestHookFlag): Unit = { enableTestHook() if (!ENABLED_TEST_FLAGS.contains(testFlag)) { ENABLED_TEST_FLAGS.add(testFlag) } } // Enable a specific test flag only (all other flags are disabled) private[snowflake] def enableTestFlagOnly(testFlag : TestHookFlag): Unit = { disableTestHook() enableTestFlag(testFlag) } // Disable a specific test flag private[snowflake] def disableTestFlag(testFlag : TestHookFlag): Unit = { if (ENABLED_TEST_FLAGS.contains(testFlag)) { ENABLED_TEST_FLAGS.remove(testFlag) } if (ENABLED_TEST_FLAGS.isEmpty) { disableTestHook() } } // Check whether a flag is enabled private[snowflake] def isTestFlagEnabled(testFlag : TestHookFlag): Boolean = { IS_TEST_ENABLED && ENABLED_TEST_FLAGS.contains(testFlag) } // Raise exception if the specific test flag is enabled. private[snowflake] def raiseExceptionIfTestFlagEnabled(testFlag: TestHookFlag, errorMessage: String) : Unit = { if (isTestFlagEnabled(testFlag)) { throw new SnowflakeSQLException(ErrorCode.INTERNAL_ERROR, s"$TEST_MESSAGE_PREFIX $errorMessage") } } }
Example 6
Source File: ClusterTest.scala From spark-snowflake with Apache License 2.0 | 5 votes |
package net.snowflake.spark.snowflake import net.snowflake.spark.snowflake.testsuite.ClusterTestSuiteBase import org.slf4j.{Logger, LoggerFactory} import org.apache.spark.sql.SparkSession object ClusterTest { val log: Logger = LoggerFactory.getLogger(getClass) val RemoteMode = "remote" val LocalMode = "local" val TestSuiteSeparator = ";" // Driver function to run the test. def main(args: Array[String]): Unit = { log.info(s"Test Spark Connector: ${net.snowflake.spark.snowflake.Utils.VERSION}") val usage = s"""Two parameters are need: [local | remote] and | testClassNames (using ';' to separate multiple classes) |""".stripMargin log.info(usage) if (args.length < 2) { throw new Exception(s"At least two parameters are need. Usage: $usage") } // Setup Spark session. // local mode is introduced for debugging purpose val runMode = args(0) var sparkSessionBuilder = SparkSession .builder() .appName("Spark SQL basic example") .config("spark.some.config.option", "some-value") if (runMode.equalsIgnoreCase(LocalMode)) { sparkSessionBuilder = sparkSessionBuilder .config("spark.master", "local") } val spark = sparkSessionBuilder.getOrCreate() // Run specified test suites val testSuiteNames = args(1).split(TestSuiteSeparator) for (testSuiteName <- testSuiteNames) { if (!testSuiteName.trim.isEmpty) { // Retrieve commit ID from env. val commitID = scala.util.Properties .envOrElse(TestUtils.GITHUB_SHA, "commit id not set") // val testSuiteName = "net.snowflake.spark.snowflake.testsuite.BasicReadWriteSuite" val resultBuilder = new ClusterTestResultBuilder() .withTestType("Scala") .withTestCaseName(testSuiteName) .withCommitID(commitID) .withTestStatus(TestUtils.TEST_RESULT_STATUS_INIT) .withStartTimeInMill(System.currentTimeMillis()) .withGithubRunId(TestUtils.githubRunId) try { Class .forName(testSuiteName) .newInstance() .asInstanceOf[ClusterTestSuiteBase] .run(spark, resultBuilder) } catch { case e: Throwable => log.error(e.getMessage) resultBuilder .withTestStatus(TestUtils.TEST_RESULT_STATUS_EXCEPTION) .withReason(e.getMessage) } finally { // Set test end time. resultBuilder .withEndTimeInMill(System.currentTimeMillis()) // Write test result resultBuilder.build().writeToSnowflake() } } } spark.stop() } }
Example 7
Source File: AuthServiceJWT.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.ledger.api.auth import java.util.concurrent.{CompletableFuture, CompletionStage} import com.daml.lf.data.Ref import com.daml.jwt.{JwtVerifier, JwtVerifierBase} import com.daml.ledger.api.auth.AuthServiceJWT.Error import io.grpc.Metadata import org.slf4j.{Logger, LoggerFactory} import spray.json._ import scala.collection.mutable.ListBuffer import scala.util.Try class AuthServiceJWT(verifier: JwtVerifierBase) extends AuthService { protected val logger: Logger = LoggerFactory.getLogger(AuthServiceJWT.getClass) override def decodeMetadata(headers: Metadata): CompletionStage[Claims] = { decodeAndParse(headers).fold( error => { logger.warn("Authorization error: " + error.message) CompletableFuture.completedFuture(Claims.empty) }, token => CompletableFuture.completedFuture(payloadToClaims(token)) ) } private[this] def parsePayload(jwtPayload: String): Either[Error, AuthServiceJWTPayload] = { import AuthServiceJWTCodec.JsonImplicits._ Try(JsonParser(jwtPayload).convertTo[AuthServiceJWTPayload]).toEither.left.map(t => Error("Could not parse JWT token: " + t.getMessage)) } private[this] def decodeAndParse(headers: Metadata): Either[Error, AuthServiceJWTPayload] = { val bearerTokenRegex = "Bearer (.*)".r for { headerValue <- Option .apply(headers.get(AUTHORIZATION_KEY)) .toRight(Error("Authorization header not found")) token <- bearerTokenRegex .findFirstMatchIn(headerValue) .map(_.group(1)) .toRight(Error("Authorization header does not use Bearer format")) decoded <- verifier .verify(com.daml.jwt.domain.Jwt(token)) .toEither .left .map(e => Error("Could not verify JWT token: " + e.message)) parsed <- parsePayload(decoded.payload) } yield parsed } private[this] def payloadToClaims(payload: AuthServiceJWTPayload): Claims = { val claims = ListBuffer[Claim]() // Any valid token authorizes the user to use public services claims.append(ClaimPublic) if (payload.admin) claims.append(ClaimAdmin) payload.actAs .foreach(party => claims.append(ClaimActAsParty(Ref.Party.assertFromString(party)))) payload.readAs .foreach(party => claims.append(ClaimReadAsParty(Ref.Party.assertFromString(party)))) Claims( claims = claims.toList, ledgerId = payload.ledgerId, participantId = payload.participantId, applicationId = payload.applicationId, expiration = payload.exp, ) } } object AuthServiceJWT { final case class Error(message: String) def apply(verifier: com.auth0.jwt.interfaces.JWTVerifier) = new AuthServiceJWT(new JwtVerifier(verifier)) def apply(verifier: JwtVerifierBase) = new AuthServiceJWT(verifier) }
Example 8
Source File: AuthorizationInterceptor.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.ledger.api.auth.interceptor import com.daml.ledger.api.auth.{AuthService, Claims} import com.daml.platform.server.api.validation.ErrorFactories.unauthenticated import io.grpc.{ Context, Contexts, Metadata, ServerCall, ServerCallHandler, ServerInterceptor, Status } import org.slf4j.{Logger, LoggerFactory} import scala.compat.java8.FutureConverters import scala.concurrent.ExecutionContext import scala.util.{Failure, Success, Try} final class AuthorizationInterceptor(protected val authService: AuthService, ec: ExecutionContext) extends ServerInterceptor { private val logger: Logger = LoggerFactory.getLogger(AuthorizationInterceptor.getClass) private val internalAuthenticationError = Status.INTERNAL.withDescription("Failed to get claims from request metadata") import AuthorizationInterceptor.contextKeyClaim override def interceptCall[ReqT, RespT]( call: ServerCall[ReqT, RespT], headers: Metadata, nextListener: ServerCallHandler[ReqT, RespT]): ServerCall.Listener[ReqT] = { // Note: Context uses ThreadLocal storage, we need to capture it outside of the async block below. // Contexts are immutable and safe to pass around. val prevCtx = Context.current // The method interceptCall() must return a Listener. // The target listener is created by calling `Contexts.interceptCall()`. // However, this is only done after we have asynchronously received the claims. // Therefore, we need to return a listener that buffers all messages until the target listener is available. new AsyncForwardingListener[ReqT] { FutureConverters .toScala(authService.decodeMetadata(headers)) .onComplete { case Failure(exception) => logger.warn(s"Failed to get claims from request metadata: ${exception.getMessage}") call.close(internalAuthenticationError, new Metadata()) new ServerCall.Listener[Nothing]() {} case Success(Claims.empty) => logger.debug(s"Auth metadata decoded into empty claims, returning UNAUTHENTICATED") call.close(Status.UNAUTHENTICATED, new Metadata()) new ServerCall.Listener[Nothing]() {} case Success(claims) => val nextCtx = prevCtx.withValue(contextKeyClaim, claims) // Contexts.interceptCall() creates a listener that wraps all methods of `nextListener` // such that `Context.current` returns `nextCtx`. val nextListenerWithContext = Contexts.interceptCall(nextCtx, call, headers, nextListener) setNextListener(nextListenerWithContext) nextListenerWithContext }(ec) } } } object AuthorizationInterceptor { private val contextKeyClaim = Context.key[Claims]("AuthServiceDecodedClaim") def extractClaimsFromContext(): Try[Claims] = Option(contextKeyClaim.get()).fold[Try[Claims]](Failure(unauthenticated()))(Success(_)) def apply(authService: AuthService, ec: ExecutionContext): AuthorizationInterceptor = new AuthorizationInterceptor(authService, ec) }
Example 9
Source File: LedgerContext.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.platform.sandbox.perf import akka.actor.ActorSystem import akka.pattern import com.daml.lf.data.Ref.PackageId import com.daml.ledger.api.domain import com.daml.ledger.api.v1.active_contracts_service.ActiveContractsServiceGrpc import com.daml.ledger.api.v1.active_contracts_service.ActiveContractsServiceGrpc.ActiveContractsServiceStub import com.daml.ledger.api.v1.command_service.CommandServiceGrpc import com.daml.ledger.api.v1.command_service.CommandServiceGrpc.CommandService import com.daml.ledger.api.v1.ledger_identity_service.LedgerIdentityServiceGrpc.LedgerIdentityServiceStub import com.daml.ledger.api.v1.ledger_identity_service.{ GetLedgerIdentityRequest, LedgerIdentityServiceGrpc } import com.daml.ledger.api.v1.testing.reset_service.ResetServiceGrpc.ResetService import com.daml.ledger.api.v1.testing.reset_service.{ResetRequest, ResetServiceGrpc} import io.grpc.{Channel, StatusRuntimeException} import org.slf4j.LoggerFactory import scalaz.syntax.tag._ import scala.concurrent.duration._ import scala.concurrent.{ExecutionContext, Future} final class LedgerContext(channel: Channel, packageIds: Iterable[PackageId])( implicit executionContext: ExecutionContext ) { private val logger = LoggerFactory.getLogger(this.getClass) val ledgerId: domain.LedgerId = domain.LedgerId( LedgerIdentityServiceGrpc .blockingStub(channel) .getLedgerIdentity(GetLedgerIdentityRequest()) .ledgerId) def reset()(implicit system: ActorSystem): Future[LedgerContext] = { def waitForNewLedger(retries: Int): Future[domain.LedgerId] = if (retries <= 0) Future.failed(new RuntimeException("waitForNewLedger: out of retries")) else { ledgerIdentityService .getLedgerIdentity(GetLedgerIdentityRequest()) .flatMap { resp => // TODO: compare with current Ledger ID and retry when not changed Future.successful(domain.LedgerId(resp.ledgerId)) } .recoverWith { case _: StatusRuntimeException => logger.debug( "waitForNewLedger: retrying identity request in 1 second. {} retries remain", retries - 1) pattern.after(1.seconds, system.scheduler)(waitForNewLedger(retries - 1)) case t: Throwable => logger.warn("waitForNewLedger: failed to reconnect!") throw t } } for { _ <- resetService.reset(ResetRequest(ledgerId.unwrap)) _ <- waitForNewLedger(10) } yield new LedgerContext(channel, packageIds) } def ledgerIdentityService: LedgerIdentityServiceStub = LedgerIdentityServiceGrpc.stub(channel) def commandService: CommandService = CommandServiceGrpc.stub(channel) def acsService: ActiveContractsServiceStub = ActiveContractsServiceGrpc.stub(channel) def resetService: ResetService = ResetServiceGrpc.stub(channel) }
Example 10
Source File: LedgerEntries.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.platform.sandbox.stores.ledger.inmemory import java.util.concurrent.atomic.AtomicReference import akka.NotUsed import akka.stream.scaladsl.Source import com.daml.ledger.participant.state.v1.Offset import com.daml.lf.data.Ref import com.daml.platform.akkastreams.dispatcher.Dispatcher import com.daml.platform.akkastreams.dispatcher.SubSource.RangeSource import org.slf4j.LoggerFactory import com.daml.platform.ApiOffset.ApiOffsetConverter import com.daml.platform.sandbox.stores.ledger.SandboxOffset import scala.collection.immutable.TreeMap private[ledger] class LedgerEntries[T](identify: T => String) { private val logger = LoggerFactory.getLogger(this.getClass) private case class Entries(ledgerEnd: Offset, items: TreeMap[Offset, T]) // Tuple of (ledger end cursor, ledger map). There is never an entry for the initial cursor. End is inclusive. private val state = new AtomicReference(Entries(ledgerBeginning, TreeMap.empty)) private def store(item: T): Offset = { val Entries(newOffset, _) = state.updateAndGet({ case Entries(ledgerEnd, ledger) => val newEnd = SandboxOffset.toOffset(SandboxOffset.fromOffset(ledgerEnd) + 1) Entries(newEnd, ledger + (newEnd -> item)) }) if (logger.isTraceEnabled()) logger.trace("Recording `{}` at offset `{}`", identify(item): Any, newOffset.toApiString: Any) newOffset } def incrementOffset(increment: Int): Offset = { val Entries(newOffset, _) = state.updateAndGet({ case Entries(ledgerEnd, ledger) => val newEnd = SandboxOffset.toOffset(SandboxOffset.fromOffset(ledgerEnd) + increment) Entries(newEnd, ledger) }) if (logger.isTraceEnabled()) logger.trace("Bumping offset to `{}`", newOffset.toApiString) newOffset } private val dispatcher = Dispatcher[Offset]("inmemory-ledger", Offset.beforeBegin, ledgerEnd) def getSource( startExclusive: Option[Offset], endInclusive: Option[Offset]): Source[(Offset, T), NotUsed] = dispatcher.startingAt( startExclusive.getOrElse(ledgerBeginning), RangeSource( (exclusiveStart, inclusiveEnd) => Source[(Offset, T)]( state.get().items.from(exclusiveStart).filter(_._1 > exclusiveStart).to(inclusiveEnd)), ), endInclusive ) def publish(item: T): Offset = { val newHead = store(item) dispatcher.signalNewHead(newHead) newHead } def ledgerBeginning: Offset = SandboxOffset.toOffset(0) def items = state.get().items.iterator def ledgerEnd: Offset = state.get().ledgerEnd def nextTransactionId: Ref.LedgerString = Ref.LedgerString.assertFromString((SandboxOffset.fromOffset(ledgerEnd) + 1).toString) }
Example 11
Source File: GlobalLogLevel.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.platform.sandbox import ch.qos.logback.classic.Level import org.slf4j.{Logger, LoggerFactory} object GlobalLogLevel { def set(level: Level): Unit = { val rootLogger = LoggerFactory.getLogger(Logger.ROOT_LOGGER_NAME) LoggerFactory.getILoggerFactory match { case loggerContext: ch.qos.logback.classic.LoggerContext => rootLogger.info(s"Sandbox verbosity changed to $level") loggerContext.getLoggerList.forEach(_.setLevel(level)) case _ => rootLogger.warn(s"Sandbox verbosity cannot be set to requested $level") } } }
Example 12
Source File: TrackerMap.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.platform.apiserver.services.tracking import java.util.concurrent.atomic.AtomicReference import com.daml.dec.DirectExecutionContext import com.daml.ledger.api.v1.command_service.SubmitAndWaitRequest import com.daml.ledger.api.v1.completion.Completion import com.daml.logging.{ContextualizedLogger, LoggingContext} import org.slf4j.LoggerFactory import scala.collection.immutable.HashMap import scala.concurrent.duration.{FiniteDuration, _} import scala.concurrent.{ExecutionContext, Future} import scala.util.{Failure, Success} final class AsyncResource[T <: AutoCloseable](future: Future[T]) { private val logger = LoggerFactory.getLogger(this.getClass) // Must progress Waiting => Ready => Closed or Waiting => Closed. val state: AtomicReference[AsyncResourceState[T]] = new AtomicReference(Waiting) future.andThen({ case Success(t) => if (!state.compareAndSet(Waiting, Ready(t))) { // This is the punch line of AsyncResource. // If we've been closed in the meantime, we must close the underlying resource also. // This "on-failure-to-complete" behavior is not present in scala or java Futures. t.close() } // Someone should be listening to this failure downstream // TODO(mthvedt): Refactor so at least one downstream listener is always present, // and exceptions are never dropped. case Failure(ex) => logger.error("failure to get async resource", ex) state.set(Closed) })(DirectExecutionContext) def flatMap[U](f: T => Future[U])(implicit ex: ExecutionContext): Future[U] = { state.get() match { case Waiting => future.flatMap(f) case Closed => throw new IllegalStateException() case Ready(t) => f(t) } } def map[U](f: T => U)(implicit ex: ExecutionContext): Future[U] = flatMap(t => Future.successful(f(t))) def ifPresent[U](f: T => U): Option[U] = state.get() match { case Ready(t) => Some(f(t)) case _ => None } def close(): Unit = state.getAndSet(Closed) match { case Ready(t) => t.close() case _ => } } def apply(retentionPeriod: FiniteDuration)(implicit logCtx: LoggingContext): TrackerMap = new TrackerMap(retentionPeriod) }
Example 13
Source File: CommandCompletionServiceValidation.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.platform.server.api.validation import com.daml.ledger.api.domain.LedgerId import com.daml.ledger.api.v1.command_completion_service.CommandCompletionServiceGrpc.CommandCompletionService import com.daml.ledger.api.v1.command_completion_service._ import com.daml.platform.api.grpc.GrpcApiService import com.daml.dec.DirectExecutionContext import com.daml.platform.server.api.ProxyCloseable import io.grpc.ServerServiceDefinition import io.grpc.stub.StreamObserver import org.slf4j.{Logger, LoggerFactory} import scala.concurrent.Future //TODO: this class is only needed by DamlOnXCommandCompletionService.scala. Must be deleted once that's gone! class CommandCompletionServiceValidation( val service: CommandCompletionService with AutoCloseable, val ledgerId: LedgerId) extends CommandCompletionService with FieldValidations with GrpcApiService with ProxyCloseable with ErrorFactories { protected val logger: Logger = LoggerFactory.getLogger(CommandCompletionService.getClass) override def completionStream( request: CompletionStreamRequest, responseObserver: StreamObserver[CompletionStreamResponse]): Unit = { val validation = for { _ <- matchLedgerId(ledgerId)(LedgerId(request.ledgerId)) _ <- requireNonEmptyString(request.applicationId, "application_id") _ <- requireNonEmpty(request.parties, "parties") } yield request validation.fold( exception => responseObserver.onError(exception), value => service.completionStream(value, responseObserver) ) } override def completionEnd(request: CompletionEndRequest): Future[CompletionEndResponse] = { matchLedgerId(ledgerId)(LedgerId(request.ledgerId)) .fold(Future.failed, _ => service.completionEnd(request)) } override def bindService(): ServerServiceDefinition = CommandCompletionServiceGrpc.bindService(this, DirectExecutionContext) }
Example 14
Source File: ActiveContractsServiceValidation.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.platform.server.api.validation import com.daml.dec.DirectExecutionContext import com.daml.ledger.api.domain.LedgerId import com.daml.ledger.api.v1.active_contracts_service.ActiveContractsServiceGrpc.ActiveContractsService import com.daml.ledger.api.v1.active_contracts_service.{ ActiveContractsServiceGrpc, GetActiveContractsRequest, GetActiveContractsResponse } import com.daml.platform.api.grpc.GrpcApiService import com.daml.platform.server.api.ProxyCloseable import io.grpc.ServerServiceDefinition import io.grpc.stub.StreamObserver import org.slf4j.{Logger, LoggerFactory} class ActiveContractsServiceValidation( protected val service: ActiveContractsService with AutoCloseable, val ledgerId: LedgerId) extends ActiveContractsService with ProxyCloseable with GrpcApiService with FieldValidations { protected val logger: Logger = LoggerFactory.getLogger(ActiveContractsService.getClass) override def getActiveContracts( request: GetActiveContractsRequest, responseObserver: StreamObserver[GetActiveContractsResponse]): Unit = { matchLedgerId(ledgerId)(LedgerId(request.ledgerId)) .fold(responseObserver.onError, _ => service.getActiveContracts(request, responseObserver)) } override def bindService(): ServerServiceDefinition = ActiveContractsServiceGrpc.bindService(this, DirectExecutionContext) }
Example 15
Source File: LedgerConfigurationServiceValidation.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.platform.server.api.validation import com.daml.dec.DirectExecutionContext import com.daml.ledger.api.domain.LedgerId import com.daml.ledger.api.v1.ledger_configuration_service.LedgerConfigurationServiceGrpc.LedgerConfigurationService import com.daml.ledger.api.v1.ledger_configuration_service.{ GetLedgerConfigurationRequest, GetLedgerConfigurationResponse, LedgerConfigurationServiceGrpc } import com.daml.platform.api.grpc.GrpcApiService import com.daml.platform.server.api.ProxyCloseable import io.grpc.ServerServiceDefinition import io.grpc.stub.StreamObserver import org.slf4j.{Logger, LoggerFactory} class LedgerConfigurationServiceValidation( protected val service: LedgerConfigurationService with GrpcApiService, protected val ledgerId: LedgerId) extends LedgerConfigurationService with ProxyCloseable with GrpcApiService with FieldValidations { protected val logger: Logger = LoggerFactory.getLogger(LedgerConfigurationService.getClass) override def getLedgerConfiguration( request: GetLedgerConfigurationRequest, responseObserver: StreamObserver[GetLedgerConfigurationResponse]): Unit = matchLedgerId(ledgerId)(LedgerId(request.ledgerId)).fold( t => responseObserver.onError(t), _ => service.getLedgerConfiguration(request, responseObserver) ) override def bindService(): ServerServiceDefinition = LedgerConfigurationServiceGrpc.bindService(this, DirectExecutionContext) }
Example 16
Source File: PackageServiceValidation.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.platform.server.api.validation import com.daml.dec.DirectExecutionContext import com.daml.ledger.api.domain.LedgerId import com.daml.ledger.api.v1.package_service.PackageServiceGrpc.PackageService import com.daml.ledger.api.v1.package_service._ import com.daml.platform.api.grpc.GrpcApiService import com.daml.platform.server.api.ProxyCloseable import io.grpc.ServerServiceDefinition import org.slf4j.{Logger, LoggerFactory} import scala.Function.const import scala.concurrent.Future class PackageServiceValidation( protected val service: PackageService with AutoCloseable, val ledgerId: LedgerId) extends PackageService with ProxyCloseable with GrpcApiService with FieldValidations { protected val logger: Logger = LoggerFactory.getLogger(PackageService.getClass) override def listPackages(request: ListPackagesRequest): Future[ListPackagesResponse] = matchLedgerId(ledgerId)(LedgerId(request.ledgerId)) .map(const(request)) .fold( Future.failed, service.listPackages ) override def getPackage(request: GetPackageRequest): Future[GetPackageResponse] = matchLedgerId(ledgerId)(LedgerId(request.ledgerId)) .map(const(request)) .fold( Future.failed, service.getPackage ) override def getPackageStatus( request: GetPackageStatusRequest): Future[GetPackageStatusResponse] = matchLedgerId(ledgerId)(LedgerId(request.ledgerId)) .map(const(request)) .fold( Future.failed, service.getPackageStatus ) override def bindService(): ServerServiceDefinition = PackageServiceGrpc.bindService(this, DirectExecutionContext) override def close(): Unit = service.close() }
Example 17
Source File: GrpcCommandService.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.platform.server.api.services.grpc import java.time.{Duration, Instant} import com.daml.ledger.api.domain.LedgerId import com.daml.ledger.api.v1.command_service.CommandServiceGrpc.CommandService import com.daml.ledger.api.v1.command_service._ import com.daml.ledger.api.validation.{CommandsValidator, SubmitAndWaitRequestValidator} import com.daml.platform.api.grpc.GrpcApiService import com.daml.dec.DirectExecutionContext import com.daml.platform.server.api.ProxyCloseable import com.google.protobuf.empty.Empty import io.grpc.ServerServiceDefinition import org.slf4j.{Logger, LoggerFactory} import scala.concurrent.Future class GrpcCommandService( protected val service: CommandService with AutoCloseable, val ledgerId: LedgerId, currentLedgerTime: () => Instant, currentUtcTime: () => Instant, maxDeduplicationTime: () => Option[Duration] ) extends CommandService with GrpcApiService with ProxyCloseable { protected val logger: Logger = LoggerFactory.getLogger(CommandService.getClass) private[this] val validator = new SubmitAndWaitRequestValidator(new CommandsValidator(ledgerId)) override def submitAndWait(request: SubmitAndWaitRequest): Future[Empty] = validator .validate(request, currentLedgerTime(), currentUtcTime(), maxDeduplicationTime()) .fold(Future.failed, _ => service.submitAndWait(request)) override def submitAndWaitForTransactionId( request: SubmitAndWaitRequest): Future[SubmitAndWaitForTransactionIdResponse] = validator .validate(request, currentLedgerTime(), currentUtcTime(), maxDeduplicationTime()) .fold(Future.failed, _ => service.submitAndWaitForTransactionId(request)) override def submitAndWaitForTransaction( request: SubmitAndWaitRequest): Future[SubmitAndWaitForTransactionResponse] = validator .validate(request, currentLedgerTime(), currentUtcTime(), maxDeduplicationTime()) .fold(Future.failed, _ => service.submitAndWaitForTransaction(request)) override def submitAndWaitForTransactionTree( request: SubmitAndWaitRequest): Future[SubmitAndWaitForTransactionTreeResponse] = validator .validate(request, currentLedgerTime(), currentUtcTime(), maxDeduplicationTime()) .fold(Future.failed, _ => service.submitAndWaitForTransactionTree(request)) override def bindService(): ServerServiceDefinition = CommandServiceGrpc.bindService(this, DirectExecutionContext) }
Example 18
Source File: GrpcCommandSubmissionService.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.platform.server.api.services.grpc import java.time.{Duration, Instant} import com.daml.dec.DirectExecutionContext import com.daml.ledger.api.domain.LedgerId import com.daml.ledger.api.v1.command_submission_service.CommandSubmissionServiceGrpc.{ CommandSubmissionService => ApiCommandSubmissionService } import com.daml.ledger.api.v1.command_submission_service.{ CommandSubmissionServiceGrpc, SubmitRequest => ApiSubmitRequest } import com.daml.ledger.api.validation.{CommandsValidator, SubmitRequestValidator} import com.daml.metrics.{Metrics, Timed} import com.daml.platform.api.grpc.GrpcApiService import com.daml.platform.server.api.ProxyCloseable import com.daml.platform.server.api.services.domain.CommandSubmissionService import com.google.protobuf.empty.Empty import io.grpc.ServerServiceDefinition import org.slf4j.{Logger, LoggerFactory} import scala.concurrent.Future class GrpcCommandSubmissionService( override protected val service: CommandSubmissionService with AutoCloseable, ledgerId: LedgerId, currentLedgerTime: () => Instant, currentUtcTime: () => Instant, maxDeduplicationTime: () => Option[Duration], metrics: Metrics, ) extends ApiCommandSubmissionService with ProxyCloseable with GrpcApiService { protected val logger: Logger = LoggerFactory.getLogger(ApiCommandSubmissionService.getClass) private val validator = new SubmitRequestValidator(new CommandsValidator(ledgerId)) override def submit(request: ApiSubmitRequest): Future[Empty] = Timed.future( metrics.daml.commands.submissions, Timed .value( metrics.daml.commands.validation, validator .validate(request, currentLedgerTime(), currentUtcTime(), maxDeduplicationTime())) .fold( Future.failed, service.submit(_).map(_ => Empty.defaultInstance)(DirectExecutionContext)) ) override def bindService(): ServerServiceDefinition = CommandSubmissionServiceGrpc.bindService(this, DirectExecutionContext) }
Example 19
Source File: DispatcherImpl.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.platform.akkastreams.dispatcher import java.util.concurrent.atomic.AtomicReference import akka.NotUsed import akka.stream.scaladsl.Source import com.github.ghik.silencer.silent import org.slf4j.LoggerFactory import scala.collection.immutable final class DispatcherImpl[Index: Ordering]( name: String, zeroIndex: Index, headAtInitialization: Index) extends Dispatcher[Index] { private val logger = LoggerFactory.getLogger(getClass) require( !indexIsBeforeZero(headAtInitialization), s"head supplied at Dispatcher initialization $headAtInitialization is before zero index $zeroIndex. " + s"This would imply that the ledger end is before the ledger begin, which makes this invalid configuration." ) private sealed abstract class State extends Product with Serializable { def getSignalDispatcher: Option[SignalDispatcher] def getLastIndex: Index } // the following silent are due to // <https://github.com/scala/bug/issues/4440> @silent private final case class Running(lastIndex: Index, signalDispatcher: SignalDispatcher) extends State { override def getLastIndex: Index = lastIndex override def getSignalDispatcher: Option[SignalDispatcher] = Some(signalDispatcher) } @silent private final case class Closed(lastIndex: Index) extends State { override def getLastIndex: Index = lastIndex override def getSignalDispatcher: Option[SignalDispatcher] = None } // So why not broadcast the actual new index, instead of using a signaller? // The reason is if we do that, the new indices race with readHead // in a way that makes it hard to start up new subscriptions. In particular, // we can tolerate NewIndexSignals being out of order or dropped, maintaining the weaker invariant that, // if head is updated, at least one NewIndexSignal eventually arrives. private val state = new AtomicReference[State](Running(headAtInitialization, SignalDispatcher())) override def apply(newHead: Index): immutable.Iterable[(Index, Index)] = if (Ordering[Index].gt(newHead, max)) { val intervalBegin = max max = newHead List(intervalBegin -> newHead) } else Nil } private def indexIsBeforeZero(checkedIndex: Index): Boolean = Ordering[Index].gt(zeroIndex, checkedIndex) def close(): Unit = state.getAndUpdate { case Running(idx, _) => Closed(idx) case c: Closed => c } match { case Running(idx, disp) => disp.signal() disp.close() case c: Closed => () } private def closedError: IllegalStateException = new IllegalStateException(s"$name: Dispatcher is closed") }
Example 20
Source File: Tests.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.ledger.api.testtool import java.nio.file.Path import com.daml.ledger.api.testtool import com.daml.ledger.api.testtool.infrastructure.{BenchmarkReporter, LedgerTestSuite} import com.daml.ledger.api.testtool.tests._ import org.slf4j.LoggerFactory object Tests { type Tests = Map[String, LedgerTestSuite] def performanceTests(path: Option[Path]): Tests = { val reporter = (key: String, value: Double) => path .map(BenchmarkReporter.toFile) .getOrElse(BenchmarkReporter.toStream(System.out)) .addReport(key, value) Envelope.values.flatMap { envelope => { val throughputKey: String = performanceEnvelopeThroughputTestKey(envelope) val latencyKey: String = performanceEnvelopeLatencyTestKey(envelope) val transactionSizeKey: String = performanceEnvelopeTransactionSizeTestKey(envelope) List( throughputKey -> new testtool.tests.PerformanceEnvelope.ThroughputTest( logger = LoggerFactory.getLogger(throughputKey), envelope = envelope, reporter = reporter, ), latencyKey -> new testtool.tests.PerformanceEnvelope.LatencyTest( logger = LoggerFactory.getLogger(latencyKey), envelope = envelope, reporter = reporter, ), transactionSizeKey -> new testtool.tests.PerformanceEnvelope.TransactionSizeScaleTest( logger = LoggerFactory.getLogger(transactionSizeKey), envelope = envelope, ), ) } } }.toMap private[this] def performanceEnvelopeThroughputTestKey(envelope: Envelope): String = s"PerformanceEnvelope.${envelope.name}.Throughput" private[this] def performanceEnvelopeLatencyTestKey(envelope: Envelope): String = s"PerformanceEnvelope.${envelope.name}.Latency" private[this] def performanceEnvelopeTransactionSizeTestKey(envelope: Envelope): String = s"PerformanceEnvelope.${envelope.name}.TransactionSize" private[testtool] val PerformanceTestsKeys = Envelope.values.flatMap { envelope => List( performanceEnvelopeThroughputTestKey(envelope), performanceEnvelopeLatencyTestKey(envelope), performanceEnvelopeTransactionSizeTestKey(envelope)), } }
Example 21
Source File: ParticipantSessionManager.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.ledger.api.testtool.infrastructure.participant import io.grpc.netty.{NegotiationType, NettyChannelBuilder} import io.netty.channel.nio.NioEventLoopGroup import io.netty.channel.socket.nio.NioSocketChannel import io.netty.util.concurrent.DefaultThreadFactory import org.slf4j.LoggerFactory import scala.collection.concurrent.TrieMap import scala.concurrent.{ExecutionContext, Future} private[infrastructure] final class ParticipantSessionManager { private[this] val logger = LoggerFactory.getLogger(classOf[ParticipantSession]) private[this] val channels = TrieMap.empty[ParticipantSessionConfiguration, ParticipantSession] @throws[RuntimeException] private def create( config: ParticipantSessionConfiguration, )(implicit ec: ExecutionContext): ParticipantSession = { logger.info(s"Connecting to participant at ${config.host}:${config.port}...") val threadFactoryPoolName = s"grpc-event-loop-${config.host}-${config.port}" val daemonThreads = false val threadFactory: DefaultThreadFactory = new DefaultThreadFactory(threadFactoryPoolName, daemonThreads) logger.info( s"gRPC thread factory instantiated with pool '$threadFactoryPoolName' (daemon threads: $daemonThreads)", ) val threadCount = Runtime.getRuntime.availableProcessors val eventLoopGroup: NioEventLoopGroup = new NioEventLoopGroup(threadCount, threadFactory) logger.info( s"gRPC event loop thread group instantiated with $threadCount threads using pool '$threadFactoryPoolName'", ) val managedChannelBuilder = NettyChannelBuilder .forAddress(config.host, config.port) .eventLoopGroup(eventLoopGroup) .channelType(classOf[NioSocketChannel]) .directExecutor() .usePlaintext() for (ssl <- config.ssl; sslContext <- ssl.client) { logger.info("Setting up managed communication channel with transport security") managedChannelBuilder .useTransportSecurity() .sslContext(sslContext) .negotiationType(NegotiationType.TLS) } managedChannelBuilder.maxInboundMessageSize(10000000) val managedChannel = managedChannelBuilder.build() logger.info(s"Connection to participant at ${config.host}:${config.port}") new ParticipantSession(config, managedChannel, eventLoopGroup) } def getOrCreate( configuration: ParticipantSessionConfiguration, )(implicit ec: ExecutionContext): Future[ParticipantSession] = Future(channels.getOrElseUpdate(configuration, create(configuration))) def close(configuration: ParticipantSessionConfiguration): Unit = channels.get(configuration).foreach(_.close()) def closeAll(): Unit = for ((_, session) <- channels) { session.close() } }
Example 22
Source File: ParticipantSession.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.ledger.api.testtool.infrastructure.participant import com.daml.ledger.api.testtool.infrastructure.LedgerServices import com.daml.ledger.api.v1.ledger_identity_service.GetLedgerIdentityRequest import com.daml.ledger.api.v1.transaction_service.GetLedgerEndRequest import com.daml.timer.RetryStrategy import io.grpc.ManagedChannel import io.netty.channel.nio.NioEventLoopGroup import org.slf4j.LoggerFactory import scala.concurrent.duration.{DurationInt, SECONDS} import scala.concurrent.{ExecutionContext, Future} private[participant] final class ParticipantSession( val config: ParticipantSessionConfiguration, channel: ManagedChannel, eventLoopGroup: NioEventLoopGroup, )(implicit val executionContext: ExecutionContext) { private[this] val logger = LoggerFactory.getLogger(classOf[ParticipantSession]) private[this] val services: LedgerServices = new LedgerServices(channel) // The ledger identifier is retrieved only once when the participant session is created // Changing the ledger identifier during the execution of a session can result in unexpected consequences // The test tool is designed to run tests in an isolated environment but changing the // global state of the ledger breaks this assumption, no matter what private[this] val ledgerIdF = RetryStrategy.exponentialBackoff(10, 10.millis) { (attempt, wait) => logger.debug(s"Fetching ledgerId to create context (attempt #$attempt, next one in $wait)...") services.identity.getLedgerIdentity(new GetLedgerIdentityRequest).map(_.ledgerId) } private[testtool] def createTestContext( endpointId: String, applicationId: String, identifierSuffix: String, ): Future[ParticipantTestContext] = for { ledgerId <- ledgerIdF end <- services.transaction.getLedgerEnd(new GetLedgerEndRequest(ledgerId)).map(_.getOffset) } yield new ParticipantTestContext( ledgerId, endpointId, applicationId, identifierSuffix, end, services, config.partyAllocation, ) private[testtool] def close(): Unit = { logger.info(s"Disconnecting from participant at ${config.host}:${config.port}...") channel.shutdownNow() if (!channel.awaitTermination(10L, SECONDS)) { sys.error("Channel shutdown stuck. Unable to recover. Terminating.") } logger.info(s"Connection to participant at ${config.host}:${config.port} shut down.") if (!eventLoopGroup .shutdownGracefully(0, 0, SECONDS) .await(10L, SECONDS)) { sys.error("Unable to shutdown event loop. Unable to recover. Terminating.") } logger.info(s"Connection to participant at ${config.host}:${config.port} closed.") } }
Example 23
Source File: Debug.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.ledger.participant.state.kvutils import java.io.{DataOutputStream, FileOutputStream} import com.daml.ledger.participant.state.kvutils.DamlKvutils._ import org.slf4j.LoggerFactory import scala.collection.JavaConverters._ def dumpLedgerEntry( submission: DamlSubmission, participantId: String, entryId: DamlLogEntryId, logEntry: DamlLogEntry, outputState: Map[DamlStateKey, DamlStateValue]): Unit = optLedgerDumpStream.foreach { outs => val dumpEntry = DamlKvutils.LedgerDumpEntry.newBuilder .setSubmission(Envelope.enclose(submission)) .setEntryId(entryId) .setParticipantId(participantId) .setLogEntry(Envelope.enclose(logEntry)) .addAllOutputState( outputState.map { case (k, v) => DamlKvutils.LedgerDumpEntry.StatePair.newBuilder .setStateKey(k) .setStateValue(Envelope.enclose(v)) .build }.asJava ) .build // Messages are delimited by a header containing the message size as int32 outs.writeInt(dumpEntry.getSerializedSize) dumpEntry.writeTo(outs) outs.flush() } }
Example 24
Source File: CommitContext.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.ledger.participant.state.kvutils.committer import com.daml.ledger.participant.state.kvutils.DamlKvutils.{ DamlLogEntryId, DamlStateKey, DamlStateValue } import com.daml.ledger.participant.state.kvutils.{DamlStateMap, Err} import com.daml.ledger.participant.state.v1.ParticipantId import com.daml.lf.data.Time.Timestamp import org.slf4j.LoggerFactory import scala.collection.mutable def getOutputs: Iterable[(DamlStateKey, DamlStateValue)] = outputOrder .map(key => key -> outputs(key)) .filterNot { case (key, value) if inputAlreadyContains(key, value) => logger.trace("Identical output found for key {}", key) true case _ => false } private def inputAlreadyContains(key: DamlStateKey, value: DamlStateValue): Boolean = inputs.get(key).exists(_.contains(value)) }
Example 25
Source File: Committer.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.ledger.participant.state.kvutils.committer import com.codahale.metrics.Timer import com.daml.ledger.participant.state.kvutils.DamlKvutils.{ DamlConfigurationEntry, DamlLogEntry, DamlLogEntryId, DamlStateKey, DamlStateValue } import com.daml.ledger.participant.state.kvutils.{Conversions, DamlStateMap, Err} import com.daml.ledger.participant.state.kvutils.committer.Committer._ import com.daml.ledger.participant.state.v1.{Configuration, ParticipantId} import com.daml.lf.data.Time import com.daml.metrics.Metrics import org.slf4j.{Logger, LoggerFactory} throw Err.MissingInputState(Conversions.configurationStateKey) ) .flatMap { v => val entry = v.getConfigurationEntry Configuration .decode(entry.getConfiguration) .fold({ err => logger.error(s"Failed to parse configuration: $err, using default configuration.") None }, conf => Some(Some(entry) -> conf)) } .getOrElse(None -> defaultConfig) }
Example 26
Source File: LedgerDataExporter.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.ledger.participant.state.kvutils.export import java.io.{DataOutputStream, FileOutputStream} import java.time.Instant import com.daml.ledger.participant.state.v1.ParticipantId import com.daml.ledger.validator.LedgerStateOperations.{Key, Value} import com.google.protobuf.ByteString import org.slf4j.LoggerFactory trait LedgerDataExporter { def finishedProcessing(correlationId: String): Unit } object LedgerDataExporter { val EnvironmentVariableName = "KVUTILS_LEDGER_EXPORT" private val logger = LoggerFactory.getLogger(this.getClass) private lazy val outputStreamMaybe: Option[DataOutputStream] = { Option(System.getenv(EnvironmentVariableName)) .map { filename => logger.info(s"Enabled writing ledger entries to $filename") new DataOutputStream(new FileOutputStream(filename)) } } private lazy val instance = outputStreamMaybe .map(new FileBasedLedgerDataExporter(_)) .getOrElse(NoopLedgerDataExporter) def apply(): LedgerDataExporter = instance }
Example 27
Source File: AuthorizationTest.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.http import java.nio.file.Files import akka.actor.ActorSystem import akka.stream.Materializer import com.daml.auth.TokenHolder import com.daml.bazeltools.BazelRunfiles.rlocation import com.daml.grpc.adapter.{AkkaExecutionSequencerPool, ExecutionSequencerFactory} import com.daml.http.util.TestUtil.requiredFile import com.daml.ledger.api.auth.{AuthServiceStatic, Claim, ClaimPublic, Claims} import com.daml.ledger.client.LedgerClient import org.scalatest.{AsyncFlatSpec, BeforeAndAfterAll, Matchers} import org.slf4j.LoggerFactory import scala.concurrent.ExecutionContext import scala.util.control.NonFatal final class AuthorizationTest extends AsyncFlatSpec with BeforeAndAfterAll with Matchers { private val dar = requiredFile(rlocation("docs/quickstart-model.dar")) .fold(e => throw new IllegalStateException(e), identity) private val testId: String = this.getClass.getSimpleName implicit val asys: ActorSystem = ActorSystem(testId) implicit val mat: Materializer = Materializer(asys) implicit val aesf: ExecutionSequencerFactory = new AkkaExecutionSequencerPool(testId)(asys) implicit val ec: ExecutionContext = asys.dispatcher private val publicToken = "public" private val emptyToken = "empty" private val mockedAuthService = Option(AuthServiceStatic { case `publicToken` => Claims(Seq[Claim](ClaimPublic)) case `emptyToken` => Claims(Nil) }) private val accessTokenFile = Files.createTempFile("Extractor", "AuthSpec") private val tokenHolder = Option(new TokenHolder(accessTokenFile)) private def setToken(string: String): Unit = { val _ = Files.write(accessTokenFile, string.getBytes()) } override protected def afterAll(): Unit = { super.afterAll() try { Files.delete(accessTokenFile) } catch { case NonFatal(e) => LoggerFactory .getLogger(classOf[AuthorizationTest]) .warn("Unable to delete temporary token file", e) } } protected def withLedger[A] = HttpServiceTestFixture .withLedger[A](List(dar), testId, Option(publicToken), mockedAuthService) _ private def packageService(client: LedgerClient): PackageService = new PackageService(HttpService.loadPackageStoreUpdates(client.packageClient, tokenHolder)) behavior of "PackageService against an authenticated sandbox" it should "fail immediately if the authorization is insufficient" in withLedger { client => setToken(emptyToken) packageService(client).reload.failed.map(_ => succeed) } it should "succeed if the authorization is sufficient" in withLedger { client => setToken(publicToken) packageService(client).reload.map(_ => succeed) } }
Example 28
Source File: AkkaBeforeAndAfterAll.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.ledger.api.testing.utils import java.util.concurrent.Executors import akka.actor.ActorSystem import akka.stream.Materializer import com.daml.grpc.adapter.{AkkaExecutionSequencerPool, ExecutionSequencerFactory} import com.google.common.util.concurrent.ThreadFactoryBuilder import org.scalatest.{BeforeAndAfterAll, Suite} import org.slf4j.LoggerFactory import scala.concurrent.duration.DurationInt import scala.concurrent.{Await, ExecutionContext} trait AkkaBeforeAndAfterAll extends BeforeAndAfterAll { self: Suite => private val logger = LoggerFactory.getLogger(getClass) protected def actorSystemName: String = this.getClass.getSimpleName private implicit lazy val executionContext: ExecutionContext = ExecutionContext.fromExecutorService( Executors.newSingleThreadExecutor( new ThreadFactoryBuilder() .setDaemon(true) .setNameFormat(s"$actorSystemName-thread-pool-worker-%d") .setUncaughtExceptionHandler((thread, _) => logger.error(s"got an uncaught exception on thread: ${thread.getName}")) .build())) protected implicit lazy val system: ActorSystem = ActorSystem(actorSystemName, defaultExecutionContext = Some(executionContext)) protected implicit lazy val materializer: Materializer = Materializer(system) protected implicit lazy val executionSequencerFactory: ExecutionSequencerFactory = new AkkaExecutionSequencerPool(poolName = actorSystemName, actorCount = 1) override protected def afterAll(): Unit = { executionSequencerFactory.close() materializer.shutdown() Await.result(system.terminate(), 30.seconds) super.afterAll() } }
Example 29
Source File: AkkaStreamPerformanceTest.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.ledger.api.perf.util import akka.actor.ActorSystem import akka.stream.Materializer import com.daml.ledger.api.testing.utils.Resource import org.slf4j.{Logger, LoggerFactory} import scala.concurrent.{ExecutionContext, ExecutionContextExecutor} @SuppressWarnings(Array("org.wartremover.warts.LeakingSealed")) abstract class AkkaStreamPerformanceTest extends PerformanceTest { protected val logger: Logger = LoggerFactory.getLogger(this.getClass) type ResourceType @volatile protected var system: ActorSystem = _ @volatile protected var materializer: Materializer = _ @transient protected implicit val ec: ExecutionContextExecutor = ExecutionContext.global protected def resource: Resource[ResourceType] protected def setup(): Unit = { resource.setup() implicit val sys: ActorSystem = ActorSystem(this.getClass.getSimpleName.stripSuffix("$")) system = sys materializer = Materializer(system) } protected def teardown(): Unit = { await(system.terminate()) resource.close() } implicit class FixtureSetup[T](using: Using[T]) extends Serializable { def withLifecycleManagement(additionalSetup: T => Unit = _ => ()): Using[T] = using .setUp { input => try { setup() additionalSetup(input) } catch { case t: Throwable => logger.error("Setup failed.", t) throw t } } .tearDown { _ => try { teardown() } catch { case t: Throwable => logger.error("Teardown failed.", t) throw t } } } }
Example 30
Source File: Main.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.codegen import java.io.File import java.nio.file.Path import ch.qos.logback.classic.Level import com.daml.lf.codegen.conf.Conf import com.typesafe.scalalogging.StrictLogging import org.slf4j.{Logger, LoggerFactory} import scalaz.Cord import scala.collection.breakOut object Main extends StrictLogging { private val codegenId = "Scala Codegen" @deprecated("Use codegen font-end: com.daml.codegen.CodegenMain.main", "0.13.23") def main(args: Array[String]): Unit = Conf.parse(args) match { case Some(conf) => generateCode(conf) case None => throw new IllegalArgumentException( s"Invalid ${codegenId: String} command line arguments: ${args.mkString(" "): String}") } def generateCode(conf: Conf): Unit = conf match { case Conf(darMap, outputDir, decoderPkgAndClass, verbosity, roots) => setGlobalLogLevel(verbosity) logUnsupportedEventDecoderOverride(decoderPkgAndClass) val (dars, packageName) = darsAndOnePackageName(darMap) CodeGen.generateCode(dars, packageName, outputDir.toFile, CodeGen.Novel, roots) } private def setGlobalLogLevel(verbosity: Level): Unit = { LoggerFactory.getLogger(Logger.ROOT_LOGGER_NAME) match { case a: ch.qos.logback.classic.Logger => a.setLevel(verbosity) logger.info(s"${codegenId: String} verbosity: ${verbosity.toString}") case _ => logger.warn(s"${codegenId: String} cannot set requested verbosity: ${verbosity.toString}") } } private def logUnsupportedEventDecoderOverride(mapping: Option[(String, String)]): Unit = mapping.foreach { case (a, b) => logger.warn( s"${codegenId: String} does not allow overriding Event Decoder, skipping: ${a: String} -> ${b: String}") } private def darsAndOnePackageName(darMap: Map[Path, Option[String]]): (List[File], String) = { val dars: List[File] = darMap.keys.map(_.toFile)(breakOut) val uniquePackageNames: Set[String] = darMap.values.collect { case Some(x) => x }(breakOut) uniquePackageNames.toSeq match { case Seq(packageName) => (dars, packageName) case _ => throw new IllegalStateException( s"${codegenId: String} expects all dars mapped to the same package name, " + s"requested: ${format(darMap): String}") } } private def format(map: Map[Path, Option[String]]): String = { val cord = map.foldLeft(Cord("{")) { (str, kv) => str ++ kv._1.toFile.getAbsolutePath ++ "->" ++ kv._2.toString ++ "," } (cord ++ "}").toString } }
Example 31
Source File: ContextualizedLogger.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.logging import akka.NotUsed import akka.stream.scaladsl.Flow import com.daml.grpc.GrpcException import io.grpc.Status import org.slf4j.{Logger, LoggerFactory} import scala.collection.concurrent.TrieMap import scala.util.{Failure, Try} import scala.util.control.NonFatal object ContextualizedLogger { // Caches loggers to prevent them from needlessly wasting memory // Replicates the behavior of the underlying Slf4j logger factory private[this] val cache = TrieMap.empty[String, ContextualizedLogger] // Allows to explicitly pass a logger, should be used for testing only private[logging] def createFor(withoutContext: Logger): ContextualizedLogger = new ContextualizedLogger(withoutContext) // Slf4j handles the caching of the underlying logger itself private[logging] def createFor(name: String): ContextualizedLogger = createFor(LoggerFactory.getLogger(name)) def get(clazz: Class[_]): ContextualizedLogger = { val name = clazz.getName.stripSuffix("$") cache.getOrElseUpdate(name, createFor(name)) } } final class ContextualizedLogger private (val withoutContext: Logger) { val trace = new LeveledLogger.Trace(withoutContext) val debug = new LeveledLogger.Debug(withoutContext) val info = new LeveledLogger.Info(withoutContext) val warn = new LeveledLogger.Warn(withoutContext) val error = new LeveledLogger.Error(withoutContext) private def internalOrUnknown(code: Status.Code): Boolean = code == Status.Code.INTERNAL || code == Status.Code.UNKNOWN private def logError(t: Throwable)(implicit logCtx: LoggingContext): Unit = error("Unhandled internal error", t) def logErrorsOnCall[Out](implicit logCtx: LoggingContext): PartialFunction[Try[Out], Unit] = { case Failure(e @ GrpcException(s, _)) => if (internalOrUnknown(s.getCode)) { logError(e) } case Failure(NonFatal(e)) => logError(e) } def logErrorsOnStream[Out](implicit logCtx: LoggingContext): Flow[Out, Out, NotUsed] = Flow[Out].mapError { case e @ GrpcException(s, _) => if (internalOrUnknown(s.getCode)) { logError(e) } e case NonFatal(e) => logError(e) e } }
Example 32
Source File: Evaluation.scala From glintlda with MIT License | 5 votes |
package glintlda import akka.util.Timeout import breeze.numerics._ import com.typesafe.scalalogging.slf4j.Logger import glint.iterators.RowBlockIterator import org.slf4j.LoggerFactory import scala.concurrent.duration._ import scala.concurrent.{Await, ExecutionContext} def logCurrentState(iteration: Int, docLoglikelihood: Double, tokenCounts: Long, model: LDAModel): Unit = { // Construct necessary variables for pipelined communication with parameter server implicit val ec = ExecutionContext.Implicits.global implicit val timeout = new Timeout(300 seconds) // Get the independently computed log likelihood numbers val wordLoglikelihood = computeWordLoglikelihood(model) val loglikelihood = docLoglikelihood + wordLoglikelihood // Compute perplexity val perplexity = Math.exp(-loglikelihood / tokenCounts) // Print to log val logger = Logger(LoggerFactory getLogger s"${getClass.getSimpleName}") logger.info(s"Evaluation after iteration ${iteration}") logger.info(s"Doc log-likelihood: ${docLoglikelihood}") logger.info(s"Word log-likelihood: ${wordLoglikelihood}") logger.info(s"Log-likelihood: ${loglikelihood}") logger.info(s"Token counts: ${tokenCounts}") logger.info(s"Perplexity: ${perplexity}") } }
Example 33
Source File: RouterMetrics.scala From prometheus-akka with Apache License 2.0 | 5 votes |
package com.workday.prometheus.akka import scala.collection.concurrent.TrieMap import scala.util.control.NonFatal import org.slf4j.LoggerFactory import io.prometheus.client.{Counter, Gauge} object RouterMetrics { private val logger = LoggerFactory.getLogger(RouterMetrics.getClass) private val map = TrieMap[Entity, RouterMetrics]() def metricsFor(e: Entity): Option[RouterMetrics] = { try { Some(map.getOrElseUpdate(e, new RouterMetrics(e))) } catch { case NonFatal(t) => { logger.warn("Issue with getOrElseUpdate (failing over to simple get)", t) map.get(e) } } } def hasMetricsFor(e: Entity): Boolean = map.contains(e) } class RouterMetrics(entity: Entity) { val actorName = metricFriendlyActorName(entity.name) val routingTime = Gauge.build().name(s"akka_router_routing_time_$actorName").help("Akka Router routing time (Seconds)").register() val processingTime = Gauge.build().name(s"akka_router_processing_time_$actorName").help("Akka Router processing time (Seconds)").register() val timeInMailbox = Gauge.build().name(s"akka_router_time_in_mailbox_$actorName").help("Akka Router time in mailbox (Seconds)").register() val messages = Counter.build().name(s"akka_router_message_count_$actorName").help("Akka Router messages").register() val errors = Counter.build().name(s"akka_router_error_count_$actorName").help("Akka Router errors").register() }
Example 34
Source File: ActorMetrics.scala From prometheus-akka with Apache License 2.0 | 5 votes |
package com.workday.prometheus.akka import scala.collection.concurrent.TrieMap import scala.util.control.NonFatal import org.slf4j.LoggerFactory import io.prometheus.client.{Counter, Gauge} object ActorMetrics { private val logger = LoggerFactory.getLogger(ActorMetrics.getClass) private val map = TrieMap[Entity, ActorMetrics]() def metricsFor(e: Entity): Option[ActorMetrics] = { try { Some(map.getOrElseUpdate(e, new ActorMetrics(e))) } catch { case NonFatal(t) => { logger.warn("Issue with getOrElseUpdate (failing over to simple get)", t) map.get(e) } } } def hasMetricsFor(e: Entity): Boolean = map.contains(e) } class ActorMetrics(entity: Entity) { val actorName = metricFriendlyActorName(entity.name) val mailboxSize = Gauge.build().name(s"akka_actor_mailbox_size_$actorName").help("Akka Actor mailbox size").register() val processingTime = Gauge.build().name(s"akka_actor_processing_time_$actorName").help("Akka Actor processing time (Seconds)").register() val timeInMailbox = Gauge.build().name(s"akka_actor_time_in_mailbox_$actorName").help("Akka Actor time in mailbox (Seconds)").register() val messages = Counter.build().name(s"akka_actor_message_count_$actorName").help("Akka Actor messages").register() val errors = Counter.build().name(s"akka_actor_error_count_$actorName").help("Akka Actor errors").register() }
Example 35
Source File: TextClassifier.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.example.textclassification import com.intel.analytics.bigdl.example.utils._ import com.intel.analytics.bigdl.nn.{ClassNLLCriterion, _} import com.intel.analytics.bigdl.utils.{Engine, LoggerFilter, T} import org.apache.log4j.{Level => Levle4j, Logger => Logger4j} import org.slf4j.{Logger, LoggerFactory} import scopt.OptionParser import scala.collection.mutable.{ArrayBuffer, Map => MMap} import scala.language.existentials object TextClassifier { val log: Logger = LoggerFactory.getLogger(this.getClass) LoggerFilter.redirectSparkInfoLogs() Logger4j.getLogger("com.intel.analytics.bigdl.optim").setLevel(Levle4j.INFO) def main(args: Array[String]): Unit = { val localParser = new OptionParser[TextClassificationParams]("BigDL Example") { opt[String]('b', "baseDir") .required() .text("Base dir containing the training and word2Vec data") .action((x, c) => c.copy(baseDir = x)) opt[String]('p', "partitionNum") .text("you may want to tune the partitionNum if run into spark mode") .action((x, c) => c.copy(partitionNum = x.toInt)) opt[String]('s', "maxSequenceLength") .text("maxSequenceLength") .action((x, c) => c.copy(maxSequenceLength = x.toInt)) opt[String]('w', "maxWordsNum") .text("maxWordsNum") .action((x, c) => c.copy(maxWordsNum = x.toInt)) opt[String]('l', "trainingSplit") .text("trainingSplit") .action((x, c) => c.copy(trainingSplit = x.toDouble)) opt[String]('z', "batchSize") .text("batchSize") .action((x, c) => c.copy(batchSize = x.toInt)) opt[Int]('l', "learningRate") .text("learningRate") .action((x, c) => c.copy(learningRate = x)) } localParser.parse(args, TextClassificationParams()).map { param => log.info(s"Current parameters: $param") val textClassification = new TextClassifier(param) textClassification.train() } } }
Example 36
Source File: TimestampLogicalType.scala From embulk-output-s3_parquet with MIT License | 5 votes |
package org.embulk.output.s3_parquet.parquet import java.time.ZoneId import org.apache.parquet.io.api.RecordConsumer import org.apache.parquet.schema.{LogicalTypeAnnotation, PrimitiveType, Types} import org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit import org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.{ MICROS, MILLIS, NANOS } import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName import org.embulk.config.ConfigException import org.embulk.output.s3_parquet.catalog.GlueDataType import org.embulk.spi.`type`.{ BooleanType, DoubleType, JsonType, LongType, StringType, TimestampType } import org.embulk.spi.time.{Timestamp, TimestampFormatter} import org.embulk.spi.Column import org.msgpack.value.Value import org.slf4j.{Logger, LoggerFactory} case class TimestampLogicalType( isAdjustedToUtc: Boolean, timeUnit: TimeUnit, timeZone: ZoneId ) extends ParquetColumnType { private val logger: Logger = LoggerFactory.getLogger(classOf[TimestampLogicalType]) override def primitiveType(column: Column): PrimitiveType = column.getType match { case _: LongType | _: TimestampType => Types .optional(PrimitiveTypeName.INT64) .as(LogicalTypeAnnotation.timestampType(isAdjustedToUtc, timeUnit)) .named(column.getName) case _: BooleanType | _: DoubleType | _: StringType | _: JsonType | _ => throw new ConfigException(s"Unsupported column type: ${column.getName}") } override def glueDataType(column: Column): GlueDataType = column.getType match { case _: LongType | _: TimestampType => timeUnit match { case MILLIS => GlueDataType.TIMESTAMP case MICROS | NANOS => warningWhenConvertingTimestampToGlueType(GlueDataType.BIGINT) GlueDataType.BIGINT } case _: BooleanType | _: DoubleType | _: StringType | _: JsonType | _ => throw new ConfigException(s"Unsupported column type: ${column.getName}") } override def consumeBoolean(consumer: RecordConsumer, v: Boolean): Unit = throw newUnsupportedMethodException("consumeBoolean") override def consumeString(consumer: RecordConsumer, v: String): Unit = throw newUnsupportedMethodException("consumeString") override def consumeLong(consumer: RecordConsumer, v: Long): Unit = consumer.addLong(v) override def consumeDouble(consumer: RecordConsumer, v: Double): Unit = throw newUnsupportedMethodException("consumeDouble") override def consumeTimestamp( consumer: RecordConsumer, v: Timestamp, formatter: TimestampFormatter ): Unit = timeUnit match { case MILLIS => consumer.addLong(v.toEpochMilli) case MICROS => consumer.addLong(v.getEpochSecond * 1_000_000L + (v.getNano / 1_000L)) case NANOS => consumer.addLong(v.getEpochSecond * 1_000_000_000L + v.getNano) } override def consumeJson(consumer: RecordConsumer, v: Value): Unit = throw newUnsupportedMethodException("consumeJson") private def warningWhenConvertingTimestampToGlueType( glueType: GlueDataType ): Unit = logger.warn( s"timestamp(isAdjustedToUtc = $isAdjustedToUtc, timeUnit = $timeUnit) is converted" + s" to Glue ${glueType.name} but this is not represented correctly, because Glue" + s" does not support time type. Please use `catalog.column_options` to define the type." ) }
Example 37
Source File: JsonLogicalType.scala From embulk-output-s3_parquet with MIT License | 5 votes |
package org.embulk.output.s3_parquet.parquet import org.apache.parquet.io.api.{Binary, RecordConsumer} import org.apache.parquet.schema.{LogicalTypeAnnotation, PrimitiveType, Types} import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName import org.embulk.config.ConfigException import org.embulk.output.s3_parquet.catalog.GlueDataType import org.embulk.spi.Column import org.embulk.spi.`type`.{ BooleanType, DoubleType, JsonType, LongType, StringType, TimestampType } import org.embulk.spi.time.{Timestamp, TimestampFormatter} import org.msgpack.value.{Value, ValueFactory} import org.slf4j.{Logger, LoggerFactory} object JsonLogicalType extends ParquetColumnType { private val logger: Logger = LoggerFactory.getLogger(JsonLogicalType.getClass) override def primitiveType(column: Column): PrimitiveType = column.getType match { case _: BooleanType | _: LongType | _: DoubleType | _: StringType | _: JsonType => Types .optional(PrimitiveTypeName.BINARY) .as(LogicalTypeAnnotation.jsonType()) .named(column.getName) case _: TimestampType | _ => throw new ConfigException(s"Unsupported column type: ${column.getName}") } override def glueDataType(column: Column): GlueDataType = column.getType match { case _: BooleanType | _: LongType | _: DoubleType | _: StringType | _: JsonType => warningWhenConvertingJsonToGlueType(GlueDataType.STRING) GlueDataType.STRING case _: TimestampType | _ => throw new ConfigException(s"Unsupported column type: ${column.getName}") } override def consumeBoolean(consumer: RecordConsumer, v: Boolean): Unit = consumeJson(consumer, ValueFactory.newBoolean(v)) override def consumeString(consumer: RecordConsumer, v: String): Unit = consumeJson(consumer, ValueFactory.newString(v)) override def consumeLong(consumer: RecordConsumer, v: Long): Unit = consumeJson(consumer, ValueFactory.newInteger(v)) override def consumeDouble(consumer: RecordConsumer, v: Double): Unit = consumeJson(consumer, ValueFactory.newFloat(v)) override def consumeTimestamp( consumer: RecordConsumer, v: Timestamp, formatter: TimestampFormatter ): Unit = throw newUnsupportedMethodException("consumeTimestamp") override def consumeJson(consumer: RecordConsumer, v: Value): Unit = consumer.addBinary(Binary.fromString(v.toJson)) private def warningWhenConvertingJsonToGlueType( glueType: GlueDataType ): Unit = { logger.warn( s"json is converted" + s" to Glue ${glueType.name} but this is not represented correctly, because Glue" + s" does not support json type. Please use `catalog.column_options` to define the type." ) } }
Example 38
Source File: package.scala From zio-metrics with Apache License 2.0 | 5 votes |
package zio.metrics.dropwizard import zio.{ Has, Layer, Task, ZLayer } import java.util.concurrent.TimeUnit import java.io.File import java.util.Locale import java.net.InetSocketAddress import java.util.concurrent.TimeUnit import org.slf4j.LoggerFactory import java.{ util => ju } import java.io.File package object reporters { import com.codahale.metrics.MetricRegistry import com.codahale.metrics.MetricFilter import com.codahale.metrics.graphite.Graphite import com.codahale.metrics.graphite.GraphiteReporter import com.codahale.metrics.ConsoleReporter import com.codahale.metrics.Slf4jReporter import com.codahale.metrics.CsvReporter import com.codahale.metrics.jmx.JmxReporter import com.codahale.metrics.Reporter type Reporters = Has[Reporters.Service] object Reporters { trait Service { def jmx(r: MetricRegistry): Task[JmxReporter] def console(r: MetricRegistry): Task[ConsoleReporter] def slf4j(r: MetricRegistry, duration: Int, unit: TimeUnit, loggerName: String): Task[Slf4jReporter] def csv(r: MetricRegistry, file: File, locale: Locale): Task[Reporter] def graphite(r: MetricRegistry, host: String, port: Int, prefix: String): Task[GraphiteReporter] } val live: Layer[Nothing, Reporters] = ZLayer.succeed(new Service { def jmx(r: MetricRegistry): zio.Task[JmxReporter] = Task(JmxReporter.forRegistry(r).build()) def console(r: MetricRegistry): Task[ConsoleReporter] = Task( ConsoleReporter .forRegistry(r) .convertRatesTo(TimeUnit.SECONDS) .convertDurationsTo(TimeUnit.MILLISECONDS) .build() ) def slf4j(r: MetricRegistry, duration: Int, unit: TimeUnit, loggerName: String): Task[Slf4jReporter] = Task( Slf4jReporter .forRegistry(r) .outputTo(LoggerFactory.getLogger(loggerName)) .convertRatesTo(TimeUnit.SECONDS) .convertDurationsTo(TimeUnit.MILLISECONDS) .build() ) def csv(r: MetricRegistry, file: File, locale: ju.Locale): zio.Task[Reporter] = Task( CsvReporter .forRegistry(r) .formatFor(locale) .convertRatesTo(TimeUnit.SECONDS) .convertDurationsTo(TimeUnit.MILLISECONDS) .build(file) ) def graphite(r: MetricRegistry, host: String, port: Int, prefix: String): zio.Task[GraphiteReporter] = Task { val graphite = new Graphite(new InetSocketAddress(host, port)) GraphiteReporter .forRegistry(r) .prefixedWith(prefix) .convertRatesTo(TimeUnit.SECONDS) .convertDurationsTo(TimeUnit.MILLISECONDS) .filter(MetricFilter.ALL) .build(graphite) } }) } }
Example 39
Source File: DockerPostgresService.scala From crm-seed with Apache License 2.0 | 5 votes |
package com.dataengi.crm.common.docker import java.sql.DriverManager import com.spotify.docker.client.{DefaultDockerClient, DockerClient} import com.whisk.docker.impl.spotify.SpotifyDockerFactory import com.whisk.docker.{ DockerCommandExecutor, DockerContainer, DockerContainerState, DockerFactory, DockerKit, DockerReadyChecker } import org.slf4j.LoggerFactory import scala.concurrent.{ExecutionContext, Future} import scala.util.Try trait DockerPostgresService extends DockerKit { import scala.concurrent.duration._ private lazy val log = LoggerFactory.getLogger(this.getClass) private val client: DockerClient = DefaultDockerClient.fromEnv().build() override implicit val dockerFactory: DockerFactory = new SpotifyDockerFactory(client) def PostgresAdvertisedPort = 5432 def PostgresExposedPort = 44444 val PostgresUser = "nph" val PostgresPassword = "suitup" lazy val DockerPostgresHost: String = postgresContainer.hostname.getOrElse("localhost") lazy val DockerPostgresPort: Int = PostgresExposedPort lazy val DockerDatabaseName: String = "crm" val postgresContainer: DockerContainer = DockerContainer("postgres:9.5") .withPorts((PostgresAdvertisedPort, Some(PostgresExposedPort))) .withEnv(s"POSTGRES_USER=$PostgresUser", s"POSTGRES_PASSWORD=$PostgresPassword") .withCommand() .withReadyChecker( PostgresReadyChecker(DockerDatabaseName, PostgresUser, PostgresPassword, Some(PostgresExposedPort)) .looped(15, 1.second) ) lazy val dockerTestDataBaseConf: Map[String, Any] = Map[String, Any]( "slick.dbs.default.driver" -> "slick.driver.PostgresDriver$", "slick.dbs.default.db.driver" -> "org.postgresql.Driver", "slick.dbs.default.db.user" -> PostgresUser, "slick.dbs.default.db.password" -> PostgresPassword, "slick.dbs.default.db.url" -> s"jdbc:postgresql://$DockerPostgresHost:$DockerPostgresPort/crm", "slick.dbs.default.db.properties.url" -> s"jdbc:postgresql://$DockerPostgresHost:$DockerPostgresPort/crm" ) override def dockerContainers: List[DockerContainer] = postgresContainer :: super.dockerContainers } case class PostgresReadyChecker(databaseName: String, user: String, password: String, port: Option[Int] = None) extends DockerReadyChecker { override def apply(container: DockerContainerState)(implicit docker: DockerCommandExecutor, ec: ExecutionContext): Future[Boolean] = container .getPorts() .map(ports => Try { Class.forName("org.postgresql.Driver") val url = s"jdbc:postgresql://${docker.host}:${port.getOrElse(ports.values.head)}/" println(s"[postgres][docker][url] $url") Option(DriverManager.getConnection(url, user, password)) .map { connection => println(s"[posgres][docker][create-db][connection] isClosed=${connection.isClosed}") val statements = connection.createStatement() val result = statements.executeUpdate(s"CREATE DATABASE $databaseName") println(s"[posgres][docker][create-db] result=$result") connection } .map(_.close) .isDefined }.getOrElse(false)) }
Example 40
Source File: CounterEtlItem.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.counter.loader.core import org.apache.s2graph.counter.util.UnitConverter import org.slf4j.LoggerFactory import play.api.libs.json._ import scala.util.{Failure, Success, Try} case class CounterEtlItem(ts: Long, service: String, action: String, item: String, dimension: JsValue, property: JsValue, useProfile: Boolean = false) { def toKafkaMessage: String = { s"$ts\t$service\t$action\t$item\t${dimension.toString()}\t${property.toString()}" } lazy val value = { (property \ "value").toOption match { case Some(JsNumber(n)) => n.longValue() case Some(JsString(s)) => s.toLong case None => 1L case _ => throw new Exception("wrong type") } } } object CounterEtlItem { val log = LoggerFactory.getLogger(this.getClass) def apply(line: String): Option[CounterEtlItem] = { Try { val Array(ts, service, action, item, dimension, property) = line.split('\t') CounterEtlItem(UnitConverter.toMillis(ts.toLong), service, action, item, Json.parse(dimension), Json.parse(property)) } match { case Success(item) => Some(item) case Failure(ex) => log.error(">>> failed") log.error(s"${ex.toString}: $line") None } } }
Example 41
Source File: WithRedis.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.counter.helper import com.typesafe.config.Config import org.apache.s2graph.counter.config.S2CounterConfig import org.apache.s2graph.counter.util.Hashes import org.slf4j.LoggerFactory import redis.clients.jedis.exceptions.JedisException import redis.clients.jedis.{Jedis, JedisPool, JedisPoolConfig} import scala.util.Try class WithRedis(config: Config) { lazy val s2config = new S2CounterConfig(config) private val log = LoggerFactory.getLogger(getClass) val poolConfig = new JedisPoolConfig() poolConfig.setMaxTotal(150) poolConfig.setMaxIdle(50) poolConfig.setMaxWaitMillis(200) val jedisPools = s2config.REDIS_INSTANCES.map { case (host, port) => new JedisPool(poolConfig, host, port) } def getBucketIdx(key: String): Int = { Hashes.murmur3(key) % jedisPools.size } def doBlockWithIndex[T](idx: Int)(f: Jedis => T): Try[T] = { Try { val pool = jedisPools(idx) var jedis: Jedis = null try { jedis = pool.getResource f(jedis) } catch { case e: JedisException => pool.returnBrokenResource(jedis) jedis = null throw e } finally { if (jedis != null) { pool.returnResource(jedis) } } } } def doBlockWithKey[T](key: String)(f: Jedis => T): Try[T] = { doBlockWithIndex(getBucketIdx(key))(f) } }
Example 42
Source File: CollectionCache.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.counter.util import java.net.InetAddress import java.util.concurrent.TimeUnit import com.google.common.cache.{Cache, CacheBuilder} import org.slf4j.LoggerFactory import scala.concurrent.{ExecutionContext, Future} import scala.language.{postfixOps, reflectiveCalls} case class CollectionCacheConfig(maxSize: Int, ttl: Int, negativeCache: Boolean = false, negativeTTL: Int = 600) class CollectionCache[C <: { def nonEmpty: Boolean; def isEmpty: Boolean } ](config: CollectionCacheConfig) { private val cache: Cache[String, C] = CacheBuilder.newBuilder() .expireAfterWrite(config.ttl, TimeUnit.SECONDS) .maximumSize(config.maxSize) .build[String, C]() // private lazy val cache = new SynchronizedLruMap[String, (C, Int)](config.maxSize) private lazy val className = this.getClass.getSimpleName private lazy val log = LoggerFactory.getLogger(this.getClass) val localHostname = InetAddress.getLocalHost.getHostName def size = cache.size val maxSize = config.maxSize // cache statistics def getStatsString: String = { s"$localHostname ${cache.stats().toString}" } def withCache(key: String)(op: => C): C = { Option(cache.getIfPresent(key)) match { case Some(r) => r case None => val r = op if (r.nonEmpty || config.negativeCache) { cache.put(key, r) } r } } def withCacheAsync(key: String)(op: => Future[C])(implicit ec: ExecutionContext): Future[C] = { Option(cache.getIfPresent(key)) match { case Some(r) => Future.successful(r) case None => op.map { r => if (r.nonEmpty || config.negativeCache) { cache.put(key, r) } r } } } def purgeKey(key: String) = { cache.invalidate(key) } def contains(key: String): Boolean = { Option(cache.getIfPresent(key)).nonEmpty } }
Example 43
Source File: RankingCounter.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.counter.core import java.util.concurrent.TimeUnit import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache} import com.typesafe.config.Config import org.apache.s2graph.counter.core.RankingCounter.RankingValueMap import org.apache.s2graph.counter.models.Counter import org.apache.s2graph.counter.util.{CollectionCacheConfig, CollectionCache} import org.slf4j.LoggerFactory import scala.collection.JavaConversions._ case class RankingRow(key: RankingKey, value: Map[String, RankingValue]) case class RateRankingRow(key: RankingKey, value: Map[String, RateRankingValue]) class RankingCounter(config: Config, storage: RankingStorage) { private val log = LoggerFactory.getLogger(getClass) val storageStatusCache = new CollectionCache[Option[Boolean]](CollectionCacheConfig(1000, 60, negativeCache = false, 60)) val cache: LoadingCache[RankingKey, RankingResult] = CacheBuilder.newBuilder() .maximumSize(1000000) .expireAfterWrite(10l, TimeUnit.MINUTES) .build( new CacheLoader[RankingKey, RankingResult]() { def load(rankingKey: RankingKey): RankingResult = { // log.warn(s"cache load: $rankingKey") storage.getTopK(rankingKey, Int.MaxValue).getOrElse(RankingResult(-1, Nil)) } } ) def getTopK(rankingKey: RankingKey, k: Int = Int.MaxValue): Option[RankingResult] = { val tq = rankingKey.eq.tq if (TimedQualifier.getQualifiers(Seq(tq.q), System.currentTimeMillis()).head == tq) { // do not use cache storage.getTopK(rankingKey, k) } else { val result = cache.get(rankingKey) if (result.values.nonEmpty) { Some(result.copy(values = result.values.take(k))) } else { None } } } def update(key: RankingKey, value: RankingValueMap, k: Int): Unit = { storage.update(key, value, k) } def update(values: Seq[(RankingKey, RankingValueMap)], k: Int): Unit = { storage.update(values, k) } def delete(key: RankingKey): Unit = { storage.delete(key) } def getAllItems(keys: Seq[RankingKey], k: Int = Int.MaxValue): Seq[String] = { val oldKeys = keys.filter(key => TimedQualifier.getQualifiers(Seq(key.eq.tq.q), System.currentTimeMillis()).head != key.eq.tq) val cached = cache.getAllPresent(oldKeys) val missed = keys.diff(cached.keys.toSeq) val found = storage.getTopK(missed, k) // log.warn(s"cached: ${cached.size()}, missed: ${missed.size}") for { (key, result) <- found } { cache.put(key, result) } for { (key, RankingResult(totalScore, values)) <- cached ++ found (item, score) <- values } yield { item } }.toSeq.distinct def prepare(policy: Counter): Unit = { storage.prepare(policy) } def destroy(policy: Counter): Unit = { storage.destroy(policy) } def ready(policy: Counter): Boolean = { storageStatusCache.withCache(s"${policy.id}") { Some(storage.ready(policy)) }.getOrElse(false) } } object RankingCounter { type RankingValueMap = Map[String, RankingValue] }
Example 44
Source File: GraphOperation.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.counter.core.v2 import akka.actor.ActorSystem import akka.stream.ActorMaterializer import com.typesafe.config.Config import org.apache.http.HttpStatus import org.apache.s2graph.counter.config.S2CounterConfig import org.apache.s2graph.counter.core.v2.ExactStorageGraph._ import org.asynchttpclient.DefaultAsyncHttpClientConfig import org.slf4j.LoggerFactory import play.api.libs.json.{JsObject, JsValue, Json} import scala.concurrent.Await import scala.concurrent.duration._ class GraphOperation(config: Config) { // using play-ws without play app implicit val materializer = ActorMaterializer.create(ActorSystem(getClass.getSimpleName)) private val builder = new DefaultAsyncHttpClientConfig.Builder() private val wsClient = new play.api.libs.ws.ning.NingWSClient(builder.build) private val s2config = new S2CounterConfig(config) val s2graphUrl = s2config.GRAPH_URL private[counter] val log = LoggerFactory.getLogger(this.getClass) import scala.concurrent.ExecutionContext.Implicits.global def createLabel(json: JsValue): Boolean = { // fix counter label's schemaVersion val newJson = json.as[JsObject] ++ Json.obj("schemaVersion" -> "v2") val future = wsClient.url(s"$s2graphUrl/graphs/createLabel").post(newJson).map { resp => resp.status match { case HttpStatus.SC_OK => true case _ => throw new RuntimeException(s"failed createLabel. errCode: ${resp.status} body: ${resp.body} query: $json") } } Await.result(future, 10 second) } def deleteLabel(label: String): Boolean = { val future = wsClient.url(s"$s2graphUrl/graphs/deleteLabel/$label").put("").map { resp => resp.status match { case HttpStatus.SC_OK => true case _ => throw new RuntimeException(s"failed deleteLabel. errCode: ${resp.status} body: ${resp.body}") } } Await.result(future, 10 second) } }
Example 45
Source File: S2GraphMutateRoute.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.http import akka.http.scaladsl.model.{ContentTypes, HttpEntity, HttpResponse, StatusCodes} import akka.http.scaladsl.server.Directives._ import akka.http.scaladsl.server.{ExceptionHandler, Route} import com.fasterxml.jackson.core.JsonParseException import org.apache.s2graph.core.rest.RequestParser import org.apache.s2graph.core.storage.MutateResponse import org.apache.s2graph.core.{GraphElement, S2Graph} import org.slf4j.LoggerFactory import play.api.libs.json.{JsValue, Json} import scala.concurrent.{ExecutionContext, Future} trait S2GraphMutateRoute extends PlayJsonSupport { val s2graph: S2Graph val logger = LoggerFactory.getLogger(this.getClass) lazy val parser = new RequestParser(s2graph) lazy val exceptionHandler = ExceptionHandler { case ex: JsonParseException => complete(StatusCodes.BadRequest -> ex.getMessage) case ex: java.lang.IllegalArgumentException => complete(StatusCodes.BadRequest -> ex.getMessage) } lazy val mutateVertex = path("vertex" / Segments) { params => implicit val ec = s2graph.ec val (operation, serviceNameOpt, columnNameOpt) = params match { case operation :: serviceName :: columnName :: Nil => (operation, Option(serviceName), Option(columnName)) case operation :: Nil => (operation, None, None) case _ => throw new RuntimeException("invalid params") } entity(as[JsValue]) { payload => val future = vertexMutate(payload, operation, serviceNameOpt, columnNameOpt).map(Json.toJson(_)) complete(future) } } lazy val mutateEdge = path("edge" / Segment) { operation => implicit val ec = s2graph.ec entity(as[JsValue]) { payload => val future = edgeMutate(payload, operation, withWait = true).map(Json.toJson(_)) complete(future) } } def vertexMutate(jsValue: JsValue, operation: String, serviceNameOpt: Option[String] = None, columnNameOpt: Option[String] = None, withWait: Boolean = true)(implicit ec: ExecutionContext): Future[Seq[Boolean]] = { val vertices = parser.toVertices(jsValue, operation, serviceNameOpt, columnNameOpt) val verticesToStore = vertices.filterNot(_.isAsync) s2graph.mutateVertices(verticesToStore, withWait).map(_.map(_.isSuccess)) } def edgeMutate(elementsWithTsv: Seq[(GraphElement, String)], withWait: Boolean)(implicit ec: ExecutionContext): Future[Seq[Boolean]] = { val elementWithIdxs = elementsWithTsv.zipWithIndex val (elementSync, elementAsync) = elementWithIdxs.partition { case ((element, tsv), idx) => !element.isAsync } val retToSkip = elementAsync.map(_._2 -> MutateResponse.Success) val (elementsToStore, _) = elementSync.map(_._1).unzip val elementsIdxToStore = elementSync.map(_._2) s2graph.mutateElements(elementsToStore, withWait).map { mutateResponses => elementsIdxToStore.zip(mutateResponses) ++ retToSkip }.map(_.sortBy(_._1).map(_._2.isSuccess)) } def edgeMutate(jsValue: JsValue, operation: String, withWait: Boolean)(implicit ec: ExecutionContext): Future[Seq[Boolean]] = { val edgesWithTsv = parser.parseJsonFormat(jsValue, operation) edgeMutate(edgesWithTsv, withWait) } // expose routes lazy val mutateRoute: Route = post { concat( handleExceptions(exceptionHandler) { mutateVertex }, handleExceptions(exceptionHandler) { mutateEdge } ) } }
Example 46
Source File: Server.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.http import java.time.Instant import scala.language.postfixOps import scala.concurrent.{Await, ExecutionContext, Future} import scala.concurrent.duration.Duration import scala.util.{Failure, Success} import akka.actor.ActorSystem import akka.http.scaladsl.Http import akka.http.scaladsl.model.{ContentTypes, HttpEntity, HttpResponse, StatusCodes} import akka.http.scaladsl.server.Route import akka.http.scaladsl.server.Directives._ import akka.stream.ActorMaterializer import com.typesafe.config.ConfigFactory import org.apache.s2graph.core.S2Graph import org.slf4j.LoggerFactory object Server extends App with S2GraphTraversalRoute with S2GraphAdminRoute with S2GraphMutateRoute with S2GraphQLRoute { implicit val system: ActorSystem = ActorSystem("S2GraphHttpServer") implicit val materializer: ActorMaterializer = ActorMaterializer() implicit val executionContext: ExecutionContext = system.dispatcher val config = ConfigFactory.load() override val s2graph = new S2Graph(config) override val logger = LoggerFactory.getLogger(this.getClass) val port = sys.props.get("http.port").fold(8000)(_.toInt) val interface = sys.props.get("http.interface").fold("0.0.0.0")(identity) val startAt = System.currentTimeMillis() def uptime = System.currentTimeMillis() - startAt def serverHealth = s"""{ "port": ${port}, "interface": "${interface}", "started_at": ${Instant.ofEpochMilli(startAt)}, "uptime": "${uptime} millis" """ def health = HttpResponse(status = StatusCodes.OK, entity = HttpEntity(ContentTypes.`application/json`, serverHealth)) // Allows you to determine routes to expose according to external settings. lazy val routes: Route = concat( pathPrefix("graphs")(traversalRoute), pathPrefix("mutate")(mutateRoute), pathPrefix("admin")(adminRoute), pathPrefix("graphql")(graphqlRoute), get(complete(health)) ) val binding: Future[Http.ServerBinding] = Http().bindAndHandle(routes, interface, port) binding.onComplete { case Success(bound) => logger.info(s"Server online at http://${bound.localAddress.getHostString}:${bound.localAddress.getPort}/") case Failure(e) => logger.error(s"Server could not start!", e) } scala.sys.addShutdownHook { () => s2graph.shutdown() system.terminate() logger.info("System terminated") } Await.result(system.whenTerminated, Duration.Inf) }
Example 47
Source File: S2GraphTraversalRoute.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.http import org.apache.s2graph.core.S2Graph import org.apache.s2graph.core.rest.RestHandler.CanLookup import org.slf4j.LoggerFactory import akka.http.scaladsl.server.Route import akka.http.scaladsl.server.Directives._ import akka.http.scaladsl.model.headers.RawHeader import akka.http.scaladsl.model._ import org.apache.s2graph.core.GraphExceptions.{BadQueryException, JsonParseException} import org.apache.s2graph.core.rest.RestHandler import play.api.libs.json._ object S2GraphTraversalRoute { import scala.collection._ implicit val akkHttpHeaderLookup = new CanLookup[immutable.Seq[HttpHeader]] { override def lookup(m: immutable.Seq[HttpHeader], key: String): Option[String] = m.find(_.name() == key).map(_.value()) } } trait S2GraphTraversalRoute extends PlayJsonSupport { import S2GraphTraversalRoute._ val s2graph: S2Graph val logger = LoggerFactory.getLogger(this.getClass) implicit lazy val ec = s2graph.ec lazy val restHandler = new RestHandler(s2graph) // The `/graphs/*` APIs are implemented to be branched from the existing restHandler.doPost. // Implement it first by delegating that function. lazy val delegated: Route = { entity(as[String]) { body => logger.info(body) extractRequest { request => val result = restHandler.doPost(request.uri.toRelative.toString(), body, request.headers) val responseHeaders = result.headers.toList.map { case (k, v) => RawHeader(k, v) } val f = result.body.map(StatusCodes.OK -> _).recover { case BadQueryException(msg, _) => StatusCodes.BadRequest -> Json.obj("error" -> msg) case JsonParseException(msg) => StatusCodes.BadRequest -> Json.obj("error" -> msg) case e: Exception => StatusCodes.InternalServerError -> Json.obj("error" -> e.toString) } respondWithHeaders(responseHeaders)(complete(f)) } } } // expose routes lazy val traversalRoute: Route = post { concat( delegated // getEdges, experiments, etc. ) } }
Example 48
Source File: MutateRouteSpec.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.http import akka.http.scaladsl.marshalling.Marshal import akka.http.scaladsl.model._ import akka.http.scaladsl.testkit.ScalatestRouteTest import com.typesafe.config.ConfigFactory import org.apache.s2graph.core.Management.JsonModel.Prop import org.apache.s2graph.core.S2Graph import org.scalatest.concurrent.ScalaFutures import org.scalatest.{BeforeAndAfterAll, Matchers, WordSpec} import org.slf4j.LoggerFactory import play.api.libs.json.{JsValue, Json} class MutateRouteSpec extends WordSpec with Matchers with PlayJsonSupport with ScalaFutures with ScalatestRouteTest with S2GraphMutateRoute with BeforeAndAfterAll { import scala.collection.JavaConverters._ val dbUrl = "jdbc:h2:file:./var/metastore_mutate_route;MODE=MYSQL;AUTO_SERVER=true" val config = ConfigFactory.parseMap(Map("db.default.url" -> dbUrl).asJava) lazy val s2graph = new S2Graph(config.withFallback(ConfigFactory.load())) override val logger = LoggerFactory.getLogger(this.getClass) override def afterAll(): Unit = { s2graph.shutdown(true) } lazy val routes = mutateRoute val serviceName = "kakaoFavorites" val columnName = "userName" "MutateRoute" should { "be able to insert vertex (POST /mutate/vertex/insert)" in { s2graph.management.createService(serviceName, "localhost", s"${serviceName}-dev", 1, None) s2graph.management.createServiceColumn(serviceName, columnName, "string", Seq(Prop("age", "0", "integer"))) // {"timestamp": 10, "serviceName": "s2graph", "columnName": "user", "id": 1, "props": {}} val param = Json.obj( "timestamp" -> 10, "serviceName" -> serviceName, "columnName" -> columnName, "id" -> "user_a", "props" -> Json.obj( "age" -> 20 ) ) val entity = Marshal(param).to[MessageEntity].futureValue val request = Post("/vertex/insert").withEntity(entity) request ~> routes ~> check { status should ===(StatusCodes.OK) contentType should ===(ContentTypes.`application/json`) val response = entityAs[JsValue] response should ===(Json.toJson(Seq(true))) } } } }
Example 49
Source File: AdminRouteSpec.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.http import akka.http.scaladsl.marshalling.Marshal import akka.http.scaladsl.model._ import akka.http.scaladsl.testkit.ScalatestRouteTest import com.typesafe.config.ConfigFactory import org.apache.s2graph.core.Management.JsonModel.Prop import org.apache.s2graph.core.S2Graph import org.scalatest.concurrent.ScalaFutures import org.scalatest.{BeforeAndAfterAll, Matchers, WordSpec} import org.slf4j.LoggerFactory import play.api.libs.json.{JsString, JsValue, Json} class AdminRoutesSpec extends WordSpec with Matchers with ScalaFutures with ScalatestRouteTest with S2GraphAdminRoute with BeforeAndAfterAll { import scala.collection.JavaConverters._ val dbUrl = "jdbc:h2:file:./var/metastore_admin_route;MODE=MYSQL;AUTO_SERVER=true" val config = ConfigFactory.parseMap(Map("db.default.url" -> dbUrl).asJava) lazy val s2graph = new S2Graph(config.withFallback(ConfigFactory.load())) override val logger = LoggerFactory.getLogger(this.getClass) override def afterAll(): Unit = { s2graph.shutdown(true) } lazy val routes = adminRoute val serviceName = "kakaoFavorites" val columnName = "userName" "AdminRoute" should { "be able to create service (POST /createService)" in { val serviceParam = Json.obj( "serviceName" -> serviceName, "compressionAlgorithm" -> "gz" ) val serviceEntity = Marshal(serviceParam).to[MessageEntity].futureValue val request = Post("/createService").withEntity(serviceEntity) request ~> routes ~> check { status should ===(StatusCodes.Created) contentType should ===(ContentTypes.`application/json`) val response = entityAs[JsValue] (response \\ "name").head should ===(JsString("kakaoFavorites")) (response \\ "status").head should ===(JsString("ok")) } } "return service if present (GET /getService/{serviceName})" in { val request = HttpRequest(uri = s"/getService/$serviceName") request ~> routes ~> check { status should ===(StatusCodes.OK) contentType should ===(ContentTypes.`application/json`) val response = entityAs[JsValue] (response \\ "name").head should ===(JsString("kakaoFavorites")) } } "be able to create serviceColumn (POST /createServiceColumn)" in { val serviceColumnParam = Json.obj( "serviceName" -> serviceName, "columnName" -> columnName, "columnType" -> "string", "props" -> Json.toJson( Seq( Json.obj("name" -> "age", "defaultValue" -> "-1", "dataType" -> "integer") ) ) ) val serviceColumnEntity = Marshal(serviceColumnParam).to[MessageEntity].futureValue val request = Post("/createServiceColumn").withEntity(serviceColumnEntity) request ~> routes ~> check { status should ===(StatusCodes.Created) contentType should ===(ContentTypes.`application/json`) val response = entityAs[JsValue] (response \\ "serviceName").head should ===(JsString("kakaoFavorites")) (response \\ "columnName").head should ===(JsString("userName")) (response \\ "status").head should ===(JsString("ok")) } } } }
Example 50
Source File: DexExtensionGrpcConnector.scala From matcher with MIT License | 5 votes |
package com.wavesplatform.dex.tool.connectors import cats.instances.future._ import cats.instances.list._ import cats.syntax.either._ import cats.syntax.traverse._ import ch.qos.logback.classic.{Level, Logger} import com.wavesplatform.dex.cli.ErrorOr import com.wavesplatform.dex.domain.account.Address import com.wavesplatform.dex.domain.asset.Asset import com.wavesplatform.dex.domain.asset.Asset.{IssuedAsset, Waves} import com.wavesplatform.dex.grpc.integration.WavesBlockchainClientBuilder import com.wavesplatform.dex.grpc.integration.clients.WavesBlockchainClient import com.wavesplatform.dex.grpc.integration.dto.BriefAssetDescription import com.wavesplatform.dex.grpc.integration.settings.GrpcClientSettings.ChannelOptionsSettings import com.wavesplatform.dex.grpc.integration.settings.{GrpcClientSettings, WavesBlockchainClientSettings} import monix.execution.Scheduler.Implicits.{global => monixScheduler} import org.slf4j.LoggerFactory import scala.concurrent.ExecutionContext.Implicits.{global => executionContext} import scala.concurrent.duration._ import scala.concurrent.{Await, Awaitable, Future} import scala.util.Try case class DexExtensionGrpcConnector private (target: String, grpcAsyncClient: WavesBlockchainClient[Future]) extends Connector { import DexExtensionGrpcConnector._ private def sync[A](f: Awaitable[A]): A = Await.result(f, requestTimeout) private def getDetailedBalance(asset: Asset, balance: Long): Future[(Asset, (BriefAssetDescription, Long))] = asset match { case Waves => Future.successful(asset -> (BriefAssetDescription.wavesDescription -> balance)) case ia: IssuedAsset => grpcAsyncClient.assetDescription(ia).map(maybeDesc => ia -> (maybeDesc.get -> balance)) } def matcherBalanceAsync(address: Address): Future[DetailedBalance] = for { balances <- grpcAsyncClient.allAssetsSpendableBalance(address) balancesWithDescription <- balances.toList.traverse { case (a, b) => getDetailedBalance(a, b) } } yield balancesWithDescription.toMap def matcherBalanceSync(address: Address): DetailedBalance = sync { matcherBalanceAsync(address) } override def close(): Unit = Await.result(grpcAsyncClient.close(), 3.seconds) } object DexExtensionGrpcConnector { val requestTimeout: FiniteDuration = 10.seconds type DetailedBalance = Map[Asset, (BriefAssetDescription, Long)] def create(target: String): ErrorOr[DexExtensionGrpcConnector] = Try { LoggerFactory.getLogger(org.slf4j.Logger.ROOT_LOGGER_NAME).asInstanceOf[Logger].setLevel(Level.OFF) val grpcSettings = GrpcClientSettings(target, 5, 5, true, 2.seconds, 5.seconds, 1.minute, ChannelOptionsSettings(5.seconds)) val clientSettings = WavesBlockchainClientSettings(grpcSettings, 100.milliseconds, 100) WavesBlockchainClientBuilder.async(clientSettings, monixScheduler, executionContext) }.toEither .bimap(ex => s"Cannot establish gRPC connection to DEX Extension! $ex", client => DexExtensionGrpcConnector(target, client)) }
Example 51
Source File: ScorexLogging.scala From matcher with MIT License | 5 votes |
package com.wavesplatform.dex.domain.utils import monix.eval.Task import monix.execution.{CancelableFuture, Scheduler} import org.slf4j.{Logger, LoggerFactory} case class LoggerFacade(logger: Logger) { def trace(message: => String): Unit = if (logger.isTraceEnabled) logger.trace(message) def debug(message: => String, arg: Any): Unit = if (logger.isDebugEnabled) logger.debug(message, arg) def debug(message: => String): Unit = if (logger.isDebugEnabled) logger.debug(message) def info(message: => String): Unit = if (logger.isInfoEnabled) logger.info(message) def info(message: => String, arg: Any): Unit = if (logger.isInfoEnabled) logger.info(message, arg) def info(message: => String, throwable: Throwable): Unit = if (logger.isInfoEnabled) logger.info(message, throwable) def warn(message: => String): Unit = if (logger.isWarnEnabled) logger.warn(message) def warn(message: => String, throwable: Throwable): Unit = if (logger.isWarnEnabled) logger.warn(message, throwable) def error(message: => String): Unit = if (logger.isErrorEnabled) logger.error(message) def error(message: => String, throwable: Throwable): Unit = if (logger.isErrorEnabled) logger.error(message, throwable) } trait ScorexLogging { protected lazy val log: LoggerFacade = LoggerFacade(LoggerFactory.getLogger(this.getClass)) implicit class TaskExt[A](t: Task[A]) { def runAsyncLogErr(implicit s: Scheduler): CancelableFuture[A] = logErr.runToFuture(s) def logErr: Task[A] = t.onErrorHandleWith { ex => log.error(s"Error executing task", ex) Task.raiseError[A](ex) } } }
Example 52
Source File: MyJournalSpec.scala From akka-tools with MIT License | 5 votes |
package no.nextgentel.oss.akkatools.persistence.jdbcjournal import akka.persistence.CapabilityFlag import akka.persistence.journal.JournalSpec import akka.persistence.snapshot.SnapshotStoreSpec import com.typesafe.config.ConfigFactory import org.scalatest.BeforeAndAfter import org.slf4j.LoggerFactory class MyJournalSpec extends JournalSpec ( config = ConfigFactory.parseString( s""" |akka.persistence.query.jdbc-read-journal.configName = MyJournalSpec |jdbc-journal.configName = MyJournalSpec |jdbc-snapshot-store.configName = MyJournalSpec """.stripMargin).withFallback(ConfigFactory.load("application-test.conf"))) { val log = LoggerFactory.getLogger(getClass) val errorHandler = new JdbcJournalErrorHandler { override def onError(e: Exception): Unit = log.error("JdbcJournalErrorHandler.onError", e) } JdbcJournalConfig.setConfig("MyJournalSpec", JdbcJournalConfig(DataSourceUtil.createDataSource("MyJournalSpec"), Some(errorHandler), StorageRepoConfig(), new PersistenceIdParserImpl('-'))) override protected def supportsRejectingNonSerializableObjects: CapabilityFlag = false } class MySnapshotStoreSpec extends SnapshotStoreSpec ( config = ConfigFactory.parseString( s""" |akka.persistence.query.jdbc-read-journal.configName = MySnapshotStoreSpec |jdbc-journal.configName = MySnapshotStoreSpec |jdbc-snapshot-store.configName = MySnapshotStoreSpec """.stripMargin).withFallback(ConfigFactory.load("application-test.conf"))) with BeforeAndAfter { val log = LoggerFactory.getLogger(getClass) val errorHandler = new JdbcJournalErrorHandler { override def onError(e: Exception): Unit = log.error("JdbcJournalErrorHandler.onError", e) } JdbcJournalConfig.setConfig("MySnapshotStoreSpec", JdbcJournalConfig(DataSourceUtil.createDataSource("MySnapshotStoreSpec"), None, StorageRepoConfig(), new PersistenceIdParserImpl('-'))) }
Example 53
Source File: SeedNodesListOrderingResolver.scala From akka-tools with MIT License | 5 votes |
package no.nextgentel.oss.akkatools.cluster import java.util.concurrent.TimeUnit import org.slf4j.LoggerFactory import scala.concurrent.duration.FiniteDuration // Must be used together with ClusterListener object SeedNodesListOrderingResolver { val log = LoggerFactory.getLogger(getClass) def resolveSeedNodesList(repo:ClusterNodeRepo, clusterConfig:AkkaClusterConfig, maxAliveAge:FiniteDuration = FiniteDuration(20, TimeUnit.SECONDS)):AkkaClusterConfig = { val ourNode = clusterConfig.thisHostnameAndPort() // Since we're starting up, just make sure that we do not find info about ourself from our last run log.debug(s"removeClusterNodeAlive for $ourNode") repo.removeClusterNodeAlive(ourNode) val allSeedNodes = clusterConfig.seedNodes val weAreSeedNode = allSeedNodes.contains(ourNode) if ( !weAreSeedNode) { log.info("We are NOT a seedNode") } val aliveNodes = repo.findAliveClusterNodes(maxAliveAge, onlyJoined = false).map { node => // alive nodes are listed on this form: // akka.tcp://SomeAkkaSystem@host1:9999 // We must remove everything before hostname:port val index = node.indexOf('@') if ( index >= 0) node.substring(index+1) else node } val seedNodeListToUse = if ( aliveNodes.isEmpty ) { if (weAreSeedNode) { val allNodesExceptOur = allSeedNodes.filter(n => n != ourNode) val list = List(ourNode) ++ allNodesExceptOur log.info("No other clusterNodes found as alive - We must be first seed node - seedNodeListToUse: " + list) list } else { log.info("No other clusterNodes found as alive - Since we're not a seedNode, we're using the list as is - seedNodeListToUse: " + allSeedNodes) allSeedNodes } } else { if (weAreSeedNode) { val allNodesExceptOurAndAliveOnes = allSeedNodes.filter(n => n != ourNode && !aliveNodes.contains(n)) val list = aliveNodes ++ List(ourNode) ++ allNodesExceptOurAndAliveOnes log.info("Found other alive clusterNodes - we should not be first seed node. Alive cluster nodes: " + aliveNodes.mkString(",") + " - seedNodeListToUse: " + list) list } else { val allNodesExceptAliveOnes = allSeedNodes.filter(n => !aliveNodes.contains(n)) val list = aliveNodes ++ allNodesExceptAliveOnes log.info("Found other alive clusterNodes - Alive cluster nodes: " + aliveNodes.mkString(",") + " - seedNodeListToUse: " + list) list } } clusterConfig.withSeedNodeList(seedNodeListToUse) } }
Example 54
Source File: ClusterSingletonHelperTest.scala From akka-tools with MIT License | 5 votes |
package no.nextgentel.oss.akkatools.cluster import akka.actor.{Actor, ActorRef, ActorSystem, Props} import akka.testkit.{TestKit, TestProbe} import com.typesafe.config.ConfigFactory import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, FunSuiteLike, Matchers} import org.slf4j.LoggerFactory import scala.util.Random object ClusterSingletonHelperTest { val port = 20000 + Random.nextInt(20000) } class ClusterSingletonHelperTest (_system:ActorSystem) extends TestKit(_system) with FunSuiteLike with Matchers with BeforeAndAfterAll with BeforeAndAfter { def this() = this(ActorSystem("test-actor-system", ConfigFactory.parseString( s"""akka.actor.provider = "akka.cluster.ClusterActorRefProvider" |akka.remote.enabled-transports = ["akka.remote.netty.tcp"] |akka.remote.netty.tcp.hostname="localhost" |akka.remote.netty.tcp.port=${ClusterSingletonHelperTest.port} |akka.cluster.seed-nodes = ["akka.tcp://test-actor-system@localhost:${ClusterSingletonHelperTest.port}"] """.stripMargin ).withFallback(ConfigFactory.load("application-test.conf")))) override def afterAll { TestKit.shutdownActorSystem(system) } val log = LoggerFactory.getLogger(getClass) test("start and communicate with cluster-singleton") { val started = TestProbe() val proxy = ClusterSingletonHelper.startClusterSingleton(system, Props(new OurClusterSingleton(started.ref)), "ocl") started.expectMsg("started") val sender = TestProbe() sender.send(proxy, "ping") sender.expectMsg("pong") } } class OurClusterSingleton(started:ActorRef) extends Actor { started ! "started" def receive = { case "ping" => sender ! "pong" } }
Example 55
Source File: AggregateCmdMessageExtractor.scala From akka-tools with MIT License | 5 votes |
package no.nextgentel.oss.akkatools.aggregate import akka.cluster.sharding.ShardRegion.HashCodeMessageExtractor import akka.persistence.{SaveSnapshotFailure, SaveSnapshotSuccess} import no.nextgentel.oss.akkatools.persistence.{DurableMessage, DurableMessageReceived} import org.slf4j.LoggerFactory class AggregateCmdMessageExtractor(val maxNumberOfNodes:Int = 2, val shardsPrNode:Int = 10) extends HashCodeMessageExtractor(maxNumberOfNodes * shardsPrNode) { val log = LoggerFactory.getLogger(getClass) private def extractId(x:AnyRef):String = { x match { case a:AggregateCmd => if (a.id == null) { log.warn("id() returned null in message: " + x) } a.id case q:AnyRef => log.error("Do not know how to extract entryId for message of type " + x.getClass + ": " + x) null } } override def entityId(rawMessage: Any): String = { rawMessage match { case dm:DurableMessage => extractId(dm.payload) case dmr:DurableMessageReceived => dmr.confirmationRoutingInfo.map(_.toString).getOrElse { log.warn("DurableMessageReceived.getConfirmationRoutingInfo() returned null in message: " + rawMessage) null } case x:SaveSnapshotSuccess => // Ignoring this message to mitigate Akka-bug https://github.com/akka/akka/issues/19893 log.debug(s"Ignoring $x to mitigate akka issue 19893") null case x:SaveSnapshotFailure => // Ignoring this message to mitigate Akka-bug https://github.com/akka/akka/issues/19893 log.debug(s"Ignoring $x to mitigate akka issue 19893") null case x:AnyRef => extractId(x) } } }
Example 56
Source File: GeneralAggregateWithShardingTest.scala From akka-tools with MIT License | 5 votes |
package no.nextgentel.oss.akkatools.aggregate import java.util.{Arrays, UUID} import akka.actor.ActorSystem import akka.actor.Status.Failure import akka.testkit.{TestKit, TestProbe} import com.typesafe.config.ConfigFactory import no.nextgentel.oss.akkatools.aggregate.testAggregate.StateName._ import no.nextgentel.oss.akkatools.aggregate.testAggregate.{StateName, _} import no.nextgentel.oss.akkatools.testing.AggregateTesting import org.scalatest._ import org.slf4j.LoggerFactory import scala.util.Random object GeneralAggregateWithShardingTest { val port = 20000 + Random.nextInt(20000) } class GeneralAggregateWithShardingTest(_system:ActorSystem) extends TestKit(_system) with FunSuiteLike with Matchers with BeforeAndAfterAll with BeforeAndAfter { def this() = this(ActorSystem("test-actor-system", ConfigFactory.parseString( s"""akka.actor.provider = "akka.cluster.ClusterActorRefProvider" |akka.remote.enabled-transports = ["akka.remote.netty.tcp"] |akka.remote.netty.tcp.hostname="localhost" |akka.remote.netty.tcp.port=${GeneralAggregateWithShardingTest.port} |akka.cluster.seed-nodes = ["akka.tcp://test-actor-system@localhost:${GeneralAggregateWithShardingTest.port}"] """.stripMargin ).withFallback(ConfigFactory.load("application-test.conf")))) override def afterAll { TestKit.shutdownActorSystem(system) } val log = LoggerFactory.getLogger(getClass) private def generateId() = UUID.randomUUID().toString val seatIds = List("s1","id-used-in-Failed-in-onAfterValidationSuccess", "s2", "s3-This-id-is-going-to-be-discarded", "s4") trait TestEnv extends AggregateTesting[BookingState] { val id = generateId() val printShop = TestProbe() val cinema = TestProbe() val onSuccessDmForwardReceiver = TestProbe() val starter = new AggregateStarterSimple("booking", system).withAggregatePropsCreator { dmSelf => BookingAggregate.props(dmSelf, dmForwardAndConfirm(printShop.ref).path, dmForwardAndConfirm(cinema.ref).path, seatIds, dmForwardAndConfirm(onSuccessDmForwardReceiver.ref).path) } val main = starter.dispatcher starter.start() def assertState(correctState:BookingState): Unit = { assert(getState(id) == correctState) } } test("normal flow") { new TestEnv { // Make sure we start with empty state assertState(BookingState.empty()) val maxSeats = 2 val sender = TestProbe() // Open the booking println("1") sendDMBlocking(main, OpenBookingCmd(id, maxSeats), sender.ref) println("2") assertState(BookingState(OPEN, maxSeats, Set())) } } }
Example 57
Source File: GeneralAggregateBaseTest_handleSnapshotMessages.scala From akka-tools with MIT License | 5 votes |
package no.nextgentel.oss.akkatools.aggregate.aggregateTest_usingAggregateStateBase import java.util.UUID import akka.actor.{ActorPath, ActorSystem, Props} import akka.persistence.{DeleteMessagesFailure, DeleteMessagesSuccess, SaveSnapshotFailure, SaveSnapshotSuccess, SnapshotMetadata, SnapshotOffer} import akka.testkit.{TestKit, TestProbe} import com.typesafe.config.ConfigFactory import no.nextgentel.oss.akkatools.aggregate._ import no.nextgentel.oss.akkatools.testing.AggregateTesting import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, FunSuiteLike, Matchers} import org.slf4j.LoggerFactory override def onSnapshotOffer(offer: SnapshotOffer): Unit = { state = offer.snapshot.asInstanceOf[StringState] } override def acceptSnapshotRequest(req: SaveSnapshotOfCurrentState): Boolean = { if (state == StringState("WAT")) { state = StringState("SAVED") true } else { state = StringState("WAT") //So it works second time false } } override def onSnapshotSuccess(success: SaveSnapshotSuccess): Unit = { state = StringState("SUCCESS_SNAP") } override def onSnapshotFailure(failure: SaveSnapshotFailure): Unit = { state = StringState("FAIL_SNAP") } override def onDeleteMessagesSuccess(success: DeleteMessagesSuccess): Unit = { state = StringState("SUCCESS_MSG") } override def onDeleteMessagesFailure(failure: DeleteMessagesFailure): Unit = { state = StringState("FAIL_MSG") } // Used as prefix/base when constructing the persistenceId to use - the unique ID is extracted runtime from actorPath which is construced by Sharding-coordinator override def persistenceIdBase(): String = "/x/" } case class StringEv(data: String) case class StringState(data:String) extends AggregateStateBase[StringEv, StringState] { override def transitionState(event: StringEv): StateTransition[StringEv, StringState] = StateTransition(StringState(event.data)) }
Example 58
Source File: HomeControllerSpec.scala From phantom-activator-template with Apache License 2.0 | 5 votes |
package controllers import org.scalatest.{BeforeAndAfterAll, MustMatchers, WordSpec} import org.scalatestplus.play.guice.GuiceOneAppPerTest import org.slf4j.LoggerFactory import play.api.test.Helpers._ import play.api.test._ class HomeControllerSpec extends WordSpec with GuiceOneAppPerTest with MustMatchers with BeforeAndAfterAll { private val logger = LoggerFactory.getLogger("embedded-cassandra") override protected def beforeAll(): Unit = { EmbeddedCassandra.start(logger) } override protected def afterAll(): Unit = { EmbeddedCassandra.cleanup(logger) } "Application" should { "render the index page" in { val result = route(app, FakeRequest(GET, "/")).get status(result) must equal(OK) contentAsString(result) must include("Spring Bud") } } }
Example 59
Source File: OntologyHubClientTest.scala From daf-semantics with Apache License 2.0 | 5 votes |
package clients import java.nio.file.Paths import org.junit.After import org.junit.Assert import org.junit.Assume import org.junit.Before import org.junit.BeforeClass import org.junit.Test import org.slf4j.LoggerFactory import play.Logger import utilities.Adapters.AwaitFuture import clients.HTTPClient object OntologyHubClientTest { val logger = LoggerFactory.getLogger(this.getClass) @BeforeClass def check_before() { Assume.assumeTrue(ontonethub_is_running) logger.info("Ontonethub is UP! [TESTING...]") } private def ontonethub_is_running = { val client = HTTPClient client.start() val ontonethub = new OntonetHubClient(client.ws) val check = ontonethub.status().await client.stop() check } }
Example 60
Source File: CatalogStandardizationService.scala From daf-semantics with Apache License 2.0 | 5 votes |
package it.almawave.kb.http.endpoints import javax.inject.Singleton import javax.ws.rs.Path import org.slf4j.LoggerFactory import it.almawave.kb.http.models.OntologyMetaModel import com.typesafe.config.ConfigFactory import java.nio.file.Paths import it.almawave.linkeddata.kb.catalog.CatalogBox import it.almawave.linkeddata.kb.utils.JSONHelper import it.almawave.daf.standardization.refactoring.CatalogStandardizer @Singleton @Path("conf://api-catalog-config") class CatalogStandardizationService { private val logger = LoggerFactory.getLogger(this.getClass) val conf = ConfigFactory.parseFile(Paths.get("./conf/catalog.conf").normalize().toFile()) val catalog = new CatalogBox(conf) catalog.start() val _standardizer = CatalogStandardizer(catalog) _standardizer.start def stardardizer = _standardizer // TODO: STOP? }
Example 61
Source File: Status.scala From daf-semantics with Apache License 2.0 | 5 votes |
package it.almawave.kb.http.endpoints import java.time.LocalTime import io.swagger.annotations.Api import javax.ws.rs.Path import javax.ws.rs.GET import javax.ws.rs.Produces import io.swagger.annotations.ApiOperation import javax.ws.rs.core.MediaType import org.slf4j.LoggerFactory import javax.ws.rs.core.Context import javax.ws.rs.core.UriInfo import javax.ws.rs.core.Request import it.almawave.linkeddata.kb.utils.JSONHelper import java.time.LocalDateTime import java.time.ZonedDateTime import java.time.format.DateTimeFormatter import java.util.Locale import java.time.ZoneId @Api(tags = Array("catalog")) @Path("/status") class Status { private val logger = LoggerFactory.getLogger(this.getClass) @Context var uriInfo: UriInfo = null @GET @Produces(Array(MediaType.APPLICATION_JSON)) @ApiOperation(nickname = "status", value = "endpoint status") def status() = { val base_uri = uriInfo.getBaseUri val msg = s"the service is running at ${base_uri}" logger.info(msg) val _now = now() StatusMsg(_now._1, _now._2, msg) } def now() = { val zdt = ZonedDateTime.now(ZoneId.of("+1")) val dtf = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSSZ") (zdt.format(dtf), zdt) } } case class StatusMsg( now: String, dateTime: ZonedDateTime, msg: String )
Example 62
Source File: StandardizationQueryV1.scala From daf-semantics with Apache License 2.0 | 5 votes |
package it.almawave.daf.standardization.v1 import com.typesafe.config.Config import java.nio.file.Paths import java.nio.file.Files import it.almawave.linkeddata.kb.catalog.VocabularyBox import java.io.FileFilter import java.io.File import java.nio.file.Path import org.slf4j.LoggerFactory def details(voc_box: VocabularyBox, level: Int, uri: String, lang: String) = { val onto_id = detect_ontology(voc_box) val query_path: Path = detailsQueryFile(onto_id) .map(_.toPath()) .getOrElse(default_query_details) // disabled for too many logs! logger.debug(s"daf.standardization> try ${voc_box.id} with details query: ${query_path}") val query = new String(Files.readAllBytes(query_path)) query .replace("${vocabularyID}", voc_box.id) .replace("${level}", level.toString()) .replace("${uri}", uri) .replace("${lang}", lang) } }
Example 63
Source File: MainSingleStandardization.scala From daf-semantics with Apache License 2.0 | 5 votes |
package it.almawave.daf.standardization.refactoring import org.slf4j.LoggerFactory import java.nio.file.Paths import it.almawave.linkeddata.kb.catalog.CatalogBox import com.typesafe.config.ConfigFactory import it.almawave.linkeddata.kb.utils.JSONHelper import it.almawave.linkeddata.kb.catalog.VocabularyBox object MainSingleStandardization extends App { private val logger = LoggerFactory.getLogger(this.getClass) val conf = ConfigFactory.parseFile(Paths.get("./conf/catalog.conf").normalize().toFile()) val catalog = new CatalogBox(conf) catalog.start() // val vocID = "legal-status" // val vocID = "theme-subtheme-mapping" val vocID = "licences" val std: VocabularyStandardizer = CatalogStandardizer(catalog).getVocabularyStandardizerByID(vocID).get std.start // println("\n\nCSV") // std.toCSV()(System.out) // // println("\n\nTREE") val tree = std.toJSONTree() val json_tree = JSONHelper.writeToString(tree) println(json_tree) println("\n\nMETA") val meta = std.getMetadata() val json_meta = JSONHelper.writeToString(meta) println(json_meta) std.stop catalog.stop() // TODO: verify the closing of all active connections } object MainStandardizationAll extends App { private val logger = LoggerFactory.getLogger(this.getClass) val conf = ConfigFactory.parseFile(Paths.get("./conf/catalog.conf").normalize().toFile()) val catalog = new CatalogBox(conf) catalog.start() val std = CatalogStandardizer(catalog) std.start val list = std.getVocabularyStandardizersList() list.foreach { vstd => // println(s"\n\nCSV for ${vstd.vbox}") vstd.toCSV()(System.out) } std.stop catalog.stop() System.exit(0) }
Example 64
Source File: NO_MainAllStandardization.scala From daf-semantics with Apache License 2.0 | 5 votes |
package it.almawave.daf.standardization.refactoring import org.slf4j.LoggerFactory import java.nio.file.Paths import com.typesafe.config.ConfigFactory import it.almawave.linkeddata.kb.catalog.CatalogBox import scala.util.Try object NO_MainAllStandardization extends App { private val logger = LoggerFactory.getLogger(this.getClass) val conf = ConfigFactory.parseFile(Paths.get("./conf/catalog.conf").normalize().toFile()) val catalog = new CatalogBox(conf) catalog.start() CatalogStandardizer(catalog).getVocabularyStandardizersList() .zipWithIndex .slice(1, 2) .toList .foreach { case (std, i) => Try { println(s"""\n\n$i: ${std.vbox}""") println("\n\nCSV_______________________________________") std.toCSV()(System.out) println("\n\n__________________________________________") } } catalog.stop() }
Example 65
Source File: KafkaSink.scala From spark-kafka-sink with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics.sink import java.util.{ Properties, Locale } import java.util.concurrent.TimeUnit import org.slf4j.Logger import org.slf4j.LoggerFactory import com.codahale.metrics.MetricRegistry import org.apache.spark.SecurityManager import com.manyangled.kafkasink.KafkaReporter class KafkaSink(val properties: Properties, val registry: MetricRegistry, securityMgr: SecurityManager) extends org.apache.spark.metrics.sink.Sink { val logger: Logger = LoggerFactory.getLogger(this.getClass) private def popt(prop: String): Option[String] = Option(properties.getProperty(prop)) // These are non-negotiable val broker = popt("broker").get val topic = popt("topic").get lazy val reporter = new KafkaReporter(registry, broker, topic, properties) def start(): Unit = { logger.info(s"Starting Kafka metric reporter at $broker, topic $topic") val period = popt("period").getOrElse("10").toLong val tstr = popt("unit").getOrElse("seconds").toUpperCase(Locale.ROOT) val tunit = TimeUnit.valueOf(tstr) reporter.start(period, tunit) } def stop(): Unit = { logger.info(s"Stopping Kafka metric reporter at $broker, topic $topic") reporter.stop() } def report(): Unit = { logger.info(s"Reporting metrics to Kafka reporter at $broker, topic $topic") reporter.report() } }
Example 66
Source File: Demo2iConfig.scala From spark-riak-connector with Apache License 2.0 | 5 votes |
package com.basho.riak.spark.examples.demos.fbl import com.basho.riak.client.core.RiakNode import com.basho.riak.client.core.query.Namespace import com.basho.riak.spark.rdd._ import com.basho.riak.client.core.query.indexes.LongIntIndex import com.basho.riak.spark.rdd.connector.RiakConnectorConf import com.basho.riak.spark.rdd.{RiakFunctions, BucketDef} import com.basho.riak.spark.util.RiakObjectConversionUtil import com.basho.riak.spark.writer.{WriteDataMapperFactory, WriteDataMapper} import org.slf4j.{LoggerFactory, Logger} import com.basho.riak.spark._ import com.basho.riak.client.core.query.{RiakObject, Namespace} import com.basho.riak.client.api.annotations.{RiakKey, RiakIndex} import org.apache.spark.{SparkConf, SparkContext} case class Demo2iConfig(riakConf: RiakConnectorConf, index: String, bucket: String, from: Long, to: Long, name:String){ def riakNodeBuilder(minConnections:Int = 2):RiakNode.Builder = { val firstTheWinner = riakConf.hosts.iterator.next() new RiakNode.Builder() .withMinConnections(minConnections) .withRemoteAddress(firstTheWinner.getHost) .withRemotePort(firstTheWinner.getPort) } } object Demo2iConfig{ val DEFAULT_INDEX_NAME = "creationNo" val DEFAULT_BUCKET_NAME = "test-bucket" val DEFAULT_FROM = 1 val DEFAULT_TO = 4 def apply(sparkConf: SparkConf):Demo2iConfig = { Demo2iConfig( riakConf = RiakConnectorConf(sparkConf), index = sparkConf.get("spark.riak.demo.index", DEFAULT_INDEX_NAME), bucket = sparkConf.get("spark.riak.demo.bucket", DEFAULT_BUCKET_NAME), from = sparkConf.get("spark.riak.demo.from", DEFAULT_FROM.toString).toLong, to = sparkConf.get("spark.riak.demo.to", DEFAULT_TO.toString).toLong, name = sparkConf.get("spark.app.name", "") ) } }
Example 67
Source File: ClientHandler.scala From spark-riak-connector with Apache License 2.0 | 5 votes |
package com.basho.riak.stub import java.nio.ByteBuffer import java.nio.channels.{AsynchronousSocketChannel, Channel, CompletionHandler} import com.basho.riak.client.core.netty.RiakMessageCodec import com.basho.riak.stub.ClientHandler._ import org.slf4j.LoggerFactory class ClientHandler(val messageHandler: RiakMessageHandler) extends RiakMessageCodec with CompletionHandler[Integer, (AsynchronousSocketChannel, ByteBuffer)] { override def completed(result: Integer, attachment: (AsynchronousSocketChannel, ByteBuffer)): Unit = attachment match { case (channel, buffer) if result > 0 => logger.info(s"Message received ${SocketUtils.serverConnectionAsStr(channel)} ($result bytes).") RiakMessageEncoder.decode(buffer.rewind().asInstanceOf[ByteBuffer]) match { case Some(m) if channel.isOpen => val msgs = messageHandler.handle(new Context(channel), m) val encoded = RiakMessageEncoder.encode(msgs.toSeq: _*) val bytes = channel.write(encoded).get assert(bytes == encoded.position()) logger.info(s"Response sent ${SocketUtils.clientConnectionAsStr(channel)} ($bytes bytes).") messageHandler.onRespond(m, msgs) case Some(m) if !channel.isOpen => logger.warn("Impossible to write message to channel: channel has been already closed") case None => // TODO: handle case with no message } buffer.clear() channel.read(buffer, (channel, buffer), this) case _ => } override def failed(exc: Throwable, attachment: (AsynchronousSocketChannel, ByteBuffer)): Unit = attachment match { case (channel, _) if channel.isOpen => logger.error(s"Something went wrong with client ${SocketUtils.serverConnectionAsStr(channel)}", exc) disconnectClient(channel) case _ => // channel is already closed - do nothing } def disconnectClient(client: AsynchronousSocketChannel): Unit = this.synchronized { client.isOpen match { case true => val connectionString = SocketUtils.serverConnectionAsStr(client) client.shutdownInput() client.shutdownOutput() client.close() logger.info(s"Client $connectionString was gracefully disconnected") case false => // client is already closed - do nothing } } } object ClientHandler { val logger = LoggerFactory.getLogger(classOf[ClientHandler]) class Context(val channel: Channel) }
Example 68
Source File: RiakNodeStub.scala From spark-riak-connector with Apache License 2.0 | 5 votes |
package com.basho.riak.stub import java.net.InetSocketAddress import java.nio.ByteBuffer import java.nio.channels.{AsynchronousCloseException, AsynchronousServerSocketChannel, AsynchronousSocketChannel, CompletionHandler} import com.basho.riak.client.core.util.HostAndPort import com.basho.riak.stub.RiakNodeStub._ import org.slf4j.LoggerFactory class RiakNodeStub(val host: String, val port: Int, messageHandler: RiakMessageHandler) { private final val localAddress = new InetSocketAddress(host, port) private final val clientHandler = new ClientHandler(messageHandler) private var serverChannel: AsynchronousServerSocketChannel = _ private var clients: List[AsynchronousSocketChannel] = Nil def start(): HostAndPort = { serverChannel = AsynchronousServerSocketChannel.open() require(serverChannel.isOpen) serverChannel.bind(localAddress) serverChannel.accept(serverChannel, new CompletionHandler[AsynchronousSocketChannel, AsynchronousServerSocketChannel]() { override def completed(client: AsynchronousSocketChannel, server: AsynchronousServerSocketChannel): Unit = { logger.info(s"Incoming connection: ${SocketUtils.serverConnectionAsStr(client)}") this.synchronized { clients = client :: clients } val buffer = ByteBuffer.allocateDirect(1024) // scalastyle:ignore client.read(buffer, (client, buffer), clientHandler) server.accept(server, this) } override def failed(exc: Throwable, serverChannel: AsynchronousServerSocketChannel): Unit = exc match { case _: AsynchronousCloseException => case _ => logger.error(s"Something went wrong: ${serverChannel.toString}", exc); } }) HostAndPort.fromParts( serverChannel.getLocalAddress.asInstanceOf[InetSocketAddress].getHostString, serverChannel.getLocalAddress.asInstanceOf[InetSocketAddress].getPort) } def stop(): Unit = this.synchronized { Option(serverChannel).foreach(_.close) clients.foreach(clientHandler.disconnectClient) } } object RiakNodeStub { val logger = LoggerFactory.getLogger(classOf[RiakNodeStub]) final val DEFAULT_HOST = "localhost" def apply(host: String, port: Int, messageHandler: RiakMessageHandler): RiakNodeStub = new RiakNodeStub(host, port, messageHandler) def apply(port: Int, messageHandler: RiakMessageHandler): RiakNodeStub = RiakNodeStub(DEFAULT_HOST, port, messageHandler) def apply(messageHandler: RiakMessageHandler): RiakNodeStub = RiakNodeStub(DEFAULT_HOST, 0, messageHandler) }
Example 69
Source File: AbstractRiakTest.scala From spark-riak-connector with Apache License 2.0 | 5 votes |
package com.basho.riak.spark.rdd import com.basho.riak.JsonTestFunctions import com.basho.riak.client.core.RiakNode import com.basho.riak.client.core.query.Namespace import org.junit._ import org.junit.rules.TestWatcher import org.junit.runner.Description import org.slf4j.{Logger, LoggerFactory} abstract class AbstractRiakTest extends RiakFunctions with JsonTestFunctions { private final val logger: Logger = LoggerFactory.getLogger(this.getClass) protected val DEFAULT_NAMESPACE = new Namespace("default","test-bucket") protected val DEFAULT_NAMESPACE_4STORE = new Namespace("default", "test-bucket-4store") protected override val numberOfParallelRequests: Int = 4 protected override val nodeBuilder: RiakNode.Builder = new RiakNode.Builder().withMinConnections(numberOfParallelRequests) protected val jsonData: Option[String] = None @Rule def watchman: TestWatcher = new TestWatcher() { override def starting(description: Description): Unit = { super.starting(description) logger.info( "\n----------------------------------------\n" + " [TEST STARTED] {}\n" + "----------------------------------------\n", description.getDisplayName) } override def finished(description: Description): Unit = { super.finished(description) logger.info( "\n----------------------------------------\n" + " [TEST FINISHED] {}\n" + "----------------------------------------\n", description.getDisplayName) } } @Before protected def initialize(): Unit = setupData() protected def setupData(): Unit = { // Purge data: data might be not only created, but it may be also changed during the previous test case execution // // For manual check: curl -v http://localhost:10018/buckets/test-bucket/keys?keys=true List(DEFAULT_NAMESPACE, DEFAULT_NAMESPACE_4STORE) foreach resetAndEmptyBucket withRiakDo(session => jsonData.foreach(createValues(session, DEFAULT_NAMESPACE, _))) } }
Example 70
Source File: UnorderedParallelParquetSink.scala From parquet4s with MIT License | 5 votes |
package com.github.mjakubowski84.parquet4s import java.util.UUID import akka.Done import akka.stream.scaladsl.{Flow, Keep, Sink} import org.apache.hadoop.fs.Path import org.apache.parquet.schema.MessageType import org.slf4j.{Logger, LoggerFactory} import scala.concurrent.Future private[parquet4s] object UnorderedParallelParquetSink extends IOOps { protected val logger: Logger = LoggerFactory.getLogger(this.getClass) def apply[T: ParquetRecordEncoder : ParquetSchemaResolver](path: Path, parallelism: Int, options: ParquetWriter.Options = ParquetWriter.Options() ): Sink[T, Future[Done]] = { val schema = ParquetSchemaResolver.resolveSchema[T] val valueCodecConfiguration = options.toValueCodecConfiguration validateWritePath(path, options) def encode(data: T): RowParquetRecord = ParquetRecordEncoder.encode[T](data, valueCodecConfiguration) Flow[T] .zipWithIndex .groupBy(parallelism, elemAndIndex => Math.floorMod(elemAndIndex._2, parallelism)) .map(elemAndIndex => encode(elemAndIndex._1)) .fold(UnorderedChunk(path, schema, options))(_.write(_)) .map(_.close()) .async .mergeSubstreamsWithParallelism(parallelism) .toMat(Sink.ignore)(Keep.right) } private trait UnorderedChunk { def write(record: RowParquetRecord): UnorderedChunk def close(): Unit } private object UnorderedChunk { def apply(basePath: Path, schema: MessageType, options: ParquetWriter.Options): UnorderedChunk = new PendingUnorderedChunk(basePath, schema, options) private[UnorderedChunk] class PendingUnorderedChunk(basePath: Path, schema: MessageType, options: ParquetWriter.Options) extends UnorderedChunk { override def write(record: RowParquetRecord): UnorderedChunk = { val chunkPath = Path.mergePaths(basePath, new Path(s"/part-${UUID.randomUUID()}.parquet")) val writer = ParquetWriter.internalWriter(chunkPath, schema, options) writer.write(record) new StartedUnorderedChunk(chunkPath, writer, acc = 1) } override def close(): Unit = () } private[UnorderedChunk] class StartedUnorderedChunk(chunkPath: Path, writer: ParquetWriter.InternalWriter, acc: Long ) extends UnorderedChunk { override def write(record: RowParquetRecord): UnorderedChunk = { writer.write(record) new StartedUnorderedChunk(chunkPath, writer, acc = acc + 1) } override def close(): Unit = { if (logger.isDebugEnabled) logger.debug(s"$acc records were successfully written to $chunkPath") writer.close() } } } }
Example 71
Source File: IndefiniteStreamParquetSink.scala From parquet4s with MIT License | 5 votes |
package com.github.mjakubowski84.parquet4s import akka.stream.FlowShape import akka.stream.scaladsl.{Broadcast, Flow, GraphDSL, Keep, Sink, ZipWith} import com.github.mjakubowski84.parquet4s.ParquetWriter.ParquetWriterFactory import org.apache.hadoop.fs.Path import org.slf4j.{Logger, LoggerFactory} import scala.concurrent.duration.FiniteDuration private[parquet4s] object IndefiniteStreamParquetSink extends IOOps { protected val logger: Logger = LoggerFactory.getLogger(this.getClass) def apply[In, ToWrite: ParquetWriterFactory, Mat](path: Path, maxChunkSize: Int, chunkWriteTimeWindow: FiniteDuration, buildChunkPath: ChunkPathBuilder[In] = ChunkPathBuilder.default, preWriteTransformation: In => ToWrite = identity[In] _, postWriteSink: Sink[Seq[In], Mat] = Sink.ignore, options: ParquetWriter.Options = ParquetWriter.Options() ): Sink[In, Mat] = { validateWritePath(path, options) val internalFlow = Flow.fromGraph(GraphDSL.create() { implicit b => import GraphDSL.Implicits._ val inChunkFlow = b.add(Flow[In].groupedWithin(maxChunkSize, chunkWriteTimeWindow)) val broadcastChunks = b.add(Broadcast[Seq[In]](outputPorts = 2)) val writeFlow = Flow[Seq[In]].map { chunk => val toWrite = chunk.map(preWriteTransformation) val chunkPath = buildChunkPath(path, chunk) if (logger.isDebugEnabled()) logger.debug(s"Writing ${toWrite.size} records to $chunkPath") ParquetWriter.writeAndClose(chunkPath.toString, toWrite, options) } val zip = b.add(ZipWith[Seq[In], Unit, Seq[In]]((chunk, _) => chunk)) inChunkFlow ~> broadcastChunks ~> writeFlow ~> zip.in1 broadcastChunks ~> zip.in0 FlowShape(inChunkFlow.in, zip.out) }) internalFlow.toMat(postWriteSink)(Keep.right) } }
Example 72
Source File: SingleFileParquetSink.scala From parquet4s with MIT License | 5 votes |
package com.github.mjakubowski84.parquet4s import akka.Done import akka.stream.scaladsl.{Flow, Keep, Sink} import org.apache.hadoop.fs.Path import org.slf4j.{Logger, LoggerFactory} import scala.concurrent.Future private[parquet4s] object SingleFileParquetSink { protected val logger: Logger = LoggerFactory.getLogger(this.getClass) def apply[T: ParquetRecordEncoder : ParquetSchemaResolver](path: Path, options: ParquetWriter.Options = ParquetWriter.Options() ): Sink[T, Future[Done]] = { val schema = ParquetSchemaResolver.resolveSchema[T] val writer = ParquetWriter.internalWriter(path, schema, options) val valueCodecConfiguration = options.toValueCodecConfiguration val isDebugEnabled = logger.isDebugEnabled def encode(data: T): RowParquetRecord = ParquetRecordEncoder.encode[T](data, valueCodecConfiguration) Flow[T] .map(encode) .fold(0) { case (acc, record) => writer.write(record); acc + 1} .map { count => if (isDebugEnabled) logger.debug(s"$count records were successfully written to $path") writer.close() } .toMat(Sink.ignore)(Keep.right) } }
Example 73
Source File: SequentialFileSplittingParquetSink.scala From parquet4s with MIT License | 5 votes |
package com.github.mjakubowski84.parquet4s import akka.Done import akka.stream.scaladsl.{Flow, Keep, Sink} import org.apache.hadoop.fs.Path import org.apache.parquet.schema.MessageType import org.slf4j.{Logger, LoggerFactory} import scala.concurrent.Future private[parquet4s] object SequentialFileSplittingParquetSink extends IOOps { protected val logger: Logger = LoggerFactory.getLogger(this.getClass) def apply[T: ParquetRecordEncoder : ParquetSchemaResolver](path: Path, maxRecordsPerFile: Long, options: ParquetWriter.Options = ParquetWriter.Options() ): Sink[T, Future[Done]] = { val schema = ParquetSchemaResolver.resolveSchema[T] val valueCodecConfiguration = options.toValueCodecConfiguration validateWritePath(path, options) def encode(data: T): RowParquetRecord = ParquetRecordEncoder.encode[T](data, valueCodecConfiguration) Flow[T] .zipWithIndex .map { case (elem, index) => OrderedChunkElem(encode(elem), index) } .fold(OrderedChunk(path, schema, maxRecordsPerFile, options))(_.write(_)) .map(_.close()) .toMat(Sink.ignore)(Keep.right) } private case class OrderedChunkElem(record: RowParquetRecord, index: Long) { def isSplit(maxRecordsPerFile: Long): Boolean = index % maxRecordsPerFile == 0 } private trait OrderedChunk { def write(elem: OrderedChunkElem): OrderedChunk def close(): Unit } private object OrderedChunk { def apply(basePath: Path, schema: MessageType, maxRecordsPerFile: Long, options: ParquetWriter.Options): OrderedChunk = new PendingOrderedChunk(basePath, schema, maxRecordsPerFile, options) private[OrderedChunk] class PendingOrderedChunk(basePath: Path, schema: MessageType, maxRecordsPerFile: Long, options: ParquetWriter.Options) extends OrderedChunk { override def write(elem: OrderedChunkElem): OrderedChunk = { val chunkNumber: Int = Math.floorDiv(elem.index, maxRecordsPerFile).toInt val chunkPath = Path.mergePaths(basePath, new Path(chunkFileName(chunkNumber))) val writer = ParquetWriter.internalWriter(chunkPath, schema, options) writer.write(elem.record) new StartedOrderedChunk(basePath, schema, maxRecordsPerFile, options, chunkPath, writer, acc = 1) } override def close(): Unit = () private def chunkFileName(chunkNumber: Int): String = f"/part-$chunkNumber%05d.parquet" } private[OrderedChunk] class StartedOrderedChunk(basePath: Path, schema: MessageType, maxRecordsPerFile: Long, options: ParquetWriter.Options, chunkPath: Path, writer: ParquetWriter.InternalWriter, acc: Long) extends OrderedChunk { override def write(elem: OrderedChunkElem): OrderedChunk = { if (elem.isSplit(maxRecordsPerFile)) { this.close() new PendingOrderedChunk(basePath, schema, maxRecordsPerFile, options).write(elem) } else { writer.write(elem.record) new StartedOrderedChunk(basePath, schema, maxRecordsPerFile, options, chunkPath, writer, acc = acc + 1) } } override def close(): Unit = { if (logger.isDebugEnabled) logger.debug(s"$acc records were successfully written to $chunkPath") writer.close() } } } }
Example 74
Source File: SddfApp.scala From sddf with GNU General Public License v3.0 | 5 votes |
package de.unihamburg.vsis.sddf import org.joda.time.format.PeriodFormatterBuilder import org.slf4j.Logger import org.slf4j.LoggerFactory import de.unihamburg.vsis.sddf.config.Config import scopt.Read import scopt.OptionParser class SddfApp extends App { val periodFormatter = (new PeriodFormatterBuilder() minimumPrintedDigits (2) printZeroAlways () appendDays () appendSeparator ("d ") appendHours () appendSeparator (":") appendMinutes () appendSuffix (":") appendSeconds () appendSeparator (".") minimumPrintedDigits (3) appendMillis () toFormatter) @transient var _log: Logger = null // Method to get or create the logger for this object def log(): Logger = { if (_log == null) { _log = LoggerFactory.getLogger(getClass.getName) } _log } @transient var _logLineage: Logger = null // Method to get or create the logger for this object def logLineage(): Logger = { if (_logLineage == null) { _logLineage = LoggerFactory.getLogger("lineage") } _logLineage } // extend Parser to accept the type Option implicit val optionRead: Read[Option[String]] = Read.reads(Some(_)) // parsing commandline parameters val parser = new OptionParser[Parameters]("sddf") { head("SddF", "0.1.0") opt[Map[String, String]]('p', "properties") optional() valueName("<property>") action { (x, c) => c.copy(properties = x) } text("set arbitrary properties via command line") opt[Option[String]]('c', "config-file") optional() action { (x, c) => c.copy(propertyPath = x) } text("optional path to a property file") } // parser.parse returns Option[C] val parameters = parser.parse(args, Parameters()) var propertiesCommandline: Map[String, String] = Map() var propertiesPath: Option[String] = None parameters match { case Some(config) => propertiesCommandline = config.properties propertiesPath = config.propertyPath case None => // arguments are bad, error message will have been displayed } val Conf: Config = if(propertiesPath.isDefined) new Config(propertiesPath.get) else new Config() propertiesCommandline.foreach(props => { Conf.setPropertyCommandline(props._1, props._2) }) } case class Parameters(propertyPath: Option[String] = None, properties: Map[String,String] = Map())
Example 75
Source File: Logging.scala From sddf with GNU General Public License v3.0 | 5 votes |
package de.unihamburg.vsis.sddf.logging import org.slf4j.LoggerFactory import com.typesafe.scalalogging.slf4j.Logger trait Logging { @transient protected var _log: Logger = null // Method to get or create the logger for this object protected def log: Logger = { if (_log == null) { _log = Logger(LoggerFactory.getLogger(getClass)) } _log } }
Example 76
Source File: MetaCatalogProcessor.scala From daf with BSD 3-Clause "New" or "Revised" License | 5 votes |
package it.gov.daf.ingestion.metacatalog import com.typesafe.config.ConfigFactory import play.api.libs.json._ import it.gov.daf.catalogmanager._ import it.gov.daf.catalogmanager.json._ import org.slf4j.{Logger, LoggerFactory} import org.apache.commons.lang.StringEscapeUtils //Get Logical_uri, process MetadataCatalog and get the required info class MetaCatalogProcessor(metaCatalog: MetaCatalog) { val logger: Logger = LoggerFactory.getLogger(this.getClass) val sftpDefPrefix = ConfigFactory.load().getString("ingmgr.sftpdef.prefixdir") def separator() = { metaCatalog.operational .input_src.sftp .flatMap(_.headOption) .flatMap(_.param) .flatMap(_.split(", ").reverse.headOption) .map(_.replace("sep=", "")) .getOrElse(",") } def fileFormatNifi(): String = { val inputSftp = metaCatalog.operational.input_src.sftp inputSftp match { case Some(s) => val sftps: Seq[SourceSftp] = s.filter(x => x.name.equals("sftp_daf")) if (sftps.nonEmpty) sftps.head.param.getOrElse("") else "" case None => "" } } def ingPipelineNifi(): String = { ingPipeline.mkString(",") } }
Example 77
Source File: KuduController.scala From daf with BSD 3-Clause "New" or "Revised" License | 5 votes |
package controllers import org.apache.kudu.spark.kudu._ import org.apache.spark.sql.{ DataFrame, SparkSession } import org.slf4j.{ Logger, LoggerFactory } import scala.util.{ Failure, Try } class KuduController(sparkSession: SparkSession, master: String) { val alogger: Logger = LoggerFactory.getLogger(this.getClass) def readData(table: String): Try[DataFrame] = Try{ sparkSession .sqlContext .read .options(Map("kudu.master" -> master, "kudu.table" -> table)).kudu }.recoverWith { case ex => alogger.error(s"Exception ${ex.getMessage}\n ${ex.getStackTrace.mkString("\n")} ") Failure(ex) } }
Example 78
Source File: PhysicalDatasetController.scala From daf with BSD 3-Clause "New" or "Revised" License | 5 votes |
package controllers import cats.syntax.show.toShow import com.typesafe.config.Config import daf.dataset.{ DatasetParams, FileDatasetParams, KuduDatasetParams } import daf.filesystem.fileFormatShow import org.apache.spark.sql.{ DataFrame, SparkSession } import org.apache.spark.SparkConf import org.slf4j.{ Logger, LoggerFactory } class PhysicalDatasetController(sparkSession: SparkSession, kuduMaster: String, defaultLimit: Option[Int] = None, defaultChunkSize: Int = 0) { lazy val kuduController = new KuduController(sparkSession, kuduMaster) lazy val hdfsController = new HDFSController(sparkSession) val logger: Logger = LoggerFactory.getLogger(this.getClass) private def addLimit(dataframe: DataFrame, limit: Option[Int]) = (limit, defaultLimit) match { case (None, None) => dataframe case (None, Some(value)) => dataframe.limit { value } case (Some(value), None) => dataframe.limit { value } case (Some(value), Some(default)) => dataframe.limit { math.min(value, default) } } def kudu(params: KuduDatasetParams, limit: Option[Int] = None) = { logger.debug { s"Reading data from kudu table [${params.table}]" } kuduController.readData(params.table).map { addLimit(_, limit) } } def hdfs(params: FileDatasetParams, limit: Option[Int] = None) = { logger.debug { s"Reading data from hdfs at path [${params.path}]" } hdfsController.readData(params.path, params.format.show, params.param("separator")).map { addLimit(_, limit) } } def get(params: DatasetParams, limit: Option[Int]= None) = params match { case kuduParams: KuduDatasetParams => kudu(kuduParams, limit) case hdfsParams: FileDatasetParams => hdfs(hdfsParams, limit) } } object PhysicalDatasetController { private def getOptionalString(path: String, underlying: Config) = { if (underlying.hasPath(path)) { Some(underlying.getString(path)) } else { None } } private def getOptionalInt(path: String, underlying: Config) = { if (underlying.hasPath(path)) { Some(underlying.getInt(path)) } else { None } } val logger: Logger = LoggerFactory.getLogger(this.getClass) def apply(configuration: Config): PhysicalDatasetController = { val sparkConfig = new SparkConf() sparkConfig.set("spark.driver.memory", configuration.getString("spark.driver.memory")) val sparkSession = SparkSession.builder().master("local").config(sparkConfig).getOrCreate() val kuduMaster = configuration.getString("kudu.master") val defaultLimit = if (configuration hasPath "daf.row_limit") Some { configuration.getInt("daf.row_limit") } else None System.setProperty("sun.security.krb5.debug", "true") new PhysicalDatasetController(sparkSession, kuduMaster, defaultLimit) } }
Example 79
Source File: HDFSController.scala From daf with BSD 3-Clause "New" or "Revised" License | 5 votes |
package controllers import com.databricks.spark.avro._ import org.apache.spark.sql.{ DataFrame, SparkSession } import org.slf4j.{Logger, LoggerFactory} import scala.util.{Failure, Try} class HDFSController(sparkSession: SparkSession) { val alogger: Logger = LoggerFactory.getLogger(this.getClass) def readData(path: String, format: String, separator: Option[String]): Try[DataFrame] = format match { case "csv" => Try { val pathFixAle = path + "/" + path.split("/").last + ".csv" alogger.debug(s"questo e' il path $pathFixAle") separator match { case None => sparkSession.read.csv(pathFixAle) case Some(sep) => sparkSession.read.format("csv") .option("sep", sep) .option("inferSchema", "true") .option("header", "true") .load(pathFixAle) } } case "parquet" => Try { sparkSession.read.parquet(path) } case "avro" => Try { sparkSession.read.avro(path) } case unknown => Failure { new IllegalArgumentException(s"Unsupported format [$unknown]") } } }
Example 80
Source File: CatalogManagerClient.scala From daf with BSD 3-Clause "New" or "Revised" License | 5 votes |
package daf.catalogmanager import java.net.URLEncoder import java.security.AccessControlException import it.gov.daf.common.config.Read import json._ import org.slf4j.LoggerFactory import play.api.Configuration import play.api.libs.json.Json import scalaj.http.{ Http, HttpResponse } import scala.util.{ Failure, Try, Success => TrySuccess } class CatalogManagerClient(serviceUrl: String) { val logger = LoggerFactory.getLogger("it.gov.daf.CatalogManager") private def callService(authorization: String, catalogId: String) = Try { Http(s"$serviceUrl/catalog-manager/v1/catalog-ds/get/${URLEncoder.encode(catalogId,"UTF-8")}") .header("Authorization", authorization) .asString } private def parseCatalog(response: HttpResponse[String]) = if (response.code == 401) Failure { new AccessControlException("Unauthorized") } else if (response.isError) Failure { new RuntimeException(s"Error retrieving catalog data: [${response.code}] with body [${response.body}]") } else Try { Json.parse(response.body).as[MetaCatalog] } def getById(authorization: String, catalogId: String): Try[MetaCatalog] = for { response <- callService(authorization, catalogId) catalog <- parseCatalog(response) } yield catalog } object CatalogManagerClient { def fromConfig(config: Configuration) = Read.string { "daf.catalog_url" }.!.read(config) match { case TrySuccess(baseUrl) => new CatalogManagerClient(baseUrl) case Failure(error) => throw new RuntimeException("Unable to create catalog-manager client", error) } }
Example 81
Source File: HDFSBase.scala From daf with BSD 3-Clause "New" or "Revised" License | 5 votes |
package daf.util import better.files.{ File, _ } import daf.util.DataFrameClasses.{ Address, Person } import org.apache.hadoop.fs.FileSystem import org.apache.hadoop.hdfs.{ HdfsConfiguration, MiniDFSCluster } import org.apache.hadoop.test.PathUtils import org.apache.spark.sql.{ SaveMode, SparkSession } import org.scalatest.{ BeforeAndAfterAll, FlatSpec, Matchers } import org.slf4j.LoggerFactory import scala.util.{ Failure, Random, Try } abstract class HDFSBase extends FlatSpec with Matchers with BeforeAndAfterAll { var miniCluster: Try[MiniDFSCluster] = Failure[MiniDFSCluster](new Exception) var fileSystem: Try[FileSystem] = Failure[FileSystem](new Exception) val sparkSession: SparkSession = SparkSession.builder().master("local").getOrCreate() val alogger = LoggerFactory.getLogger(this.getClass) val (testDataPath, confPath) = { val testDataPath = s"${PathUtils.getTestDir(this.getClass).getCanonicalPath}/MiniCluster" val confPath = s"$testDataPath/conf" ( testDataPath.toFile.createIfNotExists(asDirectory = true, createParents = false), confPath.toFile.createIfNotExists(asDirectory = true, createParents = false) ) } def pathAvro = "opendata/test.avro" def pathParquet = "opendata/test.parquet" def pathCsv = "opendata/test.csv" def getSparkSession = sparkSession override def beforeAll(): Unit = { val conf = new HdfsConfiguration() conf.setBoolean("dfs.permissions", true) System.clearProperty(MiniDFSCluster.PROP_TEST_BUILD_DATA) conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, testDataPath.pathAsString) //FileUtil.fullyDelete(testDataPath.toJava) conf.set(s"hadoop.proxyuser.${System.getProperties.get("user.name")}.groups", "*") conf.set(s"hadoop.proxyuser.${System.getProperties.get("user.name")}.hosts", "*") val builder = new MiniDFSCluster.Builder(conf) miniCluster = Try(builder.build()) fileSystem = miniCluster.map(_.getFileSystem) fileSystem.foreach(fs => { val confFile: File = confPath / "hdfs-site.xml" for { os <- confFile.newOutputStream.autoClosed } fs.getConf.writeXml(os) }) writeDf() } override def afterAll(): Unit = { miniCluster.foreach(_.shutdown(true)) val _ = testDataPath.parent.parent.delete(true) sparkSession.stop() } private def writeDf(): Unit = { import sparkSession.implicits._ alogger.info(s"TestDataPath ${testDataPath.toJava.getAbsolutePath}") alogger.info(s"ConfPath ${confPath.toJava.getAbsolutePath}") val persons = (1 to 10).map(i => Person(s"Andy$i", Random.nextInt(85), Address("Via Ciccio Cappuccio"))) val caseClassDS = persons.toDS() caseClassDS.write.format("parquet").mode(SaveMode.Overwrite).save(pathParquet) caseClassDS.write.format("com.databricks.spark.avro").mode(SaveMode.Overwrite).save(pathAvro) //writing directly the Person dataframe generates an exception caseClassDS.toDF.select("name", "age").write.format("csv").mode(SaveMode.Overwrite).option("header", "true").save(pathCsv) } } object DataFrameClasses { final case class Address(street: String) final case class Person(name: String, age: Int, address: Address) }
Example 82
Source File: HiveAddJarsEngineHook.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.engine.hive.hook import com.webank.wedatasphere.linkis.engine.execute.{EngineExecutor, EngineExecutorContext, EngineHook} import com.webank.wedatasphere.linkis.engine.hive.executor.HiveEngineExecutor import com.webank.wedatasphere.linkis.server.JMap import org.apache.commons.lang.StringUtils import org.slf4j.LoggerFactory class HiveAddJarsEngineHook extends EngineHook { private var jars:String = _ private val JARS = "jars" private val logger = LoggerFactory.getLogger(classOf[HiveAddJarsEngineHook]) private val addSql = "add jar " override def beforeCreateEngine(params: JMap[String, String]): JMap[String, String] = { import scala.collection.JavaConversions._ // params foreach { // case (k, v) => logger.info(s"params key is $k, value is $v") // } params foreach { case (key,value) => if (JARS.equals(key)) jars = value } logger.info(s"jarArray is {}", jars) params } override def afterCreatedEngine(executor: EngineExecutor): Unit = { if (StringUtils.isEmpty(jars)) { logger.warn("hive added jars is empty") return } jars.split(",") foreach{ jar => try{ logger.info("begin to run hive sql {}", addSql + jar) executor.asInstanceOf[HiveEngineExecutor].executeLine(new EngineExecutorContext(executor), addSql + jar) }catch{ case t:Throwable => logger.error(s"run hive sql ${addSql + jar} failed", t) } } } }
Example 83
Source File: HiveEngineExecutorFactory.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.engine.hive.executor import java.io.PrintStream import com.webank.wedatasphere.linkis.engine.execute.{EngineExecutor, EngineExecutorFactory} import com.webank.wedatasphere.linkis.engine.hive.common.HiveUtils import com.webank.wedatasphere.linkis.engine.hive.exception.HiveSessionStartFailedException import com.webank.wedatasphere.linkis.server.JMap import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.ql.Driver import org.apache.hadoop.hive.ql.session.SessionState import org.apache.hadoop.security.UserGroupInformation import org.slf4j.LoggerFactory import org.springframework.stereotype.Component @Component class HiveEngineExecutorFactory extends EngineExecutorFactory { private val logger = LoggerFactory.getLogger(getClass) private val HIVE_QUEUE_NAME:String = "mapreduce.job.queuename" private val BDP_QUEUE_NAME:String = "wds.linkis.yarnqueue" override def createExecutor(options: JMap[String, String]): EngineExecutor = { val hiveConf:HiveConf = HiveUtils.getHiveConf hiveConf.setVar(HiveConf.ConfVars.HIVEJAR, HiveUtils.jarOfClass(classOf[Driver]) .getOrElse(throw HiveSessionStartFailedException(40012 ,"cannot find hive-exec.jar, start session failed!"))) import scala.collection.JavaConversions._ options.foreach{ case(k,v) => logger.info(s"key is $k, value is $v")} options.filter{case (k,v) => k.startsWith("hive.") || k.startsWith("mapreduce.") || k.startsWith("wds.linkis.")}.foreach{case(k, v) => logger.info(s"key is $k, value is $v") if (BDP_QUEUE_NAME.equals(k)) hiveConf.set(HIVE_QUEUE_NAME, v) else hiveConf.set(k, v)} val sessionState:SessionState = new SessionState(hiveConf) sessionState.out = new PrintStream(System.out, true, "utf-8") sessionState.info = new PrintStream(System.out, true, "utf-8") sessionState.err = new PrintStream(System.out, true, "utf-8") SessionState.start(sessionState) val ugi = UserGroupInformation.getCurrentUser new HiveEngineExecutor(5000, sessionState, ugi, hiveConf) } }
Example 84
Source File: HiveEngineSpringConfiguration.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.engine.hive import com.webank.wedatasphere.linkis.engine.execute.hook._ import com.webank.wedatasphere.linkis.engine.execute.{CodeParser, EngineHook, SQLCodeParser} import com.webank.wedatasphere.linkis.engine.hive.hook.HiveAddJarsEngineHook import org.slf4j.LoggerFactory import org.springframework.context.annotation.{Bean, Configuration} @Configuration class HiveEngineSpringConfiguration { private val LOG = LoggerFactory.getLogger(getClass) @Bean(Array("codeParser")) def generateCodeParser:CodeParser = { LOG.info("code Parser is set in hive") new SQLCodeParser() } @Bean(Array("engineHooks")) def generateEngineHooks:Array[EngineHook] = { LOG.info("engineHooks are set in hive.") Array(new ReleaseEngineHook, new MaxExecuteNumEngineHook, new HiveAddJarsEngineHook, new JarUdfEngineHook) } }
Example 85
Source File: HiveQLProcessBuilder.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.enginemanager.hive.process import java.nio.file.Paths import com.webank.wedatasphere.linkis.common.conf.Configuration import com.webank.wedatasphere.linkis.enginemanager.conf.EnvConfiguration.{DEFAULT_JAVA_OPTS, JAVA_HOME, engineGCLogPath} import com.webank.wedatasphere.linkis.enginemanager.hive.conf.HiveEngineConfiguration import com.webank.wedatasphere.linkis.enginemanager.impl.UserEngineResource import com.webank.wedatasphere.linkis.enginemanager.process.JavaProcessEngineBuilder import com.webank.wedatasphere.linkis.enginemanager.{AbstractEngineCreator, EngineResource} import com.webank.wedatasphere.linkis.protocol.engine.RequestEngine import org.apache.commons.lang.StringUtils import org.slf4j.LoggerFactory import scala.collection.mutable.ArrayBuffer override protected def classpathCheck(jarOrFiles: Array[String]): Unit = { for(jarOrFile <- jarOrFiles){ checkJarOrFile(jarOrFile) } } //todo Check the jar of the classpath(对classpath的jar进行检查) private def checkJarOrFile(jarOrFile:String):Unit = { } override def build(engineRequest: EngineResource, request: RequestEngine): Unit = { this.request = request userEngineResource = engineRequest.asInstanceOf[UserEngineResource] val javaHome = JAVA_HOME.getValue(request.properties) if(StringUtils.isEmpty(javaHome)) { warn("We cannot find the java home, use java to run storage repl web server.") commandLine += "java" } else { commandLine += Paths.get(javaHome, "bin/java").toAbsolutePath.toFile.getAbsolutePath } if (request.properties.containsKey(HiveEngineConfiguration.HIVE_CLIENT_MEMORY.key)){ val settingClientMemory = request.properties.get(HiveEngineConfiguration.HIVE_CLIENT_MEMORY.key) if (!settingClientMemory.toLowerCase().endsWith("g")){ request.properties.put(HiveEngineConfiguration.HIVE_CLIENT_MEMORY.key, settingClientMemory + "g") } //request.properties.put(HiveEngineConfiguration.HIVE_CLIENT_MEMORY.key, request.properties.get(HiveEngineConfiguration.HIVE_CLIENT_MEMORY.key)+"g") } val clientMemory = HiveEngineConfiguration.HIVE_CLIENT_MEMORY.getValue(request.properties).toString if (clientMemory.toLowerCase().endsWith("g")){ commandLine += ("-Xmx" + clientMemory.toLowerCase()) commandLine += ("-Xms" + clientMemory.toLowerCase()) }else{ commandLine += ("-Xmx" + clientMemory + "g") commandLine += ("-Xms" + clientMemory + "g") } val javaOPTS = getExtractJavaOpts val alias = getAlias(request) if(StringUtils.isNotEmpty(DEFAULT_JAVA_OPTS.getValue)) DEFAULT_JAVA_OPTS.getValue.format(engineGCLogPath(port, userEngineResource.getUser, alias)).split("\\s+").foreach(commandLine += _) if(StringUtils.isNotEmpty(javaOPTS)) javaOPTS.split("\\s+").foreach(commandLine += _) //engineLogJavaOpts(port, alias).trim.split(" ").foreach(commandLine += _) if(Configuration.IS_TEST_MODE.getValue) { val port = AbstractEngineCreator.getNewPort info(s"$toString open debug mode with port $port.") commandLine += s"-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=$port" } var classpath = getClasspath(request.properties, getExtractClasspath) classpath = classpath ++ request.properties.get("jars").split(",") classpathCheck(classpath) commandLine += "-Djava.library.path=/appcom/Install/hadoop/lib/native" commandLine += "-cp" commandLine += classpath.mkString(":") commandLine += "com.webank.wedatasphere.linkis.engine.DataWorkCloudEngineApplication" } // override def build(engineRequest: EngineResource, request: RequestEngine): Unit = { // import scala.collection.JavaConversions._ // request.properties foreach {case (k, v) => LOG.info(s"request key is $k, value is $v")} // this.request = request // super.build(engineRequest, request) // // } override protected val addApacheConfigPath: Boolean = true }
Example 86
Source File: HiveEngineManagerSpringConfiguration.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.enginemanager.hive.conf import com.webank.wedatasphere.linkis.enginemanager.EngineHook import com.webank.wedatasphere.linkis.enginemanager.conf.EnvConfiguration import com.webank.wedatasphere.linkis.enginemanager.hook.{ConsoleConfigurationEngineHook, JarLoaderEngineHook} import com.webank.wedatasphere.linkis.resourcemanager.domain.ModuleInfo import com.webank.wedatasphere.linkis.resourcemanager.{LoadInstanceResource, ResourceRequestPolicy} import com.webank.wedatasphere.linkis.rpc.Sender import org.slf4j.{Logger, LoggerFactory} import org.springframework.context.annotation.{Bean, Configuration} @Configuration class HiveEngineManagerSpringConfiguration { private val logger:Logger = LoggerFactory.getLogger(getClass) @Bean(Array("resources")) def createResource(): ModuleInfo = { val totalresource = new LoadInstanceResource(EnvConfiguration.ENGINE_MANAGER_MAX_MEMORY_AVAILABLE.getValue.toLong , EnvConfiguration.ENGINE_MANAGER_MAX_CORES_AVAILABLE.getValue, EnvConfiguration.ENGINE_MANAGER_MAX_CREATE_INSTANCES.getValue) val protectresource = new LoadInstanceResource(EnvConfiguration.ENGINE_MANAGER_PROTECTED_MEMORY.getValue.toLong, EnvConfiguration.ENGINE_MANAGER_PROTECTED_CORES.getValue, EnvConfiguration.ENGINE_MANAGER_PROTECTED_CORES.getValue) logger.info("create resource for hive") ModuleInfo(Sender.getThisServiceInstance, totalresource, protectresource, ResourceRequestPolicy.LoadInstance) } @Bean(name = Array("hooks")) def createEngineHook(): Array[EngineHook] = { Array(new ConsoleConfigurationEngineHook, new JarLoaderEngineHook)// TODO } }
Example 87
Source File: PipeLineManagerSpringConfiguration.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.enginemanager.pipeline import com.webank.wedatasphere.linkis.enginemanager.EngineCreator import com.webank.wedatasphere.linkis.enginemanager.conf.EnvConfiguration import com.webank.wedatasphere.linkis.resourcemanager.domain.ModuleInfo import com.webank.wedatasphere.linkis.resourcemanager.{LoadInstanceResource, ResourceRequestPolicy} import com.webank.wedatasphere.linkis.rpc.Sender import org.slf4j.{Logger, LoggerFactory} import org.springframework.context.annotation.{Bean, Configuration} @Configuration class PipeLineManagerSpringConfiguration { private val logger:Logger = LoggerFactory.getLogger(getClass) @Bean(Array("engineCreator")) def createEngineCreator(): EngineCreator =new PipeLineDefaultEngineCreator @Bean(Array("resources")) def createResource(): ModuleInfo = { val totalresource = new LoadInstanceResource(EnvConfiguration.ENGINE_MANAGER_MAX_MEMORY_AVAILABLE.getValue.toLong , EnvConfiguration.ENGINE_MANAGER_MAX_CORES_AVAILABLE.getValue, EnvConfiguration.ENGINE_MANAGER_MAX_CREATE_INSTANCES.getValue) val protectresource = new LoadInstanceResource(EnvConfiguration.ENGINE_MANAGER_PROTECTED_MEMORY.getValue.toLong, EnvConfiguration.ENGINE_MANAGER_PROTECTED_CORES.getValue, EnvConfiguration.ENGINE_MANAGER_PROTECTED_INSTANCES.getValue) logger.info("create resource for pipeline") ModuleInfo(Sender.getThisServiceInstance, totalresource, protectresource, ResourceRequestPolicy.LoadInstance) } }
Example 88
Source File: PythonEngineSpringConfiguration.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.engine import com.webank.wedatasphere.linkis.engine.execute.hook.{MaxExecuteNumEngineHook, ReleaseEngineHook} import com.webank.wedatasphere.linkis.engine.execute.{CodeParser, EngineHook, PythonCodeParser} import org.slf4j.LoggerFactory import org.springframework.context.annotation.Bean import org.springframework.stereotype.Component @Component class PythonEngineSpringConfiguration { private val LOG = LoggerFactory.getLogger(getClass) @Bean(Array("codeParser")) def generateCodeParser:CodeParser = { LOG.info("code Parser is set in python") new PythonCodeParser() } @Bean(Array("engineHooks")) def generateEngineHooks:Array[EngineHook] = { LOG.info("engineHooks are set in python.") Array(new ReleaseEngineHook, new MaxExecuteNumEngineHook) } }
Example 89
Source File: CommonEntranceParser.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.entrance.parser import java.util import java.util.Date import com.webank.wedatasphere.linkis.entrance.conf.EntranceConfiguration import com.webank.wedatasphere.linkis.entrance.exception.EntranceIllegalParamException import com.webank.wedatasphere.linkis.protocol.constants.TaskConstant import com.webank.wedatasphere.linkis.protocol.query.RequestPersistTask import com.webank.wedatasphere.linkis.protocol.task.Task import com.webank.wedatasphere.linkis.rpc.Sender import com.webank.wedatasphere.linkis.scheduler.queue.SchedulerEventState import org.apache.commons.lang.StringUtils import org.slf4j.LoggerFactory if (EntranceConfiguration.DEFAULT_REQUEST_APPLICATION_NAME.getValue.equals(creator) && StringUtils.isEmpty(source.get(TaskConstant.SCRIPTPATH)) && StringUtils.isEmpty(executionCode)) throw new EntranceIllegalParamException(20007, "param executionCode and scriptPath can not be empty at the same time") var runType:String = null if (StringUtils.isNotEmpty(executionCode)) { runType = params.get(TaskConstant.RUNTYPE).asInstanceOf[String] if (StringUtils.isEmpty(runType)) runType = EntranceConfiguration.DEFAULT_RUN_TYPE.getValue //If formatCode is not empty, we need to format it(如果formatCode 不为空的话,我们需要将其进行格式化) if (formatCode) executionCode = format(executionCode) task.setExecutionCode(executionCode) } task.setSource(source) task.setEngineType(runType) //为了兼容代码,让engineType和runType都有同一个属性 task.setRunType(runType) task.setExecuteApplicationName(executeApplicationName) task.setRequestApplicationName(creator) task.setStatus(SchedulerEventState.Inited.toString) task } //todo to format code using proper way private def format(code:String):String = code }
Example 90
Source File: CommentInterceptor.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.entrance.interceptor.impl import java.lang import java.util.regex.Pattern import com.webank.wedatasphere.linkis.entrance.interceptor.EntranceInterceptor import com.webank.wedatasphere.linkis.protocol.query.RequestPersistTask import com.webank.wedatasphere.linkis.protocol.task.Task import org.slf4j.{Logger, LoggerFactory} import scala.util.matching.Regex " override def dealComment(code: String): String = { val p = Pattern.compile(scalaCommentPattern) p.matcher(code).replaceAll("$1") } } object CommentMain{ def main(args: Array[String]): Unit = { val sqlCode = "select * from default.user;--你好;show tables" val sqlCode1 = "select * from default.user--你好;show tables" println(SQLCommentHelper.dealComment(sqlCode)) } }
Example 91
Source File: EntranceGroupFactory.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.entrance.scheduler import com.webank.wedatasphere.linkis.entrance.conf.EntranceConfiguration import com.webank.wedatasphere.linkis.entrance.execute.EntranceJob import com.webank.wedatasphere.linkis.entrance.persistence.HaPersistenceTask import com.webank.wedatasphere.linkis.protocol.config.{RequestQueryAppConfig, ResponseQueryConfig} import com.webank.wedatasphere.linkis.rpc.Sender import com.webank.wedatasphere.linkis.scheduler.queue.parallelqueue.ParallelGroup import com.webank.wedatasphere.linkis.scheduler.queue.{Group, GroupFactory, SchedulerEvent} import com.webank.wedatasphere.linkis.server.JMap import org.apache.commons.lang.StringUtils import org.slf4j.{Logger, LoggerFactory} class EntranceGroupFactory extends GroupFactory { private val groupNameToGroups = new JMap[String, Group] private val logger:Logger = LoggerFactory.getLogger(classOf[EntranceGroupFactory]) override def getOrCreateGroup(groupName: String): Group = { if(!groupNameToGroups.containsKey(groupName)) synchronized{ //TODO Query the database and get initCapacity, maxCapacity, maxRunningJobs, maxAskExecutorTimes(查询数据库,拿到initCapacity、maxCapacity、maxRunningJobs、maxAskExecutorTimes) val initCapacity = 100 val maxCapacity = 100 var maxRunningJobs = EntranceConfiguration.WDS_LINKIS_INSTANCE.getValue val maxAskExecutorTimes = EntranceConfiguration.MAX_ASK_EXECUTOR_TIME.getValue.toLong if (groupName.split("_").length < 2){ logger.warn(s"name style of group: $groupName is not correct, we will set default value for the group") }else{ val sender:Sender = Sender.getSender(EntranceConfiguration.CLOUD_CONSOLE_CONFIGURATION_SPRING_APPLICATION_NAME.getValue) val creator = groupName.split("_")(0) val username = groupName.split("_")(1) val engineName = EntranceConfiguration.ENGINE_SPRING_APPLICATION_NAME.getValue val engineType = if (engineName.trim().toLowerCase().contains("engine")) engineName.substring(0, engineName.length - "engine".length) else "spark" logger.info(s"Getting parameters for $groupName(正在为 $groupName 获取参数) username: $username, creator:$creator, engineType: $engineType") val keyAndValue = sender.ask(RequestQueryAppConfig(username, creator, engineType)).asInstanceOf[ResponseQueryConfig].getKeyAndValue try{ maxRunningJobs = Integer.parseInt(keyAndValue.get(EntranceConfiguration.WDS_LINKIS_INSTANCE.key)) }catch{ case t:Throwable => logger.warn("Get maxRunningJobs from configuration server failed! Next use the default value to continue.",t) } } logger.info("groupName: {} => maxRunningJobs is {}", groupName, maxRunningJobs) val group = new ParallelGroup(groupName, initCapacity, maxCapacity) group.setMaxRunningJobs(maxRunningJobs) group.setMaxAskExecutorTimes(maxAskExecutorTimes) if(!groupNameToGroups.containsKey(groupName)) groupNameToGroups.put(groupName, group) } groupNameToGroups.get(groupName) } override def getGroupNameByEvent(event: SchedulerEvent): String = event match { case job: EntranceJob => job.getTask match { case HaPersistenceTask(task) => "HA" case _ =>EntranceGroupFactory.getGroupName(job.getCreator, job.getUser) } } } object EntranceGroupFactory { def getGroupName(creator: String, user: String): String = { if (StringUtils.isNotEmpty(creator)) creator + "_" + user else EntranceConfiguration.DEFAULT_REQUEST_APPLICATION_NAME.getValue + "_" + user } }
Example 92
Source File: DataWorkCloudEngineApplication.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.engine import java.text.SimpleDateFormat import java.util.Date import com.webank.wedatasphere.linkis.DataWorkCloudApplication import com.webank.wedatasphere.linkis.common.conf.DWCArgumentsParser import com.webank.wedatasphere.linkis.common.utils.Utils import com.webank.wedatasphere.linkis.engine.conf.EngineConfiguration import com.webank.wedatasphere.linkis.server.conf.ServerConfiguration import org.apache.commons.lang.StringUtils import org.slf4j.LoggerFactory object DataWorkCloudEngineApplication { val userName:String = System.getProperty("user.name") val hostName:String = Utils.getComputerName val appName:String = EngineConfiguration.ENGINE_SPRING_APPLICATION_NAME.getValue val prefixName:String = EngineConfiguration.ENGINE_LOG_PREFIX.getValue val timeStamp:Long = System.currentTimeMillis() private val timeFormat = new SimpleDateFormat("yyyy-MM-dd_HH:mm:ss") private val dateFormat = new SimpleDateFormat("yyyy-MM-dd") val time:String = timeFormat.format(new Date(timeStamp)) val date:String = dateFormat.format(new Date(timeStamp)) val isTimeStampSuffix:Boolean = "true".equalsIgnoreCase(EngineConfiguration.ENGINE_LOG_TIME_STAMP_SUFFIX.getValue) val shortLogFile:String = if (isTimeStampSuffix) appName + "_" + hostName + "_" + userName + "_" + time + ".log" else appName + "_" + hostName + "_" + userName + ".log" val logName:String = if(isTimeStampSuffix) prefixName + "/" + userName + "/" + shortLogFile else prefixName + "/" + shortLogFile System.setProperty("engineLogFile", logName) System.setProperty("shortEngineLogFile", shortLogFile) // System.setProperty("engineLogFile", logName) // val context:LoggerContext = LogManager.getContext(false).asInstanceOf[LoggerContext] // val path:String = getClass.getResource("/").getPath // val log4j2XMLFile:File = new File(path + "/log4j2-engine.xml") // val configUri:URI = log4j2XMLFile.toURI // context.setConfigLocation(configUri) private val logger = LoggerFactory.getLogger(getClass) logger.info(s"Now log4j2 Rolling File is set to be $logName") logger.info(s"Now shortLogFile is set to be $shortLogFile") def main(args: Array[String]): Unit = { val parser = DWCArgumentsParser.parse(args) DWCArgumentsParser.setDWCOptionMap(parser.getDWCConfMap) val existsExcludePackages = ServerConfiguration.BDP_SERVER_EXCLUDE_PACKAGES.getValue if(StringUtils.isEmpty(existsExcludePackages)) DataWorkCloudApplication.setProperty(ServerConfiguration.BDP_SERVER_EXCLUDE_PACKAGES.key, "com.webank.wedatasphere.linkis.enginemanager") else DataWorkCloudApplication.setProperty(ServerConfiguration.BDP_SERVER_EXCLUDE_PACKAGES.key, existsExcludePackages + ",com.webank.wedatasphere.linkis.enginemanager") DataWorkCloudApplication.main(DWCArgumentsParser.formatSpringOptions(parser.getSpringConfMap)) } }
Example 93
Source File: Logging.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.common.utils import org.slf4j.LoggerFactory trait Logging { protected lazy implicit val logger = LoggerFactory.getLogger(getClass) def trace(message: => String) = { if (logger.isTraceEnabled) { logger.trace(message.toString) } } def debug(message: => String): Unit = { if (logger.isDebugEnabled) { logger.debug(message.toString) } } def info(message: => String): Unit = { if (logger.isInfoEnabled) { logger.info(message.toString) } } def info(message: => String, t: Throwable): Unit = { logger.info(message.toString, t) } def warn(message: => String): Unit = { logger.warn(message.toString) } def warn(message: => String, t: Throwable): Unit = { logger.warn(message.toString, t) } def error(message: => String, t: Throwable): Unit = { logger.error(message.toString, t) } def error(message: => String): Unit = { logger.error(message.toString) } }
Example 94
Source File: LyftConnector.scala From scala-spark-cab-rides-predictions with MIT License | 5 votes |
package rides.connector import actors.CabRideSystem import com.lyft.networking.apiObjects.CostEstimateResponse import com.lyft.networking.apis.LyftPublicApi import com.lyft.networking.{ApiConfig, LyftApiFactory} import org.slf4j.LoggerFactory import rides.connector.LyftConnectorConfig.rideService import scala.concurrent.Future import scala.util.Properties.envOrElse class LyftConnector extends RidesConnector[CostEstimateResponse] { private object LyftConnectorConfig { private val log = LoggerFactory.getLogger(LyftConnectorConfig.getClass) private val apiConfig: ApiConfig = new ApiConfig.Builder() .setClientId(envOrElse("lyft_clientID", "NOT_DEFINED")) .setClientToken(envOrElse("lyft_client_token", "NOT_DEFINED")) .build val rideService: LyftPublicApi = new LyftApiFactory(apiConfig).getLyftPublicApi log.info("Starting Lyft Ride Service") }
Example 95
Source File: UberConnector.scala From scala-spark-cab-rides-predictions with MIT License | 5 votes |
package rides.connector import actors.CabRideSystem import com.uber.sdk.core.client.{ServerTokenSession, SessionConfiguration} import com.uber.sdk.rides.client.UberRidesApi import com.uber.sdk.rides.client.model.PriceEstimatesResponse import com.uber.sdk.rides.client.services.RidesService import org.slf4j.LoggerFactory import rides.connector.UberConnectorConfig.rideService import scala.concurrent.Future import scala.util.Properties.envOrElse class UberConnector extends RidesConnector[PriceEstimatesResponse] { private object UberConnectorConfig { private val log = LoggerFactory.getLogger(UberConnectorConfig.getClass) private val config: SessionConfiguration = new SessionConfiguration.Builder() .setClientId(envOrElse("uber_clientId", "NOT_DEFINED")) .setServerToken(envOrElse("uber_token", "NOT_DEFINED")) .build private val session: ServerTokenSession = new ServerTokenSession(config) val rideService: RidesService = UberRidesApi.`with`(session).build.createService log.info("Starting Uber Ride Service") }
Example 96
Source File: RidesAPI.scala From scala-spark-cab-rides-predictions with MIT License | 5 votes |
package rides import com.lyft.networking.apiObjects.CostEstimateResponse import com.uber.sdk.rides.client.model.PriceEstimatesResponse import models.{CabPrice, Location, LyftPriceModel, UberPriceModel} import org.slf4j.LoggerFactory import rides.connector.{LyftConnector, RidesConnector, UberConnector} import scala.collection.JavaConverters._ import scala.concurrent.duration._ import scala.concurrent.{Await, Future} override def getPrices(source: Location, destination: Location): Set[CabPrice] = { // future wrapped price estimate from lyft api val cef: Future[CostEstimateResponse] = ridesConnector.getPriceEstimates(source.latitude, source.longitude, destination.latitude, destination.longitude) //process data in sync val result: CostEstimateResponse = Await.result(cef, 30 seconds) result match { case cer: CostEstimateResponse => { cer.cost_estimates .asScala.map(LyftPriceModel(_, source, destination)) .toSet } // in case of failure just send blank set to avoid failures case q => log.error("Failed to fetch uber records. Got " + q + " instead of CostEstimateResponse"); Set() } } } }
Example 97
Source File: HostsStatuses.scala From algoliasearch-client-scala with MIT License | 5 votes |
package algolia import java.util.concurrent.ConcurrentHashMap import org.slf4j.{Logger, LoggerFactory} case class HostsStatuses( configuration: AlgoliaClientConfiguration, utils: AlgoliaUtils, queryHosts: Seq[String], indexingHosts: Seq[String] ) { private[algolia] val hostStatuses: ConcurrentHashMap[String, HostStatus] = new ConcurrentHashMap[String, HostStatus](5) val logger: Logger = LoggerFactory.getLogger("algoliasearch") def markHostAsUp(host: String): Unit = { logger.debug("Marking {} as `up`", host) hostStatuses.put(host, HostStatus.up(utils.now())) } def markHostAsDown(host: String): Unit = { logger.debug("Marking {} as `down`", host) hostStatuses.put(host, HostStatus.down(utils.now())) } def indexingHostsThatAreUp(): Seq[String] = hostsThatAreUp(indexingHosts) def queryHostsThatAreUp(): Seq[String] = hostsThatAreUp(queryHosts) private def hostsThatAreUp(hosts: Seq[String]): Seq[String] = { val filteredHosts = hosts.filter(h => isUpOrCouldBeRetried(getHostStatus(h)) ) if (filteredHosts.isEmpty) { hosts } else { filteredHosts } } def isUpOrCouldBeRetried(hostStatus: HostStatus): Boolean = hostStatus.up || (utils .now() - hostStatus.updatedAt) >= configuration.hostDownTimeoutMs private def getHostStatus(host: String): HostStatus = hostStatuses.getOrDefault(host, HostStatus.up(utils.now())) } private case class HostStatus(up: Boolean, updatedAt: Long) private object HostStatus { def up(now: Long) = HostStatus(up = true, now) def down(now: Long) = HostStatus(up = false, now) }
Example 98
Source File: AsciidoctorJgitIncludeProcessor.scala From gitbucket-asciidoctor-plugin with Apache License 2.0 | 5 votes |
package tobiasroeser.gitbucket.asciidoctor import java.io.File import java.net.URI import java.util import gitbucket.core.service.{AccountService, RepositoryService} import gitbucket.core.service.RepositoryService.RepositoryInfo import gitbucket.core.util.{JGitUtil, StringUtil} import gitbucket.core.util.Directory._ import gitbucket.core.util.SyntaxSugars._ import org.asciidoctor.ast.DocumentRuby import org.asciidoctor.extension.{IncludeProcessor, PreprocessorReader} import org.eclipse.jgit.api.Git import org.slf4j.LoggerFactory import scala.collection.JavaConverters._ class AsciidoctorJgitIncludeProcessor(config: java.util.Map[String, Object]) extends IncludeProcessor(config) with RepositoryService with AccountService{ val logger = LoggerFactory.getLogger(getClass) override def handles(target: String): Boolean = { true } override def process(document: DocumentRuby, reader: PreprocessorReader, target: String, attributes: util.Map[String, AnyRef]): Unit = { val documentPath = URI.create(document.getAttr("gitbucket-path").toString) val repository = document.getAttr("gitbucket-repository").asInstanceOf[RepositoryInfo] val branch = document.getAttr("gitbucket-branch").toString val targetPath = documentPath.resolve(target) using(Git.open(getRepositoryDir(repository.owner, repository.name))) { git => val revCommit = JGitUtil.getRevCommitFromId(git, git.getRepository.resolve(branch)) JGitUtil.getContentFromPath(git, revCommit.getTree, targetPath.toString, true).map{ bytes => val content = StringUtil.convertFromByteArray(bytes) val embed = if(attributes.asScala.contains("lines")){ val lines = attributes.get("lines").toString val linesRe = """(\d+)\.\.(\d+)""".r lines match { case linesRe(start, end) => content.split("""\r?\n""").slice(start.toInt - 1, end.toInt).mkString("\n") } }else{ content } reader.push_include(embed, target, target, 1, attributes) } } } }
Example 99
Source File: InitialProcessing.scala From iodb with Creative Commons Zero v1.0 Universal | 5 votes |
package io.iohk.iodb.bench import java.io.File import ch.qos.logback.classic.LoggerContext import io.iohk.iodb.{ByteArrayWrapper, ShardedStore, Store, TestUtils} import org.slf4j.LoggerFactory object InitialProcessing extends Benchmark { val Milestones = Seq(1000, 5000, 10000, 50000, 100000, 250000, 500000, 750000, 1000000) val Inputs = 5500 //average number of inputs per block val Outputs = 6000 //average number of outputs per block def bench(store: Store, dir: File): Unit = { println(s"Store: $store") Milestones.foldLeft((0, 0L, Seq[ByteArrayWrapper]())) { case ((prevMilestone, prevTime, prevCache), milestone) => val (time, newCache) = TestUtils.runningTime { (prevMilestone + 1 to milestone).foldLeft(prevCache) { case (cache, version) => processBlock(version, store, Inputs, Outputs, cache).get.take(Inputs * 100) } } val newTime = prevTime + time println(s"Time to get to $milestone: $time") (milestone, newTime, newCache) } store.close() TestUtils.deleteRecur(dir) } def main(args: Array[String]): Unit = { //switching off logging val context = LoggerFactory.getILoggerFactory.asInstanceOf[LoggerContext] context.stop() var dir = TestUtils.tempDir() bench(new ShardedStore(dir, keySize = KeySize), dir) System.gc() Thread.sleep(15000) println("======================================") dir = TestUtils.tempDir() bench(new RocksStore(dir), dir) } }
Example 100
Source File: SparkLog4jExample.scala From pulse with Apache License 2.0 | 5 votes |
package io.phdata.pams.example import org.apache.spark.{ SparkConf, SparkContext } import org.slf4j.LoggerFactory object SparkLog4jExample { private val log = LoggerFactory.getLogger(this.getClass) def main(args: Array[String]): Unit = { log.info("Starting up the spark logging example") val conf = new SparkConf().setAppName("Pulse Spark Logging Example") val sc = SparkContext.getOrCreate(conf) try { run(sc, numEvents = 10000) } finally { sc.stop() } } def run(sc: SparkContext, numEvents: Int): Unit = { val testData = 1 to numEvents val testRdd = sc.parallelize(testData) testRdd.foreach { num => if (num % 10000 == 0) { log.error(s"XXXXX error! num: " + num) } else if (num % 5000 == 0) { log.warn(s"XXXXX warning! num: " + num) } else { log.info(s"XXXXX found: " + num) } } log.info("Shutting down the spark logging example") } }
Example 101
Source File: AbstractAlertTrigger.scala From pulse with Apache License 2.0 | 5 votes |
package io.phdata.pulse.alertengine.trigger import com.typesafe.scalalogging.Logger import io.phdata.pulse.alertengine.{ AlertRule, AlertsDb, TriggeredAlert } import org.slf4j.LoggerFactory def query(applicationName: String, alertRule: AlertRule): Seq[Map[String, Any]] override final def check(applicationName: String, alertRule: AlertRule): Option[TriggeredAlert] = if (AlertsDb.shouldCheck(applicationName, alertRule)) { try { val results = query(applicationName, alertRule) processResults(applicationName, alertRule, results) } catch { case e: Exception => e.printStackTrace() logger.error(s"Error running query for $applicationName with alert $alertRule", e) None } } else { None } private def processResults(applicationName: String, alertRule: AlertRule, results: Seq[Map[String, Any]]): Option[TriggeredAlert] = { val numFound = results.size val threshold = alertRule.resultThreshold.getOrElse(0) if (threshold == -1 && results.isEmpty) { logger.info( s"Alert triggered for $applicationName on alert $alertRule at no results found condition") AlertsDb.markTriggered(applicationName, alertRule) Some(TriggeredAlert(alertRule, applicationName, results, 0)) } else if (results.lengthCompare(threshold) > 0) { logger.info(s"Alert triggered for $applicationName on alert $alertRule") AlertsDb.markTriggered(applicationName, alertRule) Some(TriggeredAlert(alertRule, applicationName, results, numFound)) } else { logger.info(s"No alert needed for $applicationName with alert $alertRule") None } } }
Example 102
Source File: DeltaRecordReaderWrapper.scala From connectors with Apache License 2.0 | 5 votes |
package io.delta.hive import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory import org.apache.hadoop.io.ArrayWritable import org.apache.hadoop.io.NullWritable import org.apache.hadoop.io.Writable import org.apache.hadoop.mapred.JobConf import org.apache.hadoop.mapred.Reporter import org.apache.parquet.hadoop.ParquetInputFormat import org.slf4j.LoggerFactory private def insertPartitionValues(value: ArrayWritable): Unit = { val valueArray = value.get() var i = 0 val n = partitionValues.length // Using while loop for better performance since this method is called for each row. while (i < n) { val partition = partitionValues(i) // The schema of `valueArray` is the Hive schema, and it's the same as the Delta // schema since we have verified it in `DeltaInputFormat`. Hence, the position of a partition // column in `valueArray` is the same as its position in Delta schema. valueArray(partition._1) = partition._2 i += 1 } } }
Example 103
Source File: ReliableHttpProxyFactory.scala From reliable-http-client with Apache License 2.0 | 5 votes |
package rhttpc.akkahttp.proxy import akka.NotUsed import akka.actor._ import akka.http.scaladsl.Http import akka.http.scaladsl.model.{HttpEntity, HttpRequest, HttpResponse} import akka.stream.Materializer import akka.stream.scaladsl.{Flow, Sink, Source} import org.slf4j.LoggerFactory import rhttpc.client.protocol.{Correlated, Request} import rhttpc.client.proxy._ import scala.concurrent.duration._ import scala.concurrent.{ExecutionContext, Future} import scala.util.control.NonFatal import scala.util.{Failure, Success} object ReliableHttpProxyFactory { private lazy val logger = LoggerFactory.getLogger(getClass) def send(successRecognizer: SuccessHttpResponseRecognizer, batchSize: Int, parallelConsumers: Int) (request: Request[HttpRequest]) (implicit actorSystem: ActorSystem, materialize: Materializer): Future[HttpResponse] = { import actorSystem.dispatcher send(prepareHttpFlow(batchSize * parallelConsumers), successRecognizer)(request.correlated) } private def prepareHttpFlow(parallelism: Int) (implicit actorSystem: ActorSystem, materialize: Materializer): Flow[(HttpRequest, String), HttpResponse, NotUsed] = { import actorSystem.dispatcher Http().superPool[String]().mapAsync(parallelism) { case (tryResponse, id) => tryResponse match { case Success(response) => response.toStrict(1 minute) case Failure(ex) => Future.failed(ex) } } } private def send(httpFlow: Flow[(HttpRequest, String), HttpResponse, Any], successRecognizer: SuccessHttpResponseRecognizer) (corr: Correlated[HttpRequest]) (implicit ec: ExecutionContext, materialize: Materializer): Future[HttpResponse] = { import collection.JavaConverters._ logger.debug( s"""Sending request for ${corr.correlationId} to ${corr.msg.getUri()}. Headers: |${corr.msg.getHeaders().asScala.toSeq.map(h => " " + h.name() + ": " + h.value()).mkString("\n")} |Body: |${corr.msg.entity.asInstanceOf[HttpEntity.Strict].data.utf8String}""".stripMargin ) val logResp = logResponse(corr) _ val responseFuture = Source.single((corr.msg, corr.correlationId)).via(httpFlow).runWith(Sink.head) responseFuture.onComplete { case Failure(ex) => logger.error(s"Got failure for ${corr.correlationId} to ${corr.msg.getUri()}", ex) case Success(_) => } for { response <- responseFuture transformedToFailureIfNeed <- { if (successRecognizer.isSuccess(response)) { logResp(response, "success response") Future.successful(response) } else { logResp(response, "response recognized as non-success") Future.failed(NonSuccessResponse) } } } yield transformedToFailureIfNeed } private def logResponse(corr: Correlated[HttpRequest]) (response: HttpResponse, additionalInfo: String): Unit = { import collection.JavaConverters._ logger.debug( s"""Got $additionalInfo for ${corr.correlationId} to ${corr.msg.getUri()}. Status: ${response.status.value}. Headers: |${response.getHeaders().asScala.toSeq.map(h => " " + h.name() + ": " + h.value()).mkString("\n")} |Body: |${response.entity.asInstanceOf[HttpEntity.Strict].data.utf8String}""".stripMargin ) } }
Example 104
Source File: AmqpPublisher.scala From reliable-http-client with Apache License 2.0 | 5 votes |
package rhttpc.transport.amqp import java.io._ import akka.agent.Agent import com.rabbitmq.client._ import org.slf4j.LoggerFactory import rhttpc.transport.SerializingPublisher.SerializedMessage import rhttpc.transport.{Message, Publisher, Serializer, SerializingPublisher} import rhttpc.utils.Recovered._ import scala.concurrent.{ExecutionContext, Future, Promise} private[amqp] class AmqpPublisher[PubMsg](channel: Channel, queueName: String, exchangeName: String, protected val serializer: Serializer[PubMsg], prepareProperties: PartialFunction[SerializedMessage, AMQP.BasicProperties]) (implicit ec: ExecutionContext) extends SerializingPublisher[PubMsg] with ConfirmListener { private lazy val logger = LoggerFactory.getLogger(getClass) private val seqNoOnAckPromiseAgent = Agent[Map[Long, Promise[Unit]]](Map.empty) override private[rhttpc] def publishSerialized(msg: SerializedMessage): Future[Unit] = { val properties = prepareProperties.applyOrElse( msg, (_: SerializedMessage) => throw new IllegalArgumentException(s"Not supported message type: $msg") ) val ackPromise = Promise[Unit]() for { _ <- seqNoOnAckPromiseAgent.alter { curr => val publishSeqNo = channel.getNextPublishSeqNo logger.debug(s"PUBLISH: $publishSeqNo") channel.basicPublish(exchangeName, queueName, properties, msg.content) curr + (publishSeqNo -> ackPromise) } ack <- ackPromise.future } yield ack } override def handleAck(deliveryTag: Long, multiple: Boolean): Unit = { logger.debug(s"ACK: $deliveryTag, multiple = $multiple") confirm(deliveryTag, multiple)(_.success(Unit)) } override def handleNack(deliveryTag: Long, multiple: Boolean): Unit = { logger.debug(s"NACK: $deliveryTag, multiple = $multiple") confirm(deliveryTag, multiple)(_.failure(NoPubMsgAckException)) } private def confirm(deliveryTag: Long, multiple: Boolean) (complete: Promise[Unit] => Unit): Unit = { seqNoOnAckPromiseAgent.alter { curr => val (toAck, rest) = curr.partition { case (seqNo, ackPromise) => seqNo == deliveryTag || multiple && seqNo <= deliveryTag } toAck.foreach { case (seqNo, ackPromise) => complete(ackPromise) } rest } } override def start(): Unit = {} override def stop(): Future[Unit] = { recoveredFuture("completing publishing", currentPublishingFuturesComplete) .map(_ => recovered("channel closing", channel.close())) } private def currentPublishingFuturesComplete: Future[Unit] = seqNoOnAckPromiseAgent.future() .flatMap(map => Future.sequence(map.values.map(_.future))) .map(_ => Unit) } case object NoPubMsgAckException extends Exception(s"No acknowledgement for published message")
Example 105
Source File: AmqpJdbcScheduler.scala From reliable-http-client with Apache License 2.0 | 5 votes |
package rhttpc.transport.amqpjdbc import akka.actor.{Cancellable, Scheduler} import org.slf4j.LoggerFactory import rhttpc.transport.SerializingPublisher.SerializedMessage import rhttpc.transport._ import scala.concurrent.duration.FiniteDuration import scala.concurrent.{ExecutionContext, Future} import scala.util.control.NonFatal import scala.util.{Failure, Success, Try} private[amqpjdbc] trait AmqpJdbcScheduler[PubMsg] { def schedule(msg: Message[PubMsg], delay: FiniteDuration): Future[Unit] def start(): Unit def stop(): Future[Unit] } private[amqpjdbc] class AmqpJdbcSchedulerImpl[PubMsg](scheduler: Scheduler, checkInterval: FiniteDuration, repo: ScheduledMessagesRepository, queueName: String, batchSize: Int, publisher: SerializingPublisher[PubMsg]) (implicit ec: ExecutionContext, serializer: Serializer[PubMsg]) extends AmqpJdbcScheduler[PubMsg] { private val logger = LoggerFactory.getLogger(getClass) private var ran: Boolean = false private var scheduledCheck: Option[Cancellable] = None private var currentPublishedFetchedFuture: Future[Int] = Future.successful(0) override def schedule(msg: Message[PubMsg], delay: FiniteDuration): Future[Unit] = { val serialized = serializer.serialize(msg.content) repo.save(MessageToSchedule(queueName, serialized, msg.properties, delay)) } override def start(): Unit = { synchronized { if (!ran) { ran = true publishFetchedMessagesThanReschedule() } } } private def publishFetchedMessagesThanReschedule(): Unit = { synchronized { if (ran) { val publishedFetchedFuture = repo.fetchMessagesShouldByRun(queueName, batchSize)(publish) currentPublishedFetchedFuture = publishedFetchedFuture publishedFetchedFuture onComplete handlePublicationResult } } } private def publish(messages: Seq[ScheduledMessage]): Future[Seq[Unit]] = { if (messages.nonEmpty) { logger.debug(s"Fetched ${messages.size}, publishing") } val handlingFutures = messages.map { message => publisher.publishSerialized(SerializedMessage(message.content.getBytes(), message.properties)) } Future.sequence(handlingFutures) } private def handlePublicationResult(tryResult: Try[Int]): Unit = { tryResult match { case Failure(ex) => logger.error("Exception while publishing fetched messages", ex) case _ => } synchronized { if (ran) { scheduledCheck = Some(scheduler.scheduleOnce(checkInterval)(publishFetchedMessagesThanReschedule())) } else { logger.debug(s"Scheduler is stopping, next check will be skipped") } } } override def stop(): Future[Unit] = { synchronized { scheduledCheck.foreach(_.cancel()) ran = false currentPublishedFetchedFuture.map(_ => Unit) } } }
Example 106
Source File: Recovered.scala From reliable-http-client with Apache License 2.0 | 5 votes |
package rhttpc.utils import org.slf4j.LoggerFactory import scala.concurrent.{ExecutionContext, Future} import scala.util.control.NonFatal object Recovered { private lazy val logger = LoggerFactory.getLogger(getClass) def recovered(action: String, run: => Unit): Unit = { try { run } catch { case NonFatal(ex) => logger.error(s"Exception while $action", ex) } } def recoveredFuture(action: String, future: => Future[Unit]) (implicit ec: ExecutionContext): Future[Unit] = { try { future.recover { case NonFatal(ex) => logger.error(s"Exception while $action", ex) } } catch { case NonFatal(ex) => // while preparing future logger.error(s"Exception while $action", ex) Future.successful(Unit) } } }
Example 107
Source File: FallbackPublisher.scala From reliable-http-client with Apache License 2.0 | 5 votes |
package rhttpc.transport.fallback import akka.actor.{ActorSystem, Scheduler} import akka.pattern.CircuitBreaker import org.slf4j.LoggerFactory import rhttpc.transport.{Message, Publisher} import scala.concurrent.Future import scala.concurrent.duration.FiniteDuration import scala.util.control.NonFatal private[fallback] class FallbackPublisher[Msg](main: Publisher[Msg], fallback: Publisher[Msg]) (maxFailures: Int, callTimeout: FiniteDuration, resetTimeout: FiniteDuration) (implicit system: ActorSystem) extends Publisher[Msg] { import system.dispatcher private val logger = LoggerFactory.getLogger(getClass) private val circuitBreaker = new CircuitBreaker(system.scheduler, maxFailures, callTimeout, resetTimeout) .onOpen(logger.debug("Circuit opened")) .onHalfOpen(logger.debug("Circuit half-opened")) .onClose(logger.debug("Circuit closed")) override def publish(msg: Message[Msg]): Future[Unit] = { circuitBreaker.withCircuitBreaker(main.publish(msg)).recoverWith { case NonFatal(ex) => logger.debug(s"Circuit is opened, sending message [${msg.getClass.getName}] to fallback transport") fallback.publish(msg) } } override def start(): Unit = { main.start() fallback.start() } override def stop(): Future[Unit] = { import rhttpc.utils.Recovered._ recoveredFuture("stopping main publisher", main.stop()) .flatMap(_ => recoveredFuture("stopping fallback publisher", fallback.stop())) } }
Example 108
Source File: Slf4jLogger.scala From zio-logging with Apache License 2.0 | 5 votes |
package zio.logging.slf4j import org.slf4j.{ LoggerFactory, MDC } import zio.internal.Tracing import zio.internal.stacktracer.Tracer import zio.internal.stacktracer.ZTraceElement.{ NoLocation, SourceLocation } import zio.internal.stacktracer.impl.AkkaLineNumbersTracer import zio.internal.tracing.TracingConfig import zio.logging.Logging import zio.logging._ import zio.{ ZIO, ZLayer } import scala.jdk.CollectionConverters._ object Slf4jLogger { private val tracing = Tracing(Tracer.globallyCached(new AkkaLineNumbersTracer), TracingConfig.enabled) private def classNameForLambda(lambda: => AnyRef) = tracing.tracer.traceLocation(() => lambda) match { case SourceLocation(_, clazz, _, _) => Some(clazz) case NoLocation(_) => None } private def logger(name: String) = ZIO.effectTotal( LoggerFactory.getLogger( name ) ) def make( logFormat: (LogContext, => String) => String, rootLoggerName: Option[String] = None ): ZLayer[Any, Nothing, Logging] = Logging.make( logger = { (context, line) => val loggerName = context.get(LogAnnotation.Name) match { case Nil => classNameForLambda(line).getOrElse("ZIO.defaultLogger") case names => LogAnnotation.Name.render(names) } logger(loggerName).map { slf4jLogger => val maybeThrowable = context.get(LogAnnotation.Throwable).orNull context.get(LogAnnotation.Level).level match { case LogLevel.Off.level => () case LogLevel.Debug.level => slf4jLogger.debug(logFormat(context, line), maybeThrowable) case LogLevel.Trace.level => slf4jLogger.trace(logFormat(context, line), maybeThrowable) case LogLevel.Info.level => slf4jLogger.info(logFormat(context, line), maybeThrowable) case LogLevel.Warn.level => slf4jLogger.warn(logFormat(context, line), maybeThrowable) case LogLevel.Error.level => slf4jLogger.error(logFormat(context, line), maybeThrowable) case LogLevel.Fatal.level => slf4jLogger.error(logFormat(context, line), maybeThrowable) } } }, rootLoggerName = rootLoggerName ) def makeWithAnnotationsAsMdc( mdcAnnotations: List[LogAnnotation[_]], logFormat: (LogContext, => String) => String = (_, s) => s, rootLoggerName: Option[String] = None ): ZLayer[Any, Nothing, Logging] = { val annotationNames = mdcAnnotations.map(_.name) Logging.make( (context, line) => { val loggerName = context.get(LogAnnotation.Name) match { case Nil => classNameForLambda(line).getOrElse("ZIO.defaultLogger") case names => LogAnnotation.Name.render(names) } logger(loggerName).map { slf4jLogger => val maybeThrowable = context.get(LogAnnotation.Throwable).orNull val mdc: Map[String, String] = context.renderContext.filter { case (k, _) => annotationNames.contains(k) } MDC.setContextMap(mdc.asJava) context.get(LogAnnotation.Level).level match { case LogLevel.Off.level => () case LogLevel.Debug.level => slf4jLogger.debug(logFormat(context, line), maybeThrowable) case LogLevel.Trace.level => slf4jLogger.trace(logFormat(context, line), maybeThrowable) case LogLevel.Info.level => slf4jLogger.info(logFormat(context, line), maybeThrowable) case LogLevel.Warn.level => slf4jLogger.warn(logFormat(context, line), maybeThrowable) case LogLevel.Error.level => slf4jLogger.error(logFormat(context, line), maybeThrowable) case LogLevel.Fatal.level => slf4jLogger.error(logFormat(context, line), maybeThrowable) } MDC.clear() } }, rootLoggerName = rootLoggerName ) } }
Example 109
Source File: SampleRoutes.scala From akka_streams_tutorial with MIT License | 5 votes |
package akkahttp import java.io.File import akka.actor.ActorSystem import akka.http.scaladsl.Http import akka.http.scaladsl.server.Directives._ import akka.http.scaladsl.server.Route import org.slf4j.{Logger, LoggerFactory} import scala.concurrent.Await import scala.concurrent.duration._ import scala.sys.process.Process import scala.util.{Failure, Success} object SampleRoutes extends App { val logger: Logger = LoggerFactory.getLogger(this.getClass) implicit val system = ActorSystem("SampleRoutes") implicit val executionContext = system.dispatcher def getFromBrowsableDir: Route = { val dirToBrowse = File.separator + "tmp" // pathPrefix allows loading dirs and files recursively pathPrefix("entries") { getFromBrowseableDirectory(dirToBrowse) } } def parseFormData: Route = path("post") { formFields('color, 'age.as[Int]) { (color, age) => complete(s"The color is '$color' and the age is $age") } } def routes: Route = { getFromBrowsableDir ~ parseFormData } val bindingFuture = Http().bindAndHandle(routes, "127.0.0.1", 8000) bindingFuture.onComplete { case Success(b) => println("Server started, listening on: " + b.localAddress) case Failure(e) => println(s"Server could not bind to... Exception message: ${e.getMessage}") system.terminate() } def browserClient() = { val os = System.getProperty("os.name").toLowerCase if (os == "mac os x") Process("open ./src/main/resources/SampleRoutes.html").! } browserClient() sys.addShutdownHook { println("About to shutdown...") val fut = bindingFuture.map(serverBinding => serverBinding.terminate(hardDeadline = 3.seconds)) println("Waiting for connections to terminate...") val onceAllConnectionsTerminated = Await.result(fut, 10.seconds) println("Connections terminated") onceAllConnectionsTerminated.flatMap { _ => system.terminate() } } }
Example 110
Source File: PublishToSourceQueueFromMultipleThreads.scala From akka_streams_tutorial with MIT License | 5 votes |
package sample.stream import akka.actor.ActorSystem import akka.stream.Supervision.Decider import akka.stream._ import akka.stream.scaladsl.{Flow, Sink, Source, SourceQueueWithComplete} import akka.{Done, NotUsed} import org.slf4j.{Logger, LoggerFactory} import scala.concurrent.Future import scala.concurrent.duration._ import scala.util.{Failure, Success} object PublishToSourceQueueFromMultipleThreads extends App { val logger: Logger = LoggerFactory.getLogger(this.getClass) implicit val system = ActorSystem("PublishToSourceQueueFromMultipleThreads") implicit val ec = system.dispatcher val bufferSize = 100 // As of akka 2.6.x there is a thread safe implementation for SourceQueue val maxConcurrentOffers = 1000 val numberOfPublishingClients = 1000 val slowSink: Sink[Seq[Int], NotUsed] = Flow[Seq[Int]] .delay(2.seconds, DelayOverflowStrategy.backpressure) .to(Sink.foreach(e => logger.info(s"Reached sink: $e"))) val sourceQueue: SourceQueueWithComplete[Int] = Source .queue[Int](bufferSize, OverflowStrategy.backpressure, maxConcurrentOffers) .groupedWithin(10, 1.seconds) .to(slowSink) .run val doneConsuming: Future[Done] = sourceQueue.watchCompletion() signalWhen(doneConsuming, "consuming") //never completes simulatePublishingFromMulitpleThreads() // Before 2.6.x a stream had to be used to throttle and control the backpressure //simulatePublishingClientsFromStream() // Decide on the stream level, because the OverflowStrategy.backpressure // on the sourceQueue causes an IllegalStateException // Handling this on the stream level allows to restart the stream private def simulatePublishingClientsFromStream() = { val decider: Decider = { case _: IllegalStateException => println("Got backpressure signal for offered element, restart..."); Supervision.Restart case _ => Supervision.Stop } val donePublishing: Future[Done] = Source(1 to numberOfPublishingClients) .mapAsync(10)(offerToSourceQueue) //throttle .withAttributes(ActorAttributes.supervisionStrategy(decider)) .runWith(Sink.ignore) signalWhen(donePublishing, "publishing") } private def simulatePublishingFromMulitpleThreads() = (1 to numberOfPublishingClients).par.foreach(offerToSourceQueue) private def offerToSourceQueue(each: Int) = { sourceQueue.offer(each).map { case QueueOfferResult.Enqueued => logger.info(s"enqueued $each") case QueueOfferResult.Dropped => logger.info(s"dropped $each") case QueueOfferResult.Failure(ex) => logger.info(s"Offer failed: $ex") case QueueOfferResult.QueueClosed => logger.info("Source Queue closed") } } private def signalWhen(done: Future[Done], operation: String) = { done.onComplete { case Success(b) => logger.info(s"Finished: $operation") case Failure(e) => logger.info(s"Failure: $e About to terminate...") system.terminate() } } }
Example 111
Source File: TweetExample.scala From akka_streams_tutorial with MIT License | 5 votes |
package sample.stream import java.time.{Instant, ZoneId} import akka.NotUsed import akka.actor.{ActorSystem, Cancellable} import akka.stream.DelayOverflowStrategy import akka.stream.scaladsl.{Flow, MergePrioritized, Sink, Source} import org.apache.commons.lang3.exception.ExceptionUtils import org.slf4j.{Logger, LoggerFactory} import scala.concurrent.duration._ import scala.util.{Failure, Success} object TweetExample extends App { implicit val system = ActorSystem("TweetExample") implicit val ec = system.dispatcher val logger: Logger = LoggerFactory.getLogger(this.getClass) final case class Author(handle: String) final case class Hashtag(name: String) final case class Tweet(author: Author, timestamp: Long, body: String) { def hashtags: Set[Hashtag] = body.split(" ").collect { case t if t.startsWith("#") => Hashtag(t) }.toSet override def toString = { val localDateTime = Instant.ofEpochMilli(timestamp).atZone(ZoneId.systemDefault()).toLocalDateTime s"$localDateTime - ${author.handle} tweeted: ${body.take(5)}..." } } val akkaTag = Hashtag("#akka") val tweetsLowPrio: Source[Tweet, Cancellable] = Source.tick(1.second, 200.millis, NotUsed).map(_ => Tweet(Author("LowPrio"), System.currentTimeMillis, "#other #akka aBody")) val tweetsHighPrio: Source[Tweet, Cancellable] = Source.tick(2.second, 1.second, NotUsed).map(_ => Tweet(Author("HighPrio"), System.currentTimeMillis, "#akka #other aBody")) val tweetsVeryHighPrio: Source[Tweet, Cancellable] = Source.tick(2.second, 1.second, NotUsed).map(_ => Tweet(Author("VeryHighPrio"), System.currentTimeMillis, "#akka #other aBody")) val limitedTweets: Source[Tweet, NotUsed] = Source.combine(tweetsLowPrio, tweetsHighPrio, tweetsVeryHighPrio)(_ => MergePrioritized(List(1, 10, 100))).take(20) val processingFlow = Flow[Tweet] .filter(_.hashtags.contains(akkaTag)) .wireTap(each => logger.info(s"$each")) val slowDownstream = Flow[Tweet] .delay(5.seconds, DelayOverflowStrategy.backpressure) val processedTweets = limitedTweets .via(processingFlow) .via(slowDownstream) .runWith(Sink.seq) processedTweets.onComplete { case Success(results) => logger.info(s"Successfully processed: ${results.size} tweets") system.terminate() case Failure(exception) => logger.info(s"The stream failed with: ${ExceptionUtils.getRootCause(exception)}") system.terminate() } }
Example 112
Source File: AsyncExecution.scala From akka_streams_tutorial with MIT License | 5 votes |
package sample.stream import akka.Done import akka.actor.ActorSystem import akka.stream.ActorAttributes import akka.stream.scaladsl.{Flow, Sink, Source} import org.slf4j.{Logger, LoggerFactory} import scala.concurrent.Future import scala.util.{Failure, Success} object AsyncExecution extends App { val logger: Logger = LoggerFactory.getLogger(this.getClass) implicit val system = ActorSystem("AsyncExecution") implicit val ec = system.dispatcher def stage(name: String) = Flow[Int] .wireTap(index => logger.info(s"Stage $name processing element $index by ${Thread.currentThread().getName}")) def stageBlocking(name: String) = Flow[Int] .wireTap(index => logger.info(s"Stage $name processing element $index by ${Thread.currentThread().getName}")) .wireTap(_ => Thread.sleep(5000)) .withAttributes(ActorAttributes.dispatcher("custom-dispatcher-for-blocking")) def sinkBlocking: Sink[Int, Future[Done]] = Sink.foreach { index: Int => Thread.sleep(2000) logger.info(s"Slow sink processing element $index by ${Thread.currentThread().getName}") } //Adding a custom dispatcher creates an async boundary //see discussion in: https://discuss.lightbend.com/t/how-can-i-make-sure-that-fileio-frompath-is-picking-up-my-dispatcher/6528/4 .withAttributes(ActorAttributes.dispatcher("custom-dispatcher-for-blocking")) val done = Source(1 to 10) .via(stage("A")).async //When activated instead of alsoTo(sinkBlocking): elements for stage C are held up by stage B //.via(stageBlocking("B")).async .alsoTo(sinkBlocking).async .via(stage("C")).async .runWith(Sink.ignore) //With alsoTo(sinkBlocking) the stages A and C signal "done" too early and thus would terminate the whole stream //The reason for this is the custom dispatcher in sinkBlocking //terminateWhen(done) def terminateWhen(done: Future[_]) = { done.onComplete { case Success(_) => println("Flow Success. About to terminate...") system.terminate() case Failure(e) => println(s"Flow Failure: $e. About to terminate...") system.terminate() } } }
Example 113
Source File: WaitForThreeFlowsToComplete.scala From akka_streams_tutorial with MIT License | 5 votes |
package sample.stream import java.nio.file.Paths import akka.actor.ActorSystem import akka.stream._ import akka.stream.scaladsl._ import akka.util.ByteString import org.slf4j.{Logger, LoggerFactory} import scala.concurrent._ import scala.concurrent.duration._ object WaitForThreeFlowsToComplete extends App { val logger: Logger = LoggerFactory.getLogger(this.getClass) implicit val system = ActorSystem("WaitForThreeFlowsToComplete") implicit val ec = system.dispatcher def lineSink(filename: String): Sink[String, Future[IOResult]] = Flow[String] .map(s => ByteString(s + "\n")) .wireTap(_ => logger.info(s"Add line to file: $filename")) .toMat(FileIO.toPath(Paths.get(filename)))(Keep.right) //retain to the Future[IOResult] .withAttributes(ActorAttributes.dispatcher("custom-dispatcher-for-blocking")) val origSource = Source(1 to 10) //scan (= transform) the source val factorialsSource = origSource.scan(BigInt(1))((acc, next) => acc * next) val fastFlow = origSource.runForeach(i => logger.info(s"Reached sink: $i")) val slowFlow1 = factorialsSource .map(_.toString) .runWith(lineSink("factorial1.txt")) val slowFlow2 = factorialsSource .zipWith(Source(0 to 10))((num, idx) => s"$idx! = $num") .throttle(1, 1.second, 1, ThrottleMode.shaping) .runWith(lineSink("factorial2.txt")) val allDone = for { fastFlowDone <- fastFlow slowFlow1Done <- slowFlow1 slowFlow2Done <- slowFlow2 } yield (fastFlowDone, slowFlow1Done, slowFlow2Done) allDone.onComplete { results => logger.info(s"Resulting futures from flows: $results - about to terminate") system.terminate() } }
Example 114
Source File: DistributedShellClient.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.examples.distributedshell import java.util.concurrent.TimeUnit import scala.concurrent.Await import scala.concurrent.duration.Duration import akka.pattern.ask import org.slf4j.{Logger, LoggerFactory} import org.apache.gearpump.cluster.client.ClientContext import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption} import org.apache.gearpump.examples.distributedshell.DistShellAppMaster.ShellCommand import org.apache.gearpump.util.{AkkaApp, Constants} object DistributedShellClient extends AkkaApp with ArgumentsParser { implicit val timeout = Constants.FUTURE_TIMEOUT private val LOG: Logger = LoggerFactory.getLogger(getClass) override val options: Array[(String, CLIOption[Any])] = Array( "appid" -> CLIOption[Int]("<the distributed shell appid>", required = true), "command" -> CLIOption[String]("<shell command>", required = true) ) override def main(akkaConf: Config, args: Array[String]): Unit = { val config = parse(args) val context = ClientContext(akkaConf) implicit val system = context.system implicit val dispatcher = system.dispatcher val appid = config.getInt("appid") val command = config.getString("command") val appMaster = context.resolveAppID(appid) LOG.info(s"Resolved appMaster $appid address $appMaster, sending command $command") val future = (appMaster ? ShellCommand(command)).map { result => LOG.info(s"Result: \n$result") context.close() } Await.ready(future, Duration(60, TimeUnit.SECONDS)) } }
Example 115
Source File: CGroupProcessLauncher.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.cluster.worker import java.io.File import scala.sys.process.Process import com.typesafe.config.Config import org.slf4j.{Logger, LoggerFactory} import org.apache.gearpump.cluster.scheduler.Resource import org.apache.gearpump.util.{ProcessLogRedirector, RichProcess} class CGroupProcessLauncher(val config: Config) extends ExecutorProcessLauncher { private val APP_MASTER = -1 private val cgroupManager: Option[CGroupManager] = CGroupManager.getInstance(config) private val LOG: Logger = LoggerFactory.getLogger(getClass) override def cleanProcess(appId: Int, executorId: Int): Unit = { if (executorId != APP_MASTER) { cgroupManager.foreach(_.shutDownExecutor(appId, executorId)) } } override def createProcess( appId: Int, executorId: Int, resource: Resource, appConfig: Config, options: Array[String], classPath: Array[String], mainClass: String, arguments: Array[String]): RichProcess = { val cgroupCommand = if (executorId != APP_MASTER) { cgroupManager.map(_.startNewExecutor(appConfig, resource.slots, appId, executorId)).getOrElse(List.empty) } else List.empty LOG.info(s"Launch executor $executorId with CGroup ${cgroupCommand.mkString(" ")}, " + s"classpath: ${classPath.mkString(File.pathSeparator)}") val java = System.getProperty("java.home") + "/bin/java" val command = cgroupCommand ++ List(java) ++ options ++ List("-cp", classPath .mkString(File.pathSeparator), mainClass) ++ arguments LOG.info(s"Starting executor process java $mainClass ${arguments.mkString(" ")}; " + s"options: ${options.mkString(" ")}") val logger = new ProcessLogRedirector() val process = Process(command).run(logger) new RichProcess(process, logger) } }
Example 116
Source File: ProcessLogRedirector.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.util import java.io.{Closeable, Flushable} import scala.sys.process.ProcessLogger import org.slf4j.LoggerFactory class ProcessLogRedirector extends ProcessLogger with Closeable with Flushable with ConsoleOutput { private val LOG = LoggerFactory.getLogger("redirect") // We only capture the first 1K chars private final val LENGTH = 1000 private var _error: String = "" private var _output: String = "" def error: String = _error def output: String = _output def out(s: => String): Unit = { if (_output.length <= LENGTH) { _output += "\n" + s } LOG.info(s) } def err(s: => String): Unit = { if (_error.length <= LENGTH) { _error += "\n" + s } LOG.error(s) } def buffer[T](f: => T): T = f def close(): Unit = Unit def flush(): Unit = Unit }
Example 117
package org.dizhang.seqspark.assoc import breeze.linalg._ import org.dizhang.seqspark.stat.HypoTest.{NullModel => NM} import org.dizhang.seqspark.stat.{Resampling, ScoreTest} import org.dizhang.seqspark.util.General.RichDouble import org.slf4j.LoggerFactory import scala.language.existentials @SerialVersionUID(7727880001L) trait VT extends AssocMethod { def nullModel: NM def x: Encode.VT def result: AssocMethod.Result } object VT { val logger = LoggerFactory.getLogger(getClass) def apply(nullModel: NM, x: Encode.Coding): VT with AssocMethod.AnalyticTest = { val nmf = nullModel match { case NM.Simple(y, b) => NM.Fit(y, b) case NM.Mutiple(y, c, b) => NM.Fit(y, c, b) case nm: NM.Fitted => nm } AnalyticScoreTest(nmf, x.asInstanceOf[Encode.VT]) } def apply(ref: Double, min: Int, max: Int, nullModel: NM.Fitted, x: Encode.Coding): ResamplingTest = { ResamplingTest(ref, min, max, nullModel, x.asInstanceOf[Encode.VT]) } def getStatistic(nm: NM.Fitted, x: Encode.Coding): Double = { //println(s"scores: ${st.score.toArray.mkString(",")}") //println(s"variances: ${diag(st.variance).toArray.mkString(",")}") val m = x.asInstanceOf[Encode.VT].coding val ts = m.map{sv => val st = ScoreTest(nm, sv) st.score(0)/st.variance(0, 0).sqrt } //val ts = st.score :/ diag(st.variance).map(x => x.sqrt) max(ts) } @SerialVersionUID(7727880101L) final case class AnalyticScoreTest(nullModel: NM.Fitted, x: Encode.VT) extends VT with AssocMethod.AnalyticTest { val statistic = getStatistic(nullModel, x) val pValue = None def result: AssocMethod.VTAnalytic = { val info = s"MAFs=${x.coding.length}" AssocMethod.VTAnalytic(x.vars, x.size, statistic, pValue, info) } } @SerialVersionUID(7727880201L) final case class ResamplingTest(refStatistic: Double, min: Int, max: Int, nullModel: NM.Fitted, x: Encode.VT) extends VT with AssocMethod.ResamplingTest { def pCount = { Resampling.Simple(refStatistic, min, max, nullModel, x, getStatistic).pCount } def result: AssocMethod.VTResampling = AssocMethod.VTResampling(x.vars, x.size, refStatistic, pCount) } }
Example 118
Source File: LCCSLiu.scala From seqspark with Apache License 2.0 | 5 votes |
package org.dizhang.seqspark.stat import breeze.linalg.{sum, DenseVector => DV} import breeze.numerics.pow import org.dizhang.seqspark.stat.LCCSLiu._ import org.dizhang.seqspark.stat.{LinearCombinationChiSquare => LCCS} import org.dizhang.seqspark.util.General.RichDouble import org.slf4j.LoggerFactory object LCCSLiu { val logger = LoggerFactory.getLogger(getClass) case class CDFLiu(pvalue: Double, ifault: Int) extends LCCS.CDF { def trace = Array(0.0) override def toString = "Pvalue: %10f".format(pvalue) } trait CentralOneDF extends LinearCombinationChiSquare { def degreeOfFreedom = DV.ones[Double](size) def nonCentrality = DV.zeros[Double](size) } trait Old extends LCCSLiu { def a = if (squareOfS1LargerThanS2) 1.0/(s1 - (s1.square - s2).sqrt) else 1.0/s1 def df = if (squareOfS1LargerThanS2) a.square - 2 * delta else c2.cube/c3.square } trait New extends LCCSLiu { def a = if (squareOfS1LargerThanS2) 1.0/(s1 - (s1.square - s2).sqrt) else 1.0/s2.sqrt def df = if (squareOfS1LargerThanS2) a.square - 2 * delta else 1.0/s2 } @SerialVersionUID(7778550101L) case class Simple(lambda: DV[Double]) extends LCCSLiu with CentralOneDF with Old { val c1 = ck(1) val c2 = ck(2) val c3 = ck(3) val c4 = ck(4) } @SerialVersionUID(7778550201L) case class Modified(lambda: DV[Double]) extends LCCSLiu with CentralOneDF with New { val c1 = ck(1) val c2 = ck(2) val c3 = ck(3) val c4 = ck(4) } case class SimpleMoments(cs: IndexedSeq[Double]) extends LCCSLiu with CentralOneDF with Old { def lambda = DV.zeros[Double](0) override val c1 = cs(0) override val c2 = cs(1) override val c3 = cs(2) override val c4 = cs(3) } case class ModifiedMoments(cs: IndexedSeq[Double]) extends LCCSLiu with CentralOneDF with New { def lambda = DV.zeros[Double](0) override val c1 = cs(0) override val c2 = cs(1) override val c3 = cs(2) override val c4 = cs(3) } } @SerialVersionUID(7778550001L) trait LCCSLiu extends LinearCombinationChiSquare { def ck(k: Int): Double = { val lbk = pow(lambda, k) (lbk dot degreeOfFreedom) + k * (lbk dot nonCentrality) } def c1:Double def c2:Double def c3:Double def c4:Double def s1:Double = c3/c2.cube.sqrt def s2:Double = c4/c2.square def muQ:Double = c1 def sigmaQ:Double = (2 * c2).sqrt protected lazy val squareOfS1LargerThanS2: Boolean = { s1.square > s2 } def a: Double def delta:Double = if (squareOfS1LargerThanS2) s1 * a.cube - a.square else 0.0 def df: Double def sigmaX:Double = 2.0.sqrt * a def muX:Double = df + delta def cdf(cutoff: Double): CDFLiu = { //logger.debug(s"muX: $muX sigmaX: $sigmaX muQ: $muQ sigmaQ: $sigmaQ df: $df delta: $delta ") val nccs = NonCentralChiSquare(df + delta, delta) val norm = (cutoff - muQ)/sigmaQ val norm1 = norm * sigmaX + muX val pv = nccs.cdf(norm1) if (pv >= 0.0 && pv <= 1.0) { CDFLiu(pv, 0) } else { CDFLiu(pv, 1) } } }
Example 119
package org.dizhang.seqspark.stat import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV} import org.apache.spark.mllib.feature.{PCA => SPCA} import org.apache.spark.mllib.linalg.{Vector, Vectors} import org.apache.spark.rdd.RDD import org.dizhang.seqspark.ds.{DenseCounter, Genotype, SparseCounter} import org.dizhang.seqspark.util.General._ import org.dizhang.seqspark.worker.Data import org.slf4j.LoggerFactory } def pc(n: Int): BDM[Double] = { val model = new SPCA(n) val data = this.prepare if (data.isEmpty()) { new BDM[Double](0, 0) } else { val res = model.fit(data).pc.values new BDM(res.length/n, n, res) } } }
Example 120
Source File: Regions.scala From seqspark with Apache License 2.0 | 5 votes |
package org.dizhang.seqspark.annot import org.apache.spark.SparkContext import org.dizhang.seqspark.ds.Region import org.slf4j.LoggerFactory class Regions(private val loci: Map[Byte, IntervalTree[Region]]) { def count(): Int = { loci.map{case (k, v) => IntervalTree.count(v)}.sum } def overlap(r: Region): Boolean = { loci.contains(r.chr) && IntervalTree.overlap(loci(r.chr), r) } def lookup(r: Region): List[Region] = { if (! loci.contains(r.chr)) { List[Region]() } else { IntervalTree.lookup(loci(r.chr), r) } } } object Regions { type LOCI = Map[Byte, Array[Region]] val logger = LoggerFactory.getLogger(this.getClass) def comop(m1: LOCI, m2: LOCI): LOCI = { m1 ++ (for ((k, v) <- m2) yield k -> (v ++ m1.getOrElse(k, Array()))) } def apply(raw: Iterator[Region]): Regions = { val regArr = raw.toArray logger.info(s"${regArr.length} regions to parse") val regByChrEle = regArr.map(r => Map(r.chr -> Array(r))) //logger.info(s"${regByChrEle.count()} regions after map") val regByChr = regByChrEle.reduce((a, b) => comop(a, b)) //logger.info(s"${regByChr.map{case (k, v) => v.length}.sum} regions after combine") val rs = new Regions(regByChr.map{case (k, v) => k -> IntervalTree(v.toIterator)}) logger.info(s"${rs.count()} regions generated") rs } def makeExome(coordFile: String)(sc: SparkContext): Regions = { val locRaw = sc.textFile(coordFile).cache() val header = locRaw.first().split("\t") val locRdd = locRaw.zipWithUniqueId().filter(_._2 > 0).map(_._1) //val iter = scala.io.Source.fromFile(coordFile).getLines() val raw = locRdd.filter(l => ! l.split("\t")(2).contains("_")) .flatMap(l => RefGene.makeExons(l, header)).toLocalIterator apply(raw) } }
Example 121
Source File: Association.scala From seqspark with Apache License 2.0 | 5 votes |
package org.dizhang.seqspark.worker import org.dizhang.seqspark.assoc.AssocMaster import org.dizhang.seqspark.ds.Genotype import org.dizhang.seqspark.util.SeqContext import org.slf4j.LoggerFactory object Association { private val logger = LoggerFactory.getLogger(getClass) def apply[B: Genotype](input: Data[B])(implicit ssc: SeqContext): Unit = { if (ssc.userConfig.pipeline.contains("association")) if (input.isEmpty()) { logger.warn(s"no variants left. cannot perform association analysis") } else { new AssocMaster(input).run() } } }
Example 122
Source File: Export.scala From seqspark with Apache License 2.0 | 5 votes |
package org.dizhang.seqspark.worker import java.net.URI import java.nio.file.{Files, Path, Paths} import org.dizhang.seqspark.ds.Genotype import org.dizhang.seqspark.ds.VCF._ import org.dizhang.seqspark.util.SeqContext import org.dizhang.seqspark.util.UserConfig.hdfs import org.apache.hadoop import org.slf4j.LoggerFactory import scala.collection.JavaConverters._ object Export { private val logger = LoggerFactory.getLogger(getClass) def apply[A: Genotype](data: Data[A])(implicit ssc: SeqContext): Unit = { val geno = implicitly[Genotype[A]] val conf = ssc.userConfig.output.genotype if (conf.export) { val path = if (conf.path.isEmpty) ssc.userConfig.input.genotype.path + "." + ssc.userConfig.project else conf.path logger.info(s"going to export data to $path") if (path.startsWith("file:")) { val p = Paths.get(URI.create(path)) if (Files.exists(p)) { Files.walk(p) .iterator() .asScala .toList .sorted(Ordering[Path].reverse) .foreach(f => Files.delete(f)) } } else { val hdPath = new hadoop.fs.Path(path) if (hdfs.exists(hdPath)) { hdfs.delete(hdPath, true) } } data.samples(conf.samples).saveAsTextFile(path) } if (conf.save || conf.cache) { data.saveAsObjectFile(conf.path) } } }
Example 123
Source File: Annotation.scala From seqspark with Apache License 2.0 | 5 votes |
package org.dizhang.seqspark.worker import org.apache.spark.SparkContext import org.dizhang.seqspark.annot._ import org.dizhang.seqspark.annot.VariantAnnotOp._ import org.dizhang.seqspark.ds.{Genotype, Variant} import org.dizhang.seqspark.util.{Constant, QueryParser, SeqContext} import org.dizhang.seqspark.util.UserConfig._ import org.dizhang.seqspark.util.ConfigValue._ import org.slf4j.LoggerFactory import org.apache.hadoop import org.dizhang.seqspark.worker.Variants.countByFunction object Annotation { private val logger = LoggerFactory.getLogger(getClass) private val dbExists = Constant.Variant.dbExists def apply[A: Genotype](data: Data[A], a: A)(implicit ssc: SeqContext): Data[A] = { logger.info("annotation") val conf = ssc.userConfig val queryExprs = QueryParser.parse(conf.annotation.addInfo) val dbs = QueryParser.dbs(queryExprs.values) val assocConf = conf.association paired.map{ case (_, (vt, dbmap)) => val res = QueryParser.eval(queryExprs)(dbmap) vt.updateInfo(res) vt } } } }
Example 124
Source File: LogicalParserSpec.scala From seqspark with Apache License 2.0 | 5 votes |
package org.dizhang.seqspark.util import org.scalatest.{FlatSpec, Matchers} import org.slf4j.LoggerFactory class LogicalParserSpec extends FlatSpec with Matchers { val logger = LoggerFactory.getLogger(getClass) "A LogicalParser" should "be able to constructed" in { val lp = LogicalParser.parse("INFO.AN>3800 and INFO.AC>38") LogicalParser.parse(List("maf < 0.01 or maf > 0.99", "SS_PASS")) LogicalParser.parse(List("maf >= 0.01", "maf <= 0.99", "SS_PASS")) } "A LogicalParser" should "eval to true" in { val lp = LogicalParser.parse("INFO.AN>3800 and INFO.AC>38") LogicalParser.eval(lp)(Map("INFO.AN"->"3900", "INFO.AC"->"40")) should be (true) } "A LogicalParser" should "eval to false" in { val lp = LogicalParser.parse("INFO.AN>3800 and INFO.AC>38 and INFO.AC<3750") LogicalParser.eval(lp)(Map("INFO.AN"->"3900", "INFO.AC"->"3775")) should be (false) } "A LogicalParser" should "handle String comparisons" in { val lp = LogicalParser.parse("chr != \"X\" and chr != \"Y\"") LogicalParser.eval(lp)(Map("chr" -> "11")) should be (true) } "A LogicalParser" should "handle nested conditions" in { val lp = LogicalParser.parse(List("missingRate < 0.1", "batchMissingRate < 0.1", "hwePvalue >= 1e-5")) logger.debug(LogicalParser.view(lp)) LogicalParser.eval(lp)( Map("missingRate" -> "0.3", "batchMissingRate" -> "0.4", "hwePvalue" -> "0.001") ) should be (false) } "A LogicalParser" should "parse filter" in { val lp = LogicalParser.parse(List("FILTER==\"PASS\"", "INFO.AN>=3468", "INFO.AC>=34", "INFO.AC<=3815")) logger.debug(LogicalParser.view(lp)) } }
Example 125
Source File: GenotypeSpec.scala From seqspark with Apache License 2.0 | 5 votes |
package org.dizhang.seqspark.ds import org.scalatest.{FlatSpec, Matchers} import org.slf4j.LoggerFactory class GenotypeSpec extends FlatSpec with Matchers { val logger = LoggerFactory.getLogger(getClass) val raw = { Array( ".:0", "0:4", "1:3", "./.:2", "0/0:12", "0/1:2", "1/0:1", "1/1:0", ".|.:1", "0|0:8", "0|1:7", "1|0:9", "1|1:3" ) } val simple = raw.map(g => Genotype.Raw.toSimpleGenotype(g)) "A Raw Genotype" should "be able to convert to simple and back" in { val s = raw.map(g => Genotype.Raw.toSimpleGenotype(g)) logger.debug(s"raw to simple: ${s.mkString(",")}") val r = s.map(g => Genotype.Simple.toVCF(g)) logger.debug(s"simple to raw: ${r.mkString(",")}") r.map(g => Genotype.Raw.toSimpleGenotype(g)) should be (s) } "A Raw Genotype" should "give right callRate" in { val c = raw.map(g => Genotype.Raw.callRate(g)) logger.debug(s"raw callrate: ${c.mkString(",")}") val cnt = Counter.fromIndexedSeq(c, (1.0, 1.0)).reduce logger.debug(s"raw callrate: ${cnt._1/cnt._2}") } "A Simple Genotype" should "give right callRate" in { val c = simple.map(g => Genotype.Simple.callRate(g)) logger.debug(s"simple callRate: ${c.mkString(",")}") val cnt = Counter.fromIndexedSeq(c, (1.0, 1.0)).reduce logger.debug(s"simple callrate: ${cnt._1/cnt._2}") } "A Raw Genotype" should "give right MAF" in { val maf = raw.map(g => Genotype.Raw.toAAF(g)) logger.debug(s"raw maf: ${maf.mkString(",")}") val cnt = Counter.fromIndexedSeq(maf, (0.0, 2.0)).reduce logger.debug(s"raw maf: ${cnt._1/cnt._2}") } "A Simple Genotype" should "give right MAF" in { val maf = simple.map(g => Genotype.Simple.toAAF(g)) logger.debug(s"simple maf: ${maf.mkString(",")}") val cnt = Counter.fromIndexedSeq(maf, (0.0, 2.0)).reduce logger.debug(s"simple maf: ${cnt._1/cnt._2}") } }
Example 126
Source File: JavaScript.scala From incubator-toree with Apache License 2.0 | 5 votes |
package org.apache.toree.magic.builtin import java.io.PrintStream import com.google.common.base.Strings import org.apache.toree.kernel.protocol.v5.MIMEType import org.apache.toree.magic._ import org.apache.toree.magic.dependencies.IncludeOutputStream import org.apache.toree.utils.ArgumentParsingSupport import org.slf4j.LoggerFactory import org.apache.toree.plugins.annotations.Event class JavaScript extends CellMagic with ArgumentParsingSupport with IncludeOutputStream { // Lazy because the outputStream is not provided at construction private def printStream = new PrintStream(outputStream) @Event(name = "javascript") override def execute(code: String): CellMagicOutput = { def printHelpAndReturn: CellMagicOutput = { printHelp(printStream, """%JavaScript <string_code>""") CellMagicOutput() } Strings.isNullOrEmpty(code) match { case true => printHelpAndReturn case false => CellMagicOutput(MIMEType.ApplicationJavaScript -> code) } } }
Example 127
Source File: InterpreterManager.scala From incubator-toree with Apache License 2.0 | 5 votes |
package org.apache.toree.boot.layer import org.apache.toree.kernel.api.KernelLike import com.typesafe.config.Config import org.apache.toree.interpreter._ import scala.collection.JavaConverters._ import org.slf4j.LoggerFactory case class InterpreterManager( default: String = "Scala", interpreters: Map[String, Interpreter] = Map[String, Interpreter]() ) { def initializeInterpreters(kernel: KernelLike): Unit = { interpreters.values.foreach(interpreter => interpreter.init(kernel) ) } def addInterpreter( name:String, interpreter: Interpreter ): InterpreterManager = { copy(interpreters = interpreters + (name -> interpreter)) } def defaultInterpreter: Option[Interpreter] = { interpreters.get(default) } } object InterpreterManager { protected val logger = LoggerFactory.getLogger(this.getClass.getName) def apply(config: Config): InterpreterManager = { val ip = config.getStringList("interpreter_plugins").asScala ++ config.getStringList("default_interpreter_plugin").asScala val m = ip.foldLeft(Map[String, Interpreter]())( (acc, v) => { v.split(":") match { case Array(name, className) => try { val i = instantiate(className, config) acc + (name -> i) } catch { case e:Throwable => logger.error("Error loading interpreter class " + className) logger.error(e.getMessage()) //acc throw e } case _ => acc } }) val default = config.getString("default_interpreter") InterpreterManager(interpreters = m, default = default) } private def instantiate(className:String, config:Config):Interpreter = { try { Class .forName(className) .getConstructor(Class.forName("com.typesafe.config.Config")) .newInstance(config).asInstanceOf[Interpreter] } catch { case e: NoSuchMethodException => logger.debug("Using default constructor for class " + className) Class .forName(className) .newInstance().asInstanceOf[Interpreter] } } }
Example 128
Source File: SocketConfigSpec.scala From incubator-toree with Apache License 2.0 | 5 votes |
package org.apache.toree.kernel.protocol.v5.kernel.socket import com.typesafe.config.ConfigFactory import org.scalatest.{FunSpec, Matchers} import org.slf4j.LoggerFactory import play.api.data.validation.ValidationError import play.api.libs.json.{JsPath, JsValue, Json} class SocketConfigSpec extends FunSpec with Matchers { val logger = LoggerFactory.getLogger("jt4") //logger.error("WOOT!") private val jsonString: String = """ { "stdin_port": 10000, "control_port": 10001, "hb_port": 10002, "shell_port": 10003, "iopub_port": 10004, "ip": "1.2.3.4", "transport": "tcp", "signature_scheme": "hmac-sha256", "key": "" } """.stripMargin val socketConfigJson: JsValue = Json.parse(jsonString) val socketConfigFromConfig = SocketConfig.fromConfig(ConfigFactory.parseString(jsonString)) val socketConfig = SocketConfig( 10000, 10001, 10002, 10003, 10004, "1.2.3.4", "tcp", "hmac-sha256", "" ) describe("SocketConfig") { describe("implicit conversions") { it("should implicitly convert from valid json to a SocketConfig instance") { // This is the least safe way to convert as an error is thrown if it fails socketConfigJson.as[SocketConfig] should be (socketConfig) } it("should also work with asOpt") { // This is safer, but we lose the error information as it returns // None if the conversion fails val newCompleteRequest = socketConfigJson.asOpt[SocketConfig] newCompleteRequest.get should be (socketConfig) } it("should also work with validate") { // This is the safest as it collects all error information (not just first error) and reports it val CompleteRequestResults = socketConfigJson.validate[SocketConfig] CompleteRequestResults.fold( (invalid: Seq[(JsPath, Seq[ValidationError])]) => println("Failed!"), (valid: SocketConfig) => valid ) should be (socketConfig) } it("should implicitly convert from a SocketConfig instance to valid json") { Json.toJson(socketConfig) should be (socketConfigJson) } } describe("#toConfig") { it("should implicitly convert from valid json to a SocketConfig instance") { // This is the least safe way to convert as an error is thrown if it fails socketConfigFromConfig should be (socketConfig) } it("should convert json file to SocketConfig object") { socketConfigFromConfig.stdin_port should be (10000) } } } }
Example 129
Source File: MagicManager.scala From incubator-toree with Apache License 2.0 | 5 votes |
package org.apache.toree.magic import org.apache.toree.plugins.{Plugin, PluginMethodResult, PluginManager} import org.slf4j.LoggerFactory import scala.annotation.tailrec import scala.language.dynamics import scala.runtime.BoxedUnit import scala.util.{Try, Failure, Success} class MagicManager(private val pluginManager: PluginManager) extends Dynamic { protected val logger = LoggerFactory.getLogger(this.getClass.getName) @throws[MagicNotFoundException] def findMagic(name: String): Magic = { @tailrec def inheritsMagic(klass: Class[_]): Boolean = { if (klass == null) false else if (klass.getInterfaces.exists(classOf[Magic].isAssignableFrom)) true else inheritsMagic(klass.getSuperclass) } val magics = pluginManager.plugins .filter(p => inheritsMagic(p.getClass)) .filter(_.simpleName.split("\\.").last.toLowerCase == name.toLowerCase) if (magics.size <= 0){ logger.error(s"No magic found for $name!") throw new MagicNotFoundException(name) } else if (magics.size > 1) { logger.warn(s"More than one magic found for $name!") } magics.head.asInstanceOf[Magic] } @throws[MagicNotFoundException] def applyDynamic(name: String)(args: Any*): MagicOutput = { val arg = args.headOption.map(_.toString).getOrElse("") import org.apache.toree.plugins.Implicits._ val result = pluginManager.fireEventFirstResult( name.toLowerCase(), "input" -> arg ) result match { case Some(r: PluginMethodResult) => handleMagicResult(name, r.toTry) case None => throw new MagicNotFoundException(name) } } private def handleMagicResult(name: String, result: Try[Any]): MagicOutput = result match { case Success(magicOutput) => magicOutput match { case out: MagicOutput => out case null | _: BoxedUnit => MagicOutput() case cmo: Map[_, _] if cmo.keys.forall(_.isInstanceOf[String]) && cmo.values.forall(_.isInstanceOf[String]) => MagicOutput(cmo.asInstanceOf[Map[String, String]].toSeq:_*) case unknown => val message = s"""Magic $name did not return proper magic output |type. Expected ${classOf[MagicOutput].getName}, but found |type of ${unknown.getClass.getName}.""".trim.stripMargin logger.warn(message) MagicOutput("text/plain" -> message) } case Failure(t) => val message = s"Magic $name failed to execute with error: \n${t.getMessage}" logger.warn(message, t) MagicOutput("text/plain" -> message) } }
Example 130
Source File: SignatureHashTestCaseProtocol.scala From bitcoin-s with MIT License | 5 votes |
package org.bitcoins.core.protocol.script.testprotocol import org.bitcoins.core.number.{Int32, UInt32} import org.bitcoins.core.protocol.script.ScriptPubKey import org.bitcoins.core.protocol.transaction.Transaction import org.bitcoins.core.script.crypto.HashType import org.bitcoins.core.serializers.script.ScriptParser import org.bitcoins.crypto.DoubleSha256Digest import org.slf4j.LoggerFactory import spray.json._ object SignatureHashTestCaseProtocol extends DefaultJsonProtocol { private val logger = LoggerFactory.getLogger(this.getClass) implicit object SignatureTestCaseProtocol extends RootJsonFormat[SignatureHashTestCase] { override def read(value: JsValue): SignatureHashTestCase = { val jsArray: JsArray = value match { case array: JsArray => array case _: JsValue => throw new RuntimeException( "Script signature hash test case must be in jsarray format") } val elements: Vector[JsValue] = jsArray.elements val transaction: Transaction = Transaction( elements.head.convertTo[String]) val asm = ScriptParser.fromHex(elements.apply(1).convertTo[String]) val script: ScriptPubKey = ScriptPubKey(asm) val inputIndex: UInt32 = UInt32(elements(2).convertTo[Int]) val hashTypeNum: Int32 = Int32(elements(3).convertTo[Int]) val hashType: HashType = HashType(hashTypeNum) val hash: DoubleSha256Digest = DoubleSha256Digest( elements.last.convertTo[String]) SignatureHashTestCaseImpl(transaction, script, inputIndex, hashTypeNum, hashType, hash) } override def write(testCase: SignatureHashTestCase): JsValue = ??? } }
Example 131
Source File: UInt5Test.scala From bitcoin-s with MIT License | 5 votes |
package org.bitcoins.core.number import org.bitcoins.testkit.core.gen.NumberGenerator import org.bitcoins.testkit.util.BitcoinSUnitTest import org.slf4j.LoggerFactory class UInt5Test extends BitcoinSUnitTest { behavior of "UInt5" it must "convert a byte to a UInt5 correctly" in { UInt5.fromByte(0.toByte) must be(UInt5.zero) UInt5(1.toByte) must be(UInt5.one) UInt5(31.toByte) must be(UInt5.max) } it must "not allow negative numbers" in { intercept[IllegalArgumentException] { UInt5(-1) } } it must "not allow numbers more than 31" in { intercept[IllegalArgumentException] { UInt5(32) } } it must "have serialization symmetry" in { forAll(NumberGenerator.uInt5) { u5 => val u52 = UInt5.fromHex(u5.hex) u52 == u5 } } it must "uint5 -> byte -> uint5" in { forAll(NumberGenerator.uInt5) { u5 => val byte = u5.byte UInt5.fromByte(byte) == u5 } } it must "uint5 -> uint8 -> uint5" in { forAll(NumberGenerator.uInt5) { u5 => val u8 = u5.toUInt8 u8.toUInt5 == u5 } } }
Example 132
Source File: ZMQSubscriberTest.scala From bitcoin-s with MIT License | 5 votes |
package org.bitcoins.zmq import java.net.InetSocketAddress import org.bitcoins.core.util.BytesUtil import org.scalatest.flatspec.AsyncFlatSpec import org.slf4j.LoggerFactory import org.zeromq.{ZFrame, ZMQ, ZMsg} import scodec.bits.ByteVector import scala.concurrent.Promise class ZMQSubscriberTest extends AsyncFlatSpec { private val logger = LoggerFactory.getLogger(this.getClass().toString) behavior of "ZMQSubscriber" it must "connect to a regtest instance of a daemon and stream txs/blocks from it" in { //note for this unit test to pass, you need to setup a bitcoind instance yourself //and set the bitcoin.conf file to allow for //zmq connections //see: https://github.com/bitcoin/bitcoin/blob/master/doc/zmq.md val socket = new InetSocketAddress("tcp://127.0.0.1", 29000) val zmqSub = new ZMQSubscriber(socket, None, None, rawTxListener, rawBlockListener) //stupid, doesn't test anything, for now. You need to look at log output to verify this is working // TODO: In the future this could use the testkit to verify the subscriber by calling generate(1) zmqSub.start() Thread.sleep(10000) // 10 seconds zmqSub.stop succeed } it must "be able to subscribe to a publisher and read a value" in { val port = Math.abs(scala.util.Random.nextInt % 14000) + 1000 val socket = new InetSocketAddress("tcp://127.0.0.1", port) val context = ZMQ.context(1) val publisher = context.socket(ZMQ.PUB) val uri = socket.getHostString + ":" + socket.getPort publisher.bind(uri) val valuePromise = Promise[String]() val fakeBlockListener: Option[ByteVector => Unit] = Some { bytes => val str = new String(bytes.toArray) valuePromise.success(str) () } val sub = new ZMQSubscriber(socket, None, None, None, fakeBlockListener) sub.start() Thread.sleep(1000) val testValue = "sweet, sweet satoshis" val msg = new ZMsg() msg.add(new ZFrame(RawBlock.topic)) msg.add(new ZFrame(testValue)) val sent = msg.send(publisher) assert(sent) valuePromise.future.map { str => sub.stop publisher.close() context.term() assert(str == testValue) } } val rawBlockListener: Option[ByteVector => Unit] = Some { { bytes: ByteVector => val hex = BytesUtil.encodeHex(bytes) logger.debug(s"received raw block ${hex}") } } val hashBlockListener: Option[ByteVector => Unit] = Some { { bytes: ByteVector => val hex = BytesUtil.encodeHex(bytes) logger.debug(s"received raw block hash ${hex}") } } val rawTxListener: Option[ByteVector => Unit] = Some { { bytes: ByteVector => val hex = BytesUtil.encodeHex(bytes) logger.debug(s"received raw tx ${hex}") } } }
Example 133
Source File: BlockBench.scala From bitcoin-s with MIT License | 5 votes |
package org.bitcoins.bench.core import org.bitcoins.core.protocol.blockchain.Block import org.slf4j.LoggerFactory import scala.io.Source object BlockBench extends App { private def logger = LoggerFactory.getLogger(this.getClass) private def timeBlockParsing[R](block: () => R): Long = { val t0 = System.currentTimeMillis() val _ = block() // call-by-name val t1 = System.currentTimeMillis() val time = t1 - t0 logger.info("Elapsed time: " + time + "ms") time } def bench1(): Unit = { val fileName = "/00000000000000000008513c860373da0484f065983aeb063ebf81c172e81d48.txt" val lines = Source.fromURL(getClass.getResource(fileName)).mkString val time = timeBlockParsing(() => Block.fromHex(lines)) require(time <= 15000) } def bench2(): Unit = { val fileName = "/000000000000000000050f70113ab1932c195442cb49bcc4ee4d7f426c8a3295.txt" val lines = Source.fromURL(getClass.getResource(fileName)).mkString val time = timeBlockParsing(() => Block.fromHex(lines)) require(time <= 15000) } 0.until(10).foreach(_ => bench1()) //bench2() }
Example 134
Source File: SuspiciousConnects.scala From oni-ml with Apache License 2.0 | 5 votes |
package org.opennetworkinsight import org.apache.log4j.{Level, Logger} import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.sql.SQLContext import org.slf4j.LoggerFactory import org.opennetworkinsight.SuspiciousConnectsArgumentParser.SuspiciousConnectsConfig import org.opennetworkinsight.dns.DNSSuspiciousConnects import org.opennetworkinsight.netflow.FlowSuspiciousConnects import org.opennetworkinsight.proxy.ProxySuspiciousConnectsAnalysis def main(args: Array[String]) { val parser = SuspiciousConnectsArgumentParser.parser parser.parse(args, SuspiciousConnectsConfig()) match { case Some(config) => val logger = LoggerFactory.getLogger(this.getClass) Logger.getLogger("org").setLevel(Level.OFF) Logger.getLogger("akka").setLevel(Level.OFF) val analysis = config.analysis val sparkConfig = new SparkConf().setAppName("ONI ML: " + analysis + " lda") val sparkContext = new SparkContext(sparkConfig) val sqlContext = new SQLContext(sparkContext) implicit val outputDelimiter = OutputDelimiter analysis match { case "flow" => FlowSuspiciousConnects.run(config, sparkContext, sqlContext, logger) case "dns" => DNSSuspiciousConnects.run(config, sparkContext, sqlContext, logger) case "proxy" => ProxySuspiciousConnectsAnalysis.run(config, sparkContext, sqlContext, logger) case _ => println("ERROR: unsupported (or misspelled) analysis: " + analysis) } sparkContext.stop() case None => println("Error parsing arguments") } System.exit(0) } }
Example 135
Source File: SLF4JSpec.scala From scribe with MIT License | 5 votes |
package spec import java.util.TimeZone import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import org.slf4j.{LoggerFactory, MDC} import scribe.handler.LogHandler import scribe.output.LogOutput import scribe.util.Time import scribe.writer.Writer import scribe.{Level, LogRecord, Logger} class SLF4JSpec extends AnyWordSpec with Matchers { TimeZone.setDefault(TimeZone.getTimeZone("UTC")) private var logs: List[LogRecord[_]] = Nil private var logOutput: List[String] = Nil private val recordHolder = LogHandler.default.withMinimumLevel(Level.Info).withWriter(new Writer { override def write[M](record: LogRecord[M], output: LogOutput): Unit = { logs = record :: logs logOutput = output.plainText :: logOutput } }) "SLF4J" should { "set the time to an arbitrary value" in { Time.function = () => 1542376191920L } "remove existing handlers from Root" in { Logger.root.clearHandlers().replace() } "add a testing handler" in { Logger.root.withHandler(recordHolder).replace() } "verify not records are in the RecordHolder" in { logs.isEmpty should be(true) } "log to Scribe" in { val logger = LoggerFactory.getLogger(getClass) logger.info("Hello World!") } "verify Scribe received the record" in { logs.size should be(1) val r = logs.head r.level should be(Level.Info) r.message.plainText should be("Hello World!") r.className should be("spec.SLF4JSpec") logs = Nil } "verify Scribe wrote value" in { logOutput.size should be(1) val s = logOutput.head s should be("2018.11.16 13:49:51 [INFO] spec.SLF4JSpec - Hello World!") } "use MDC" in { MDC.put("name", "John Doe") val logger = LoggerFactory.getLogger(getClass) logger.info("A generic name") logOutput.head should be("2018.11.16 13:49:51 [INFO] spec.SLF4JSpec - A generic name (name: John Doe)") } "clear MDC" in { MDC.clear() val logger = LoggerFactory.getLogger(getClass) logger.info("MDC cleared") logOutput.head should be("2018.11.16 13:49:51 [INFO] spec.SLF4JSpec - MDC cleared") } "make sure logging nulls doesn't error" in { val logger = LoggerFactory.getLogger(getClass) logger.error(null) logs.length should be(3) logOutput.head should be("2018.11.16 13:49:51 [ERROR] spec.SLF4JSpec - null") } } }
Example 136
Source File: SLF4JSpec.scala From scribe with MIT License | 5 votes |
package spec import java.util.TimeZone import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import org.slf4j.{LoggerFactory, MDC} import scribe.handler.LogHandler import scribe.output.LogOutput import scribe.util.Time import scribe.writer.Writer import scribe.{Level, LogRecord, Logger} class SLF4JSpec extends AnyWordSpec with Matchers { TimeZone.setDefault(TimeZone.getTimeZone("UTC")) private var logs: List[LogRecord[_]] = Nil private var logOutput: List[String] = Nil private val recordHolder = LogHandler.default.withMinimumLevel(Level.Info).withWriter(new Writer { override def write[M](record: LogRecord[M], output: LogOutput): Unit = { logs = record :: logs logOutput = output.plainText :: logOutput } }) "SLF4J" should { "set the time to an arbitrary value" in { Time.function = () => 1542376191920L } "remove existing handlers from Root" in { Logger.root.clearHandlers().replace() } "add a testing handler" in { Logger.root.withHandler(recordHolder).replace() } "verify not records are in the RecordHolder" in { logs.isEmpty should be(true) } "log to Scribe" in { val logger = LoggerFactory.getLogger(getClass) logger.info("Hello World!") } "verify Scribe received the record" in { logs.size should be(1) val r = logs.head r.level should be(Level.Info) r.message.plainText should be("Hello World!") r.className should be("spec.SLF4JSpec") logs = Nil } "verify Scribe wrote value" in { logOutput.size should be(1) val s = logOutput.head s should be("2018.11.16 13:49:51 [INFO] spec.SLF4JSpec - Hello World!") } "use MDC" in { MDC.put("name", "John Doe") val logger = LoggerFactory.getLogger(getClass) logger.info("A generic name") logOutput.head should be("2018.11.16 13:49:51 [INFO] spec.SLF4JSpec - A generic name (name: John Doe)") } "clear MDC" in { MDC.clear() val logger = LoggerFactory.getLogger(getClass) logger.info("MDC cleared") logOutput.head should be("2018.11.16 13:49:51 [INFO] spec.SLF4JSpec - MDC cleared") } "make sure logging nulls doesn't error" in { val logger = LoggerFactory.getLogger(getClass) logger.error(null) logs.length should be(3) logOutput.head should be("2018.11.16 13:49:51 [ERROR] spec.SLF4JSpec - null") } } }
Example 137
Source File: Logging.scala From incubator-livy with Apache License 2.0 | 5 votes |
package org.apache.livy import org.slf4j.LoggerFactory trait Logging { lazy val logger = LoggerFactory.getLogger(this.getClass) def trace(message: => Any): Unit = { if (logger.isTraceEnabled) { logger.trace(message.toString) } } def debug(message: => Any): Unit = { if (logger.isDebugEnabled) { logger.debug(message.toString) } } def info(message: => Any): Unit = { if (logger.isInfoEnabled) { logger.info(message.toString) } } def warn(message: => Any): Unit = { logger.warn(message.toString) } def warn(message: => Any, t: Throwable): Unit = { logger.warn(message.toString, t) } def error(message: => Any, t: Throwable): Unit = { logger.error(message.toString, t) } def error(message: => Any): Unit = { logger.error(message.toString) } }
Example 138
Source File: SequoiadbRDDIterator.scala From spark-sequoiadb with Apache License 2.0 | 5 votes |
package com.sequoiadb.spark.rdd import _root_.com.sequoiadb.spark.SequoiadbConfig import _root_.com.sequoiadb.spark.io.SequoiadbReader import org.apache.spark._ import org.apache.spark.sql.sources.Filter import org.bson.BSONObject import org.slf4j.{Logger, LoggerFactory} //import java.io.FileOutputStream; class SequoiadbRDDIterator( taskContext: TaskContext, partition: Partition, config: SequoiadbConfig, requiredColumns: Array[String], filters: Array[Filter], queryReturnType: Int = SequoiadbConfig.QUERYRETURNBSON, queryLimit: Long = -1) extends Iterator[BSONObject] { private var LOG: Logger = LoggerFactory.getLogger(this.getClass.getName()) protected var finished = false private var closed = false private var initialized = false lazy val reader = { initialized = true initReader() } // Register an on-task-completion callback to close the input stream. taskContext.addTaskCompletionListener((context: TaskContext) => closeIfNeeded()) override def hasNext: Boolean = { !finished && reader.hasNext } override def next(): BSONObject = { if (!hasNext) { throw new NoSuchElementException("End of stream") } reader.next() } def closeIfNeeded(): Unit = { if (!closed) { close() closed = true } } protected def close(): Unit = { if (initialized) { reader.close() } } def initReader() = { val reader = new SequoiadbReader(config,requiredColumns,filters, queryReturnType, queryLimit) reader.init(partition) reader } }
Example 139
Source File: SequoiadbRDD.scala From spark-sequoiadb with Apache License 2.0 | 5 votes |
package com.sequoiadb.spark.rdd import org.apache.spark.SparkContext import _root_.com.sequoiadb.spark.SequoiadbConfig import com.sequoiadb.spark.partitioner._ import org.apache.spark.rdd.RDD import org.apache.spark.sql.SQLContext import org.apache.spark.sql.sources.Filter import org.apache.spark.{Partition, TaskContext} import org.bson.BSONObject import org.slf4j.{Logger, LoggerFactory} import scala.collection.mutable.ArrayBuffer //import java.io.FileOutputStream; def apply ( sc: SQLContext, config: SequoiadbConfig, partitioner: Option[SequoiadbPartitioner] = None, requiredColumns: Array[String] = Array(), filters: Array[Filter] = Array(), queryReturnType: Int = SequoiadbConfig.QUERYRETURNBSON, queryLimit: Long = -1) = { new SequoiadbRDD ( sc.sparkContext, config, partitioner, requiredColumns, filters, queryReturnType, queryLimit) } }
Example 140
Source File: SequoiadbWriter.scala From spark-sequoiadb with Apache License 2.0 | 5 votes |
package com.sequoiadb.spark.io def save(it: Iterator[Row], schema: StructType): Unit = { try { ds = Option(new SequoiadbDatasource ( config[List[String]](SequoiadbConfig.Host), config[String](SequoiadbConfig.Username), config[String](SequoiadbConfig.Password), ConnectionUtil.initConfigOptions, ConnectionUtil.initSequoiadbOptions )) // pickup a connection connection = Option(ds.get.getConnection) // locate collection val cl = connection.get.getCollectionSpace( config[String](SequoiadbConfig.CollectionSpace)).getCollection( config[String](SequoiadbConfig.Collection)) LOG.info ("bulksize = " + config[String](SequoiadbConfig.BulkSize)) // loop through it and perform batch insert // batch size is defined in SequoiadbConfig.BulkSize val list : ArrayList[BSONObject] = new ArrayList[BSONObject]() while ( it.hasNext ) { val record = it.next val bsonrecord = SequoiadbRowConverter.rowAsDBObject ( record, schema ) list.add(bsonrecord) if ( list.size >= config[String](SequoiadbConfig.BulkSize).toInt ) { cl.bulkInsert ( list, 0 ) list.clear } } // insert rest of the record if there's any if ( list.size > 0 ) { cl.bulkInsert ( list, 0 ) list.clear } } catch { case ex: Exception => throw SequoiadbException(ex.getMessage, ex) } finally { ds.fold(ifEmpty=()) { connectionpool => connection.fold(ifEmpty=()) { conn => connectionpool.close(conn) } connectionpool.close } // ds.fold(ifEmpty=()) } // finally } // def save(it: Iterator[BSONObject]): Unit = }
Example 141
Source File: AbstractLoggingServiceRegistryClient.scala From lagom with Apache License 2.0 | 5 votes |
package com.lightbend.lagom.devmode.internal.registry import java.net.URI import org.slf4j.Logger import org.slf4j.LoggerFactory import scala.collection.immutable import scala.concurrent.ExecutionContext import scala.concurrent.Future import scala.util.Failure import scala.util.Success private[lagom] abstract class AbstractLoggingServiceRegistryClient(implicit ec: ExecutionContext) extends ServiceRegistryClient { protected val log: Logger = LoggerFactory.getLogger(getClass) override def locateAll(serviceName: String, portName: Option[String]): Future[immutable.Seq[URI]] = { require( serviceName != ServiceRegistryClient.ServiceName, "The service registry client cannot locate the service registry service itself" ) log.debug("Locating service name=[{}] ...", serviceName) val location: Future[immutable.Seq[URI]] = internalLocateAll(serviceName, portName) location.onComplete { case Success(Nil) => log.warn("serviceName=[{}] was not found. Hint: Maybe it was not started?", serviceName) case Success(uris) => log.debug("serviceName=[{}] can be reached at uris=[{}]", serviceName: Any, uris: Any) case Failure(e) => log.warn("Service registry replied with an error when looking up serviceName=[{}]", serviceName: Any, e: Any) } location } protected def internalLocateAll(serviceName: String, portName: Option[String]): Future[immutable.Seq[URI]] }
Example 142
Source File: HeaderFilters.scala From lagom with Apache License 2.0 | 5 votes |
package docs.scaladsl.services.headerfilters package compose { import com.lightbend.lagom.scaladsl.api.transport.HeaderFilter import com.lightbend.lagom.scaladsl.api.transport.RequestHeader import com.lightbend.lagom.scaladsl.api.transport.ResponseHeader import com.lightbend.lagom.scaladsl.api.Service import com.lightbend.lagom.scaladsl.api.ServiceCall import org.slf4j.LoggerFactory //#verbose-filter class VerboseFilter(name: String) extends HeaderFilter { private val log = LoggerFactory.getLogger(getClass) def transformClientRequest(request: RequestHeader) = { log.debug(name + " - transforming Client Request") request } def transformServerRequest(request: RequestHeader) = { log.debug(name + " - transforming Server Request") request } def transformServerResponse(response: ResponseHeader, request: RequestHeader) = { log.debug(name + " - transforming Server Response") response } def transformClientResponse(response: ResponseHeader, request: RequestHeader) = { log.debug(name + " - transforming Client Response") response } } //#verbose-filter trait HelloService extends Service { def sayHello: ServiceCall[String, String] //#header-filter-composition def descriptor = { import Service._ named("hello") .withCalls( call(sayHello) ) .withHeaderFilter( HeaderFilter.composite( new VerboseFilter("Foo"), new VerboseFilter("Bar") ) ) } //#header-filter-composition } }
Example 143
Source File: AkkaDiscoveryHelper.scala From lagom with Apache License 2.0 | 5 votes |
package com.lightbend.lagom.internal.client import java.net.URI import java.net.URISyntaxException import java.util.concurrent.ThreadLocalRandom import java.util.concurrent.TimeUnit import akka.discovery.ServiceDiscovery import akka.discovery.ServiceDiscovery.ResolvedTarget import com.typesafe.config.Config import org.slf4j.LoggerFactory import scala.concurrent.ExecutionContext import scala.concurrent.Future import scala.concurrent.duration._ private[lagom] class AkkaDiscoveryHelper(config: Config, serviceDiscovery: ServiceDiscovery)( implicit ec: ExecutionContext ) { private val logger = LoggerFactory.getLogger(this.getClass) private val serviceNameMapper = new ServiceNameMapper(config) private val lookupTimeout = config.getDuration("lookup-timeout", TimeUnit.MILLISECONDS).millis def locateAll(name: String): Future[Seq[URI]] = { val serviceLookup = serviceNameMapper.mapLookupQuery(name) serviceDiscovery .lookup(serviceLookup.lookup, lookupTimeout) .map { resolved => logger.debug("Retrieved addresses: {}", resolved.addresses) resolved.addresses.map(target => toURI(target, serviceLookup)) } } def locate(name: String): Future[Option[URI]] = locateAll(name).map(selectRandomURI) private def toURI(resolvedTarget: ResolvedTarget, lookup: ServiceLookup): URI = { val port = resolvedTarget.port.getOrElse(-1) val scheme = lookup.scheme.orNull try { new URI( scheme, // scheme null, // userInfo resolvedTarget.host, // host port, // port null, // path null, // query null // fragment ) } catch { case e: URISyntaxException => throw new RuntimeException(e) } } private def selectRandomURI(uris: Seq[URI]) = uris match { case Nil => None case Seq(one) => Some(one) case many => Some(many(ThreadLocalRandom.current().nextInt(many.size))) } }
Example 144
Source File: ServiceNameMapper.scala From lagom with Apache License 2.0 | 5 votes |
package com.lightbend.lagom.internal.client import akka.discovery.Lookup import com.typesafe.config.Config import com.typesafe.config.ConfigObject import com.typesafe.config.ConfigValueType import org.slf4j.LoggerFactory import scala.collection.JavaConverters._ private[lagom] class ServiceNameMapper(config: Config) { private val logger = LoggerFactory.getLogger(this.getClass) private val defaultPortName = readConfigValue(config, "defaults.port-name").toOption private val defaultPortProtocol = readConfigValue(config, "defaults.port-protocol").toOption private val defaultScheme = readConfigValue(config, "defaults.scheme").toOption private sealed trait ConfigValue { def toOption = this match { case NonEmpty(v) => Some(v) case _ => None } } private object ConfigValue { def apply(value: String) = if (value.trim.isEmpty) Empty else NonEmpty(value.trim) } private case object Undefined extends ConfigValue private case object Empty extends ConfigValue private case class NonEmpty(value: String) extends ConfigValue private def readConfigValue(config: Config, name: String): ConfigValue = if (config.hasPathOrNull(name)) { if (config.getIsNull(name)) Empty else ConfigValue(config.getString(name)) } else Undefined private def readOptionalConfigValue(config: Config, name: String, defaultValue: Option[String]): Option[String] = readConfigValue(config, name) match { case Undefined => defaultValue // this is the case the user explicitly set the scheme to empty string case Empty => None case NonEmpty(value) => Option(value) } private val serviceLookupMapping: Map[String, ServiceLookup] = config .getObject("service-name-mappings") .entrySet() .asScala .map { entry => if (entry.getValue.valueType != ConfigValueType.OBJECT) { throw new IllegalArgumentException( s"Illegal value type in service-name-mappings: ${entry.getKey} - ${entry.getValue.valueType}" ) } val configEntry = entry.getValue.asInstanceOf[ConfigObject].toConfig // read config values for portName, portProtocol and scheme // when not explicitly overwritten by used, uses default values val portName = readOptionalConfigValue(configEntry, "port-name", defaultPortName) val portProtocol = readOptionalConfigValue(configEntry, "port-protocol", defaultPortProtocol) val scheme = readOptionalConfigValue(configEntry, "scheme", defaultScheme) val lookup: Lookup = readConfigValue(configEntry, "lookup").toOption .map(name => parseSrv(name, portName, portProtocol)) .getOrElse(Lookup(entry.getKey, portName, portProtocol)) entry.getKey -> ServiceLookup(lookup, scheme) } .toMap private def parseSrv(name: String, portName: Option[String], portProtocol: Option[String]) = if (Lookup.isValidSrv(name)) Lookup.parseSrv(name) else Lookup(name, portName, portProtocol) private[lagom] def mapLookupQuery(name: String): ServiceLookup = { val serviceLookup = serviceLookupMapping.getOrElse( name, ServiceLookup(parseSrv(name, defaultPortName, defaultPortProtocol), defaultScheme) ) logger.debug("Lookup service '{}', mapped to {}", name: Any, serviceLookup: Any) serviceLookup } } private[lagom] case class ServiceLookup(lookup: Lookup, scheme: Option[String])
Example 145
Source File: AwaitPersistenceInit.scala From lagom with Apache License 2.0 | 5 votes |
package com.lightbend.lagom.internal.persistence.testkit import java.util.concurrent.TimeUnit import akka.actor.ActorSystem import akka.actor.Props import akka.persistence.PersistentActor import akka.testkit.TestProbe import org.slf4j.LoggerFactory import scala.concurrent.duration._ // A copy of akka.persistence.cassandra.CassandraLifecycle's awaitPersistenceInit. private[lagom] object AwaitPersistenceInit { def awaitPersistenceInit(system: ActorSystem): Unit = { val probe = TestProbe()(system) val log = LoggerFactory.getLogger(getClass) val t0 = System.nanoTime() var n = 0 probe.within(45.seconds) { probe.awaitAssert { n += 1 system.actorOf(Props[AwaitPersistenceInit], "persistenceInit" + n).tell("hello", probe.ref) probe.expectMsg(15.seconds, "hello") log.debug( "awaitPersistenceInit took {} ms {}", TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - t0), system.name ) } } } } private[lagom] class AwaitPersistenceInit extends PersistentActor { def persistenceId: String = self.path.name def receiveRecover: Receive = { case _ => } def receiveCommand: Receive = { case msg => persist(msg) { _ => sender() ! msg context.stop(self) } } }
Example 146
Source File: CassandraReadSideHandler.scala From lagom with Apache License 2.0 | 5 votes |
package com.lightbend.lagom.internal.scaladsl.persistence.cassandra import akka.persistence.query.Offset import akka.stream.ActorAttributes import akka.stream.scaladsl.Flow import akka.Done import akka.NotUsed import com.datastax.driver.core.BatchStatement import com.datastax.driver.core.BoundStatement import com.lightbend.lagom.internal.persistence.cassandra.CassandraOffsetDao import com.lightbend.lagom.internal.persistence.cassandra.CassandraOffsetStore import com.lightbend.lagom.scaladsl.persistence.ReadSideProcessor.ReadSideHandler import com.lightbend.lagom.scaladsl.persistence._ import com.lightbend.lagom.scaladsl.persistence.cassandra.CassandraSession import org.slf4j.LoggerFactory import scala.collection.immutable import scala.concurrent.ExecutionContext import scala.concurrent.Future import scala.collection.JavaConverters._ private[cassandra] final class CassandraAutoReadSideHandler[Event <: AggregateEvent[Event]]( session: CassandraSession, offsetStore: CassandraOffsetStore, handlers: Map[Class[_ <: Event], CassandraAutoReadSideHandler.Handler[Event]], globalPrepareCallback: () => Future[Done], prepareCallback: AggregateEventTag[Event] => Future[Done], readProcessorId: String, dispatcher: String )(implicit ec: ExecutionContext) extends CassandraReadSideHandler[Event, CassandraAutoReadSideHandler.Handler[Event]]( session, handlers, dispatcher ) { import CassandraAutoReadSideHandler.Handler @volatile private var offsetDao: CassandraOffsetDao = _ protected override def invoke( handler: Handler[Event], element: EventStreamElement[Event] ): Future[immutable.Seq[BoundStatement]] = { for { statements <- handler .asInstanceOf[EventStreamElement[Event] => Future[immutable.Seq[BoundStatement]]] .apply(element) } yield statements :+ offsetDao.bindSaveOffset(element.offset) } protected def offsetStatement(offset: Offset): immutable.Seq[BoundStatement] = immutable.Seq(offsetDao.bindSaveOffset(offset)) override def globalPrepare(): Future[Done] = { globalPrepareCallback.apply() } override def prepare(tag: AggregateEventTag[Event]): Future[Offset] = { for { _ <- prepareCallback.apply(tag) dao <- offsetStore.prepare(readProcessorId, tag.tag) } yield { offsetDao = dao dao.loadedOffset } } }
Example 147
Source File: LivySubmit.scala From spark-bench with Apache License 2.0 | 5 votes |
package com.ibm.sparktc.sparkbench.sparklaunch.submission.livy import com.ibm.sparktc.sparkbench.sparklaunch.confparse.SparkJobConf import com.ibm.sparktc.sparkbench.sparklaunch.submission.livy.LivySubmit._ import com.ibm.sparktc.sparkbench.sparklaunch.submission.Submitter import com.ibm.sparktc.sparkbench.utils.SparkBenchException import com.softwaremill.sttp.{Id, SttpBackend} import org.slf4j.{Logger, LoggerFactory} import scala.annotation.tailrec import scala.sys.ShutdownHookThread object LivySubmit { val log: Logger = LoggerFactory.getLogger(this.getClass) val successCode = 200 import com.softwaremill.sttp._ val emptyBodyException: SparkBenchException = SparkBenchException("REST call returned empty message body") val nonSuccessCodeException: Int => SparkBenchException = (code: Int) => SparkBenchException(s"REST call returned non-sucess code: $code") def apply(): LivySubmit = { new LivySubmit()(HttpURLConnectionBackend()) } def cancelAllBatches(livyWithID: LivyRequestWithID)(implicit backend: SttpBackend[Id, Nothing]): Response[ResponseBodyDelete] = { log.info(s"Cancelling batch request id: ${livyWithID.id}") val response = livyWithID.deleteRequest.send() (response.is200, response.body) match { case (true, Right(bod)) => if (bod.msg == "deleted") response else throw SparkBenchException(s"Unexpected status for delete request: ${bod.msg}") case (true, Left(b)) => throw emptyBodyException case (_, _) => throw nonSuccessCodeException(response.code) } } def sendPostBatchRequest(conf: SparkJobConf) (implicit backend: SttpBackend[Id, Nothing]): (LivyRequestWithID, Response[ResponseBodyBatch]) = { val livyRequest = LivyRequest(conf) log.info(s"Sending Livy POST request:\n${livyRequest.postRequest.toString}") val response: Id[Response[ResponseBodyBatch]] = livyRequest.postRequest.send() (response.isSuccess, response.body) match { case (true, Left(_)) => throw emptyBodyException case (false, Left(_)) => throw nonSuccessCodeException(response.code) case (false, Right(bod)) => throw SparkBenchException(s"POST Request to ${livyRequest.postBatchUrl} failed:\n" + s"${bod.log.mkString("\n")}") case (_,_) => // no exception thrown } val livyWithID = LivyRequestWithID(livyRequest, response.body.right.get.id) (livyWithID, response) } private def pollHelper(request: LivyRequestWithID)(implicit backend: SttpBackend[Id, Nothing]): Response[ResponseBodyState] = { Thread.sleep(request.pollSeconds * 1000) log.info(s"Sending Livy status GET request:\n${request.statusRequest.toString}") val response: Id[Response[ResponseBodyState]] = request.statusRequest.send() response } @tailrec def poll(request: LivyRequestWithID, response: Response[ResponseBodyState]) (implicit backend: SttpBackend[Id, Nothing]): Response[ResponseBodyState] = (response.isSuccess, response.body) match { case (false, _) => throw SparkBenchException(s"Request failed with code ${response.code}") case (_, Left(_)) => throw emptyBodyException case (true, Right(bod)) => bod.state match { case "success" => response case "dead" => throw SparkBenchException(s"Poll request failed with state: dead\n" + getLogs(request)) case "running" => poll(request, pollHelper(request)) case st => throw SparkBenchException(s"Poll request failed with state: $st") } } def getLogs(request: LivyRequestWithID)(implicit backend: SttpBackend[Id, Nothing]): String = { val response = request.logRequest.send() (response.is200, response.body) match { case (true, Right(bod)) => bod.log.mkString("\n") case (false, Right(_)) => throw SparkBenchException(s"Log request failed with code: ${response.code}") case (_, Left(_)) => throw emptyBodyException } } } class LivySubmit()(implicit val backend: SttpBackend[Id, Nothing]) extends Submitter { override def launch(conf: SparkJobConf): Unit = { val (livyWithID, postResponse) = sendPostBatchRequest(conf)(backend) val shutdownHook: ShutdownHookThread = sys.ShutdownHookThread { // interrupt any batches cancelAllBatches(livyWithID)(backend) } val pollResponse = poll(livyWithID, pollHelper(livyWithID))(backend) // The request has completed, so we're going to remove the shutdown hook. shutdownHook.remove() } }
Example 148
Source File: CLIKickoff.scala From spark-bench with Apache License 2.0 | 5 votes |
package com.ibm.sparktc.sparkbench.cli import org.slf4j.{Logger, LoggerFactory} import com.ibm.sparktc.sparkbench.workload.MultipleSuiteKickoff object CLIKickoff extends App { override def main(args: Array[String]): Unit = { val log: Logger = LoggerFactory.getLogger(this.getClass) log.info(s"args received: ${args.mkString(", ")}") if(args.isEmpty) throw new IllegalArgumentException("CLIKickoff received no arguments") val oneStr = args.mkString(" ") val worksuites = Configurator(oneStr) MultipleSuiteKickoff.run(worksuites) } }
Example 149
Source File: SparkStreamingQueryListener.scala From spark-summit-2018 with GNU General Public License v3.0 | 5 votes |
package com.twilio.open.streaming.trend.discovery.listeners import kamon.Kamon import org.apache.spark.sql.SparkSession import org.apache.spark.sql.streaming.StreamingQueryListener import org.apache.spark.sql.streaming.StreamingQueryListener.{QueryProgressEvent, QueryStartedEvent, QueryTerminatedEvent} import org.slf4j.{Logger, LoggerFactory} object SparkStreamingQueryListener { val log: Logger = LoggerFactory.getLogger(classOf[SparkStreamingQueryListener]) def apply(spark: SparkSession, restart: () => Unit): SparkStreamingQueryListener = { new SparkStreamingQueryListener(spark, restart) } } class SparkStreamingQueryListener(sparkSession: SparkSession, restart: () => Unit) extends StreamingQueryListener { import SparkStreamingQueryListener._ private val streams = sparkSession.streams private val defaultTag = Map("app_name" -> sparkSession.sparkContext.appName) override def onQueryStarted(event: QueryStartedEvent): Unit = { if (log.isDebugEnabled) log.debug(s"onQueryStarted queryName=${event.name} id=${event.id} runId=${event.runId}") } //https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala override def onQueryProgress(progressEvent: QueryProgressEvent): Unit = { val progress = progressEvent.progress val inputRowsPerSecond = progress.inputRowsPerSecond val processedRowsPerSecond = progress.processedRowsPerSecond val sources = progress.sources.map { source => val description = source.description val startOffset = source.startOffset val endOffset = source.endOffset val inputRows = source.numInputRows s"topic=$description startOffset=$startOffset endOffset=$endOffset numRows=$inputRows" } Kamon.metrics.histogram("spark.query.progress.processed.rows.rate").record(processedRowsPerSecond.toLong) Kamon.metrics.histogram("spark.query.progress.input.rows.rate", defaultTag).record(inputRowsPerSecond.toLong) log.info(s"query.progress query=${progress.name} kafka=${sources.mkString(",")} inputRows/s=$inputRowsPerSecond processedRows/s=$processedRowsPerSecond durationMs=${progress.durationMs} sink=${progress.sink.json}") } override def onQueryTerminated(event: QueryTerminatedEvent): Unit = { log.warn(s"queryTerminated: $event") val possibleStreamingQuery = streams.get(event.id) if (possibleStreamingQuery != null) { val progress = possibleStreamingQuery.lastProgress val sources = progress.sources log.warn(s"last.progress.sources sources=$sources") } event.exception match { case Some(exception) => log.warn(s"queryEndedWithException exception=$exception resetting.all.streams") restart() case None => } } }
Example 150
Source File: EventAggregationSpec.scala From spark-summit-2018 with GNU General Public License v3.0 | 5 votes |
package com.twilio.open.streaming.trend.discovery import java.util import com.twilio.open.protocol.Calls.CallEvent import com.twilio.open.protocol.Metrics import com.twilio.open.streaming.trend.discovery.streams.EventAggregation import org.apache.kafka.common.serialization.{Deserializer, Serializer, StringDeserializer, StringSerializer} import org.apache.spark.sql.streaming.{OutputMode, Trigger} import org.apache.spark.sql._ import org.apache.spark.sql.kafka010.KafkaTestUtils import org.apache.spark.{SparkConf, SparkContext} import org.slf4j.{Logger, LoggerFactory} class EventAggregationSpec extends KafkaBackedTest[String, CallEvent] { override val testUtils = new KafkaTestUtils[String, CallEvent] { override val keySerializer: Serializer[String] = new StringSerializer override val keyDeserializer: Deserializer[String] = new StringDeserializer override val valueSerializer: Serializer[CallEvent] = new CallEventSerializer override val valueDeserializer: Deserializer[CallEvent] = new CallEventDeserializer } override protected val kafkaTopic = "spark.summit.call.events" override protected val partitions = 8 private val pathToTestScenarios = "src/test/resources/scenarios" val log: Logger = LoggerFactory.getLogger(classOf[EventAggregation]) lazy val session: SparkSession = sparkSql override def conf: SparkConf = { new SparkConf() .setMaster("local[*]") .setAppName("aggregation-test-app") .set("spark.ui.enabled", "false") .set("spark.app.id", appID) .set("spark.driver.host", "localhost") .set("spark.sql.shuffle.partitions", "32") .set("spark.executor.cores", "4") .set("spark.executor.memory", "1g") .set("spark.ui.enabled", "false") .setJars(SparkContext.jarOfClass(classOf[EventAggregation]).toList) } test("Should aggregate call events") { import session.implicits._ val appConfig = appConfigForTest() val scenario = TestHelper.loadScenario[CallEvent](s"$pathToTestScenarios/pdd_events.json") val scenarioIter = scenario.toIterator scenario.nonEmpty shouldBe true testUtils.createTopic(kafkaTopic, partitions, overwrite = true) sendNextMessages(scenarioIter, 30, _.getEventId, _.getLoggedEventTime) val trendDiscoveryApp = new TrendDiscoveryApp(appConfigForTest(), session) val eventAggregation = EventAggregation(appConfig) eventAggregation.process(trendDiscoveryApp.readKafkaStream())(session) .writeStream .queryName("calleventaggs") .format("memory") .outputMode(eventAggregation.outputMode) .start() .processAllAvailable() val df = session.sql("select * from calleventaggs") df.printSchema() df.show val res = session .sql("select avg(stats.p99) from calleventaggs") .collect() .map { r => r.getAs[Double](0) } .head DiscoveryUtils.round(res) shouldEqual 7.13 } } class CallEventSerializer extends Serializer[CallEvent] { override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {} override def serialize(topic: String, data: CallEvent): Array[Byte] = data.toByteArray override def close(): Unit = {} } class CallEventDeserializer extends Deserializer[CallEvent] { override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {} override def deserialize(topic: String, data: Array[Byte]): CallEvent = CallEvent.parseFrom(data) override def close(): Unit = {} }
Example 151
Source File: TestHelper.scala From spark-summit-2018 with GNU General Public License v3.0 | 5 votes |
package com.twilio.open.streaming.trend.discovery import java.io.{ByteArrayInputStream, InputStream} import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule import com.google.protobuf.Message import com.googlecode.protobuf.format.JsonFormat import com.holdenkarau.spark.testing.{LocalSparkContext, SparkContextProvider} import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.sql.SparkSession import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers, Suite} import org.slf4j.{Logger, LoggerFactory} import scala.collection.Seq import scala.io.Source import scala.reflect.ClassTag import scala.reflect.classTag object TestHelper { val log: Logger = LoggerFactory.getLogger("com.twilio.open.streaming.trend.discovery.TestHelper") val mapper: ObjectMapper = { val m = new ObjectMapper() m.registerModule(DefaultScalaModule) } val jsonFormat: JsonFormat = new JsonFormat def loadScenario[T<: Message : ClassTag](file: String): Seq[T] = { val fileString = Source.fromFile(file).mkString val parsed = mapper.readValue(fileString, classOf[Sceanario]) parsed.input.map { data => val json = mapper.writeValueAsString(data) convert[T](json) } } def convert[T<: Message : ClassTag](json: String): T = { val clazz = classTag[T].runtimeClass val builder = clazz.getMethod("newBuilder").invoke(clazz).asInstanceOf[Message.Builder] try { val input: InputStream = new ByteArrayInputStream(json.getBytes()) jsonFormat.merge(input, builder) builder.build().asInstanceOf[T] } catch { case e: Exception => throw e } } } @SerialVersionUID(1L) case class KafkaDataFrame(key: Array[Byte], topic: Array[Byte], value: Array[Byte]) extends Serializable case class Sceanario(input: Seq[Any], expected: Option[Any] = None) trait SparkSqlTest extends BeforeAndAfterAll with SparkContextProvider { self: Suite => @transient var _sparkSql: SparkSession = _ @transient private var _sc: SparkContext = _ override def sc: SparkContext = _sc def conf: SparkConf def sparkSql: SparkSession = _sparkSql override def beforeAll() { _sparkSql = SparkSession.builder().config(conf).getOrCreate() _sc = _sparkSql.sparkContext setup(_sc) super.beforeAll() } override def afterAll() { try { _sparkSql.close() _sparkSql = null LocalSparkContext.stop(_sc) _sc = null } finally { super.afterAll() } } }
Example 152
Source File: CanalEntry2RowDataInfoMappingFormat4Sda.scala From estuary with Apache License 2.0 | 5 votes |
package com.neighborhood.aka.laplace.estuary.mysql.lifecycle.reborn.batch.mappings import com.neighborhood.aka.laplace.estuary.bean.key.PartitionStrategy import com.neighborhood.aka.laplace.estuary.mysql.lifecycle import com.neighborhood.aka.laplace.estuary.mysql.lifecycle.MysqlRowDataInfo import com.neighborhood.aka.laplace.estuary.mysql.schema.SdaSchemaMappingRule import com.neighborhood.aka.laplace.estuary.mysql.schema.tablemeta.MysqlTableSchemaHolder import com.typesafe.config.Config import org.slf4j.LoggerFactory final class CanalEntry2RowDataInfoMappingFormat4Sda( override val partitionStrategy: PartitionStrategy, override val syncTaskId: String, override val syncStartTime: Long, override val schemaComponentIsOn: Boolean, override val isCheckSchema: Boolean, override val config: Config, override val schemaHolder: Option[MysqlTableSchemaHolder] = None, val tableMappingRule: SdaSchemaMappingRule ) extends CanalEntry2RowDataInfoMappingFormat { override protected lazy val logger = LoggerFactory.getLogger(classOf[CanalEntry2RowDataInfoMappingFormat4Sda]) override def transform(x: lifecycle.EntryKeyClassifier): MysqlRowDataInfo = { val entry = x.entry val header = entry.getHeader val (dbName, tableName) = tableMappingRule.getMappingName(header.getSchemaName, header.getTableName) val dmlType = header.getEventType val columnList = x.columnList checkAndGetMysqlRowDataInfo(dbName, tableName, dmlType, columnList,entry) } }
Example 153
Source File: MultipleJsonKeyPartitioner.scala From estuary with Apache License 2.0 | 5 votes |
package com.neighborhood.aka.laplace.estuary.bean.key import java.util import org.apache.kafka.clients.producer.Partitioner import org.apache.kafka.common.Cluster import org.slf4j.LoggerFactory class MultipleJsonKeyPartitioner extends Partitioner { val logger = LoggerFactory.getLogger(classOf[MultipleJsonKeyPartitioner]) private def partitionByPrimaryKey(key: Any)(implicit partitions: Int): Int = { key.hashCode() % partitions } private def partitionByMod(mod: Long)(implicit partitions: Int): Int = (mod % partitions) toInt private def partitionByDbAndTable(db: String, tb: String)(implicit partitions: Int): Int = s"$db-$tb".hashCode % partitions override def partition(topic: String, key: Any, keyBytes: Array[Byte], value: Any, valueBytes: Array[Byte], cluster: Cluster): Int = { implicit val partitions: Int = cluster.partitionCountForTopic(topic) key match { case x: BinlogKey => { x.getPartitionStrategy match { case PartitionStrategy.MOD => math.abs(partitionByMod(x.getSyncTaskSequence)) case PartitionStrategy.PRIMARY_KEY => math.abs(partitionByPrimaryKey(x.getPrimaryKeyValue)) case _ => ??? } } case x: OplogKey => { x.getPartitionStrategy match { case PartitionStrategy.PRIMARY_KEY => math.abs(partitionByPrimaryKey(x.getMongoOpsUuid)) case _ => ??? } } } } override def close(): Unit = {} override def configure(configs: util.Map[String, _]): Unit = {} }
Example 154
Source File: SupportUtil.scala From estuary with Apache License 2.0 | 5 votes |
package com.neighborhood.aka.laplace.estuary.core.util import com.neighborhood.aka.laplace.estuary.bean.exception.other.TimeoutException import com.neighborhood.aka.laplace.estuary.core.task.TaskManager import org.slf4j.LoggerFactory import scala.annotation.tailrec @tailrec @throws[TimeoutException] def loopWaiting4SendCurrentAllDataFinish(taskManager: TaskManager, timeout: Option[Long] = None, startTs: Long = System.currentTimeMillis()): Unit = { lazy val currentTs = System.currentTimeMillis() lazy val totalCost = currentTs - startTs lazy val isTimeout = timeout.fold(false)(t => totalCost >= t) if (isTimeout) { logger.warn(s"time has been run out when loopWaiting4SendDataFinish,currentTs:$currentTs,timeOut:$timeout,startTs:$startTs") throw new TimeoutException(s"time has been run out when loopWaiting4SendDataFinish,currentTs:$currentTs,timeOut:$timeout,startTs:$startTs") } else loopWaiting4SendCurrentAllDataFinish(taskManager, timeout, startTs) } }
Example 155
Source File: Sourcer.scala From eidos with Apache License 2.0 | 5 votes |
package org.clulab.wm.wmexchanger.utils import java.io.{File, FileNotFoundException} import java.nio.charset.StandardCharsets import org.slf4j.{Logger, LoggerFactory} import scala.io.BufferedSource import scala.io.Source object Sourcer { protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass) val utf8: String = StandardCharsets.UTF_8.toString def sourceFromResource(path: String): BufferedSource = { val url = Option(Sourcer.getClass.getResource(path)) .getOrElse(throw newFileNotFoundException(path)) logger.info("Sourcing resource " + url.getPath) Source.fromURL(url, utf8) } def sourceFromFile(file: File): BufferedSource = { logger.info("Sourcing file " + file.getPath) Source.fromFile(file, utf8) } def sourceFromFile(path: String): BufferedSource = sourceFromFile(new File(path)) def newFileNotFoundException(path: String): FileNotFoundException = { val message1 = path + " (The system cannot find the path specified" val message2 = message1 + (if (path.startsWith("~")) ". Make sure to not use the tilde (~) character in paths in lieu of the home directory." else "") val message3 = message2 + ")" new FileNotFoundException(message3) } }
Example 156
Source File: KafkaConsumer.scala From eidos with Apache License 2.0 | 5 votes |
package org.clulab.wm.wmexchanger.wmconsumer import java.io.File import java.time.Duration import java.util.Collections import java.util.ConcurrentModificationException import java.util.Properties import org.apache.kafka.clients.consumer.{KafkaConsumer => ApacheKafkaConsumer} import org.clulab.wm.wmexchanger.utils.Closer.AutoCloser import org.clulab.wm.wmexchanger.utils.FileUtils import org.clulab.wm.wmexchanger.utils.FileEditor import org.json4s._ import org.slf4j.Logger import org.slf4j.LoggerFactory class KafkaConsumer(properties: Properties, closeDuration: Int, topic: String, outputDir: String) { import KafkaConsumer._ implicit val formats: DefaultFormats.type = org.json4s.DefaultFormats logger.info("Opening consumer...") protected val consumer: ApacheKafkaConsumer[String, String] = { val consumer = new ApacheKafkaConsumer[String, String](properties) consumer.subscribe(Collections.singletonList(topic)) consumer } def poll(duration: Int): Unit = { val records = consumer.poll(Duration.ofSeconds(duration)) logger.info(s"Polling ${records.count} records...") records.forEach { record => val key = record.key val value = record.value // Imply an extension on the file so that it can be replaced. val file = FileEditor(new File(key + ".")).setDir(outputDir).setExt("json").get logger.info("Consuming " + file.getName) FileUtils.printWriterFromFile(file).autoClose { printWriter => printWriter.print(value) } } } def close(): Unit = { logger.info("Closing consumer...") try { consumer.close(Duration.ofSeconds(closeDuration)) } catch { case _: ConcurrentModificationException => // KafkaConsumer is not safe for multi-threaded access } } } object KafkaConsumer { val logger: Logger = LoggerFactory.getLogger(this.getClass) }
Example 157
Source File: KafkaConsumerApp.scala From eidos with Apache License 2.0 | 5 votes |
package org.clulab.wm.wmexchanger.wmconsumer import java.util.Properties import org.clulab.wm.wmexchanger.utils.PropertiesBuilder import org.clulab.wm.wmexchanger.utils.WmUserApp import org.clulab.wm.wmexchanger.utils.SafeThread import org.slf4j.Logger import org.slf4j.LoggerFactory class KafkaConsumerApp(args: Array[String]) extends WmUserApp(args, "/kafkaconsumer.properties") { val localKafkaProperties: Properties = { // This allows the login to be contained in a file external to the project. val loginProperty = appProperties.getProperty("login") val loginPropertiesBuilder = PropertiesBuilder.fromFile(loginProperty) PropertiesBuilder(kafkaProperties).putAll(loginPropertiesBuilder).get } val topic: String = appProperties.getProperty("topic") val outputDir: String = appProperties.getProperty("outputDir") val pollDuration: Int = appProperties.getProperty("poll.duration").toInt val waitDuration: Long = appProperties.getProperty("wait.duration").toLong val closeDuration: Int = appProperties.getProperty("close.duration").toInt val thread: SafeThread = new SafeThread(KafkaConsumerApp.logger) { override def runSafely(): Unit = { val consumer = new KafkaConsumer(localKafkaProperties, closeDuration, topic, outputDir) // autoClose isn't executed if the thread is shot down, so this hook is used instead. sys.ShutdownHookThread { consumer.close() } while (!isInterrupted) consumer.poll(pollDuration) } } if (interactive) thread.waitSafely(waitDuration) } object KafkaConsumerApp extends App { val logger: Logger = LoggerFactory.getLogger(this.getClass) new KafkaConsumerApp(args) }
Example 158
Source File: CurlProducerApp.scala From eidos with Apache License 2.0 | 5 votes |
package org.clulab.wm.wmexchanger.wmproducer import com.typesafe.config.ConfigFactory import org.clulab.wm.wmexchanger.utils.Closer.AutoCloser import org.clulab.wm.wmexchanger.utils.FileUtils import org.clulab.wm.wmexchanger.utils.PropertiesBuilder import org.clulab.wm.wmexchanger.utils.Sinker import org.clulab.wm.wmexchanger.utils.StringUtils import org.slf4j.Logger import org.slf4j.LoggerFactory object CurlProducerApp extends App { val logger: Logger = LoggerFactory.getLogger(this.getClass) val version = "0.2.3" val inputDir = args(0) val outputFile = args(1) val config = ConfigFactory.load("curlproducer") val service = config.getString("CurlProducerApp.service") val login = config.getString("CurlProducerApp.login") val properties = PropertiesBuilder.fromFile(login).get val username = properties.getProperty("username") val password = properties.getProperty("password") val files = FileUtils.findFiles(inputDir, "jsonld") Sinker.printWriterFromFile(outputFile).autoClose { printWriter => files.foreach { file => logger.info(s"Processing ${file.getName}") val docId = StringUtils.beforeFirst(file.getName, '.') try { val command = s"""curl |--basic |--user "$username:$password" |-X POST "$service" |-H "accept: application/json" |-H "Content-Type: multipart/form-data" |-F 'metadata={ "identity": "eidos", "version": "$version", "document_id": "$docId" }' |-F "file=@${file.getName}" |""".stripMargin.replace('\r', ' ').replace('\n', ' ') printWriter.print(command) printWriter.print("\n") } catch { case exception: Exception => logger.error(s"Exception for file $file", exception) } } } }
Example 159
Source File: DocumentFilter.scala From eidos with Apache License 2.0 | 5 votes |
package org.clulab.wm.eidos.utils import org.clulab.processors.corenlp.CoreNLPDocument import org.clulab.processors.shallownlp.ShallowNLPProcessor import org.clulab.processors.{Document, Processor} import org.slf4j.{Logger, LoggerFactory} trait DocumentFilter { def whileFiltered(document: Document)(transform: Document => Document): Document } class FilterByNothing extends DocumentFilter { def whileFiltered(doc: Document)(transform: Document => Document): Document = transform(doc) } object FilterByNothing { def apply() = new FilterByNothing } class FilterByLength(processor: Processor, cutoff: Int = 200) extends DocumentFilter { def whileFiltered(doc: Document)(transform: Document => Document): Document = { val text = doc.text val filteredDoc = filter(doc) val transformedDoc = transform(filteredDoc) val unfilteredDoc = unfilter(transformedDoc, text) unfilteredDoc } protected def unfilter(doc: Document, textOpt: Option[String]): Document = { doc.text = textOpt doc } protected def filter(doc: Document): Document = { // Iterate through the sentences, any sentence that is too long (number of tokens), remove val sanitizedText = sanitizeText(doc) val kept = doc.sentences.filter(s => s.words.length < cutoff) val skipped = doc.sentences.length - kept.length val newDoc = Document(doc.id, kept, doc.coreferenceChains, doc.discourseTree, sanitizedText) val newerDoc = // This is a hack for lack of copy constructor for CoreNLPDocument if (doc.isInstanceOf[CoreNLPDocument]) ShallowNLPProcessor.cluDocToCoreDoc(newDoc, keepText = true) else newDoc if (skipped != 0) FilterByLength.logger.info(s"skipping $skipped sentences") // Return a new document from these sentences newerDoc } protected def sanitizeText(doc: Document): Option[String] = doc.text.map { text => // Assume that these characters are never parts of words. var newText = text.replace('\n', ' ').replace(0x0C.toChar, ' ') for (s <- doc.sentences if s.endOffsets.last < newText.size) { // Only perform this if it isn't part of a word. A space is most reliable. if (newText(s.endOffsets.last) == ' ') newText = newText.updated(s.endOffsets.last, '\n') } newText } } object FilterByLength { protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass) def apply(processor: Processor, cutoff: Int = 200): FilterByLength = new FilterByLength(processor, cutoff) }
Example 160
Source File: Sourcer.scala From eidos with Apache License 2.0 | 5 votes |
package org.clulab.wm.eidos.utils import java.io.{File, FileNotFoundException} import java.nio.charset.StandardCharsets import org.slf4j.{Logger, LoggerFactory} import scala.io.BufferedSource import scala.io.Source object Sourcer { protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass) val utf8: String = StandardCharsets.UTF_8.toString def sourceFromResource(path: String): BufferedSource = { val url = Option(Sourcer.getClass.getResource(path)) .getOrElse(throw newFileNotFoundException(path)) logger.info("Sourcing resource " + url.getPath) Source.fromURL(url, utf8) } def sourceFromFile(file: File): BufferedSource = { logger.info("Sourcing file " + file.getPath) Source.fromFile(file, utf8) } def sourceFromFile(path: String): BufferedSource = sourceFromFile(new File(path)) def newFileNotFoundException(path: String): FileNotFoundException = { val message1 = path + " (The system cannot find the path specified" val message2 = message1 + (if (path.startsWith("~")) ". Make sure to not use the tilde (~) character in paths in lieu of the home directory." else "") val message3 = message2 + ")" new FileNotFoundException(message3) } }
Example 161
Source File: Timer.scala From eidos with Apache License 2.0 | 5 votes |
package org.clulab.wm.eidos.utils import org.slf4j.{Logger, LoggerFactory} import scala.collection.mutable class Timer(val description: String) { var elapsedTime: Option[Long] = None var startTime: Option[Long] = None def time[R](block: => R): R = { val t0 = System.currentTimeMillis() val result: R = block // call-by-name val t1 = System.currentTimeMillis() elapsedTime = Some(t1 - t0) result } def start(): Unit = { val t0 = System.currentTimeMillis() startTime = Some(t0) } def stop(): Unit = { if (startTime.isDefined) { val t1 = System.currentTimeMillis() elapsedTime = Some(t1 - startTime.get) } } override def toString: String = { if (elapsedTime.isDefined) s"\tTime\t$description\t${Timer.diffToString(elapsedTime.get)}" else if (startTime.isDefined) s"\tStart\t$description\t${startTime.get}\tms" else s"\tTimer\t$description" } } object Timer { protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass) val elapsedTimes: mutable.Map[String, Long] = mutable.Map.empty def addTime(key: String, milliseconds: Long): Unit = this synchronized { val oldTime = elapsedTimes.getOrElseUpdate(key, 0) val newTime = oldTime + milliseconds elapsedTimes.update(key, newTime) } def summarize: Unit = { elapsedTimes.toSeq.sorted.foreach { case (key, milliseconds) => logger.info(s"\tTotal\t$key\t$milliseconds") } } def diffToString(diff: Long): String = { val days = (diff / (1000 * 60 * 60 * 24)) / 1 val hours = (diff % (1000 * 60 * 60 * 24)) / (1000 * 60 * 60) val mins = (diff % (1000 * 60 * 60)) / (1000 * 60) val secs = (diff % (1000 * 60)) / 1000 val msecs = (diff % (1000 * 1)) / 1 f"$days:$hours%02d:$mins%02d:$secs%02d.$msecs%03d" } // See http://biercoff.com/easily-measuring-code-execution-time-in-scala/ def time[R](description: String, verbose: Boolean = true)(block: => R): R = { val t0 = System.currentTimeMillis() if (verbose) logger.info(s"\tStart\t$description\t$t0\tms") val result: R = block // call-by-name val t1 = System.currentTimeMillis() if (verbose) logger.info(s"\tStop\t$description\t$t1\tms") val diff = t1 - t0 if (verbose) logger.info(s"\tDiff\t$description\t$diff\tms") if (verbose) logger.info(s"\tTime\t$description\t${diffToString(diff)}") addTime(description, diff) result } }
Example 162
Source File: DomainHandler.scala From eidos with Apache License 2.0 | 5 votes |
package org.clulab.wm.eidos.groundings import java.time.ZonedDateTime import com.github.clulab.eidos.Version import com.github.clulab.eidos.Versions import com.github.worldModelers.ontologies.{Versions => AwayVersions} import org.clulab.wm.eidos.SentencesExtractor import org.clulab.wm.eidos.groundings.ontologies.FullTreeDomainOntology.FullTreeDomainOntologyBuilder import org.clulab.wm.eidos.groundings.OntologyHandler.serializedPath import org.clulab.wm.eidos.groundings.ontologies.CompactDomainOntology import org.clulab.wm.eidos.groundings.ontologies.FastDomainOntology import org.clulab.wm.eidos.groundings.ontologies.HalfTreeDomainOntology.HalfTreeDomainOntologyBuilder import org.clulab.wm.eidos.utils.Canonicalizer import org.clulab.wm.eidos.utils.StringUtils import org.slf4j.Logger import org.slf4j.LoggerFactory object DomainHandler { protected lazy val logger: Logger = LoggerFactory.getLogger(getClass) // The intention is to stop the proliferation of the generated Version class to this single method. protected def getVersionOpt(ontologyPath: String): (Option[String], Option[ZonedDateTime]) = { // This should work for local ontologies. Absolute val goodVersionOpt = Versions.versions.get(MockVersions.codeDir + ontologyPath) // See what might have come from WordModelers/Ontologies val bestVersionOpt = goodVersionOpt.getOrElse { // These are always stored in top level directory. val awayVersionOpt = AwayVersions.versions.get(StringUtils.afterLast(ontologyPath, '/')).getOrElse(None) val homeVersionOpt = awayVersionOpt.map { awayVersion => Version(awayVersion.commit, awayVersion.date) } homeVersionOpt } if (bestVersionOpt.isDefined) (Some(bestVersionOpt.get.commit), Some(bestVersionOpt.get.date)) else (None, None) } def apply(ontologyPath: String, serializedPath: String, sentencesExtractor: SentencesExtractor, canonicalizer: Canonicalizer, filter: Boolean = true, useCacheForOntologies: Boolean = false, includeParents: Boolean = false): DomainOntology = { // As coded below, when parents are included, the FullTreeDomainOntology is being used. // The faster loading version is the FastDomainOntology. // If parents are not included, as had traditionally been the case, the HalfTreeDomainOntology suffices. // Being smaller and faster, it is preferred. The faster loading counterpart is CompactDomainOntology. if (includeParents) { if (useCacheForOntologies) { logger.info(s"Processing cached yml ontology with parents from $serializedPath...") FastDomainOntology.load(serializedPath) } else { logger.info(s"Processing yml ontology with parents from $ontologyPath...") val (versionOpt, dateOpt) = getVersionOpt(ontologyPath) new FullTreeDomainOntologyBuilder(sentencesExtractor, canonicalizer, filter).buildFromPath(ontologyPath, versionOpt, dateOpt) } } else { if (useCacheForOntologies) { logger.info(s"Processing cached yml ontology without parents from $serializedPath...") CompactDomainOntology.load(serializedPath) } else { logger.info(s"Processing yml ontology without parents from $ontologyPath...") val (versionOpt, dateOpt) = getVersionOpt(ontologyPath) new HalfTreeDomainOntologyBuilder(sentencesExtractor, canonicalizer, filter).buildFromPath(ontologyPath, versionOpt, dateOpt) } } } def mkDomainOntology(name: String, ontologyPath: String, sentenceExtractor: SentencesExtractor, canonicalizer: Canonicalizer, cacheDir: String, useCacheForOntologies: Boolean, includeParents: Boolean): DomainOntology = { val ontSerializedPath: String = serializedPath(name, cacheDir, includeParents) DomainHandler(ontologyPath, ontSerializedPath, sentenceExtractor, canonicalizer: Canonicalizer, filter = true, useCacheForOntologies = useCacheForOntologies, includeParents = includeParents) } }
Example 163
Source File: ExtractCluMetaFromDirectory.scala From eidos with Apache License 2.0 | 5 votes |
package org.clulab.wm.eidos.apps.batch import java.io.File import org.clulab.serialization.json.stringify import org.clulab.wm.eidos.EidosSystem import org.clulab.wm.eidos.groundings.EidosAdjectiveGrounder import org.clulab.wm.eidos.serialization.jsonld.JLDCorpus import org.clulab.wm.eidos.utils.Closer.AutoCloser import org.clulab.wm.eidos.utils.FileEditor import org.clulab.wm.eidos.utils.FileUtils import org.clulab.wm.eidos.utils.ThreadUtils import org.clulab.wm.eidos.utils.Timer import org.clulab.wm.eidos.utils.meta.CluText import org.slf4j.Logger import org.slf4j.LoggerFactory object ExtractCluMetaFromDirectory extends App { val logger: Logger = LoggerFactory.getLogger(this.getClass) val inputDir = args(0) val metaDir = args(1) val outputDir = args(2) val timeFile = args(3) val threads = args(4).toInt val doneDir = inputDir + "/done" val textToMeta = CluText.convertTextToMeta _ val files = FileUtils.findFiles(inputDir, "txt") val parFiles = ThreadUtils.parallelize(files, threads) Timer.time("Whole thing") { val timePrintWriter = FileUtils.appendingPrintWriterFromFile(timeFile) timePrintWriter.println("File\tSize\tTime") val timer = new Timer("Startup") timer.start() // Prime it first. This counts on overall time, but should not be attributed // to any particular document. val config = EidosSystem.defaultConfig val reader = new EidosSystem(config) val options = EidosSystem.Options() // 0. Optionally include adjective grounding val adjectiveGrounder = EidosAdjectiveGrounder.fromEidosConfig(config) reader.extractFromText("This is a test.") timer.stop() timePrintWriter.println("Startup\t0\t" + timer.elapsedTime.get) parFiles.foreach { file => try { // 1. Open corresponding output file logger.info(s"Extracting from ${file.getName}") val timer = new Timer("Single file in parallel") val size = timer.time { // 2. Get the input file text and metadata val metafile = textToMeta(file, metaDir) val eidosText = CluText(reader, file, Some(metafile)) val text = eidosText.getText val metadata = eidosText.getMetadata // 3. Extract causal mentions from the text val annotatedDocument = reader.extractFromText(text, options, metadata) // 4. Convert to JSON val corpus = new JLDCorpus(annotatedDocument) val mentionsJSONLD = corpus.serialize() // 5. Write to output file val path = CluText.convertTextToJsonld(file, outputDir) FileUtils.printWriterFromFile(path).autoClose { pw => pw.println(stringify(mentionsJSONLD, pretty = true)) } // Now move the file to directory done val newFile = FileEditor(file).setDir(doneDir).get file.renameTo(newFile) text.length } this.synchronized { timePrintWriter.println(file.getName + "\t" + size + "\t" + timer.elapsedTime.get) } } catch { case exception: Exception => logger.error(s"Exception for file $file", exception) } } timePrintWriter.close() } }
Example 164
Source File: ExtractCdrMetaFromDirectory.scala From eidos with Apache License 2.0 | 5 votes |
package org.clulab.wm.eidos.apps.batch import org.clulab.wm.eidos.EidosSystem import org.clulab.wm.eidos.serialization.jsonld.JLDCorpus import org.clulab.wm.eidos.utils.Closer.AutoCloser import org.clulab.wm.eidos.utils.FileEditor import org.clulab.wm.eidos.utils.FileUtils import org.clulab.wm.eidos.utils.ThreadUtils import org.clulab.wm.eidos.utils.Timer import org.clulab.wm.eidos.utils.meta.CdrText import org.slf4j.Logger import org.slf4j.LoggerFactory object ExtractCdrMetaFromDirectory extends App { val logger: Logger = LoggerFactory.getLogger(this.getClass) val inputDir = args(0) val outputDir = args(1) val timeFile = args(2) val threads = args(3).toInt val doneDir = inputDir + "/done" val files = FileUtils.findFiles(inputDir, "json") val parFiles = ThreadUtils.parallelize(files, threads) Timer.time("Whole thing") { val timePrintWriter = FileUtils.appendingPrintWriterFromFile(timeFile) timePrintWriter.println("File\tSize\tTime") val timer = new Timer("Startup") timer.start() // Prime it first. This counts on overall time, but should not be attributed // to any particular document. val reader = new EidosSystem() val options = EidosSystem.Options() Timer.time("EidosPrimer") { reader.extractFromText("This is a test.") } timer.stop() timePrintWriter.println("Startup\t0\t" + timer.elapsedTime.get) parFiles.foreach { file => try { // 1. Open corresponding output file logger.info(s"Extracting from ${file.getName}") val timer = new Timer("Single file in parallel") val size = timer.time { // 1. Get the input file text and metadata val eidosText = CdrText(file) val text = eidosText.getText val metadata = eidosText.getMetadata // 2. Extract causal mentions from the text val annotatedDocument = reader.extractFromText(text, options, metadata) // 3. Write to output file val path = FileEditor(file).setDir(outputDir).setExt("jsonld").get FileUtils.printWriterFromFile(path).autoClose { printWriter => new JLDCorpus(annotatedDocument).serialize(printWriter) } // Now move the file to directory done val newFile = FileEditor(file).setDir(doneDir).get file.renameTo(newFile) text.length } this.synchronized { timePrintWriter.println(file.getName + "\t" + size + "\t" + timer.elapsedTime.get) } } catch { case exception: Exception => logger.error(s"Exception for file $file", exception) } } timePrintWriter.close() } }
Example 165
Source File: Sourcer.scala From eidos with Apache License 2.0 | 5 votes |
package org.clulab.wm.elasticsearch.utils import java.io.{File, FileNotFoundException} import java.nio.charset.StandardCharsets import org.slf4j.{Logger, LoggerFactory} import scala.io.BufferedSource import scala.io.Source object Sourcer { protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass) val utf8: String = StandardCharsets.UTF_8.toString def sourceFromResource(path: String): BufferedSource = { val url = Option(Sourcer.getClass.getResource(path)) .getOrElse(throw newFileNotFoundException(path)) logger.info("Sourcing resource " + url.getPath) Source.fromURL(url, utf8) } def sourceFromFile(file: File): BufferedSource = { logger.info("Sourcing file " + file.getPath) Source.fromFile(file, utf8) } def sourceFromFile(path: String): BufferedSource = sourceFromFile(new File(path)) def newFileNotFoundException(path: String): FileNotFoundException = { val message1 = path + " (The system cannot find the path specified" val message2 = message1 + (if (path.startsWith("~")) ". Make sure to not use the tilde (~) character in paths in lieu of the home directory." else "") val message3 = message2 + ")" new FileNotFoundException(message3) } }
Example 166
Source File: SharedSparkContext.scala From sscheck with Apache License 2.0 | 5 votes |
package es.ucm.fdi.sscheck.spark import org.apache.spark._ import org.slf4j.LoggerFactory def sparkAppName : String = "ScalaCheck Spark test" // lazy val so early definitions are not needed for subtyping @transient lazy val conf = new SparkConf().setMaster(sparkMaster).setAppName(sparkAppName) @transient protected[this] var _sc : Option[SparkContext] = None def sc() : SparkContext = { _sc.getOrElse { logger.warn("creating test Spark context") _sc = Some(new SparkContext(conf)) _sc.get } } override def close() : Unit = { _sc.foreach { sc => logger.warn("stopping test Spark context") sc.stop() // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown System.clearProperty("spark.driver.port") } _sc = None } }
Example 167
Source File: SharedStreamingContext.scala From sscheck with Apache License 2.0 | 5 votes |
package es.ucm.fdi.sscheck.spark.streaming import org.apache.spark.streaming.{StreamingContext,Duration} import org.slf4j.LoggerFactory import scala.util.Try import es.ucm.fdi.sscheck.spark.SharedSparkContext trait SharedStreamingContext extends SharedSparkContext { // cannot use private[this] due to https://issues.scala-lang.org/browse/SI-8087 // @transient private[this] val logger = Logger(LoggerFactory.getLogger("SharedStreamingContext")) @transient private val logger = LoggerFactory.getLogger("SharedStreamingContext") ssc.stop(stopSparkContext=false, stopGracefully=false) } recover { case _ => { logger.warn("second attempt forcing stop of test Spark Streaming context") ssc.stop(stopSparkContext=false, stopGracefully=false) } } _ssc = None } if (stopSparkContext) { super[SharedSparkContext].close() } } }
Example 168
Source File: SharedStreamingContextBeforeAfterEachTest.scala From sscheck with Apache License 2.0 | 5 votes |
package es.ucm.fdi.sscheck.spark.streaming import org.junit.runner.RunWith import org.specs2.runner.JUnitRunner import org.specs2.execute.Result import org.apache.spark.streaming.Duration import org.apache.spark.rdd.RDD import scala.collection.mutable.Queue import scala.concurrent.duration._ import org.slf4j.LoggerFactory import es.ucm.fdi.sscheck.matcher.specs2.RDDMatchers._ // sbt "test-only es.ucm.fdi.sscheck.spark.streaming.SharedStreamingContextBeforeAfterEachTest" @RunWith(classOf[JUnitRunner]) class SharedStreamingContextBeforeAfterEachTest extends org.specs2.Specification with org.specs2.matcher.MustThrownExpectations with org.specs2.matcher.ResultMatchers with SharedStreamingContextBeforeAfterEach { // cannot use private[this] due to https://issues.scala-lang.org/browse/SI-8087 @transient private val logger = LoggerFactory.getLogger("SharedStreamingContextBeforeAfterEachTest") // Spark configuration override def sparkMaster : String = "local[5]" override def batchDuration = Duration(250) override def defaultParallelism = 3 override def enableCheckpointing = false // as queueStream doesn't support checkpointing def is = sequential ^ s2""" Simple test for SharedStreamingContextBeforeAfterEach where a simple queueStream test must be successful $successfulSimpleQueueStreamTest where a simple queueStream test can also fail $failingSimpleQueueStreamTest """ def successfulSimpleQueueStreamTest = simpleQueueStreamTest(expectedCount = 0) def failingSimpleQueueStreamTest = simpleQueueStreamTest(expectedCount = 1) must beFailing def simpleQueueStreamTest(expectedCount : Int) : Result = { val record = "hola" val batches = Seq.fill(5)(Seq.fill(10)(record)) val queue = new Queue[RDD[String]] queue ++= batches.map(batch => sc.parallelize(batch, numSlices = defaultParallelism)) val inputDStream = ssc.queueStream(queue, oneAtATime = true) val sizesDStream = inputDStream.map(_.length) var batchCount = 0 // NOTE wrapping assertions with a Result object is needed // to avoid the Spark Streaming runtime capturing the exceptions // from failing assertions var result : Result = ok inputDStream.foreachRDD { rdd => batchCount += 1 println(s"completed batch number $batchCount: ${rdd.collect.mkString(",")}") result = result and { rdd.filter(_!= record).count() === expectedCount rdd should existsRecord(_ == "hola") } } sizesDStream.foreachRDD { rdd => result = result and { rdd should foreachRecord(record.length)(len => _ == len) } } // should only start the dstream after all the transformations and actions have been defined ssc.start() // wait for completion of batches.length batches StreamingContextUtils.awaitForNBatchesCompleted(batches.length, atMost = 10 seconds)(ssc) result } }
Example 169
Source File: LogLevelRoutesSpec.scala From akka-management with Apache License 2.0 | 5 votes |
package akka.management.loglevels.logback import akka.actor.ExtendedActorSystem import akka.http.javadsl.server.MalformedQueryParamRejection import akka.http.scaladsl.model.StatusCodes import akka.http.scaladsl.model.Uri import akka.http.scaladsl.testkit.ScalatestRouteTest import akka.management.scaladsl.ManagementRouteProviderSettings import org.scalatest.Matchers import org.scalatest.WordSpec import org.slf4j.LoggerFactory import akka.event.{ Logging => ClassicLogging } class LogLevelRoutesSpec extends WordSpec with Matchers with ScalatestRouteTest { override def testConfigSource: String = """ akka.loglevel = INFO """ val routes = LogLevelRoutes .createExtension(system.asInstanceOf[ExtendedActorSystem]) .routes(ManagementRouteProviderSettings(Uri("https://example.com"), readOnly = false)) "The logback log level routes" must { "show log level of a Logger" in { Get("/loglevel/logback?logger=LogLevelRoutesSpec") ~> routes ~> check { responseAs[String] } } "change log level of a Logger" in { Put("/loglevel/logback?logger=LogLevelRoutesSpec&level=DEBUG") ~> routes ~> check { response.status should ===(StatusCodes.OK) LoggerFactory.getLogger("LogLevelRoutesSpec").isDebugEnabled should ===(true) } } "fail for unknown log level" in { Put("/loglevel/logback?logger=LogLevelRoutesSpec&level=MONKEY") ~> routes ~> check { rejection shouldBe an[MalformedQueryParamRejection] } } "not change loglevel if read only" in { val readOnlyRoutes = LogLevelRoutes .createExtension(system.asInstanceOf[ExtendedActorSystem]) .routes(ManagementRouteProviderSettings(Uri("https://example.com"), readOnly = true)) Put("/loglevel/logback?logger=LogLevelRoutesSpec&level=DEBUG") ~> readOnlyRoutes ~> check { response.status should ===(StatusCodes.Forbidden) } } "allow inspecting classic Akka loglevel" in { Get("/loglevel/akka") ~> routes ~> check { response.status should ===(StatusCodes.OK) responseAs[String] should ===("INFO") } } "allow changing classic Akka loglevel" in { Put("/loglevel/akka?level=DEBUG") ~> routes ~> check { response.status should ===(StatusCodes.OK) system.eventStream.logLevel should ===(ClassicLogging.DebugLevel) } } } }
Example 170
Source File: ParallelCpgPass.scala From codepropertygraph with Apache License 2.0 | 5 votes |
package io.shiftleft.passes import java.util.concurrent.LinkedBlockingQueue import io.shiftleft.SerializedCpg import io.shiftleft.codepropertygraph.Cpg import org.apache.logging.log4j.{LogManager, Logger} import org.slf4j.LoggerFactory abstract class ParallelCpgPass[T](cpg: Cpg, outName: String = "") extends CpgPassBase { private val logger: Logger = LogManager.getLogger(classOf[ParallelCpgPass[T]]) def init(): Unit = {} def partIterator: Iterator[T] def runOnPart(part: T): Option[DiffGraph] override def createAndApply(): Unit = { withWriter() { writer => enqueueInParallel(writer) } } override def createApplySerializeAndStore(serializedCpg: SerializedCpg, inverse: Boolean, prefix: String): Unit = { withWriter(serializedCpg, prefix, inverse) { writer => enqueueInParallel(writer) } } private def withWriter[X](serializedCpg: SerializedCpg = new SerializedCpg(), prefix: String = "", inverse: Boolean = false)(f: Writer => Unit): Unit = { val writer = new Writer(serializedCpg, prefix, inverse) val writerThread = new Thread(writer) writerThread.setName("Writer") writerThread.start() try { f(writer) } catch { case exception: Exception => logger.warn(exception) } finally { writer.enqueue(None) writerThread.join() } } private def enqueueInParallel(writer: Writer): Unit = { init() val it = new ParallelIteratorExecutor(partIterator).map { part => // Note: write.enqueue(runOnPart(part)) would be wrong because // it would terminate the writer as soon as a pass returns None // as None is used as a termination symbol for the queue runOnPart(part).foreach(diffGraph => writer.enqueue(Some(diffGraph))) } consume(it) } private def consume(it: Iterator[_]): Unit = { while (it.hasNext) { it.next() } } private class Writer(serializedCpg: SerializedCpg, prefix: String, inverse: Boolean) extends Runnable { private val logger = LoggerFactory.getLogger(getClass) private val queue = new LinkedBlockingQueue[Option[DiffGraph]] def enqueue(diffGraph: Option[DiffGraph]): Unit = { queue.put(diffGraph) } override def run(): Unit = { try { var terminate = false var index: Int = 0 while (!terminate) { queue.take() match { case Some(diffGraph) => val appliedDiffGraph = DiffGraph.Applier.applyDiff(diffGraph, cpg, inverse) if (!serializedCpg.isEmpty) { val overlay = serialize(appliedDiffGraph, inverse) val name = generateOutFileName(prefix, outName, index) index += 1 store(overlay, name, serializedCpg) } case None => logger.info("Shutting down WriterThread") terminate = true } } } catch { case _: InterruptedException => logger.info("Interrupted WriterThread") } } } }
Example 171
Source File: MLLib.scala From spark-sql-perf with Apache License 2.0 | 5 votes |
package com.databricks.spark.sql.perf.mllib import scala.io.Source import scala.language.implicitConversions import org.slf4j.LoggerFactory import org.apache.spark.sql.{DataFrame, SQLContext} import org.apache.spark.{SparkConf, SparkContext} import com.databricks.spark.sql.perf._ class MLLib(sqlContext: SQLContext) extends Benchmark(sqlContext) with Serializable { def this() = this(SQLContext.getOrCreate(SparkContext.getOrCreate())) } object MLLib { def run(yamlFile: String = null, yamlConfig: String = null): DataFrame = { logger.info("Starting run") val conf = getConf(yamlFile, yamlConfig) val sparkConf = new SparkConf().setAppName("MLlib QA").setMaster("local[2]") val sc = SparkContext.getOrCreate(sparkConf) sc.setLogLevel("INFO") val b = new com.databricks.spark.sql.perf.mllib.MLLib() val benchmarks = getBenchmarks(conf) println(s"${benchmarks.size} benchmarks identified:") val str = benchmarks.map(_.prettyPrint).mkString("\n") println(str) logger.info("Starting experiments") val e = b.runExperiment( executionsToRun = benchmarks, iterations = 1, // If you want to increase the number of iterations, add more seeds resultLocation = conf.output, forkThread = false) e.waitForFinish(conf.timeout.toSeconds.toInt) logger.info("Run finished") e.getCurrentResults() } }
Example 172
Source File: CarbonThriftServer.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.carbondata.spark.thriftserver import java.io.File import org.apache.spark.SparkConf import org.apache.spark.sql.{CarbonEnv, SparkSession} import org.apache.spark.sql.hive.thriftserver.HiveThriftServer2 import org.slf4j.{Logger, LoggerFactory} import org.apache.carbondata.common.logging.LogServiceFactory import org.apache.carbondata.core.util.CarbonProperties import org.apache.carbondata.spark.util.CarbonSparkUtil object CarbonThriftServer { def main(args: Array[String]): Unit = { if (args.length != 0 && args.length != 3) { val logger: Logger = LoggerFactory.getLogger(this.getClass) logger.error("parameters: [access-key] [secret-key] [s3-endpoint]") System.exit(0) } val sparkConf = new SparkConf(loadDefaults = true) val builder = SparkSession .builder() .config(sparkConf) .appName("Carbon Thrift Server(uses CarbonExtensions)") .enableHiveSupport() .config("spark.sql.extensions", "org.apache.spark.sql.CarbonExtensions") configPropertiesFile(sparkConf, builder) if (args.length == 3) { builder.config(CarbonSparkUtil.getSparkConfForS3(args(0), args(1), args(2))) } val spark = builder.getOrCreate() CarbonEnv.getInstance(spark) waitingForSparkLaunch() HiveThriftServer2.startWithContext(spark.sqlContext) } private def waitingForSparkLaunch(): Unit = { val warmUpTime = CarbonProperties.getInstance().getProperty("carbon.spark.warmUpTime", "5000") try { Thread.sleep(Integer.parseInt(warmUpTime)) } catch { case e: Exception => val LOG = LogServiceFactory.getLogService(this.getClass.getCanonicalName) LOG.error(s"Wrong value for carbon.spark.warmUpTime $warmUpTime " + "Using default Value and proceeding") Thread.sleep(5000) } } private def configPropertiesFile(sparkConf: SparkConf, builder: SparkSession.Builder): Unit = { sparkConf.contains("carbon.properties.filepath") match { case false => val sparkHome = System.getenv.get("SPARK_HOME") if (null != sparkHome) { val file = new File(sparkHome + '/' + "conf" + '/' + "carbon.properties") if (file.exists()) { builder.config("carbon.properties.filepath", file.getCanonicalPath) System.setProperty("carbon.properties.filepath", file.getCanonicalPath) } } case true => System.setProperty( "carbon.properties.filepath", sparkConf.get("carbon.properties.filepath")) } } }
Example 173
Source File: S3CsvExample.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.carbondata.examples import java.io.File import org.apache.hadoop.fs.s3a.Constants.{ACCESS_KEY, SECRET_KEY} import org.apache.spark.sql.SparkSession import org.slf4j.{Logger, LoggerFactory} object S3CsvExample { def main(args: Array[String]) { val rootPath = new File(this.getClass.getResource("/").getPath + "../../../..").getCanonicalPath val logger: Logger = LoggerFactory.getLogger(this.getClass) import org.apache.spark.sql.CarbonUtils._ if (args.length != 4) { logger.error("Usage: java CarbonS3Example <access-key> <secret-key>" + "<s3.csv.location> <spark-master>") System.exit(0) } val spark = SparkSession .builder() .master(args(3)) .appName("S3CsvExample") .config("spark.driver.host", "localhost") .config("spark.hadoop." + ACCESS_KEY, args(0)) .config("spark.hadoop." + SECRET_KEY, args(1)) .config("spark.sql.extensions", "org.apache.spark.sql.CarbonExtensions") .getOrCreate() spark.sparkContext.setLogLevel("ERROR") spark.sql( s""" | CREATE TABLE if not exists carbon_table1( | shortField SHORT, | intField INT, | bigintField LONG, | doubleField DOUBLE, | stringField STRING, | timestampField TIMESTAMP, | decimalField DECIMAL(18,2), | dateField DATE, | charField CHAR(5), | floatField FLOAT | ) | STORED AS carbondata | LOCATION '$rootPath/examples/spark/target/store' """.stripMargin) spark.sql( s""" | LOAD DATA LOCAL INPATH '${ args(2) }' | INTO TABLE carbon_table1 | OPTIONS('HEADER'='true') """.stripMargin) spark.sql( s""" | LOAD DATA LOCAL INPATH '${ args(2) }' | INTO TABLE carbon_table1 | OPTIONS('HEADER'='true') """.stripMargin) spark.sql( s""" | SELECT * | FROM carbon_table1 """.stripMargin).show() spark.sql("Drop table if exists carbon_table1") spark.stop() } }
Example 174
Source File: S3UsingSDkExample.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.carbondata.examples import org.apache.hadoop.fs.s3a.Constants.{ACCESS_KEY, ENDPOINT, SECRET_KEY} import org.apache.spark.sql.SparkSession import org.slf4j.{Logger, LoggerFactory} import org.apache.carbondata.core.metadata.datatype.{DataTypes, Field} import org.apache.carbondata.sdk.file.{CarbonWriter, Schema} import org.apache.carbondata.spark.util.CarbonSparkUtil def main(args: Array[String]) { val logger: Logger = LoggerFactory.getLogger(this.getClass) if (args.length < 2 || args.length > 6) { logger.error("Usage: java CarbonS3Example <access-key> <secret-key>" + "[table-path-on-s3] [s3-endpoint] [number-of-rows] [spark-master]") System.exit(0) } val (accessKey, secretKey, endpoint) = CarbonSparkUtil.getKeyOnPrefix(args(2)) val spark = SparkSession .builder() .master(getSparkMaster(args)) .appName("S3UsingSDKExample") .config("spark.driver.host", "localhost") .config(accessKey, args(0)) .config(secretKey, args(1)) .config(endpoint, CarbonSparkUtil.getS3EndPoint(args)) .config("spark.sql.extensions", "org.apache.spark.sql.CarbonExtensions") .getOrCreate() spark.sparkContext.setLogLevel("WARN") val path = if (args.length < 3) { "s3a://sdk/WriterOutput2 " } else { args(2) } val num = if (args.length > 4) { Integer.parseInt(args(4)) } else { 3 } buildTestData(args, path, num) spark.sql("DROP TABLE IF EXISTS s3_sdk_table") spark.sql(s"CREATE EXTERNAL TABLE s3_sdk_table STORED AS carbondata" + s" LOCATION '$path'") spark.sql("SELECT * FROM s3_sdk_table LIMIT 10").show() spark.stop() } def getSparkMaster(args: Array[String]): String = { if (args.length == 6) args(5) else "local" } }
Example 175
Source File: CouchbaseReadSideHandler.scala From akka-persistence-couchbase with Apache License 2.0 | 5 votes |
package com.lightbend.lagom.internal.javadsl.persistence.couchbase import java.util.concurrent.CompletionStage import akka.Done import akka.japi.Pair import akka.stream.ActorAttributes import akka.stream.alpakka.couchbase.javadsl.CouchbaseSession import akka.stream.javadsl.Flow import com.lightbend.lagom.internal.javadsl.persistence.OffsetAdapter import com.lightbend.lagom.internal.persistence.couchbase.{CouchbaseOffsetDao, CouchbaseOffsetStore} import com.lightbend.lagom.javadsl.persistence.ReadSideProcessor.ReadSideHandler import com.lightbend.lagom.javadsl.persistence.{AggregateEvent, AggregateEventTag, Offset} import org.slf4j.LoggerFactory import scala.compat.java8.FutureConverters._ import scala.concurrent.{ExecutionContext, Future} private[couchbase] final class CouchbaseReadSideHandler[Event <: AggregateEvent[Event]]( couchbaseSession: CouchbaseSession, offsetStore: CouchbaseOffsetStore, handlers: Map[Class[_ <: Event], Handler[Event]], globalPrepareCallback: CouchbaseSession => CompletionStage[Done], prepareCallback: (CouchbaseSession, AggregateEventTag[Event]) => CompletionStage[Done], readProcessorId: String, dispatcher: String )(implicit ec: ExecutionContext) extends ReadSideHandler[Event] { private val log = LoggerFactory.getLogger(this.getClass) @volatile private var offsetDao: CouchbaseOffsetDao = _ protected def invoke(handler: Handler[Event], event: Event, offset: Offset): CompletionStage[Done] = handler .asInstanceOf[(CouchbaseSession, Event, Offset) => CompletionStage[Done]] .apply(couchbaseSession, event, offset) .toScala .flatMap { _ => val akkaOffset = OffsetAdapter.dslOffsetToOffset(offset) offsetDao.bindSaveOffset(akkaOffset).execute(couchbaseSession.asScala, ec) } .toJava override def globalPrepare(): CompletionStage[Done] = globalPrepareCallback.apply(couchbaseSession) override def prepare(tag: AggregateEventTag[Event]): CompletionStage[Offset] = (for { _ <- prepareCallback.apply(couchbaseSession, tag).toScala dao <- offsetStore.prepare(readProcessorId, tag.tag) } yield { offsetDao = dao OffsetAdapter.offsetToDslOffset(dao.loadedOffset) }).toJava override def handle(): Flow[Pair[Event, Offset], Done, _] = akka.stream.scaladsl .Flow[Pair[Event, Offset]] .mapAsync(parallelism = 1) { pair => val Pair(event, offset) = pair val eventClass = event.getClass val handler = handlers.getOrElse( // lookup handler eventClass, // fallback to empty handler if none { if (log.isDebugEnabled()) log.debug("Unhandled event [{}]", eventClass.getName) CouchbaseReadSideHandler.emptyHandler } ) invoke(handler, event, offset).toScala } .withAttributes(ActorAttributes.dispatcher(dispatcher)) .asJava }
Example 176
Source File: CouchbaseReadSideHandler.scala From akka-persistence-couchbase with Apache License 2.0 | 5 votes |
package com.lightbend.lagom.internal.scaladsl.persistence.couchbase import akka.persistence.query.Offset import akka.stream.ActorAttributes import akka.stream.alpakka.couchbase.scaladsl.CouchbaseSession import akka.stream.scaladsl.Flow import akka.{Done, NotUsed} import com.lightbend.lagom.internal.persistence.couchbase.{CouchbaseOffsetDao, CouchbaseOffsetStore} import com.lightbend.lagom.scaladsl.persistence.ReadSideProcessor.ReadSideHandler import com.lightbend.lagom.scaladsl.persistence._ import org.slf4j.LoggerFactory import scala.concurrent.{ExecutionContext, Future} private[couchbase] final class CouchbaseReadSideHandler[Event <: AggregateEvent[Event]]( couchbase: CouchbaseSession, offsetStore: CouchbaseOffsetStore, handlers: Map[Class[_ <: Event], CouchbaseReadSideHandler.Handler[Event]], globalPrepareCallback: CouchbaseSession => Future[Done], prepareCallback: (CouchbaseSession, AggregateEventTag[Event]) => Future[Done], readProcessorId: String, dispatcher: String )(implicit ec: ExecutionContext) extends ReadSideHandler[Event] { import CouchbaseReadSideHandler.Handler private val log = LoggerFactory.getLogger(this.getClass) @volatile private var offsetDao: CouchbaseOffsetDao = _ protected def invoke(handler: Handler[Event], element: EventStreamElement[Event]): Future[Done] = handler .apply(couchbase, element) .flatMap(_ => offsetDao.bindSaveOffset(element.offset).execute(couchbase, ec)) override def globalPrepare(): Future[Done] = globalPrepareCallback(couchbase) override def prepare(tag: AggregateEventTag[Event]): Future[Offset] = for { _ <- prepareCallback.apply(couchbase, tag) dao <- offsetStore.prepare(readProcessorId, tag.tag) } yield { offsetDao = dao dao.loadedOffset } override def handle(): Flow[EventStreamElement[Event], Done, NotUsed] = Flow[EventStreamElement[Event]] .mapAsync(parallelism = 1) { elem => val eventClass = elem.event.getClass val handler = handlers.getOrElse( // lookup handler eventClass, // fallback to empty handler if none { if (log.isDebugEnabled()) log.debug("Unhandled event [{}]", eventClass.getName) CouchbaseReadSideHandler.emptyHandler.asInstanceOf[Handler[Event]] } ) invoke(handler, elem) } .withAttributes(ActorAttributes.dispatcher(dispatcher)) }
Example 177
Source File: Factory.scala From mystem-scala with MIT License | 5 votes |
package ru.stachek66.nlp.mystem.holding import java.io.{File, IOException} import java.nio.file.Files import java.nio.file.attribute.PosixFilePermissions import org.slf4j.LoggerFactory import ru.stachek66.tools.external.FailSafeExternalProcessServer import ru.stachek66.tools.{Decompressor, Downloader, Tools} import scala.concurrent.duration._ import scala.sys.process._ import scala.util.Try def newMyStem(version: String, customExecutable: Option[File] = None): Try[MyStem] = Try { val ex = customExecutable match { case Some(exe) => exe case None => getExecutable(version) } version match { case "3.0" | "3.1" => new MyStem3( new FailSafeExternalProcessServer( ex.getAbsolutePath + (if (parsingOptions.nonEmpty) " " + parsingOptions else ""))) case _ => throw new NotImplementedError() } } @throws(classOf[Exception]) private[holding] def getExecutable(version: String): File = { val destFile = new File(BinDestination + BIN_FILE_NAME) val tempFile = new File(s"${BinDestination}tmp_${System.currentTimeMillis}.${Decompressor.select.traditionalExtension}") if (destFile.exists) { log.info("Old executable file found") try { val suggestedVersion = (destFile.getAbsolutePath + " -v") !! log.info("Version | " + suggestedVersion) // not scala-way stuff if (suggestedVersion.contains(version)) destFile else throw new Exception("Wrong version!") } catch { case e: Exception => log.warn("Removing old binary files...", e) destFile.delete getExecutable(version) } } else Tools.withAttempt(10, 1.second) { try { Decompressor.select.unpack( Downloader.downloadBinaryFile(getUrl(version), tempFile), destFile) } finally { tempFile.delete() try { Files.setPosixFilePermissions(destFile.toPath, PosixFilePermissions.fromString("r-xr-xr-x")).toFile } catch { case ioe: IOException => log.warn("Can't set POSIX permissions to file " + destFile.toPath) destFile } } } } }
Example 178
Source File: package.scala From mystem-scala with MIT License | 5 votes |
package ru.stachek66.nlp import org.slf4j.LoggerFactory package object mystem { private val log = LoggerFactory.getLogger(getClass) val os: Map[(String, String), String] = Map( ("Linux", "x86_64") -> "linux64", ("Linux", "amd64") -> "linux64", ("Linux", "x86") -> "linux32", ("Windows7", "x86") -> "win32", ("Windows7", "x86_64") -> "win64" ) withDefault { _ => log.warn("Getting OSX binaries!") "osx" } }
Example 179
Source File: Tools.scala From mystem-scala with MIT License | 5 votes |
package ru.stachek66.tools import org.slf4j.LoggerFactory import scala.concurrent.duration._ object Tools { private val log = LoggerFactory.getLogger(getClass) @throws(classOf[Exception]) def withAttempt[T](n: Int, timeout: Duration = 0.millis)(action: => T): T = try { action } catch { case e: Exception if n > 1 => log.warn(s"${n - 1} attempts left", e) Thread.sleep(timeout.toMillis) withAttempt(n - 1)(action) case e: Exception => throw new Exception("No attempts left", e) } }
Example 180
Source File: Log.scala From fs2-rabbit with Apache License 2.0 | 5 votes |
package dev.profunktor.fs2rabbit.effects import cats.effect.Sync import org.slf4j.LoggerFactory trait Log[F[_]] { def info(value: => String): F[Unit] def error(value: => String): F[Unit] } object Log { private[fs2rabbit] val logger = LoggerFactory.getLogger(this.getClass) def apply[F[_]](implicit ev: Log[F]): Log[F] = ev implicit def syncLogInstance[F[_]](implicit F: Sync[F]): Log[F] = new Log[F] { override def error(value: => String): F[Unit] = F.delay(logger.error(value)) override def info(value: => String): F[Unit] = F.delay(logger.info(value)) } }
Example 181
Source File: LoanWordsProcessor.scala From attic-nlp4l with Apache License 2.0 | 5 votes |
package org.nlp4l.syn import org.apache.lucene.search.spell.LuceneLevenshteinDistance import org.nlp4l.core.RawReader import org.nlp4l.framework.models._ import org.nlp4l.framework.processors.{Processor, ProcessorFactory, DictionaryAttributeFactory} import org.nlp4l.lm.{HmmTokenizer, HmmModel} import org.slf4j.LoggerFactory import scala.collection.mutable.ListBuffer import scala.util.matching.Regex class LoanWordsDictionaryAttributeFactory(settings: Map[String, String]) extends DictionaryAttributeFactory(settings) { override def getInstance: DictionaryAttribute = { val list = Seq[CellAttribute]( CellAttribute("word", CellType.StringType, true, true), CellAttribute("synonym", CellType.StringType, false, true) ) new DictionaryAttribute("loanWords", list) } } class LoanWordsProcessorFactory(settings: Map[String, String]) extends ProcessorFactory(settings) { val DEF_THRESHOLD = 0.8F val DEF_MIN_DOCFREQ = 3 override def getInstance: Processor = { val index = getStrParamRequired("index") val field = getStrParamRequired("field") val modelIndex = getStrParamRequired("modelIndex") val threshold = getFloatParam("threshold", DEF_THRESHOLD) val minDocFreq = getIntParam("minDocFreq", DEF_MIN_DOCFREQ) new LoanWordsProcessor(index, field, modelIndex, threshold, minDocFreq) } } class LoanWordsProcessor(val index: String, val field: String, val modelIndex: String, val threshold: Float, val minDocFreq: Int) extends Processor { override def execute(data: Option[Dictionary]): Option[Dictionary] = { val logger = LoggerFactory.getLogger(this.getClass) val reader = RawReader(index) val trModel = new TransliterationModelIndex(modelIndex) val pattern: Regex = """([a-z]+) ([\u30A0-\u30FF]+)""".r val lld = new LuceneLevenshteinDistance() val records = ListBuffer.empty[Record] try{ var progress = 0 val fi = reader.field(field) fi match { case Some(f) => { val len = f.uniqTerms f.terms.foreach { t => progress = progress + 1 if((progress % 10000) == 0){ val percent = ((progress.toFloat / len) * 100).toInt logger.info(s"$percent % done ($progress / $len) term is ${t.text}") } if (t.docFreq >= minDocFreq) { t.text match { case pattern(a, b) => { val predWord = trModel.predict(b) if (lld.getDistance(a, predWord) > threshold) { records += Record(Seq(Cell("word", a), Cell("synonym", b))) } } case _ => {} } } } Some(Dictionary(records)) } case _ => throw new RuntimeException(s"""field "$field" you specified in conf file doesn't exist in the index "$index""") } } finally{ if(reader != null) reader.close } } } class TransliterationModelIndex(index: String){ private val model = HmmModel(index) private val tokenizer = HmmTokenizer(model) def predict(katakana: String): String = { tokenizer.tokens(katakana).map(_.cls).mkString } }
Example 182
Source File: UnifySynonymRecordsProcessor.scala From attic-nlp4l with Apache License 2.0 | 5 votes |
package org.nlp4l.syn import org.nlp4l.framework.models._ import org.nlp4l.framework.processors.{Processor, ProcessorFactory, DictionaryAttributeFactory} import org.slf4j.LoggerFactory class UnifySynonymRecordsDictionaryAttributeFactory(settings: Map[String, String]) extends DictionaryAttributeFactory(settings) { override def getInstance: DictionaryAttribute = { val list = Seq[CellAttribute]( CellAttribute("synonyms", CellType.StringType, false, true) ) new DictionaryAttribute("unifySynonymRecords", list) } } class UnifySynonymRecordsProcessorFactory(settings: Map[String, String]) extends ProcessorFactory(settings) { override def getInstance: Processor = { val logger = LoggerFactory.getLogger(this.getClass) val separator = settings.getOrElse("separator", ",") val sortReverse = getBoolParam("sortReverse", false) logger.info("""separator "{}", sortReverse "{}"""", separator, sortReverse) new UnifySynonymRecordsProcessor(sortReverse, separator) } } class UnifySynonymRecordsProcessor(val sortReverse: Boolean, val separator: String) extends Processor { override def execute(data: Option[Dictionary]): Option[Dictionary] = { data match { case None => None case Some(dic) => { val inputRecords = dic.recordList.map{ r => r.cellList.map{ c => c.value.toString } } val uniqueRecords = SynonymCommon.getUniqueRecords(inputRecords, Seq()) Some(Dictionary(for(r <- uniqueRecords) yield { Record(Seq(Cell("synonyms", r.mkString(separator)))) })) } } } }
Example 183
Source File: TermsExtractionProcessor.scala From attic-nlp4l with Apache License 2.0 | 5 votes |
package org.nlp4l.extract import org.nlp4l.framework.models._ import org.nlp4l.framework.processors.{Processor, ProcessorFactory, DictionaryAttributeFactory} import org.nlp4l.lucene.LuceneDocTermVector import org.nlp4l.lucene.TermsExtractor import org.nlp4l.lucene.TermsExtractor.Config import org.slf4j.LoggerFactory import scala.collection.mutable.ListBuffer class TermsExtractionDictionaryAttributeFactory(settings: Map[String, String]) extends DictionaryAttributeFactory(settings) { override def getInstance: DictionaryAttribute = { val outScore = getBoolParam("outScore", true) val list = if(outScore){ Seq[CellAttribute]( CellAttribute("term", CellType.StringType, true, true), // use constant hashCode so that we don't take into account score when calculating hashCode of Records CellAttribute("score", CellType.FloatType, false, true, constantHashCode => 0) ) } else { Seq[CellAttribute]( CellAttribute("term", CellType.StringType, true, true) ) } new DictionaryAttribute("terms", list) } } class TermsExtractionProcessorFactory(settings: Map[String, String]) extends ProcessorFactory(settings) { override def getInstance: Processor = { val logger = LoggerFactory.getLogger(this.getClass) val config = new Config() config.index = getStrParamRequired("index") config.outScore = getBoolParam("outScore", true) config.fieldCn = getStrParamRequired("field") config.fieldLn2 = settings.getOrElse("fieldln2", null) config.fieldRn2 = settings.getOrElse("fieldrn2", null) config.delimiter = settings.getOrElse("delimiter", "/") config.outNum = getIntParam("num", org.nlp4l.lucene.TermsExtractor.DEF_OUT_NUM) config.scorer = settings.getOrElse("scorer", "FreqDFLR") logger.info( """TermsExtractionProcessor starts with parameters | index "{}" | field "{}" | fieldln2 "{}" | fieldrn2 "{}" | delimiter "{}" | num "{}" | scorer "{}" | outScore "{}"""".stripMargin, config.index, config.fieldCn, config.fieldLn2, config.fieldRn2, config.delimiter, config.outNum.toString, config.scorer, config.outScore.toString) new TermsExtractionProcessor(config) } } class TermsExtractionProcessor(val config: Config) extends Processor { override def execute(data: Option[Dictionary]): Option[Dictionary] = { val te = new ProcTermsExtractor(config) te.setConfig() te.execute() Some(Dictionary(te.records)) } } class ProcTermsExtractor(config: Config) extends TermsExtractor(config: Config) { val records = ListBuffer.empty[Record] val logger = LoggerFactory.getLogger(this.getClass) override def printResultEntry(e: java.util.Map.Entry[String, LuceneDocTermVector.TermWeight]): Unit ={ if(config.outScore){ records += Record(Seq(Cell("term", getTerm(e)), Cell("score", getScore(e)))) } else{ records += Record(Seq(Cell("term", getTerm(e)))) } } }
Example 184
Source File: BuddyWordsProcessor.scala From attic-nlp4l with Apache License 2.0 | 5 votes |
package org.nlp4l.colloc import org.nlp4l.core.RawReader import org.nlp4l.framework.processors._ import org.nlp4l.framework.models._ import org.slf4j.LoggerFactory import scala.collection.mutable.ListBuffer class BuddyWordsDictionaryAttributeFactory(settings: Map[String, String]) extends DictionaryAttributeFactory(settings) { override def getInstance: DictionaryAttribute = { val list = Seq[CellAttribute]( CellAttribute("word", CellType.StringType, true, true), CellAttribute("buddies", CellType.StringType, false, true) ) new DictionaryAttribute("buddyWords", list) } } class BuddyWordsProcessorFactory(settings: Map[String, String]) extends ProcessorFactory(settings) { val DEF_MAX_DOCS_TO_ANALYZE: Int = 1000 val DEF_SLOP: Int = 5 val DEF_MAX_COI_TERMS_PER_TERM: Int = 20 val DEF_MAX_BASE_TERMS_PER_DOC: Int = 10 * 1000 override def getInstance: Processor = { val index = getStrParamRequired("index") val field = getStrParamRequired("field") val srcField = field // use same field name for source field for now val maxDocsToAnalyze = getIntParam("maxDocsToAnalyze", DEF_MAX_DOCS_TO_ANALYZE) val slop = getIntParam("slop", DEF_SLOP) val maxCoiTermsPerTerm = getIntParam("maxCoiTermsPerTerm", DEF_MAX_COI_TERMS_PER_TERM) val maxBaseTermsPerDoc = getIntParam("maxBaseTermsPerDoc", DEF_MAX_BASE_TERMS_PER_DOC) new BuddyWordsProcessor(index, field, srcField, maxDocsToAnalyze, slop, maxCoiTermsPerTerm, maxBaseTermsPerDoc) } } class BuddyWordsProcessor(val index: String, val field: String, val srcField: String, val maxDocsToAnalyze: Int, val slop: Int, val maxCoiTermsPerTerm: Int, val maxBaseTermPerDoc: Int) extends Processor { override def execute(data: Option[Dictionary]): Option[Dictionary] = { val logger = LoggerFactory.getLogger(this.getClass) val reader = RawReader(index) val records = ListBuffer.empty[Record] try{ var progress = 0 val fi = reader.field(field) fi match { case Some(f) => { val finder = BuddyWordsFinder(reader, maxDocsToAnalyze, slop, maxCoiTermsPerTerm, maxBaseTermPerDoc) val len = f.uniqTerms f.terms.foreach{ t => val result = finder.find(field, t.text) progress = progress + 1 if((progress % 1000) == 0){ val percent = ((progress.toFloat / len) * 100).toInt logger.info(s"$percent % done ($progress / $len) term is ${t.text}") } if(result.size > 0){ records += Record(Seq(Cell("word", t.text), Cell("buddies", result.map(_._1).mkString(",")))) } } Some(Dictionary(records)) } case _ => throw new RuntimeException(s"""field "$field" you specified in conf file doesn't exist in the index "$index""") } } finally{ if(reader != null) reader.close } } }
Example 185
Source File: LogstashLogbackEmitter.scala From cedi-dtrace with Apache License 2.0 | 5 votes |
package com.ccadllc.cedi.dtrace package logstash import cats.effect.Sync import cats.implicits._ import net.logstash.logback.marker.LogstashMarker import net.logstash.logback.marker.Markers._ import org.slf4j.LoggerFactory import scala.collection.JavaConverters._ @deprecated("use EcsLogstashLogbackEmitter", "2.0.0") final class LogstashLogbackEmitter[F[_]](implicit F: Sync[F]) extends TraceSystem.Emitter[F] { private val logger = LoggerFactory.getLogger("distributed-trace.logstash") final val description: String = "Logstash Logback Emitter" final def emit(tc: TraceContext[F]): F[Unit] = F.delay { if (logger.isDebugEnabled) { val s = tc.currentSpan val marker: LogstashMarker = append("where", tc.system.data.allValues.asJava). and[LogstashMarker](append("root", s.root)). and[LogstashMarker](append("trace-id", s.spanId.traceId.toString)). and[LogstashMarker](append("span-id", s.spanId.spanId)). and[LogstashMarker](append("parent-id", s.spanId.parentSpanId)). and[LogstashMarker](append("span-name", s.spanName.value)). and[LogstashMarker](append("start-time", s.startTime.show)). and[LogstashMarker](append("span-success", s.failure.isEmpty)). and[LogstashMarker](append("failure-detail", s.failure.map(_.render).orNull)). and[LogstashMarker](append("span-duration", s.duration.toMicros)). and[LogstashMarker](append("notes", s.notes.map(n => n.name.value -> n.value).collect { case (name, Some(value)) => name -> value.toString }.toMap.asJava)) logger.debug(marker, "Span {} {} after {} microseconds", s.spanName.value, if (s.failure.isEmpty) "succeeded" else "failed", s.duration.toMicros.toString) } } }
Example 186
Source File: EcsLogstashLogbackEmitter.scala From cedi-dtrace with Apache License 2.0 | 5 votes |
package com.ccadllc.cedi.dtrace package logstash import cats.effect.Sync import cats.implicits._ import net.logstash.logback.marker.LogstashMarker import net.logstash.logback.marker.Markers._ import org.slf4j.LoggerFactory import scala.collection.JavaConverters._ final class EcsLogstashLogbackEmitter[F[_]](implicit F: Sync[F]) extends TraceSystem.Emitter[F] { object ecs { object field { val kind: String = "event.kind" val module: String = "event.module" val root: String = "dtrace.root" val traceId: String = "dtrace.trace_id" val parentId: String = "dtrace.parent_id" val spanId: String = "event.id" val spanName: String = "event.action" val spanStart: String = "event.start" val spanOutcome: String = "event.outcome" val spanDuration: String = "event.duration" val spanFailureDetail: String = "error.message" val spanMetadata: String = "labels" } } private val logger = LoggerFactory.getLogger("distributed-trace.ecs.logstash") final val description: String = "ECS-Compliant Logstash Logback Emitter" final def emit(tc: TraceContext[F]): F[Unit] = F.delay { if (logger.isDebugEnabled) { val s = tc.currentSpan val marker: LogstashMarker = { val m = append(ecs.field.kind, "event"). and[LogstashMarker](append(ecs.field.module, "dtrace")). and[LogstashMarker](append(ecs.field.root, s.root)). and[LogstashMarker](append(ecs.field.traceId, s.spanId.traceId.toString)). and[LogstashMarker](append(ecs.field.parentId, s.spanId.parentSpanId)). and[LogstashMarker](append(ecs.field.spanId, s.spanId.spanId)). and[LogstashMarker](append(ecs.field.spanName, s.spanName.value)). and[LogstashMarker](append(ecs.field.spanStart, s.startTime.show)). and[LogstashMarker](append(ecs.field.spanOutcome, if (s.failure.isEmpty) "success" else "failure")). and[LogstashMarker](append(ecs.field.spanDuration, s.duration.toUnit(tc.system.timer.unit))). and[LogstashMarker](append(ecs.field.spanFailureDetail, s.failure.map(_.render).orNull)). and[LogstashMarker](append( ecs.field.spanMetadata, (tc.system.data.meta.values ++ s.notes.map( n => n.name.value -> n.value).collect { case (name, Some(value)) => name -> value.toString }.toMap).asJava)) tc.system.data.identity.values.foldLeft(m) { case (acc, (k, v)) => acc.and[LogstashMarker](append(k, v)) } } logger.debug(marker, s"Span {} {} after {} ${tc.system.timer.unit.toString.toLowerCase}s", s.spanName.value, if (s.failure.isEmpty) "succeeded" else "failed", s.duration.toUnit(tc.system.timer.unit).toString) } } }
Example 187
Source File: Logging.scala From make-your-programs-free with GNU General Public License v3.0 | 5 votes |
package free import scalaz._, Scalaz._ import scalaz.concurrent.Task import org.slf4j.LoggerFactory sealed trait Logging[A] case class Info(line: String) extends Logging[Unit] case class Warn(line: String) extends Logging[Unit] case class Error(line: String) extends Logging[Unit] case class Debug(line: String) extends Logging[Unit] object Logging { class Ops[S[_]](implicit s0: Logging :<: S) { def info(line: String): Free[S, Unit] = Free.liftF(s0.inj(Info(line))) def warn(line: String): Free[S, Unit] = Free.liftF(s0.inj(Warn(line))) def error(line: String): Free[S, Unit] = Free.liftF(s0.inj(Error(line))) def debug(line: String): Free[S, Unit] = Free.liftF(s0.inj(Debug(line))) } object Ops { implicit def apply[S[_]](implicit S: Logging :<: S): Ops[S] = new Ops[S] } } object Log4JInterpreter extends (Logging ~> Task) { def apply[A](inout: Logging[A]): Task[A] = inout match { case Info(line) => Task.delay { LoggerFactory.getLogger(this.getClass).info(line) } case Error(line) => Task.delay { LoggerFactory.getLogger(this.getClass).error(line) } case Warn(line) => Task.delay { LoggerFactory.getLogger(this.getClass).warn(line) } case Debug(line) => Task.delay { LoggerFactory.getLogger(this.getClass).debug(line) } } } object RunLogging extends App { implicit val ops = new Logging.Ops[Logging]() val program = for { _ <- ops.info("starting application!") _ <- ops.debug("omg, app is running!") } yield() val task: Task[Unit] = program.foldMap(Log4JInterpreter) task.unsafePerformSync }
Example 188
Source File: DeltaLoad.scala From m3d-engine with Apache License 2.0 | 5 votes |
package com.adidas.analytics.algo import com.adidas.analytics.algo.DeltaLoad._ import com.adidas.analytics.algo.core.Algorithm import com.adidas.analytics.algo.shared.DateComponentDerivation import com.adidas.analytics.config.DeltaLoadConfiguration.PartitionedDeltaLoadConfiguration import com.adidas.analytics.util.DataFrameUtils._ import com.adidas.analytics.util._ import org.apache.spark.sql.expressions.Window import org.apache.spark.sql.functions._ import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession} import org.apache.spark.storage.StorageLevel import org.slf4j.{Logger, LoggerFactory} private def getUpsertRecords(deltaRecords: Dataset[Row], resultColumns: Seq[String]): Dataset[Row] = { // Create partition window - Partitioning by delta records logical key (i.e. technical key of active records) val partitionWindow = Window .partitionBy(businessKey.map(col): _*) .orderBy(technicalKey.map(component => col(component).desc): _*) // Ranking & projection val rankedDeltaRecords = deltaRecords .withColumn(rankingColumnName, row_number().over(partitionWindow)) .filter(upsertRecordsModesFilterFunction) rankedDeltaRecords .filter(rankedDeltaRecords(rankingColumnName) === 1) .selectExpr(resultColumns: _*) } protected def withDatePartitions(spark: SparkSession, dfs: DFSWrapper, dataFrames: Vector[DataFrame]): Vector[DataFrame] = { logger.info("Adding partitioning information if needed") try { dataFrames.map { df => if (df.columns.toSeq.intersect(targetPartitions) != targetPartitions){ df.transform(withDateComponents(partitionSourceColumn, partitionSourceColumnFormat, targetPartitions)) } else df } } catch { case e: Throwable => logger.error("Cannot add partitioning information for data frames.", e) //TODO: Handle failure case properly throw new RuntimeException("Unable to transform data frames.", e) } } } object DeltaLoad { private val logger: Logger = LoggerFactory.getLogger(getClass) def apply(spark: SparkSession, dfs: DFSWrapper, configLocation: String): DeltaLoad = { new DeltaLoad(spark, dfs, configLocation) } }
Example 189
Source File: GzipDecompressor.scala From m3d-engine with Apache License 2.0 | 5 votes |
package com.adidas.analytics.algo import java.util.concurrent.{Executors, TimeUnit} import com.adidas.analytics.algo.GzipDecompressor.{changeFileExtension, compressedExtension, _} import com.adidas.analytics.algo.core.JobRunner import com.adidas.analytics.config.GzipDecompressorConfiguration import com.adidas.analytics.util.DFSWrapper import com.adidas.analytics.util.DFSWrapper._ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.io.IOUtils import org.apache.hadoop.io.compress.CompressionCodecFactory import org.apache.spark.sql.SparkSession import org.slf4j.{Logger, LoggerFactory} import scala.concurrent._ import scala.concurrent.duration._ final class GzipDecompressor protected(val spark: SparkSession, val dfs: DFSWrapper, val configLocation: String) extends JobRunner with GzipDecompressorConfiguration { private val hadoopConfiguration: Configuration = spark.sparkContext.hadoopConfiguration private val fileSystem: FileSystem = dfs.getFileSystem(inputDirectoryPath) override def run(): Unit = { //check if directory exists if (!fileSystem.exists(inputDirectoryPath)){ logger.error(s"Input directory: $inputDirectoryPath does not exist.") throw new RuntimeException(s"Directory $inputDirectoryPath does not exist.") } val compressedFilePaths = fileSystem.ls(inputDirectoryPath, recursive) .filterNot(path => fileSystem.isDirectory(path)) .filter(_.getName.toLowerCase.endsWith(compressedExtension)) if (compressedFilePaths.isEmpty) { logger.warn(s"Input directory $inputDirectoryPath does not contain compressed files. Skipping...") } else { implicit val ec: ExecutionContext = ExecutionContext.fromExecutor(Executors.newFixedThreadPool(threadPoolSize)) Await.result(Future.sequence( compressedFilePaths.map { compressedFilePath => Future { logger.info(s"Decompressing file: $compressedFilePath") val decompressedFileName = changeFileExtension(compressedFilePath.getName, compressedExtension, outputExtension) val decompressedFilePath = new Path(compressedFilePath.getParent, decompressedFileName) val compressionCodecFactory = new CompressionCodecFactory(hadoopConfiguration) val inputCodec = compressionCodecFactory.getCodec(compressedFilePath) val inputStream = inputCodec.createInputStream(fileSystem.open(compressedFilePath)) val output = fileSystem.create(decompressedFilePath) IOUtils.copyBytes(inputStream, output, hadoopConfiguration) logger.info(s"Finished decompressing file: $compressedFilePath") //Delete the compressed file fileSystem.delete(compressedFilePath, false) logger.info(s"Removed file: $compressedFilePath") } } ), Duration(4, TimeUnit.HOURS)) } } } object GzipDecompressor { private val logger: Logger = LoggerFactory.getLogger(this.getClass) private val compressedExtension: String = ".gz" def apply(spark: SparkSession, dfs: DFSWrapper, configLocation: String): GzipDecompressor = { new GzipDecompressor(spark, dfs, configLocation) } private def changeFileExtension(fileName: String, currentExt: String, newExt: String): String = { val newFileName = fileName.substring(0, fileName.lastIndexOf(currentExt)) if (newFileName.endsWith(newExt)) newFileName else newFileName + newExt } }
Example 190
Source File: DataFrameUtils.scala From m3d-engine with Apache License 2.0 | 5 votes |
package com.adidas.analytics.util import org.apache.spark.sql.types._ import org.apache.spark.sql.{DataFrame, Row, functions} import org.slf4j.{Logger, LoggerFactory} object DataFrameUtils { private val logger: Logger = LoggerFactory.getLogger(getClass) type FilterFunction = Row => Boolean type PartitionCriteria = Seq[(String, String)] def mapPartitionsToDirectories(partitionCriteria: PartitionCriteria): Seq[String] = { partitionCriteria.map { case (columnName, columnValue) => s"$columnName=$columnValue" } } def buildPartitionsCriteriaMatcherFunc(multiplePartitionsCriteria: Seq[PartitionCriteria], schema: StructType): FilterFunction = { val targetPartitions = multiplePartitionsCriteria.flatten.map(_._1).toSet val fieldNameToMatchFunctionMapping = schema.fields.filter { case StructField(name, _, _, _) => targetPartitions.contains(name) }.map { case StructField(name, _: ByteType, _, _) => name -> ((r: Row, value: String) => r.getAs[Byte](name) == value.toByte) case StructField(name, _: ShortType, _, _) => name -> ((r: Row, value: String) => r.getAs[Short](name) == value.toShort) case StructField(name, _: IntegerType, _, _) => name -> ((r: Row, value: String) => r.getAs[Int](name) == value.toInt) case StructField(name, _: LongType, _, _) => name -> ((r: Row, value: String) => r.getAs[Long](name) == value.toLong) case StructField(name, _: FloatType, _, _) => name -> ((r: Row, value: String) => r.getAs[Float](name) == value.toFloat) case StructField(name, _: DoubleType, _, _) => name -> ((r: Row, value: String) => r.getAs[Double](name) == value.toDouble) case StructField(name, _: BooleanType, _, _) => name -> ((r: Row, value: String) => r.getAs[Boolean](name) == value.toBoolean) case StructField(name, _: StringType, _, _) => name -> ((r: Row, value: String) => r.getAs[String](name) == value) }.toMap def convertPartitionCriteriaToFilterFunctions(partitionCriteria: PartitionCriteria): Seq[FilterFunction] = partitionCriteria.map { case (name, value) => (row: Row) => fieldNameToMatchFunctionMapping(name)(row, value) } def joinSinglePartitionFilterFunctionsWithAnd(partitionFilterFunctions: Seq[FilterFunction]): FilterFunction = partitionFilterFunctions .reduceOption((predicate1, predicate2) => (row: Row) => predicate1(row) && predicate2(row)) .getOrElse((_: Row) => false) multiplePartitionsCriteria .map(convertPartitionCriteriaToFilterFunctions) .map(joinSinglePartitionFilterFunctionsWithAnd) .reduceOption((predicate1, predicate2) => (row: Row) => predicate1(row) || predicate2(row)) .getOrElse((_: Row) => false) } implicit class DataFrameHelper(df: DataFrame) { def collectPartitions(targetPartitions: Seq[String]): Seq[PartitionCriteria] = { logger.info(s"Collecting unique partitions for partitions columns (${targetPartitions.mkString(", ")})") val partitions = df.selectExpr(targetPartitions: _*).distinct().collect() partitions.map { row => targetPartitions.map { columnName => Option(row.getAs[Any](columnName)) match { case Some(columnValue) => columnName -> columnValue.toString case None => throw new RuntimeException(s"Partition column '$columnName' contains null value") } } } } def addMissingColumns(targetSchema: StructType): DataFrame = { val dataFieldsSet = df.schema.fieldNames.toSet val selectColumns = targetSchema.fields.map { field => if (dataFieldsSet.contains(field.name)) { functions.col(field.name) } else { functions.lit(null).cast(field.dataType).as(field.name) } } df.select(selectColumns: _*) } def isEmpty: Boolean = df.head(1).isEmpty def nonEmpty: Boolean = df.head(1).nonEmpty } }
Example 191
Source File: DataFormat.scala From m3d-engine with Apache License 2.0 | 5 votes |
package com.adidas.analytics.util import org.apache.spark.sql._ import org.apache.spark.sql.types.StructType import org.slf4j.{Logger, LoggerFactory} sealed trait DataFormat { protected val logger: Logger = LoggerFactory.getLogger(getClass) def read(reader: DataFrameReader, locations: String*): DataFrame def write(writer: DataFrameWriter[Row], location: String): Unit } object DataFormat { case class ParquetFormat(optionalSchema: Option[StructType] = None) extends DataFormat { override def read(reader: DataFrameReader, locations: String*): DataFrame = { val filesString = locations.mkString(", ") logger.info(s"Reading Parquet data from $filesString") optionalSchema.fold(reader)(schema => reader.schema(schema)).parquet(locations: _*) } override def write(writer: DataFrameWriter[Row], location: String): Unit = { logger.info(s"Writing Parquet data to $location") writer.parquet(location) } } case class DSVFormat(optionalSchema: Option[StructType] = None) extends DataFormat { override def read(reader: DataFrameReader, locations: String*): DataFrame = { val filesString = locations.mkString(", ") logger.info(s"Reading DSV data from $filesString") optionalSchema.fold(reader.option("inferSchema", "true"))(schema => reader.schema(schema)).csv(locations: _*) } override def write(writer: DataFrameWriter[Row], location: String): Unit = { logger.info(s"Writing DSV data to $location") writer.csv(location) } } case class JSONFormat(optionalSchema: Option[StructType] = None) extends DataFormat { override def read(reader: DataFrameReader, locations: String*): DataFrame = { val filesString = locations.mkString(", ") logger.info(s"Reading JSON data from $filesString") optionalSchema.fold(reader.option("inferSchema", "true"))(schema => reader.schema(schema)).json(locations: _*) } override def write(writer: DataFrameWriter[Row], location: String): Unit = { logger.info(s"Writing JSON data to $location") writer.json(location) } } }
Example 192
Source File: InputReader.scala From m3d-engine with Apache License 2.0 | 5 votes |
package com.adidas.analytics.util import org.apache.spark.sql.{DataFrame, SparkSession} import org.slf4j.{Logger, LoggerFactory} def newTableLocationReader(table: String, format: DataFormat, options: Map[String, String] = Map.empty): TableLocationReader = { TableLocationReader(table, format, options) } case class TableReader(table: String, options: Map[String, String]) extends InputReader { override def read(sparkSession: SparkSession): DataFrame = { logger.info(s"Reading data from table $table") sparkSession.read.options(options).table(table) } } case class FileSystemReader(location: String, format: DataFormat, options: Map[String, String]) extends InputReader { override def read(sparkSession: SparkSession): DataFrame = { logger.info(s"Reading data from location $location") format.read(sparkSession.read.options(options), location) } } case class TableLocationReader(table: String, format: DataFormat, options: Map[String, String]) extends InputReader { override def read(sparkSession: SparkSession): DataFrame = { val location = HiveTableAttributeReader(table, sparkSession).getTableLocation logger.info(s"Reading data from location $location") format.read(sparkSession.read.options(options), location) } } }
Example 193
Source File: ConfigReader.scala From m3d-engine with Apache License 2.0 | 5 votes |
package com.adidas.analytics.util import java.text.DecimalFormatSymbols import org.slf4j.{Logger, LoggerFactory} import scala.util.parsing.json.{JSON, JSONArray, JSONObject} class ConfigReader(jsonContent: String) extends Serializable { private val logger: Logger = LoggerFactory.getLogger(getClass) private val decimalSeparator: Char = new DecimalFormatSymbols().getDecimalSeparator JSON.globalNumberParser = (in: String) => if (in.contains(decimalSeparator)) in.toDouble else in.toInt private lazy val config = JSON.parseRaw(jsonContent) match { case Some(JSONObject(obj)) => obj case _ => throw new IllegalArgumentException(s"Wrong format of the configuration file: $jsonContent") } def getAsSeq[T](propertyName: String): Seq[T] = { config.get(propertyName) match { case Some(JSONArray(list)) => list.map(_.asInstanceOf[T]) case _ => throw new IllegalArgumentException(s"Unable to find configuration property $propertyName") } } def getAsMap[K, V](propertyName: String): Map[K,V] = { config.get(propertyName) match { case Some(JSONObject(obj)) => obj.asInstanceOf[Map[K,V]] case _ => throw new IllegalArgumentException(s"Unable to find configuration property $propertyName") } } def getAs[T](propertyName: String): T = { config.get(propertyName) match { case Some(property) => property.asInstanceOf[T] case None => throw new IllegalArgumentException(s"Unable to find configuration property $propertyName") } } def getAsOption[T](propertyName: String): Option[T] = { config.get(propertyName).map(property => property.asInstanceOf[T]) } def getAsOptionSeq[T](propertyName: String): Option[Seq[T]] = { config.get(propertyName).map(_ => getAsSeq(propertyName)) } def contains(propertyName: String): Boolean = { config.contains(propertyName) } } object ConfigReader { def apply(jsonContent: String): ConfigReader = new ConfigReader(jsonContent) }
Example 194
Source File: BaseAlgorithmTest.scala From m3d-engine with Apache License 2.0 | 5 votes |
package com.adidas.utils import java.util.UUID import com.adidas.analytics.util.{DFSWrapper, LoadMode} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.spark.sql.types.StructType import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, Suite} import org.slf4j.{Logger, LoggerFactory} import scala.io.Source trait BaseAlgorithmTest extends Suite with BeforeAndAfterAll with BeforeAndAfterEach with HDFSSupport with SparkSupport { override val logger: Logger = LoggerFactory.getLogger(getClass) override val testAppId: String = UUID.randomUUID().toString override val localTestDir: String = "target" override val sparkHadoopConf: Option[Configuration] = Some(fs.getConf) val hdfsRootTestPath: Path = new Path("hdfs:///tmp/tests") val dfs: DFSWrapper = DFSWrapper(spark.sparkContext.hadoopConfiguration) override def afterAll(): Unit = { spark.stop() cluster.shutdown(true) } override def beforeEach(): Unit = { fs.delete(hdfsRootTestPath, true) fs.mkdirs(hdfsRootTestPath) } override def afterEach(): Unit = { spark.sqlContext.clearCache() spark.sparkContext.getPersistentRDDs.foreach { case (_, rdd) => rdd.unpersist(true) } } def resolveResource(fileName: String, withProtocol: Boolean = false): String = { val resource = s"${getClass.getSimpleName}/$fileName" logger.info(s"Resolving resource $resource") val location = getClass.getClassLoader.getResource(resource).getPath if (withProtocol) { s"file://$location" } else { location } } def getResourceAsText(fileName: String): String = { val resource = s"${getClass.getSimpleName}/$fileName" logger.info(s"Reading resource $resource") val stream = getClass.getClassLoader.getResourceAsStream(resource) Source.fromInputStream(stream).mkString } def copyResourceFileToHdfs(resource: String, targetPath: Path): Unit = { val localResourceRoot = resolveResource("", withProtocol = true) val sourcePath = new Path(localResourceRoot, resource) logger.info(s"Copying local resource to HDFS $sourcePath -> $targetPath") fs.copyFromLocalFile(sourcePath, targetPath) } def createAndLoadParquetTable(database: String, tableName: String, partitionColumns: Option[Seq[String]] = None, schema: StructType, filePath: String, reader: FileReader): Table = { val table = createParquetTable(database, tableName, partitionColumns, schema) val inputTableDataURI = resolveResource(filePath, withProtocol = true) table.write(Seq(inputTableDataURI), reader, LoadMode.OverwritePartitions) table } }
Example 195
Source File: Retries.scala From http-verbs with Apache License 2.0 | 5 votes |
package uk.gov.hmrc.http import akka.actor.ActorSystem import akka.pattern.after import com.typesafe.config.Config import java.util.concurrent.TimeUnit import javax.net.ssl.SSLException import org.slf4j.LoggerFactory import scala.collection.JavaConverters._ import scala.concurrent.duration._ import scala.concurrent.{ExecutionContext, Future} import uk.gov.hmrc.play.http.logging.Mdc trait Retries { protected def actorSystem: ActorSystem protected def configuration: Option[Config] private val logger = LoggerFactory.getLogger("application") def retry[A](verb: String, url: String)(block: => Future[A])(implicit ec: ExecutionContext): Future[A] = { def loop(remainingIntervals: Seq[FiniteDuration])(mdcData: Map[String, String])(block: => Future[A]): Future[A] = // scheduling will loose MDC data. Here we explicitly ensure it is available on block. Mdc.withMdc(block, mdcData) .recoverWith { case ex @ `sslEngineClosedMatcher`() if remainingIntervals.nonEmpty => val delay = remainingIntervals.head logger.warn(s"Retrying $verb $url in $delay due to '${ex.getMessage}' error") after(delay, actorSystem.scheduler)(loop(remainingIntervals.tail)(mdcData)(block)) } loop(intervals)(Mdc.mdcData)(block) } private[http] lazy val intervals: Seq[FiniteDuration] = { val defaultIntervals = Seq(500.millis, 1.second, 2.seconds, 4.seconds, 8.seconds) configuration .map { c => val path = "http-verbs.retries.intervals" if (c.hasPath(path)) { c.getDurationList(path).asScala.map { d => FiniteDuration(d.toMillis, TimeUnit.MILLISECONDS) } } else { defaultIntervals } } .getOrElse(defaultIntervals) } private lazy val sslEngineClosedMatcher = new SSlEngineClosedMatcher(isEnabled("ssl-engine-closed-already")) private class SSlEngineClosedMatcher(enabled: Boolean) { def unapply(ex: Throwable): Boolean = ex match { case _: SSLException if ex.getMessage == "SSLEngine closed already" => enabled case _ => false } } private def isEnabled(name: String): Boolean = configuration.exists { c => val path = s"http-verbs.retries.$name.enabled" c.hasPath(path) && c.getBoolean(path) } }
Example 196
Source File: ConnectionTracing.scala From http-verbs with Apache License 2.0 | 5 votes |
package uk.gov.hmrc.http.logging import org.slf4j.LoggerFactory import uk.gov.hmrc.http.{HttpException, Upstream4xxResponse} import scala.concurrent._ import scala.util.{Failure, Success, Try} trait ConnectionTracing { lazy val connectionLogger = LoggerFactory.getLogger("connector") def withTracing[T](method: String, uri: String)( body: => Future[T])(implicit ld: LoggingDetails, ec: ExecutionContext): Future[T] = { val startAge = ld.age val f = body f.onComplete(logResult(ld, method, uri, startAge)) f } def logResult[A](ld: LoggingDetails, method: String, uri: String, startAge: Long)(result: Try[A]) = result match { case Success(ground) => connectionLogger.debug(formatMessage(ld, method, uri, startAge, "ok")) case Failure(ex: HttpException) if ex.responseCode == 404 => connectionLogger.info(formatMessage(ld, method, uri, startAge, s"failed ${ex.getMessage}")) case Failure(ex: Upstream4xxResponse) if ex.upstreamResponseCode == 404 => connectionLogger.info(formatMessage(ld, method, uri, startAge, s"failed ${ex.getMessage}")) case Failure(ex) => connectionLogger.warn(formatMessage(ld, method, uri, startAge, s"failed ${ex.getMessage}")) } import uk.gov.hmrc.http.logging.ConnectionTracing.formatNs def formatMessage(ld: LoggingDetails, method: String, uri: String, startAge: Long, message: String) = { val requestId = ld.requestId.getOrElse("") val requestChain = ld.requestChain val durationNs = ld.age - startAge s"$requestId:$method:$startAge:${formatNs(startAge)}:$durationNs:${formatNs(durationNs)}:${requestChain.value}:$uri:$message" } } object ConnectionTracing { def formatNs(ns: Long): String = { val nsPart = ns % 1000 val usPart = ns / 1000 % 1000 val msPart = ns / 1000000 % 1000 val sPart = ns / 1000000000 if (sPart > 0) f"${(sPart * 1000 + msPart) / 1000.0}%03.3fs" else if (msPart > 0) f"${(msPart * 1000 + usPart) / 1000.0}%03.3fms" else if (usPart > 0) f"${(usPart * 1000 + nsPart) / 1000.0}%03.3fus" else s"${ns}ns" } }
Example 197
Source File: Main.scala From etl-light with MIT License | 5 votes |
package yamrcraft.etlite import org.slf4j.LoggerFactory import yamrcraft.etlite.processors.EtlProcessor import yamrcraft.etlite.utils.{DLock, FakeLock, FileUtils} object Main { val logger = LoggerFactory.getLogger(this.getClass) def main(args: Array[String]): Unit = { if (args.length != 1) { println( s""" |Usage: Main <config> | <config> path to the application configuration file, use 'file:///' prefix in case file located on local file system. """.stripMargin) System.exit(1) } val configPath = args(0) logger.info(s"Configuration file = '$configPath'") val settings = new Settings(FileUtils.readContent(configPath)) val lock = { if (settings.etl.lock.enabled) new DLock(settings.etl.lock.zookeeperConnect, settings.etl.lock.path, settings.etl.lock.waitForLockSeconds) else new FakeLock } if (lock.tryLock()) { EtlProcessor.run(settings) lock.release() } else { logger.error("can't acquire zookeeper lock!") } } }
Example 198
Source File: PartitionProcessor.scala From etl-light with MIT License | 5 votes |
package yamrcraft.etlite.processors import java.io.IOException import org.slf4j.LoggerFactory import yamrcraft.etlite.transformers.InboundMessage import yamrcraft.etlite.writers.{ErrorInfo, ErrorEventWriter} import yamrcraft.etlite.{ErrorType, EtlException, EtlSettings} import scala.util.Try class PartitionProcessor(jobId: Long, partitionId: Int, settings: EtlSettings) { val logger = LoggerFactory.getLogger(this.getClass) val pipeline = settings.pipeline.createFactory.createPipeline(settings.pipeline, jobId, partitionId) val errorsWriter: ErrorEventWriter = new ErrorEventWriter(settings.errorsFolder, jobId, partitionId) def processPartition(partition: Iterator[InboundMessage]): Unit = { logger.info(s"partition processing started [jobId=$jobId, partitionId=$partitionId]") partition foreach { inbound => try { pipeline.processMessage(inbound) } catch { case e@(_: Exception) => logger.error("event processing error", e) val errorType = e match { case ex: EtlException => ex.errorType.toString case _ : IOException => ErrorType.WriteError.toString case _ => ErrorType.SystemError.toString } val cause = Try(e.getCause.getMessage).getOrElse("") val errorInfo = ErrorInfo(errorType, Some(cause)) errorsWriter.write((inbound.msg, errorInfo)) } } pipeline.writer.commit() errorsWriter.commit() logger.info(s"partition processing ended [jobId=$jobId, partitionId=$partitionId]") } }
Example 199
Source File: EtlProcessor.scala From etl-light with MIT License | 5 votes |
package yamrcraft.etlite.processors import kafka.common.TopicAndPartition import kafka.message.MessageAndMetadata import kafka.serializer.DefaultDecoder import org.apache.spark._ import org.apache.spark.rdd.RDD import org.apache.spark.streaming.kafka._ import org.slf4j.LoggerFactory import yamrcraft.etlite.Settings import yamrcraft.etlite.state.{KafkaOffsetsState, KafkaStateManager} import yamrcraft.etlite.transformers.InboundMessage object EtlProcessor { val logger = LoggerFactory.getLogger(this.getClass) def run(settings: Settings) = { val context = createContext(settings) val stateManager = new KafkaStateManager(settings.etl.state) val lastState = stateManager.readState logger.info(s"last persisted state: $lastState") val currState = stateManager.fetchNextState(lastState, settings) logger.info(s"batch working state: $currState") val rdd = createRDD(context, currState, settings) processRDD(rdd, currState.jobId, settings) logger.info("committing state") stateManager.commitState(currState) } private def createContext(settings: Settings) = { val sparkConf = new SparkConf() .setAppName(settings.spark.appName) .setAll(settings.spark.conf) new SparkContext(sparkConf) } private def createRDD(context: SparkContext, state: KafkaOffsetsState, settings: Settings): RDD[InboundMessage] = { KafkaUtils.createRDD[Array[Byte], Array[Byte], DefaultDecoder, DefaultDecoder, InboundMessage]( context, settings.kafka.properties, state.ranges.toArray, Map[TopicAndPartition, Broker](), (msgAndMeta: MessageAndMetadata[Array[Byte], Array[Byte]]) => { InboundMessage(msgAndMeta.topic, msgAndMeta.key(), msgAndMeta.message()) } ) } private def processRDD(kafkaRDD: RDD[InboundMessage], jobId: Long, settings: Settings) = { // passed to remote workers val etlSettings = settings.etl logger.info(s"RDD processing started [rdd=${kafkaRDD.id}, jobId=$jobId]") val rdd = settings.etl.maxNumOfOutputFiles.map(kafkaRDD.coalesce(_)).getOrElse(kafkaRDD) rdd.foreachPartition { partition => // executed at the worker new PartitionProcessor(jobId, TaskContext.get.partitionId(), etlSettings) .processPartition(partition) } logger.info(s"RDD processing ended [rdd=${kafkaRDD.id}, jobId=$jobId]") } }
Example 200
Source File: DLock.scala From etl-light with MIT License | 5 votes |
package yamrcraft.etlite.utils import java.util.concurrent.TimeUnit import org.apache.curator.framework.recipes.locks.InterProcessSemaphoreMutex import org.apache.curator.framework.{CuratorFramework, CuratorFrameworkFactory} import org.apache.curator.retry.ExponentialBackoffRetry import org.slf4j.LoggerFactory class DLock(zkConnect: String, lockFile: String, waitForLockSeconds: Int) { val logger = LoggerFactory.getLogger(this.getClass) private var zkClient: Option[CuratorFramework] = None private var lock: Option[InterProcessSemaphoreMutex] = None def tryLock(): Boolean = { require(lock.isEmpty, "lock can't be reused") logger.info("acquiring lock...") zkClient = Some(CuratorFrameworkFactory.newClient(zkConnect, new ExponentialBackoffRetry(1000, 3))) zkClient.get.start() lock = Some(new InterProcessSemaphoreMutex(zkClient.get, lockFile)) lock.get.acquire(waitForLockSeconds, TimeUnit.SECONDS) } def release() = { require(lock.nonEmpty, "lock wasn't acquired") logger.info("releasing lock") lock.foreach(_.release()) zkClient.foreach(_.close()) } } class FakeLock extends DLock("", "", 0) { override def tryLock() = true override def release() = {} }