java.io.StringReader Scala Examples
The following examples show how to use java.io.StringReader.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: DummySourceAcl.scala From kafka-security-manager with MIT License | 5 votes |
package com.github.simplesteph.ksm.source import java.io.{Reader, StringReader} import com.github.simplesteph.ksm.TestFixtures._ import com.github.simplesteph.ksm.parser.CsvAclParser import com.typesafe.config.Config class DummySourceAcl extends SourceAcl { var noneNext = false var errorNext = false val csvAclParser: CsvAclParser = new CsvAclParser() // initial state val sar1 = Set( res1 -> acl1, res1 -> acl2, res2 -> acl3 ) // one deletion, one add val sar2 = Set( res1 -> acl1, res2 -> acl3, res3 -> acl2 ) // all gone val sar3 = Set() // all state changes val sars = List(sar1, sar2, sar3) // a states iterator, shifting its position changes current state private val sarsIterator = sars.iterator override def refresh(): Option[Reader] = { if (noneNext) { noneNext = false None } else if (errorNext) { errorNext = false throw new RuntimeException("triggered error") } else { Some( new StringReader(csvAclParser.formatAcls(sarsIterator.next().toList)) ) } } def setNoneNext(): Unit = { noneNext = true } def setErrorNext(): Unit = { errorNext = true } override def close(): Unit = () override def configure(config: Config): Unit = () }
Example 2
Source File: LightWeightJawaParser.scala From Argus-SAF with Apache License 2.0 | 5 votes |
package org.argus.jawa.core.compiler.parser import java.io.{LineNumberReader, StringReader} import org.argus.jawa.core.util.ISet object LightWeightJawaParser { val TITLE = "LightWeightJawaParser" val DEBUG = false def splitCode(code: String): ISet[String] = { code.replaceAll("(record `)", "DELIMITER$1").split("DELIMITER").tail.toSet } def getCode(recordCode: String, contentSig: String): Option[String] = { val lnr = new LineNumberReader(new StringReader(recordCode)) var lineNo = 0 var chunkLineNo = 0 val sb = new StringBuilder var lineText = lnr.readLine val keywords = Set("record", "global", "procedure") var found = false import scala.util.control.Breaks._ breakable{ while (lineText != null) { val word = getFirstWord(lineText) if (keywords.contains(word) && found) break if (keywords.contains(word)) { if(lineText.contains(contentSig)) found = true chunkLineNo = lineNo } if(found){ sb.append(lineText) sb.append('\n') } lineNo += 1 lineText = lnr.readLine } } if(found) Some(sb.toString.intern()) else None } def getFirstWord(line: String): String = { val size = line.length var i = 0 while (i < size && line.charAt(i).isWhitespace) { i += 1 } var j = i while (j < size && !line.charAt(j).isWhitespace) { j += 1 } if (i < size && j <= size) line.substring(i, j) else "" } def getClassName(line: String): String = { val size = line.length var i = if(line.contains("record")) line.indexOf("record") + 7 else size while (i < size && line.charAt(i).isWhitespace) { i += 1 } var j = i while (j < size && !line.charAt(j).isWhitespace && !line.charAt(j).equals('@')) { j += 1 } if (i < size && j <= size) line.substring(i + 1, j - 1) else throw new RuntimeException("Doing " + TITLE + ". Cannot find name from record code: \n" + line) } }
Example 3
Source File: CSVParser.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.csv import java.io.{CharArrayWriter, StringReader} import com.univocity.parsers.csv._ import org.apache.spark.internal.Logging private[csv] class LineCsvWriter(params: CSVOptions, headers: Seq[String]) extends Logging { private val writerSettings = new CsvWriterSettings private val format = writerSettings.getFormat format.setDelimiter(params.delimiter) format.setQuote(params.quote) format.setQuoteEscape(params.escape) format.setComment(params.comment) writerSettings.setNullValue(params.nullValue) writerSettings.setEmptyValue(params.nullValue) writerSettings.setSkipEmptyLines(true) writerSettings.setQuoteAllFields(params.quoteAll) writerSettings.setHeaders(headers: _*) writerSettings.setQuoteEscapingEnabled(params.escapeQuotes) private val buffer = new CharArrayWriter() private val writer = new CsvWriter(buffer, writerSettings) def writeRow(row: Seq[String], includeHeader: Boolean): Unit = { if (includeHeader) { writer.writeHeaders() } writer.writeRow(row.toArray: _*) } def flush(): String = { writer.flush() val lines = buffer.toString.stripLineEnd buffer.reset() lines } def close(): Unit = { writer.close() } }
Example 4
Source File: TableRowDiffy.scala From ratatool with Apache License 2.0 | 5 votes |
package com.spotify.ratatool.diffy import java.io.StringReader import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature} import com.google.api.client.json.JsonObjectParser import com.google.api.client.json.jackson2.JacksonFactory import com.google.api.services.bigquery.model.{TableFieldSchema, TableRow, TableSchema} import scala.jdk.CollectionConverters._ import scala.util.Try class TableRowDiffy(tableSchema: TableSchema, ignore: Set[String] = Set.empty, unordered: Set[String] = Set.empty, unorderedFieldKeys: Map[String, String] = Map()) extends Diffy[TableRow](ignore, unordered, unorderedFieldKeys) { override def apply(x: TableRow, y: TableRow): Seq[Delta] = diff(Option(x), Option(y), schema.getFields.asScala.toList, "") private type Record = java.util.Map[String, AnyRef] // TableSchema is not serializable private val schemaString: String = new ObjectMapper().disable(SerializationFeature.FAIL_ON_EMPTY_BEANS) .writeValueAsString(tableSchema) private lazy val schema: TableSchema = new JsonObjectParser(new JacksonFactory) .parseAndClose(new StringReader(schemaString), classOf[TableSchema]) // scalastyle:off cyclomatic.complexity private def diff(x: Option[Record], y: Option[Record], fields: Seq[TableFieldSchema], root: String): Seq[Delta] = { def getField(f: String)(x: Record): Option[AnyRef] = { Option(x.get(f)) } fields.flatMap { f => val name = f.getName val fullName = if (root.isEmpty) name else root + "." + name if (f.getType == "RECORD" && f.getMode != "REPEATED") { val a = x.flatMap(r => getField(name)(r).map(_.asInstanceOf[Record])) val b = y.flatMap(r => getField(name)(r).map(_.asInstanceOf[Record])) if (a.isEmpty && b.isEmpty) { Nil } else if (a.isEmpty || b.isEmpty) { Seq(Delta(fullName, a, b, UnknownDelta)) } else { diff(a, b, f.getFields.asScala.toList, fullName) } } else if (f.getMode == "REPEATED" && unordered.contains(fullName)) { if (f.getType == "RECORD" && unorderedFieldKeys.contains(fullName)) { val l = x .flatMap(outer => getField(name)(outer).map(_.asInstanceOf[java.util.List[Record]].asScala.toList)) .getOrElse(List()) .flatMap(inner => Try(inner.get(unorderedFieldKeys(fullName))).toOption.map(k => (k, inner))).toMap val r = y .flatMap(outer => getField(name)(outer).map(_.asInstanceOf[java.util.List[Record]].asScala.toList)) .getOrElse(List()) .flatMap(inner => Try(inner.get(unorderedFieldKeys(fullName))).toOption.map(k => (k, inner))).toMap (l.keySet ++ r.keySet).flatMap(k => diff(l.get(k), r.get(k), f.getFields.asScala.toList, fullName)) } else { val a = x.flatMap(r => Option(r.get(name).asInstanceOf[java.util.List[AnyRef]])) .map(sortList) val b = y.flatMap(r => Option(r.get(name).asInstanceOf[java.util.List[AnyRef]])) .map(sortList) if (a == b) Nil else Seq(Delta(fullName, a, b, delta(a.orNull, b.orNull))) } } else { val a = x.flatMap(r => getField(name)(r)) val b = y.flatMap(r => getField(name)(r)) if (a == b) Nil else Seq(Delta(fullName, a, b, delta(a.orNull, b.orNull))) } }.filter(d => !ignore.contains(d.field)) } // scalastyle:on cyclomatic.complexity }
Example 5
Source File: LanguageAwareStemmerUtil.scala From pravda-ml with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.odkl.texts import java.io.StringReader import org.apache.lucene.analysis.ar.ArabicAnalyzer import org.apache.lucene.analysis.bg.BulgarianAnalyzer import org.apache.lucene.analysis.br.BrazilianAnalyzer import org.apache.lucene.analysis.ca.CatalanAnalyzer import org.apache.lucene.analysis.cjk.CJKAnalyzer import org.apache.lucene.analysis.cz.CzechAnalyzer import org.apache.lucene.analysis.da.DanishAnalyzer import org.apache.lucene.analysis.de.GermanAnalyzer import org.apache.lucene.analysis.el.GreekAnalyzer import org.apache.lucene.analysis.en.EnglishAnalyzer import org.apache.lucene.analysis.es.SpanishAnalyzer import org.apache.lucene.analysis.eu.BasqueAnalyzer import org.apache.lucene.analysis.fa.PersianAnalyzer import org.apache.lucene.analysis.fi.FinnishAnalyzer import org.apache.lucene.analysis.fr.FrenchAnalyzer import org.apache.lucene.analysis.ga.IrishAnalyzer import org.apache.lucene.analysis.gl.GalicianAnalyzer import org.apache.lucene.analysis.hi.HindiAnalyzer import org.apache.lucene.analysis.hu.HungarianAnalyzer import org.apache.lucene.analysis.hy.ArmenianAnalyzer import org.apache.lucene.analysis.id.IndonesianAnalyzer import org.apache.lucene.analysis.it.ItalianAnalyzer import org.apache.lucene.analysis.lv.LatvianAnalyzer import org.apache.lucene.analysis.no.NorwegianAnalyzer import org.apache.lucene.analysis.ro.RomanianAnalyzer import org.apache.lucene.analysis.ru.RussianAnalyzer import org.apache.lucene.analysis.sv.SwedishAnalyzer import org.apache.lucene.analysis.th.ThaiAnalyzer import org.apache.lucene.analysis.tokenattributes.{CharTermAttribute, OffsetAttribute} import org.apache.lucene.analysis.tr.TurkishAnalyzer import org.apache.lucene.analysis.util.StopwordAnalyzerBase object LanguageAwareStemmerUtil { val languageAnalyzersMap = { Map[String, () => StopwordAnalyzerBase]( "ar" -> { () => new ArabicAnalyzer() }, "bg" -> { () => new BulgarianAnalyzer() }, "br" -> { () => new BrazilianAnalyzer() }, "ca" -> { () => new CatalanAnalyzer() }, "ch" -> { () => new CJKAnalyzer() }, //Chinise,Japanese,Korean "ja" -> { () => new CJKAnalyzer() }, //Chinise,Japanese,Korean "ko" -> { () => new CJKAnalyzer() }, "cz" -> { () => new CzechAnalyzer() }, "da" -> { () => new DanishAnalyzer() }, "de" -> { () => new GermanAnalyzer() }, "el" -> { () => new GreekAnalyzer() }, "en" -> { () => new EnglishAnalyzer() }, "es" -> { () => new SpanishAnalyzer() }, "eu" -> { () => new BasqueAnalyzer() }, "fa" -> { () => new PersianAnalyzer() }, "fi" -> { () => new FinnishAnalyzer() }, "fr" -> { () => new FrenchAnalyzer() }, "ga" -> { () => new IrishAnalyzer() }, "gl" -> { () => new GalicianAnalyzer() }, "hi" -> { () => new HindiAnalyzer() }, "hu" -> { () => new HungarianAnalyzer() }, "hy" -> { () => new ArmenianAnalyzer() }, "id" -> { () => new IndonesianAnalyzer() }, "it" -> { () => new ItalianAnalyzer() }, "lv" -> { () => new LatvianAnalyzer() }, "no" -> { () => new NorwegianAnalyzer() }, "ro" -> { () => new RomanianAnalyzer() }, "ru" -> { () => new RussianAnalyzer() }, "sv" -> { () => new SwedishAnalyzer() }, "th" -> { () => new ThaiAnalyzer() }, "tr" -> { () => new TurkishAnalyzer() } ) } def stemmString(text: String, analyzer: StopwordAnalyzerBase): Array[String] = { val reader = new StringReader(text.toLowerCase) val tokens = analyzer.tokenStream("text", reader) val charTermAttribute = tokens.addAttribute(classOf[CharTermAttribute]) tokens.reset() var ansList = scala.collection.mutable.ArrayBuffer.empty[String] while (tokens.incrementToken()) { ansList.append(charTermAttribute.toString) } tokens.close() ansList.toArray[String] } def instantiateMap = languageAnalyzersMap.mapValues(_.apply()) }
Example 6
Source File: URLElimminatorUtil.scala From pravda-ml with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.odkl.texts import java.io.StringReader import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer import org.apache.lucene.analysis.tokenattributes.{CharTermAttribute, TypeAttribute} import org.apache.lucene.util.AttributeFactory object URLElimminatorUtil { def geURLTokenizer() = new UAX29URLEmailTokenizer(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY) def filterText(text: String, tokenizer: UAX29URLEmailTokenizer) = { val stringBuilder = new StringBuilder val reader = new StringReader(text.toLowerCase) tokenizer.clearAttributes() tokenizer.setReader(reader) var charTermAttribute = tokenizer.addAttribute(classOf[CharTermAttribute]); var typeAttribute = tokenizer.addAttribute(classOf[TypeAttribute]) tokenizer.reset() while (tokenizer.incrementToken()) { if (typeAttribute.`type`() != UAX29URLEmailTokenizer.TOKEN_TYPES(UAX29URLEmailTokenizer.URL)) { stringBuilder.++=(" " + charTermAttribute.toString) } } tokenizer.close() reader.close() stringBuilder.toString.trim } }
Example 7
Source File: ExpressionOptimizerSpec.scala From mimir with Apache License 2.0 | 5 votes |
package mimir.optimizer; import java.io.{StringReader,FileReader} import org.specs2.mutable._ import mimir._ import mimir.parser._ import mimir.algebra._ import mimir.sql._ import mimir.test.RASimplify import mimir.optimizer.expression._ import mimir.optimizer.operator._ object ExpressionOptimizerSpec extends Specification with RASimplify { def typechecker = new Typechecker def expr = ExpressionParser.expr _ def conditionals(x:String) = simplify(PropagateConditions(expr(x))) def booleanOpts(x:String) = new FlattenBooleanConditionals(typechecker)(PullUpBranches(expr(x))) "Propagate Conditions" should { "Simplify Redundant Expressions" >> { conditionals("((A = 2) AND (A = 2))") must be equalTo expr("A = 2") } "Simplify Redundant Falsehoods" >> { conditionals("((A = 2) AND (A = 3))") must be equalTo expr("FALSE") conditionals("((A = 2) AND (A != 2))") must be equalTo expr("FALSE") } "Simplify If Statements" >> { conditionals("((A = 2) AND (CASE WHEN A = 2 THEN 5 ELSE 6 END))") must be equalTo expr("(A = 2) AND 5") conditionals("((A = 3) AND (CASE WHEN A = 2 THEN 5 ELSE 6 END))") must be equalTo expr("(A = 3) AND 6") conditionals("((A IS NULL) AND (CASE WHEN A IS NULL THEN 5 ELSE 6 END))") must be equalTo expr("(A IS NULL) AND 5") } "Simplify Negation" >> { conditionals("((A IS NOT NULL) AND (A IS NULL))") must be equalTo expr("FALSE") conditionals("((A IS NOT NULL) AND (CASE WHEN A IS NULL THEN 5 ELSE 6 END))") must be equalTo expr("(A IS NOT NULL) AND 6") } } "PullUpBranches" should { "Simplify Arithmetic" >> { booleanOpts("(CASE WHEN A = 1 THEN B ELSE C END) + 2") must be equalTo expr(""" CASE WHEN A = 1 THEN B + 2 ELSE C + 2 END """) } "Flatten Boolean Expressions" >> { booleanOpts("(CASE WHEN A = 1 THEN B ELSE C END) = 2") must be equalTo expr(""" ((A = 1) AND (B = 2)) OR ((A != 1) AND (C = 2)) """) } } }
Example 8
Source File: DetectSeriesSpec.scala From mimir with Apache License 2.0 | 5 votes |
package mimir.statistics import java.io.{BufferedReader, File, FileReader, StringReader} import java.sql.SQLException import scala.collection.JavaConversions._ import org.specs2.mutable._ import org.specs2.matcher.FileMatchers import mimir._ import mimir.sql._ import mimir.parser._ import mimir.algebra._ import mimir.optimizer._ import mimir.ctables._ import mimir.exec._ import mimir.util._ import mimir.test._ import mimir.statistics._ object DetectSeriesSpec extends SQLTestSpecification("DetectSeriesTest"){ sequential def testDetectSeriesof(oper: Operator) = { val (schema, df) = SparkUtils.getDataFrameWithProvFromQuery(db, oper) DetectSeries.seriesOf(df, schema, 0.1).collect().toSeq } "The DetectSeriesSpec" should { "Be able to load DetectSeriesTest1" >> { db.loader.loadTable("test/data/DetectSeriesTest1.csv"); ok } "Be able to detect Date and Timestamp type" >> { val queryOper = select("SELECT * FROM DetectSeriesTest1") val colSeq: Seq[String] = testDetectSeriesof(queryOper).map{_.columnName.toString} colSeq must have size(4) colSeq must contain("TRAN_TS","EXP_DT", "JOIN_DT", "DOB") } "Be able to create a new schema and detect Date and Timestamp type" >> { db.catalog.materializedTableProvider().createStoredTableAs( HardTable(Seq( ID("JN_DT") -> TDate(), ID("JN_TS") -> TTimestamp() ), Seq()), ID("DetectSeriesTest3"), db ) val queryOper = select("SELECT * FROM DetectSeriesTest3") val colSeq: Seq[String] = testDetectSeriesof(queryOper).map{_.columnName.toString} colSeq must have size(2) colSeq must contain("JN_DT", "JN_TS") } "Be able to load DetectSeriesTest2" >> { db.loader.loadTable("test/data/DetectSeriesTest2.csv"); ok } "Be able to detect Date, Timestamp and increasing-decreasing Numeric type" >> { val queryOper = select("SELECT * FROM DetectSeriesTest2") val colSeq: Seq[String] = testDetectSeriesof(queryOper).map{_.columnName.toString} colSeq must have size(6) colSeq must contain("TRAN_TS","EXP_DT", "JOIN_DT", "DOB", "ROW_ID", "QUALITY") } } }
Example 9
Source File: GitLabSourceAcl.scala From kafka-security-manager with MIT License | 5 votes |
package com.github.simplesteph.ksm.source import java.io.{Reader, StringReader} import java.nio.charset.Charset import java.util.Base64 import com.fasterxml.jackson.databind.ObjectMapper import com.typesafe.config.Config import org.slf4j.LoggerFactory import skinny.http.{HTTP, HTTPException, Request, Response} class GitLabSourceAcl extends SourceAcl { private val log = LoggerFactory.getLogger(classOf[GitLabSourceAcl]) override val CONFIG_PREFIX: String = "gitlab" final val REPOID_CONFIG = "repoid" final val FILEPATH_CONFIG = "filepath" final val BRANCH_CONFIG = "branch" final val HOSTNAME_CONFIG = "hostname" final val ACCESSTOKEN_CONFIG = "accesstoken" var lastModified: Option[String] = None val objectMapper = new ObjectMapper() var repoid: String = _ var filepath: String = _ var branch: String = _ var hostname: String = _ var accessToken: String = _ override def close(): Unit = { // HTTP } }
Example 10
Source File: BitbucketServerSourceAcl.scala From kafka-security-manager with MIT License | 5 votes |
package com.github.simplesteph.ksm.source import java.io.{Reader, StringReader} import java.nio.charset.Charset import java.util.Base64 import com.fasterxml.jackson.databind.ObjectMapper import com.typesafe.config.Config import org.slf4j.LoggerFactory import skinny.http.{HTTP, HTTPException, Request, Response} class BitbucketServerSourceAcl extends SourceAcl { private val log = LoggerFactory.getLogger(classOf[BitbucketServerSourceAcl]) override val CONFIG_PREFIX: String = "bitbucket-server" final val HOSTNAME_CONFIG = "hostname" final val PORT_CONFIG = "port" final val PROTOCOL_CONFIG = "protocol" final val PROJECT_CONFIG = "project" final val REPO_CONFIG = "repo" final val FILEPATH_CONFIG = "filepath" final val AUTH_USERNAME_CONFIG = "auth.username" final val AUTH_PASSWORD_CONFIG = "auth.password" final val BRANCH_CONFIG = "branch" var lastCommit: Option[String] = None val objectMapper = new ObjectMapper() var http: HTTP = HTTP var hostname: String = _ var port: String = _ var protocol: String = _ var project: String = _ var repo: String = _ var filePath: String = _ var username: String = _ var password: String = _ var branch: Option[String] = _ override def close(): Unit = { // HTTP } }
Example 11
Source File: GitHubSourceAcl.scala From kafka-security-manager with MIT License | 5 votes |
package com.github.simplesteph.ksm.source import java.io.{Reader, StringReader} import java.nio.charset.Charset import java.util.Base64 import com.fasterxml.jackson.databind.ObjectMapper import com.typesafe.config.Config import org.slf4j.LoggerFactory import skinny.http.{HTTP, HTTPException, Request, Response} import scala.util.Try class GitHubSourceAcl extends SourceAcl { private val log = LoggerFactory.getLogger(classOf[GitHubSourceAcl]) override val CONFIG_PREFIX: String = "github" final val USER_CONFIG = "user" final val REPO_CONFIG = "repo" final val FILEPATH_CONFIG = "filepath" final val BRANCH_CONFIG = "branch" final val HOSTNAME_CONFIG = "hostname" final val AUTH_BASIC_CONFIG = "auth.basic" final val AUTH_TOKEN_CONFIG = "auth.token" var lastModified: Option[String] = None val objectMapper = new ObjectMapper() var user: String = _ var repo: String = _ var filepath: String = _ var branch: String = _ var hostname: String = _ var basicOpt: Option[String] = _ var tokenOpt: Option[String] = _ override def close(): Unit = { // HTTP } }
Example 12
Source File: LibraryAPISummary.scala From Argus-SAF with Apache License 2.0 | 5 votes |
package org.argus.jawa.core import java.io.{BufferedReader, FileReader, StringReader} import org.argus.jawa.core.elements.JawaType import org.argus.jawa.core.util._ trait LibraryAPISummary { def isLibraryClass: JawaType => Boolean } class NoneLibraryAPISummary extends LibraryAPISummary { private val appPackages: MSet[String] = msetEmpty private val appPackagePrefixes: MSet[String] = msetEmpty private def doLoad(rdr: BufferedReader): Unit = { var line = Option(rdr.readLine()) while(line.isDefined){ line match { case Some(str) => if(str.endsWith(".*")) appPackagePrefixes += str.substring(0, str.length - 2) else appPackages += str case None => } line = Option(rdr.readLine()) } } def load(filePath: String): Unit = { val rdr: BufferedReader = new BufferedReader(new FileReader(filePath)) doLoad(rdr) rdr.close() } def loadFromString(str: String): Unit = { val rdr: BufferedReader = new BufferedReader(new StringReader(str)) doLoad(rdr) rdr.close() } override def isLibraryClass: JawaType => Boolean = { typ => !appPackages.contains(typ.getPackageName) && !appPackagePrefixes.exists(typ.getPackageName.startsWith) } } object NoLibraryAPISummary extends LibraryAPISummary { override def isLibraryClass: JawaType => Boolean = _ => false }
Example 13
Source File: SparkILoop.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.repl import java.io.{BufferedReader, FileReader} import Predef.{println => _, _} import scala.util.Properties.{jdkHome, javaVersion, versionString, javaVmName} import scala.tools.nsc.interpreter.{JPrintWriter, ILoop} import scala.tools.nsc.Settings import scala.tools.nsc.util.stringFromStream def run(code: String, sets: Settings = new Settings): String = { import java.io.{ BufferedReader, StringReader, OutputStreamWriter } stringFromStream { ostream => Console.withOut(ostream) { val input = new BufferedReader(new StringReader(code)) val output = new JPrintWriter(new OutputStreamWriter(ostream), true) val repl = new SparkILoop(input, output) if (sets.classpath.isDefault) sets.classpath.value = sys.props("java.class.path") repl process sets } } } def run(lines: List[String]): String = run(lines.map(_ + "\n").mkString) }
Example 14
Source File: Tokenizer.scala From Mastering-Spark-for-Data-Science with MIT License | 5 votes |
package io.gzet.story.util import java.io.StringReader import org.apache.lucene.analysis.en.EnglishAnalyzer import org.apache.lucene.analysis.tokenattributes.CharTermAttribute object Tokenizer { lazy private val replacePunc = """\\W""".r lazy private val replaceDigitOnly = """\\s\\d+\\s""".r def lucene(url: Traversable[String]): Traversable[Seq[String]] = { val analyzer = new EnglishAnalyzer url.map({ text => lucene(text, analyzer) }) } def lucene(line: String, analyzer: EnglishAnalyzer = new EnglishAnalyzer()): Seq[String] = { val content1 = replacePunc.replaceAllIn(line, " ") val content = replaceDigitOnly.replaceAllIn(content1, " ") val tReader = new StringReader(content) val tStream = analyzer.tokenStream("contents", tReader) val term = tStream.addAttribute(classOf[CharTermAttribute]) tStream.reset() val terms = collection.mutable.MutableList[String]() while (tStream.incrementToken) { if (!term.toString.matches(".*\\d.*") && term.toString.length > 3) { terms += term.toString } } tStream.close() terms.toSeq } }
Example 15
Source File: EventDrivenFetcher.scala From Mycat-spider with Apache License 2.0 | 5 votes |
package turbo.crawler.power import java.io.IOException import java.io.StringReader import java.net.BindException import java.net.SocketException import java.net.SocketTimeoutException import org.apache.commons.httpclient.ConnectTimeoutException import org.apache.commons.httpclient.Header import org.cyberneko.html.parsers.DOMParser import org.w3c.dom.Document import org.xml.sax.InputSource import turbo.crawler.FetchRejectedException import turbo.crawler.Fetchable import turbo.crawler.IO import turbo.crawler.Logable import turbo.crawler.ResourceHasAlreadyBeenFetchedException import turbo.crawler.StringAdapter import turbo.crawler.io.HttpReturns /** * Event driven fetcher * @author mclaren * */ class EventDrivenFetcher[T <: Fetchable](eventId: String) extends Logable with MessageDriven with IO with StringAdapter { def fetch(fetchUrl: String , contentFilter: String => String , parseDocument: Document => List[T])(hasRejected: Document => Boolean) = { val _retry = (msg: String) => { logger.info("Retry " + msg) Thread.sleep(3000) this.fetch(fetchUrl, contentFilter, parseDocument)(hasRejected)(howToContinue)(referer) } var httpReturns: HttpReturns = null try { val dom = new DOMParser httpReturns = this.fromUrl(fetchUrl, Array[Header](new Header("Referer", referer(fetchUrl)))) dom.parse(new InputSource(new StringReader(contentFilter(httpReturns.body)))) var document = dom.getDocument //检查是否被屏蔽 if (hasRejected(document)) throw new FetchRejectedException(fetchUrl) parseDocument(document).foreach(x => fireEvent(new Evt(eventId + "_COMPLETION", x))) } catch { case e: SocketTimeoutException => _retry(e.getMessage) case e: SocketException => _retry(e.getMessage) case e: ConnectTimeoutException => _retry(e.getMessage) case e: IOException => { logger.info("Oh网络错误with代理:" + httpReturns.proxy.ip + ":" + httpReturns.proxy.port) howToContinue(fetchUrl, httpReturns.proxy) //10秒之内只允许出现一次重拨 _retry(e.getMessage) } case e: BindException => _retry(e.getMessage) case e: FetchRejectedException => { logger.info("Oh 惨遭屏蔽~") howToContinue(e.getFetchUrl, httpReturns.proxy) //10秒之内只允许出现一次重拨 _retry(e.getMessage) } case e: ResourceHasAlreadyBeenFetchedException => case e: Exception => { logger.error("Unknown exception has been occurred", e) } } } }
Example 16
Source File: Pagination.scala From Mycat-spider with Apache License 2.0 | 5 votes |
package turbo.crawler.power import java.io.IOException import java.io.StringReader import java.net.SocketException import java.net.SocketTimeoutException import org.apache.commons.httpclient.ConnectTimeoutException import org.apache.commons.httpclient.Header import org.cyberneko.html.parsers.DOMParser import org.w3c.dom.Document import org.xml.sax.InputSource import turbo.crawler.FetchRejectedException import turbo.crawler.Logable import turbo.crawler.ResourceHasAlreadyBeenFetchedException import turbo.crawler.io.HttpReturns import turbo.crawler.io.InternetIO /** * 分页支持 * @author mclaren * */ object pages extends Logable with InternetIO { def apply(fetchUrl: String, contentFilter: String => String, checkBoundary: Document => Int, urlFactory: (String, Int) => String)(hasRejected: Document => Boolean)(howToContinue: (String, turbo.crawler.io.Proxy) => Unit): List[String] = { var value = new ValueRef[Int](0) resetBoundary(fetchUrl, value, contentFilter, checkBoundary, urlFactory)(hasRejected)(howToContinue) var rts = List[String]() value.get for (i <- 1 to value.get) { rts = rts.+:(urlFactory(fetchUrl, i)) } rts } private def resetBoundary(fetchUrl: String, lastPage: ValueRef[Int], contentFilter: String => String = x => x, checkBoundary: Document => Int, urlFactory: (String, Int) => String)(hasRejected: Document => Boolean ): Unit = { val _retry = (() => { Thread.sleep(3000) resetBoundary(fetchUrl, lastPage, contentFilter, checkBoundary, urlFactory)(hasRejected)(howToContinue) }) var httpReturns: HttpReturns = null try { var domp = new DOMParser httpReturns = this.fromUrl(fetchUrl, Array[Header]()) domp.parse(new InputSource(new StringReader(contentFilter(httpReturns.body)))) var document = domp.getDocument if (hasRejected(document)) throw new FetchRejectedException(fetchUrl, httpReturns.proxy) lastPage.set(checkBoundary(document)) } catch { case e: SocketTimeoutException => _retry() case e: SocketException => _retry() case e: ConnectTimeoutException => _retry() case e: IOException => _retry() case e: FetchRejectedException => { logger.info("Oh 惨遭屏蔽~") howToContinue(e.getFetchUrl, httpReturns.proxy) _retry() } case e: ResourceHasAlreadyBeenFetchedException => case e: Exception => { logger.error("Unknown exception has been occurred", e) } } } } class ValueRef[M](v: M) { var value = v def set(vv: M) = this.value = vv def get = value }
Example 17
Source File: StaxDecoder.scala From scalaz-deriving with GNU Lesser General Public License v3.0 | 5 votes |
// Copyright: 2017 - 2020 Sam Halliday // License: http://www.gnu.org/licenses/lgpl-3.0.en.html package xmlformat package stax import java.io.StringReader import javax.xml.stream.{ XMLInputFactory, XMLStreamConstants, XMLStreamReader } import scalaz._, Scalaz._ import com.ctc.wstx.stax.WstxInputFactory // scalafix:off DisableSyntax.keywords.while,DisableSyntax.keywords.var object StaxDecoder { // must not escape the code in this module private[this] val factory = new ThreadLocal[XMLInputFactory] { override def initialValue: WstxInputFactory = { val f = new com.ctc.wstx.stax.WstxInputFactory f.configureForSpeed() f } } import XMLStreamConstants._ def parse(txt: String): String \/ XTag = { val reader = factory.get.createXMLStreamReader(new StringReader(txt)) try { reader.nextTag() parseTag(reader).right } catch { case e: Exception => s"parser error: ${e.getMessage} ${e.getClass}".left } finally reader.close() } private[this] def parseTag(x: XMLStreamReader): XTag = { val name = x.getName.getLocalPart() val attrs = 0.until(x.getAttributeCount).toList.map { i => XAttr( x.getAttributeLocalName(i), XString(x.getAttributeValue(i)) ) } var children = IList.empty[XTag] var content = IList.empty[String] x.next() while (x.getEventType() != END_ELEMENT) { x.getEventType() match { case START_ELEMENT => children = parseTag(x) :: children case CHARACTERS | CDATA => val text = x.getText().trim if (!text.isEmpty) content = text :: content case _ => } x.next() } val body = content.toNel.map(t => XString(t.reverse.fold)).toMaybe XTag(name, attrs.toIList, children.reverse, body) } }
Example 18
Source File: PMMLUtils.scala From sona with Apache License 2.0 | 5 votes |
package com.tencent.angel.sona.ml.util import java.io.StringReader import org.dmg.pmml._ import org.jpmml.model.{ImportFilter, JAXBUtil} import org.xml.sax.InputSource /** * Testing utils for working with PMML. * Predictive Model Markup Language (PMML) is an XML-based file format * developed by the Data Mining Group (www.dmg.org). */ object PMMLUtils { /** * :: Experimental :: * Load a PMML model from a string. Note: for testing only, PMML model evaluation is supported * through external spark-packages. */ def loadFromString(input: String): PMML = { val is = new StringReader(input) val transformed = ImportFilter.apply(new InputSource(is)) JAXBUtil.unmarshalPMML(transformed) } }
Example 19
Source File: PropertiesApiSuite.scala From iep-apps with Apache License 2.0 | 5 votes |
package com.netflix.iep.archaius import java.io.StringReader import java.util.Properties import akka.http.scaladsl.model.HttpResponse import akka.http.scaladsl.model.MediaTypes import akka.http.scaladsl.model.StatusCode import akka.http.scaladsl.model.StatusCodes import akka.http.scaladsl.model.headers._ import akka.http.scaladsl.testkit.RouteTestTimeout import akka.http.scaladsl.testkit.ScalatestRouteTest import com.netflix.atlas.akka.RequestHandler import com.netflix.atlas.json.Json import com.netflix.spectator.api.DefaultRegistry import com.netflix.spectator.api.ManualClock import org.scalatest.funsuite.AnyFunSuite class PropertiesApiSuite extends AnyFunSuite with ScalatestRouteTest { import scala.concurrent.duration._ implicit val routeTestTimeout = RouteTestTimeout(5.second) val clock = new ManualClock() val registry = new DefaultRegistry(clock) val propContext = new PropertiesContext(registry) val endpoint = new PropertiesApi(propContext, system) val routes = RequestHandler.standardOptions(endpoint.routes) private def assertJsonContentType(response: HttpResponse): Unit = { assert(response.entity.contentType.mediaType === MediaTypes.`application/json`) } private def assertResponse(response: HttpResponse, expected: StatusCode): Unit = { assert(response.status === expected) assertJsonContentType(response) } test("no asg") { Get("/api/v1/property") ~> routes ~> check { assert(response.status === StatusCodes.BadRequest) } } test("empty") { propContext.update(Nil) Get("/api/v1/property?asg=foo-main-v001") ~> addHeader(Accept(MediaTypes.`application/json`)) ~> routes ~> check { assertResponse(response, StatusCodes.OK) assert(responseAs[String] === "[]") } } test("properties response") { propContext.update( List( PropertiesApi.Property("foo-main::a", "foo-main", "a", "b", 12345L), PropertiesApi.Property("foo-main::1", "foo-main", "1", "2", 12345L), PropertiesApi.Property("bar-main::c", "bar-main", "c", "d", 12345L) ) ) Get("/api/v1/property?asg=foo-main-v001") ~> routes ~> check { assert(response.status === StatusCodes.OK) val props = new Properties props.load(new StringReader(responseAs[String])) assert(props.size === 2) assert(props.getProperty("a") === "b") assert(props.getProperty("1") === "2") } } test("json response") { propContext.update( List( PropertiesApi.Property("foo-main::a", "foo-main", "a", "b", 12345L) ) ) Get("/api/v1/property?asg=foo-main-v001") ~> addHeader(Accept(MediaTypes.`application/json`)) ~> routes ~> check { assertResponse(response, StatusCodes.OK) val props = Json.decode[List[PropertiesApi.Property]](responseAs[String]) assert(props === List(PropertiesApi.Property("foo-main::a", "foo-main", "a", "b", 12345L))) } } }
Example 20
Source File: BeamTypeCoders.scala From scio with Apache License 2.0 | 5 votes |
package com.spotify.scio.coders.instances import com.google.api.client.json.GenericJson import com.google.api.client.json.jackson2.JacksonFactory import com.google.api.client.json.JsonObjectParser import com.google.api.services.bigquery.model.TableRow import com.spotify.scio.coders.Coder import com.spotify.scio.util.ScioUtil import java.io.StringReader import org.apache.beam.sdk.coders.RowCoder import org.apache.beam.sdk.io.FileIO.ReadableFile import org.apache.beam.sdk.io.fs.{MatchResult, MetadataCoderV2} import org.apache.beam.sdk.io.gcp.bigquery.TableRowJsonCoder import org.apache.beam.sdk.io.gcp.pubsub.{PubsubMessage, PubsubMessageWithAttributesCoder} import org.apache.beam.sdk.io.ReadableFileCoder import org.apache.beam.sdk.schemas.{Schema => BSchema} import org.apache.beam.sdk.transforms.windowing.{BoundedWindow, IntervalWindow, PaneInfo} import org.apache.beam.sdk.values.{KV, Row} import scala.reflect.ClassTag trait BeamTypeCoders { import BeamTypeCoders._ implicit def intervalWindowCoder: Coder[IntervalWindow] = Coder.beam(IntervalWindow.getCoder) implicit def boundedWindowCoder: Coder[BoundedWindow] = Coder.kryo[BoundedWindow] implicit def paneInfoCoder: Coder[PaneInfo] = Coder.beam(PaneInfo.PaneInfoCoder.of()) implicit def tableRowCoder: Coder[TableRow] = Coder.beam(TableRowJsonCoder.of()) def row(schema: BSchema): Coder[Row] = Coder.beam(RowCoder.of(schema)) implicit def messageCoder: Coder[PubsubMessage] = Coder.beam(PubsubMessageWithAttributesCoder.of()) implicit def beamKVCoder[K: Coder, V: Coder]: Coder[KV[K, V]] = Coder.kv(Coder[K], Coder[V]) implicit def readableFileCoder: Coder[ReadableFile] = Coder.beam(new ReadableFileCoder()) implicit def matchResultMetadataCoder: Coder[MatchResult.Metadata] = Coder.beam(MetadataCoderV2.of()) implicit def genericJsonCoder[T <: GenericJson: ClassTag]: Coder[T] = Coder.xmap(Coder[String])( str => DefaultJsonObjectParser.parseAndClose(new StringReader(str), ScioUtil.classOf[T]), DefaultJsonObjectParser.getJsonFactory().toString(_) ) } private[coders] object BeamTypeCoders extends BeamTypeCoders { private lazy val DefaultJsonObjectParser = new JsonObjectParser(new JacksonFactory) }
Example 21
Source File: Frontin.scala From paradox with Apache License 2.0 | 5 votes |
package com.lightbend.paradox.markdown import java.io.{ File, StringReader } import collection.JavaConverters._ case class Frontin(header: Map[String, String], body: String) object Frontin { val separator = "---" def separates(str: String): Boolean = (str.trim == separator) && (str startsWith separator) def apply(file: File): Frontin = apply(scala.io.Source.fromFile(file)("UTF-8").getLines.mkString("\n")) def apply(str: String): Frontin = str.linesWithSeparators.toList match { case Nil => Frontin(Map.empty[String, String], "") case x :: xs if separates(x) => xs span { !separates(_) } match { case (h, b) => Frontin(loadProperties(Some(h.mkString(""))), if (b.isEmpty) "" else b.tail.mkString("")) } case _ => Frontin(Map.empty[String, String], str) } def loadProperties(str: Option[String]): Map[String, String] = str match { case None => Map.empty[String, String] case Some(s) => val p = new java.util.Properties p.load(new StringReader(s)) p.asScala.toMap } }
Example 22
Source File: GloveUnitTest.scala From random-projections-at-berlinbuzzwords with Apache License 2.0 | 5 votes |
package com.stefansavev.fuzzysearchtest import java.io.StringReader import com.stefansavev.TemporaryFolderFixture import com.stefansavev.core.serialization.TupleSerializers._ import org.junit.runner.RunWith import org.scalatest.{FunSuite, Matchers} import org.scalatest.junit.JUnitRunner @RunWith(classOf[JUnitRunner]) class GloveUnitTest extends FunSuite with TemporaryFolderFixture with Matchers { def readResource(name: String): String ={ val stream = getClass.getResourceAsStream(name) val lines = scala.io.Source.fromInputStream( stream ).getLines lines.mkString("\n") } def parameterizedTest(inputTextFile: String, indexFile: String, numTrees: Int, expectedResultsName: String): Unit ={ val expectedResults = readResource(expectedResultsName).trim val queryResults = GloveTest.run(inputTextFile, indexFile, numTrees).trim assertResult(expectedResults)(queryResults) } //manually download http://nlp.stanford.edu/data/glove.6B.zip and unzip into test/resources/glove //then enable the test ignore("test glove num trees 1") { val numTrees: Int = 1 val inputTextFile: String = "src/test/resources/glove/glove.6B.100d.txt" val index = temporaryFolder.newFolder("index").getAbsolutePath val expectedResultsResouceName = "/glove/expected_results_num_trees_1.txt" parameterizedTest(inputTextFile, index, numTrees, expectedResultsResouceName) } ignore("test glove num trees 150") { val numTrees: Int = 150 val inputTextFile: String = "src/test/resources/glove/glove.6B.100d.txt" val index = temporaryFolder.newFolder("index").getAbsolutePath val expectedResultsResouceName = "/glove/expected_results_num_trees_150.txt" parameterizedTest(inputTextFile, index, numTrees, expectedResultsResouceName) } }
Example 23
Source File: SparkILoop.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.repl import java.io.BufferedReader import scala.Predef.{println => _, _} import scala.tools.nsc.Settings import scala.tools.nsc.interpreter.{ILoop, JPrintWriter} import scala.tools.nsc.util.stringFromStream import scala.util.Properties.{javaVersion, javaVmName, versionString} def run(code: String, sets: Settings = new Settings): String = { import java.io.{ BufferedReader, StringReader, OutputStreamWriter } stringFromStream { ostream => Console.withOut(ostream) { val input = new BufferedReader(new StringReader(code)) val output = new JPrintWriter(new OutputStreamWriter(ostream), true) val repl = new SparkILoop(input, output) if (sets.classpath.isDefault) { sets.classpath.value = sys.props("java.class.path") } repl process sets } } } def run(lines: List[String]): String = run(lines.map(_ + "\n").mkString) }
Example 24
Source File: CSVParser.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.csv import java.io.{CharArrayWriter, StringReader} import com.univocity.parsers.csv._ import org.apache.spark.internal.Logging private[csv] class LineCsvWriter(params: CSVOptions, headers: Seq[String]) extends Logging { private val writerSettings = new CsvWriterSettings private val format = writerSettings.getFormat format.setDelimiter(params.delimiter) format.setQuote(params.quote) format.setQuoteEscape(params.escape) format.setComment(params.comment) writerSettings.setNullValue(params.nullValue) writerSettings.setEmptyValue(params.nullValue) writerSettings.setSkipEmptyLines(true) writerSettings.setQuoteAllFields(params.quoteAll) writerSettings.setHeaders(headers: _*) writerSettings.setQuoteEscapingEnabled(params.escapeQuotes) private val buffer = new CharArrayWriter() private val writer = new CsvWriter(buffer, writerSettings) def writeRow(row: Seq[String], includeHeader: Boolean): Unit = { if (includeHeader) { writer.writeHeaders() } writer.writeRow(row.toArray: _*) } def flush(): String = { writer.flush() val lines = buffer.toString.stripLineEnd buffer.reset() lines } def close(): Unit = { writer.close() } }
Example 25
Source File: package.scala From spark-lucenerdd with Apache License 2.0 | 5 votes |
package org.zouzias.spark.lucenerdd.spatial import java.io.StringReader import org.locationtech.jts.geom.{Coordinate, GeometryFactory} import org.locationtech.spatial4j.shape.Shape import org.zouzias.spark.lucenerdd.spatial.shape.context.ContextLoader package object shape extends ContextLoader{ private val GeometryFactory = new GeometryFactory() implicit def convertToPoint(point: (Double, Double)): Shape = { ctx.makePoint(point._1, point._2) } implicit def WKTToShape(shapeAsString: String): Shape = { try { shapeReader.read(new StringReader(shapeAsString)) } catch { case e: Exception => ctx.makePoint(0.0, 0.0) } } implicit def rectangleToShape(rect: (Double, Double, Double, Double)): Shape = { val minX = rect._1 val maxX = rect._2 val minY = rect._3 val maxY = rect._4 ctx.makeRectangle(minX, maxX, minY, maxY) } implicit def circleToShape(circle: ((Double, Double), Double)): Shape = { val x = circle._1._1 val y = circle._1._2 val radius = circle._2 ctx.makeCircle(x, y, radius) } implicit def listPolygonToShape(rect: List[(Double, Double)]): Shape = { val coordinates = rect.map(p => new Coordinate(p._1, p._2)).toArray val polygon = GeometryFactory.createPolygon(coordinates) ctx.makeShape(polygon) } implicit def arrayPolygonToShape(rect: Array[(Double, Double)]): Shape = { val coordinates = rect.map(p => new Coordinate(p._1, p._2)) val polygon = GeometryFactory.createPolygon(coordinates) ctx.makeShape(polygon) } }
Example 26
Source File: LuceneDocToSparkRowpec.scala From spark-lucenerdd with Apache License 2.0 | 5 votes |
package org.zouzias.spark.lucenerdd import java.io.{Reader, StringReader} import org.apache.lucene.document.{Document, DoublePoint, Field, FloatPoint, IntPoint, LongPoint, StoredField, TextField} import org.scalatest.{BeforeAndAfterEach, FlatSpec, Matchers} import org.zouzias.spark.lucenerdd.models.SparkScoreDoc import org.zouzias.spark.lucenerdd.models.SparkScoreDoc.{DocIdField, ScoreField, ShardField} import scala.collection.JavaConverters._ class LuceneDocToSparkRowpec extends FlatSpec with Matchers with BeforeAndAfterEach { val (score: Float, docId: Int, shardIndex: Int) = (1.0f, 1, 2) val float: Float = 20.001f val double: Double = 10.1000000001D def generate_doc(): Document = { val doc = new Document() // Add long field doc.add(new LongPoint("longField", 10)) doc.add(new StoredField("longField", 10)) doc.add(new FloatPoint("floatField", float)) doc.add(new StoredField("floatField", float)) doc.add(new IntPoint("intField", 9)) doc.add(new StoredField("intField", 9)) doc.add(new DoublePoint("doubleField", double)) doc.add(new StoredField("doubleField", double)) doc.add(new TextField("textField", "hello world", Field.Store.NO)) doc.add(new StoredField("textField", "hello world")) doc } private val doc: Document = generate_doc() val sparkScoreDoc = SparkScoreDoc(score, docId, shardIndex, doc) "SparkScoreDoc.toRow" should "return correct score" in { val row = sparkScoreDoc.toRow() row.getFloat(row.fieldIndex(ScoreField)) should equal(score) } "SparkScoreDoc.toRow" should "return correct docId" in { val row = sparkScoreDoc.toRow() row.getInt(row.fieldIndex(DocIdField)) should equal(docId) } "SparkScoreDoc.toRow" should "return correct shard number" in { val row = sparkScoreDoc.toRow() row.getInt(row.fieldIndex(ShardField)) should equal(shardIndex) } "SparkScoreDoc.toRow" should "return correct number of fields" in { val row = sparkScoreDoc.toRow() row.getFields().asScala.count(_.fieldType().stored()) should equal(8) } "SparkScoreDoc.toRow" should "set correctly DoublePoint" in { val row = sparkScoreDoc.toRow() row.getDouble(row.fieldIndex("doubleField")) should equal(double) } "SparkScoreDoc.toRow" should "set correctly FloatPoint" in { val row = sparkScoreDoc.toRow() row.getFloat(row.fieldIndex("floatField")) should equal(float) } }
Example 27
Source File: SequenceSupport.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.sequence import java.io.StringReader import java.nio.charset.Charset import com.sksamuel.exts.Logging import com.sksamuel.exts.io.Using import io.eels.component.csv.{CsvFormat, CsvSupport} import io.eels.schema.{Field, StructType} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.hadoop.io.{BytesWritable, IntWritable, SequenceFile} object SequenceSupport extends Logging with Using { def createReader(path: Path)(implicit conf: Configuration): SequenceFile.Reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(path)) def toValues(v: BytesWritable): Array[String] = toValues(new String(v.copyBytes(), Charset.forName("UTF8"))) def toValues(str: String): Array[String] = { val parser = CsvSupport.createParser(CsvFormat(), false, false, false, null, null) parser.beginParsing(new StringReader(str)) val record = parser.parseNext() parser.stopParsing() record } def schema(path: Path)(implicit conf: Configuration): StructType = { logger.debug(s"Fetching sequence schema for $path") using(createReader(path)) { it => val k = new IntWritable() val v = new BytesWritable() val fields: Array[Field] = { it.next(k, v) toValues(v).map { it => new Field(it) } } StructType(fields.toList) } } }
Example 28
Source File: StaxXmlParserUtils.scala From spark-xml with Apache License 2.0 | 5 votes |
package com.databricks.spark.xml.parsers import java.io.StringReader import javax.xml.stream.{EventFilter, XMLEventReader, XMLInputFactory, XMLStreamConstants} import javax.xml.stream.events._ import scala.annotation.tailrec import scala.collection.JavaConverters._ import com.databricks.spark.xml.XmlOptions private[xml] object StaxXmlParserUtils { private val factory: XMLInputFactory = { val factory = XMLInputFactory.newInstance() factory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, false) factory.setProperty(XMLInputFactory.IS_COALESCING, true) factory } def filteredReader(xml: String): XMLEventReader = { val filter = new EventFilter { override def accept(event: XMLEvent): Boolean = // Ignore comments and processing instructions event.getEventType match { case XMLStreamConstants.COMMENT | XMLStreamConstants.PROCESSING_INSTRUCTION => false case _ => true } } // It does not have to skip for white space, since `XmlInputFormat` // always finds the root tag without a heading space. val eventReader = factory.createXMLEventReader(new StringReader(xml)) factory.createFilteredReader(eventReader, filter) } def gatherRootAttributes(parser: XMLEventReader): Array[Attribute] = { val rootEvent = StaxXmlParserUtils.skipUntil(parser, XMLStreamConstants.START_ELEMENT) rootEvent.asStartElement.getAttributes.asScala.map(_.asInstanceOf[Attribute]).toArray } def skipChildren(parser: XMLEventReader): Unit = { var shouldStop = checkEndElement(parser) while (!shouldStop) { parser.nextEvent match { case _: StartElement => val e = parser.peek if (e.isCharacters && e.asCharacters.isWhiteSpace) { // There can be a `Characters` event between `StartElement`s. // So, we need to check further to decide if this is a data or just // a whitespace between them. parser.next } if (parser.peek.isStartElement) { skipChildren(parser) } case _: EndElement => shouldStop = checkEndElement(parser) case _: XMLEvent => // do nothing } } } }
Example 29
Source File: StaxXmlParserUtilsSuite.scala From spark-xml with Apache License 2.0 | 5 votes |
package com.databricks.spark.xml.parsers import java.io.StringReader import javax.xml.stream.events.Attribute import javax.xml.stream.{XMLInputFactory, XMLStreamConstants} import scala.collection.JavaConverters._ import org.scalatest.BeforeAndAfterAll import org.scalatest.funsuite.AnyFunSuite import com.databricks.spark.xml.XmlOptions final class StaxXmlParserUtilsSuite extends AnyFunSuite with BeforeAndAfterAll { private val factory = XMLInputFactory.newInstance() factory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, false) factory.setProperty(XMLInputFactory.IS_COALESCING, true) test("Test if elements are skipped until the given event type") { val input = <ROW><id>2</id><name>Sam Mad Dog Smith</name><amount>93</amount></ROW> val parser = factory.createXMLEventReader(new StringReader(input.toString)) val event = StaxXmlParserUtils.skipUntil(parser, XMLStreamConstants.END_DOCUMENT) assert(event.isEndDocument) } test("Check the end of element") { val input = <ROW><id>2</id></ROW> val parser = factory.createXMLEventReader(new StringReader(input.toString)) // Skip until </id> StaxXmlParserUtils.skipUntil(parser, XMLStreamConstants.END_ELEMENT) assert(StaxXmlParserUtils.checkEndElement(parser)) } test("Convert attributes to a map with keys and values") { val input = <ROW id="2"></ROW> val parser = factory.createXMLEventReader(new StringReader(input.toString)) val event = StaxXmlParserUtils.skipUntil(parser, XMLStreamConstants.START_ELEMENT) val attributes = event.asStartElement().getAttributes.asScala.map(_.asInstanceOf[Attribute]).toArray val valuesMap = StaxXmlParserUtils.convertAttributesToValuesMap(attributes, new XmlOptions()) assert(valuesMap === Map(s"${XmlOptions.DEFAULT_ATTRIBUTE_PREFIX}id" -> "2")) } test("Convert current structure to string") { val input = <ROW><id>2</id><info> <name>Sam Mad Dog Smith</name><amount><small>1</small><large>9</large></amount></info></ROW> val parser = factory.createXMLEventReader(new StringReader(input.toString)) // Skip until </id> StaxXmlParserUtils.skipUntil(parser, XMLStreamConstants.END_ELEMENT) val xmlString = StaxXmlParserUtils.currentStructureAsString(parser) val expected = <info> <name>Sam Mad Dog Smith</name><amount><small>1</small><large>9</large></amount></info> assert(xmlString === expected.toString()) } test("Skip XML children") { val input = <ROW><info> <name>Sam Mad Dog Smith</name><amount><small>1</small> <large>9</large></amount></info><abc>2</abc><test>2</test></ROW> val parser = factory.createXMLEventReader(new StringReader(input.toString)) // We assume here it's reading the value within `id` field. StaxXmlParserUtils.skipUntil(parser, XMLStreamConstants.CHARACTERS) StaxXmlParserUtils.skipChildren(parser) assert(parser.nextEvent().asEndElement().getName.getLocalPart === "info") parser.next() StaxXmlParserUtils.skipChildren(parser) assert(parser.nextEvent().asEndElement().getName.getLocalPart === "abc") parser.next() StaxXmlParserUtils.skipChildren(parser) assert(parser.nextEvent().asEndElement().getName.getLocalPart === "test") } }
Example 30
Source File: ArtifactSourceBackedMustacheResolver.scala From rug with GNU General Public License v3.0 | 5 votes |
package com.atomist.project.common.template import java.io.{Reader, StringReader} import com.atomist.source.ArtifactSource import com.github.mustachejava.resolver.DefaultResolver import com.typesafe.scalalogging.LazyLogging class ArtifactSourceBackedMustacheResolver(artifactSource: ArtifactSource) extends DefaultResolver with LazyLogging{ override def getReader(resourceName: String): Reader = { logger.debug(s"Need to return Reader for $resourceName") artifactSource.findFile(resourceName) match { case Some(f) => new StringReader(f.content) case _ => new StringReader(resourceName) } } }
Example 31
Source File: PropertiesMutableView.scala From rug with GNU General Public License v3.0 | 5 votes |
package com.atomist.rug.kind.properties import java.io.StringReader import java.util.Properties import com.atomist.rug.kind.core.{LazyFileArtifactBackedMutableView, ProjectMutableView} import com.atomist.rug.spi.{ExportFunction, ExportFunctionParameterDescription, TerminalView} import com.atomist.source.FileArtifact import scala.collection.JavaConverters._ class PropertiesMutableView( originalBackingObject: FileArtifact, parent: ProjectMutableView) extends LazyFileArtifactBackedMutableView(originalBackingObject, parent) with TerminalView[FileArtifact] { private var properties = originalBackingObject.content override protected def currentContent: String = properties @ExportFunction(readOnly = true, description = "Return the content of this property") def getValue(@ExportFunctionParameterDescription(name = "key", description = "The name of the simple node") key: String): String = { val regexp = s"$key=(.*)".r val matched = regexp.findFirstMatchIn(properties) if (matched.isDefined) { val rm = regexp.findFirstMatchIn(properties).get rm.group(1) } else { "" } } @ExportFunction(readOnly = false, description = "Set the value of the specified property, creating a property if not present") def setProperty(@ExportFunctionParameterDescription(name = "key", description = "The key of the property being set") key: String, @ExportFunctionParameterDescription(name = "value", description = "The value of the property") newValue: String ): Unit = { if (content.contains(key)) { val regexp = s"$key=(.*)" this.properties = content.replaceFirst(regexp, s"$key=$newValue") } else { val newPropertyString = s"$key=$newValue\n" this.properties = content.concat(newPropertyString) } } @ExportFunction(readOnly = true, description = "Return whether a property key exists in this file or not") def containsKey(@ExportFunctionParameterDescription(name = "key", description = "The key of the property being searched for") key: String): Boolean = { val properties = new Properties() properties.load(new StringReader(content)) properties.containsKey(key) } @ExportFunction(readOnly = true, description = "Return whether a property value exists in this file or not") def containsValue(@ExportFunctionParameterDescription(name = "value", description = "The value being searched for") value: String): Boolean = { val properties = new Properties() properties.load(new StringReader(content)) properties.containsValue(value) } @ExportFunction(readOnly = true, description = "Return a list of the supported keys") def keys: List[Any] = { val properties = new Properties() properties.load(new StringReader(content)) properties.propertyNames().asScala.toList } }
Example 32
Source File: BatchProducerSpec.scala From Scala-Programming-Projects with MIT License | 5 votes |
package coinyser import java.io.{BufferedOutputStream, StringReader} import java.nio.CharBuffer import java.sql.Timestamp import cats.effect.IO import org.apache.spark.sql._ import org.apache.spark.sql.test.SharedSparkSession import org.scalatest.{Matchers, WordSpec} class BatchProducerSpec extends WordSpec with Matchers with SharedSparkSession { val httpTransaction1 = HttpTransaction("1532365695", "70683282", "7740.00", "0", "0.10041719") val httpTransaction2 = HttpTransaction("1532365693", "70683281", "7739.99", "0", "0.00148564") "BatchProducer.jsonToHttpTransaction" should { "create a Dataset[HttpTransaction] from a Json string" in { val json = """[{"date": "1532365695", "tid": "70683282", "price": "7740.00", "type": "0", "amount": "0.10041719"}, |{"date": "1532365693", "tid": "70683281", "price": "7739.99", "type": "0", "amount": "0.00148564"}]""".stripMargin val ds: Dataset[HttpTransaction] = BatchProducer.jsonToHttpTransactions(json) ds.collect() should contain theSameElementsAs Seq(httpTransaction1, httpTransaction2) } } "BatchProducer.httpToDomainTransactions" should { "transform a Dataset[HttpTransaction] into a Dataset[Transaction]" in { import testImplicits._ val source: Dataset[HttpTransaction] = Seq(httpTransaction1, httpTransaction2).toDS() val target: Dataset[Transaction] = BatchProducer.httpToDomainTransactions(source) val transaction1 = Transaction(timestamp = new Timestamp(1532365695000L), tid = 70683282, price = 7740.00, sell = false, amount = 0.10041719) val transaction2 = Transaction(timestamp = new Timestamp(1532365693000L), tid = 70683281, price = 7739.99, sell = false, amount = 0.00148564) target.collect() should contain theSameElementsAs Seq(transaction1, transaction2) } } }
Example 33
Source File: CSVParser.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.csv import java.io.{CharArrayWriter, StringReader} import com.univocity.parsers.csv._ import org.apache.spark.internal.Logging private[csv] class LineCsvWriter(params: CSVOptions, headers: Seq[String]) extends Logging { private val writerSettings = new CsvWriterSettings private val format = writerSettings.getFormat format.setDelimiter(params.delimiter) format.setQuote(params.quote) format.setQuoteEscape(params.escape) format.setComment(params.comment) writerSettings.setNullValue(params.nullValue) writerSettings.setEmptyValue(params.nullValue) writerSettings.setSkipEmptyLines(true) writerSettings.setQuoteAllFields(params.quoteAll) writerSettings.setHeaders(headers: _*) writerSettings.setQuoteEscapingEnabled(params.escapeQuotes) private val buffer = new CharArrayWriter() private val writer = new CsvWriter(buffer, writerSettings) def writeRow(row: Seq[String], includeHeader: Boolean): Unit = { if (includeHeader) { writer.writeHeaders() } writer.writeRow(row.toArray: _*) } def flush(): String = { writer.flush() val lines = buffer.toString.stripLineEnd buffer.reset() lines } def close(): Unit = { writer.close() } }
Example 34
Source File: ScalaObjectHandlerTest.scala From fintrospect with Apache License 2.0 | 5 votes |
package io.fintrospect.templating import java.io.{StringReader, StringWriter} import java.util.concurrent.Callable import com.github.mustachejava.DefaultMustacheFactory import org.scalatest.{FunSpec, Matchers} class ScalaObjectHandlerTest extends FunSpec with Matchers { describe("ScalaObjectHandler") { it("maps") { render("{{#map}}{{test}}{{test2}}{{/map}}", Map("map" -> Map("test" -> "fred"))) shouldBe "fred" } it("handler") { val model = new { val list = Seq(new { lazy val optionalHello = Some("Hello") val futureWorld = new Callable[String] { def call(): String = "world" } val test = true val num = 0 }, new { val optionalHello = Some("Goodbye") val futureWorld = new Callable[String] { def call(): String = "thanks for all the fish" } lazy val test = false val map = Map("value" -> "test") val num = 1 }) } render("{{#list}}{{optionalHello}}, {{futureWorld}}!" + "{{#test}}?{{/test}}{{^test}}!{{/test}}{{#num}}?{{/num}}{{^num}}!{{/num}}" + "{{#map}}{{value}}{{/map}}\n{{/list}}", model) shouldBe "Hello, world!?!\nGoodbye, thanks for all the fish!!?test\n" } it("steams") { val model = new { val stream = Stream( new { val value = "hello" }, new { val value = "world" }) } render("{{#stream}}{{value}}{{/stream}}", model) shouldBe "helloworld" } it("unit") { val model = new { val test = if (false) "test" } render("{{test}}", model) shouldBe "" } it("options") { val model = new { val foo = Some("Hello") val bar = None } render("{{foo}}{{bar}}", model) shouldBe "Hello" } } private def render(template: String, model: Any): String = { val mf = new DefaultMustacheFactory() mf.setObjectHandler(new ScalaObjectHandler) val m = mf.compile(new StringReader(template), "name") val sw = new StringWriter m.execute(sw, model).close() sw.toString } }
Example 35
Source File: Decoding.scala From kantan.csv with Apache License 2.0 | 5 votes |
package kantan.csv package benchmark import com.univocity.parsers.csv.CsvParserSettings import engine.ReaderEngine import java.io.StringReader import java.util.concurrent.TimeUnit import org.openjdk.jmh.annotations._ @State(Scope.Thread) @BenchmarkMode(Array(Mode.AverageTime)) @OutputTimeUnit(TimeUnit.MICROSECONDS) class Decoding { @Benchmark def kantanInternal: List[CsvEntry] = Decoding.kantan(strData) @Benchmark def kantanJackson: List[CsvEntry] = Decoding.kantan(strData)(kantan.csv.engine.jackson.jacksonCsvReaderEngine) @Benchmark def kantanCommons: List[CsvEntry] = Decoding.kantan(strData)(kantan.csv.engine.commons.commonsCsvReaderEngine) @Benchmark def opencsv: List[CsvEntry] = Decoding.opencsv(strData) @Benchmark def commons: List[CsvEntry] = Decoding.commons(strData) @Benchmark def jackson: List[CsvEntry] = Decoding.jackson(strData) @Benchmark def univocity: List[CsvEntry] = Decoding.univocity(strData) @Benchmark def scalaCsv: List[CsvEntry] = Decoding.scalaCsv(strData) } object Decoding { // - Helpers --------------------------------------------------------------------------------------------------------- // ------------------------------------------------------------------------------------------------------------------- @SuppressWarnings(Array("org.wartremover.warts.Var")) class CsvIterator[A](iterator: A)(f: A => Array[String]) extends Iterator[CsvEntry] { private var n = f(iterator) override def hasNext: Boolean = n != null override def next(): CsvEntry = { val temp = n n = f(iterator) toTuple(temp) } } def toTuple(row: Array[String]): CsvEntry = (row(0).toInt, row(1), row(2).toBoolean, row(3).toFloat) // - Benchmarks ------------------------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------------------------------------- def kantan(str: String)(implicit e: ReaderEngine): List[CsvEntry] = CsvSource[String].unsafeReader[CsvEntry](str, rfc).toList def opencsv(str: String): List[CsvEntry] = new CsvIterator(new com.opencsv.CSVReader(new StringReader(str)))(_.readNext()).toList def commons(str: String): List[CsvEntry] = { val csv = org.apache.commons.csv.CSVFormat.RFC4180.parse(new StringReader(str)).iterator() new Iterator[CsvEntry] { override def hasNext: Boolean = csv.hasNext override def next(): CsvEntry = { val n = csv.next() (n.get(0).toInt, n.get(1), n.get(2).toBoolean, n.get(3).toFloat) } }.toList } @SuppressWarnings(Array("org.wartremover.warts.Null")) def jackson(str: String): List[CsvEntry] = new CsvIterator(engine.jackson.defaultMappingIteratorBuilder(new StringReader(str), rfc))({ it => if(it.hasNext) it.next() else null }).toList val univocitySettings: CsvParserSettings = { val settings = new com.univocity.parsers.csv.CsvParserSettings settings.setReadInputOnSeparateThread(false) settings.setInputBufferSize(2048) settings.setIgnoreLeadingWhitespaces(true) settings.setIgnoreLeadingWhitespaces(false) settings } def univocity(str: String): List[CsvEntry] = { val parser = new com.univocity.parsers.csv.CsvParser(univocitySettings) parser.beginParsing(new StringReader(str)) new CsvIterator(parser)(_.parseNext()).toList } def scalaCsv(str: String): List[CsvEntry] = { import com.github.tototoshi.csv._ CSVReader.open(new StringReader(str)).iterator.map(r => (r(0).toInt, r(1), r(2).toBoolean, r(3).toFloat)).toList } }
Example 36
Source File: package.scala From tethys with Apache License 2.0 | 5 votes |
import java.io.{Reader, StringReader, StringWriter, Writer} import tethys.readers.{FieldName, ReaderError} import tethys.readers.tokens.{TokenIterator, TokenIteratorProducer} import tethys.writers.tokens.{TokenWriter, TokenWriterProducer} import scala.Specializable.Group package object tethys { final val specializations = new Group((Short, Int, Long, Float, Double, Boolean)) implicit class JsonWriterOps[A](val a: A) extends AnyVal { def asJson(implicit jsonWriter: JsonWriter[A], tokenWriterProducer: TokenWriterProducer): String = { val stringWriter = new StringWriter() writeJson(tokenWriterProducer.forWriter(stringWriter)) stringWriter.toString } def asJsonWith(jsonWriter: JsonWriter[A])(implicit tokenWriterProducer: TokenWriterProducer): String = { asJson(jsonWriter, tokenWriterProducer) } def writeJson(tokenWriter: TokenWriter)(implicit jsonWriter: JsonWriter[A]): Unit = { try jsonWriter.write(a, tokenWriter) finally { tokenWriter.flush() } } } implicit class WriterOps(val w: Writer) extends AnyVal { def toTokenWriter(implicit tokenWriterProducer: TokenWriterProducer): TokenWriter = tokenWriterProducer.forWriter(w) } implicit class StringReaderOps(val json: String) extends AnyVal { def jsonAs[A](implicit jsonReader: JsonReader[A], producer: TokenIteratorProducer): Either[ReaderError, A] = { new StringReader(json).readJson[A] } def toTokenIterator(implicit producer: TokenIteratorProducer): Either[ReaderError, TokenIterator] = { new StringReader(json).toTokenIterator } } implicit class ReaderReaderOps(val reader: Reader) extends AnyVal { def readJson[A](implicit jsonReader: JsonReader[A], producer: TokenIteratorProducer): Either[ReaderError, A] = { implicit val root: FieldName = FieldName() producer.fromReader(reader).right.flatMap(_.readJson[A]) } def readJsonWith[A](jsonReader: JsonReader[A])(implicit producer: TokenIteratorProducer): Either[ReaderError, A] = { readJson[A](jsonReader, producer) } def toTokenIterator(implicit producer: TokenIteratorProducer): Either[ReaderError, TokenIterator] = { producer.fromReader(reader) } } implicit class TokenIteratorOps(val tokenIterator: TokenIterator) extends AnyVal { def readJson[A](implicit jsonReader: JsonReader[A]): Either[ReaderError, A] = { implicit val fieldName: FieldName = FieldName() ReaderError.catchNonFatal(jsonReader.read(tokenIterator)) } } }
Example 37
Source File: CSVReader.scala From spark-csv-es with Apache License 2.0 | 5 votes |
package com.esri import java.io.StringReader import com.univocity.parsers.csv.{CsvParser, CsvParserSettings} class CSVReader(parser: CsvParser) extends Serializable { def parseCSV(line: String): Array[String] = { parser.beginParsing(new StringReader(line)) val parsed = parser.parseNext() parser.stopParsing() parsed } } object CSVReader { def apply(fieldSep: Char = ',', lineSep: String = "\n", quote: Char = '"', escape: Char = '\\', ignoreLeadingSpace: Boolean = true, ignoreTrailingSpace: Boolean = true, inputBufSize: Int = 1024, maxCols: Int = 20480): CSVReader = { val settings = new CsvParserSettings() val format = settings.getFormat format.setDelimiter(fieldSep) format.setLineSeparator(lineSep) format.setQuote(quote) format.setQuoteEscape(escape) settings.setIgnoreLeadingWhitespaces(ignoreLeadingSpace) settings.setIgnoreTrailingWhitespaces(ignoreTrailingSpace) settings.setReadInputOnSeparateThread(false) settings.setInputBufferSize(inputBufSize) settings.setMaxColumns(maxCols) settings.setNullValue("") settings.setEmptyValue("") new CSVReader(new CsvParser(settings)) } }
Example 38
Source File: CSVReaderOld.scala From spark-csv-es with Apache License 2.0 | 5 votes |
package com.esri import java.io.StringReader import com.univocity.parsers.csv.{CsvParser, CsvParserSettings} class CSVReaderOld(fieldSep: Char = ',', lineSep: String = "\n", quote: Char = '"', escape: Char = '\\', ignoreLeadingSpace: Boolean = true, ignoreTrailingSpace: Boolean = true, inputBufSize: Int = 128, maxCols: Int = 20480) extends Serializable { lazy val parser = { val settings = new CsvParserSettings() val format = settings.getFormat format.setDelimiter(fieldSep) format.setLineSeparator(lineSep) format.setQuote(quote) format.setQuoteEscape(escape) settings.setIgnoreLeadingWhitespaces(ignoreLeadingSpace) settings.setIgnoreTrailingWhitespaces(ignoreTrailingSpace) settings.setReadInputOnSeparateThread(false) settings.setInputBufferSize(inputBufSize) settings.setMaxColumns(maxCols) settings.setNullValue("") settings.setEmptyValue("") new CsvParser(settings) } def parseCSV(line: String): Array[String] = { parser.beginParsing(new StringReader(line)) val parsed = parser.parseNext() parser.stopParsing() parsed } }
Example 39
Source File: FileCredentials.scala From coursier with Apache License 2.0 | 5 votes |
package coursier.credentials import java.io.{File, FileInputStream, StringReader} import java.nio.charset.Charset import java.nio.file.{Files, Paths} import java.util.Properties import dataclass.data import scala.collection.JavaConverters._ @data class FileCredentials( path: String, optional: Boolean = true ) extends Credentials { def get(): Seq[DirectCredentials] = { val f = Paths.get(path) if (Files.isRegularFile(f)) { val content = new String(Files.readAllBytes(f), Charset.defaultCharset()) FileCredentials.parse(content, path) } else if (optional) Nil else throw new Exception(s"Credential file $path not found") } } object FileCredentials { def parse(content: String, origin: String): Seq[DirectCredentials] = { val props = new Properties props.load(new StringReader(content)) val userProps = props .propertyNames() .asScala .map(_.asInstanceOf[String]) .filter(_.endsWith(".username")) .toVector userProps.map { userProp => val prefix = userProp.stripSuffix(".username") val user = props.getProperty(userProp) val password = Option(props.getProperty(s"$prefix.password")).getOrElse { throw new Exception(s"Property $prefix.password not found in $origin") } val host = Option(props.getProperty(s"$prefix.host")).getOrElse { throw new Exception(s"Property $prefix.host not found in $origin") } val realmOpt = Option(props.getProperty(s"$prefix.realm")) // filter if empty? val matchHost = Option(props.getProperty(s"$prefix.auto")).fold(DirectCredentials.defaultMatchHost)(_.toBoolean) val httpsOnly = Option(props.getProperty(s"$prefix.https-only")).fold(DirectCredentials.defaultHttpsOnly)(_.toBoolean) val passOnRedirect = Option(props.getProperty(s"$prefix.pass-on-redirect")).fold(false)(_.toBoolean) DirectCredentials(host, user, password) .withRealm(realmOpt) .withMatchHost(matchHost) .withHttpsOnly(httpsOnly) .withPassOnRedirect(passOnRedirect) } } }
Example 40
Source File: ScannerBenchmark.scala From better-files with MIT License | 5 votes |
package better.files.benchmarks import better.files._ import java.io.{BufferedReader, StringReader} class ScannerBenchmark extends Benchmark { val file = File.newTemporaryFile() val n = 1000 repeat(n) { file .appendLine(-n to n mkString " ") .appendLine("hello " * n) .appendLine("world " * n) } val scanners: Seq[BufferedReader => AbstractScanner] = Seq( new JavaScanner(_), new StringBuilderScanner(_), new CharBufferScanner(_), new StreamingScanner(_), new IterableScanner(_), new IteratorScanner(_), new BetterFilesScanner(_), new ArrayBufferScanner(_), new FastJavaIOScanner2(_), new FastJavaIOScanner(_) ) def runTest(scanner: AbstractScanner) = { val (_, time) = profile(run(scanner)) scanner.close() info(f"${scanner.getClass.getSimpleName.padTo(25, ' ')}: $time%4d ms") } def run(scanner: AbstractScanner): Unit = repeat(n) { assert(scanner.hasNext) val ints = List.fill(2 * n + 1)(scanner.nextInt()) val line = "" //scanner.nextLine() val words = IndexedSeq.fill(2 * n)(scanner.next()) (line, ints, words) } test("scanner") { info("Warming up ...") scanners foreach { scannerBuilder => val canaryData = """ |10 -23 |Hello World |Hello World |19 """.stripMargin val scanner = scannerBuilder(new BufferedReader(new StringReader(canaryData))) info(s"Testing ${scanner.getClass.getSimpleName} for correctness") assert(scanner.hasNext) assert(scanner.nextInt() == 10) assert(scanner.nextInt() == -23) assert(scanner.next() == "Hello") assert(scanner.next() == "World") val l = scanner.nextLine() assert(l == "Hello World", l) assert(scanner.nextInt() == 19) //assert(!scanner.hasNext) } info("Running benchmark ...") scanners foreach { scanner => runTest(scanner(file.newBufferedReader)) } } }
Example 41
Source File: SummaryParser.scala From Argus-SAF with Apache License 2.0 | 5 votes |
package org.argus.jawa.flow.summary.susaf.parser import java.io.StringReader import org.antlr.v4.runtime.misc.ParseCancellationException import org.antlr.v4.runtime.{BailErrorStrategy, CharStreams, CommonTokenStream, NoViableAltException} import org.argus.jawa.flow.summary.susaf.rule.HeapSummaryFile import org.argus.jawa.flow.summary.grammar.{SafsuLexer, SafsuParser} object SummaryParser { def apply(source: String): HeapSummaryFile = parse(source) @throws[SummaryParserException] def parse(source: String): HeapSummaryFile = { val reader = new StringReader(source) val input = CharStreams.fromReader(reader) val lexer = new SafsuLexer(input) val cts = new CommonTokenStream(lexer) val parser = new SafsuParser(cts) parser.setErrorHandler(new BailErrorStrategy) try { SummaryParserVisitor(parser.summaryFile()) } catch { case oie: IndexOutOfBoundsException => throw SummaryParserException(oie) case nvae: NoViableAltException => throw SummaryParserException(nvae) case pce: ParseCancellationException => throw SummaryParserException(pce.getCause) } } } case class SummaryParserException(cause: Throwable) extends Exception(cause.getMessage)