java.io.CharArrayWriter Scala Examples
The following examples show how to use java.io.CharArrayWriter.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: CSVParser.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.csv import java.io.{CharArrayWriter, StringReader} import com.univocity.parsers.csv._ import org.apache.spark.internal.Logging private[csv] class LineCsvWriter(params: CSVOptions, headers: Seq[String]) extends Logging { private val writerSettings = new CsvWriterSettings private val format = writerSettings.getFormat format.setDelimiter(params.delimiter) format.setQuote(params.quote) format.setQuoteEscape(params.escape) format.setComment(params.comment) writerSettings.setNullValue(params.nullValue) writerSettings.setEmptyValue(params.nullValue) writerSettings.setSkipEmptyLines(true) writerSettings.setQuoteAllFields(params.quoteAll) writerSettings.setHeaders(headers: _*) writerSettings.setQuoteEscapingEnabled(params.escapeQuotes) private val buffer = new CharArrayWriter() private val writer = new CsvWriter(buffer, writerSettings) def writeRow(row: Seq[String], includeHeader: Boolean): Unit = { if (includeHeader) { writer.writeHeaders() } writer.writeRow(row.toArray: _*) } def flush(): String = { writer.flush() val lines = buffer.toString.stripLineEnd buffer.reset() lines } def close(): Unit = { writer.close() } }
Example 2
Source File: XmlFile.scala From spark-xml with Apache License 2.0 | 5 votes |
package com.databricks.spark.xml.util import java.io.CharArrayWriter import java.nio.charset.Charset import javax.xml.stream.XMLOutputFactory import scala.collection.Map import com.databricks.spark.xml.parsers.StaxXmlGenerator import com.sun.xml.txw2.output.IndentingXMLStreamWriter import org.apache.hadoop.io.{Text, LongWritable} import org.apache.spark.rdd.RDD import org.apache.spark.SparkContext import org.apache.spark.sql.DataFrame import com.databricks.spark.xml.{XmlOptions, XmlInputFormat} private[xml] object XmlFile { val DEFAULT_INDENT = " " def withCharset( context: SparkContext, location: String, charset: String, rowTag: String): RDD[String] = { // This just checks the charset's validity early, to keep behavior Charset.forName(charset) context.hadoopConfiguration.set(XmlInputFormat.START_TAG_KEY, s"<$rowTag>") context.hadoopConfiguration.set(XmlInputFormat.END_TAG_KEY, s"</$rowTag>") context.hadoopConfiguration.set(XmlInputFormat.ENCODING_KEY, charset) context.newAPIHadoopFile(location, classOf[XmlInputFormat], classOf[LongWritable], classOf[Text]).map { case (_, text) => new String(text.getBytes, 0, text.getLength, charset) } } def saveAsXmlFile( dataFrame: DataFrame, path: String, parameters: Map[String, String] = Map()): Unit = { val options = XmlOptions(parameters.toMap) val codecClass = CompressionCodecs.getCodecClass(options.codec) val rowSchema = dataFrame.schema val indent = XmlFile.DEFAULT_INDENT val xmlRDD = dataFrame.rdd.mapPartitions { iter => val factory = XMLOutputFactory.newInstance() val writer = new CharArrayWriter() val xmlWriter = factory.createXMLStreamWriter(writer) val indentingXmlWriter = new IndentingXMLStreamWriter(xmlWriter) indentingXmlWriter.setIndentStep(indent) new Iterator[String] { var firstRow: Boolean = true var lastRow: Boolean = true override def hasNext: Boolean = iter.hasNext || firstRow || lastRow override def next: String = { if (iter.nonEmpty) { if (firstRow) { indentingXmlWriter.writeStartElement(options.rootTag) firstRow = false } val xml = { StaxXmlGenerator( rowSchema, indentingXmlWriter, options)(iter.next()) indentingXmlWriter.flush() writer.toString } writer.reset() xml } else { if (!firstRow) { lastRow = false indentingXmlWriter.writeEndElement() indentingXmlWriter.close() writer.toString } else { // This means the iterator was initially empty. firstRow = false lastRow = false "" } } } } } codecClass match { case null => xmlRDD.saveAsTextFile(path) case codec => xmlRDD.saveAsTextFile(path, codec) } } }
Example 3
Source File: JacksonGeneratorSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.json import java.io.CharArrayWriter import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils, GenericArrayData} import org.apache.spark.sql.types._ class JacksonGeneratorSuite extends SparkFunSuite { val gmtId = DateTimeUtils.TimeZoneGMT.getID val option = new JSONOptions(Map.empty, gmtId) test("initial with StructType and write out a row") { val dataType = StructType(StructField("a", IntegerType) :: Nil) val input = InternalRow(1) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) gen.write(input) gen.flush() assert(writer.toString === """{"a":1}""") } test("initial with StructType and write out rows") { val dataType = StructType(StructField("a", IntegerType) :: Nil) val input = new GenericArrayData(InternalRow(1) :: InternalRow(2) :: Nil) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) gen.write(input) gen.flush() assert(writer.toString === """[{"a":1},{"a":2}]""") } test("initial with StructType and write out an array with single empty row") { val dataType = StructType(StructField("a", IntegerType) :: Nil) val input = new GenericArrayData(InternalRow(null) :: Nil) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) gen.write(input) gen.flush() assert(writer.toString === """[{}]""") } test("initial with StructType and write out an empty array") { val dataType = StructType(StructField("a", IntegerType) :: Nil) val input = new GenericArrayData(Nil) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) gen.write(input) gen.flush() assert(writer.toString === """[]""") } test("initial with Map and write out a map data") { val dataType = MapType(StringType, IntegerType) val input = ArrayBasedMapData(Map("a" -> 1)) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) gen.write(input) gen.flush() assert(writer.toString === """{"a":1}""") } test("initial with Map and write out an array of maps") { val dataType = MapType(StringType, IntegerType) val input = new GenericArrayData( ArrayBasedMapData(Map("a" -> 1)) :: ArrayBasedMapData(Map("b" -> 2)) :: Nil) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) gen.write(input) gen.flush() assert(writer.toString === """[{"a":1},{"b":2}]""") } test("error handling: initial with StructType but error calling write a map") { val dataType = StructType(StructField("a", IntegerType) :: Nil) val input = ArrayBasedMapData(Map("a" -> 1)) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) intercept[UnsupportedOperationException] { gen.write(input) } } test("error handling: initial with MapType and write out a row") { val dataType = MapType(StringType, IntegerType) val input = InternalRow(1) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) intercept[UnsupportedOperationException] { gen.write(input) } } }
Example 4
Source File: CSVParser.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.csv import java.io.{CharArrayWriter, StringReader} import com.univocity.parsers.csv._ import org.apache.spark.internal.Logging private[csv] class LineCsvWriter(params: CSVOptions, headers: Seq[String]) extends Logging { private val writerSettings = new CsvWriterSettings private val format = writerSettings.getFormat format.setDelimiter(params.delimiter) format.setQuote(params.quote) format.setQuoteEscape(params.escape) format.setComment(params.comment) writerSettings.setNullValue(params.nullValue) writerSettings.setEmptyValue(params.nullValue) writerSettings.setSkipEmptyLines(true) writerSettings.setQuoteAllFields(params.quoteAll) writerSettings.setHeaders(headers: _*) writerSettings.setQuoteEscapingEnabled(params.escapeQuotes) private val buffer = new CharArrayWriter() private val writer = new CsvWriter(buffer, writerSettings) def writeRow(row: Seq[String], includeHeader: Boolean): Unit = { if (includeHeader) { writer.writeHeaders() } writer.writeRow(row.toArray: _*) } def flush(): String = { writer.flush() val lines = buffer.toString.stripLineEnd buffer.reset() lines } def close(): Unit = { writer.close() } }
Example 5
Source File: SparkLogDivertAppender.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver.multitenancy import java.io.CharArrayWriter import scala.collection.JavaConverters._ import org.apache.hadoop.hive.ql.session.OperationLog import org.apache.log4j._ import org.apache.log4j.spi.{Filter, LoggingEvent} import org.apache.spark.internal.Logging class SparkLogDivertAppender extends WriterAppender with Logging { private var operationManager: ThriftServerOperationManager = _ private class NameFilter( var operationManager: ThriftServerOperationManager) extends Filter { override def decide(ev: LoggingEvent): Int = { val log = operationManager.getOperationLog if (log == null) return Filter.DENY val currentLoggingMode = log.getOpLoggingLevel // If logging is disabled, deny everything. if (currentLoggingMode == OperationLog.LoggingLevel.NONE) return Filter.DENY Filter.NEUTRAL } } override protected def subAppend(event: LoggingEvent): Unit = { super.subAppend(event) // That should've gone into our writer. Notify the LogContext. val logOutput = writer.toString writer.reset() val log = operationManager.getOperationLog if (log == null) { logDebug(" ---+++=== Dropped log event from thread " + event.getThreadName) return } log.writeOperationLog(logOutput) } }
Example 6
Source File: CSVParser.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.csv import java.io.{CharArrayWriter, StringReader} import com.univocity.parsers.csv._ import org.apache.spark.internal.Logging private[csv] class LineCsvWriter(params: CSVOptions, headers: Seq[String]) extends Logging { private val writerSettings = new CsvWriterSettings private val format = writerSettings.getFormat format.setDelimiter(params.delimiter) format.setQuote(params.quote) format.setQuoteEscape(params.escape) format.setComment(params.comment) writerSettings.setNullValue(params.nullValue) writerSettings.setEmptyValue(params.nullValue) writerSettings.setSkipEmptyLines(true) writerSettings.setQuoteAllFields(params.quoteAll) writerSettings.setHeaders(headers: _*) writerSettings.setQuoteEscapingEnabled(params.escapeQuotes) private val buffer = new CharArrayWriter() private val writer = new CsvWriter(buffer, writerSettings) def writeRow(row: Seq[String], includeHeader: Boolean): Unit = { if (includeHeader) { writer.writeHeaders() } writer.writeRow(row.toArray: _*) } def flush(): String = { writer.flush() val lines = buffer.toString.stripLineEnd buffer.reset() lines } def close(): Unit = { writer.close() } }
Example 7
Source File: JacksonGeneratorSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.json import java.io.CharArrayWriter import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils, GenericArrayData} import org.apache.spark.sql.types._ class JacksonGeneratorSuite extends SparkFunSuite { val gmtId = DateTimeUtils.TimeZoneGMT.getID val option = new JSONOptions(Map.empty, gmtId) test("initial with StructType and write out a row") { val dataType = StructType(StructField("a", IntegerType) :: Nil) val input = InternalRow(1) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) gen.write(input) gen.flush() assert(writer.toString === """{"a":1}""") } test("initial with StructType and write out rows") { val dataType = StructType(StructField("a", IntegerType) :: Nil) val input = new GenericArrayData(InternalRow(1) :: InternalRow(2) :: Nil) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) gen.write(input) gen.flush() assert(writer.toString === """[{"a":1},{"a":2}]""") } test("initial with StructType and write out an array with single empty row") { val dataType = StructType(StructField("a", IntegerType) :: Nil) val input = new GenericArrayData(InternalRow(null) :: Nil) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) gen.write(input) gen.flush() assert(writer.toString === """[{}]""") } test("initial with StructType and write out an empty array") { val dataType = StructType(StructField("a", IntegerType) :: Nil) val input = new GenericArrayData(Nil) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) gen.write(input) gen.flush() assert(writer.toString === """[]""") } test("initial with Map and write out a map data") { val dataType = MapType(StringType, IntegerType) val input = ArrayBasedMapData(Map("a" -> 1)) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) gen.write(input) gen.flush() assert(writer.toString === """{"a":1}""") } test("initial with Map and write out an array of maps") { val dataType = MapType(StringType, IntegerType) val input = new GenericArrayData( ArrayBasedMapData(Map("a" -> 1)) :: ArrayBasedMapData(Map("b" -> 2)) :: Nil) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) gen.write(input) gen.flush() assert(writer.toString === """[{"a":1},{"b":2}]""") } test("error handling: initial with StructType but error calling write a map") { val dataType = StructType(StructField("a", IntegerType) :: Nil) val input = ArrayBasedMapData(Map("a" -> 1)) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) intercept[UnsupportedOperationException] { gen.write(input) } } test("error handling: initial with MapType and write out a row") { val dataType = MapType(StringType, IntegerType) val input = InternalRow(1) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) intercept[UnsupportedOperationException] { gen.write(input) } } }