java.io.CharArrayWriter Scala Examples

The following examples show how to use java.io.CharArrayWriter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: CSVParser.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources.csv

import java.io.{CharArrayWriter, StringReader}

import com.univocity.parsers.csv._

import org.apache.spark.internal.Logging


private[csv] class LineCsvWriter(params: CSVOptions, headers: Seq[String]) extends Logging {
  private val writerSettings = new CsvWriterSettings
  private val format = writerSettings.getFormat

  format.setDelimiter(params.delimiter)
  format.setQuote(params.quote)
  format.setQuoteEscape(params.escape)
  format.setComment(params.comment)

  writerSettings.setNullValue(params.nullValue)
  writerSettings.setEmptyValue(params.nullValue)
  writerSettings.setSkipEmptyLines(true)
  writerSettings.setQuoteAllFields(params.quoteAll)
  writerSettings.setHeaders(headers: _*)
  writerSettings.setQuoteEscapingEnabled(params.escapeQuotes)

  private val buffer = new CharArrayWriter()
  private val writer = new CsvWriter(buffer, writerSettings)

  def writeRow(row: Seq[String], includeHeader: Boolean): Unit = {
    if (includeHeader) {
      writer.writeHeaders()
    }
    writer.writeRow(row.toArray: _*)
  }

  def flush(): String = {
    writer.flush()
    val lines = buffer.toString.stripLineEnd
    buffer.reset()
    lines
  }

  def close(): Unit = {
    writer.close()
  }
} 
Example 2
Source File: XmlFile.scala    From spark-xml   with Apache License 2.0 5 votes vote down vote up
package com.databricks.spark.xml.util

import java.io.CharArrayWriter
import java.nio.charset.Charset
import javax.xml.stream.XMLOutputFactory

import scala.collection.Map

import com.databricks.spark.xml.parsers.StaxXmlGenerator
import com.sun.xml.txw2.output.IndentingXMLStreamWriter
import org.apache.hadoop.io.{Text, LongWritable}

import org.apache.spark.rdd.RDD
import org.apache.spark.SparkContext
import org.apache.spark.sql.DataFrame
import com.databricks.spark.xml.{XmlOptions, XmlInputFormat}

private[xml] object XmlFile {
  val DEFAULT_INDENT = "    "

  def withCharset(
      context: SparkContext,
      location: String,
      charset: String,
      rowTag: String): RDD[String] = {
    // This just checks the charset's validity early, to keep behavior
    Charset.forName(charset)
    context.hadoopConfiguration.set(XmlInputFormat.START_TAG_KEY, s"<$rowTag>")
    context.hadoopConfiguration.set(XmlInputFormat.END_TAG_KEY, s"</$rowTag>")
    context.hadoopConfiguration.set(XmlInputFormat.ENCODING_KEY, charset)
    context.newAPIHadoopFile(location,
      classOf[XmlInputFormat],
      classOf[LongWritable],
      classOf[Text]).map { case (_, text) => new String(text.getBytes, 0, text.getLength, charset) }
  }

  
  def saveAsXmlFile(
      dataFrame: DataFrame,
      path: String,
      parameters: Map[String, String] = Map()): Unit = {
    val options = XmlOptions(parameters.toMap)
    val codecClass = CompressionCodecs.getCodecClass(options.codec)
    val rowSchema = dataFrame.schema
    val indent = XmlFile.DEFAULT_INDENT

    val xmlRDD = dataFrame.rdd.mapPartitions { iter =>
      val factory = XMLOutputFactory.newInstance()
      val writer = new CharArrayWriter()
      val xmlWriter = factory.createXMLStreamWriter(writer)
      val indentingXmlWriter = new IndentingXMLStreamWriter(xmlWriter)
      indentingXmlWriter.setIndentStep(indent)

      new Iterator[String] {
        var firstRow: Boolean = true
        var lastRow: Boolean = true

        override def hasNext: Boolean = iter.hasNext || firstRow || lastRow

        override def next: String = {
          if (iter.nonEmpty) {
            if (firstRow) {
              indentingXmlWriter.writeStartElement(options.rootTag)
              firstRow = false
            }
            val xml = {
              StaxXmlGenerator(
                rowSchema,
                indentingXmlWriter,
                options)(iter.next())
              indentingXmlWriter.flush()
              writer.toString
            }
            writer.reset()
            xml
          } else {
            if (!firstRow) {
              lastRow = false
              indentingXmlWriter.writeEndElement()
              indentingXmlWriter.close()
              writer.toString
            } else {
              // This means the iterator was initially empty.
              firstRow = false
              lastRow = false
              ""
            }
          }
        }
      }
    }

    codecClass match {
      case null => xmlRDD.saveAsTextFile(path)
      case codec => xmlRDD.saveAsTextFile(path, codec)
    }
  }
} 
Example 3
Source File: JacksonGeneratorSuite.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.json

import java.io.CharArrayWriter

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils, GenericArrayData}
import org.apache.spark.sql.types._

class JacksonGeneratorSuite extends SparkFunSuite {

  val gmtId = DateTimeUtils.TimeZoneGMT.getID
  val option = new JSONOptions(Map.empty, gmtId)

  test("initial with StructType and write out a row") {
    val dataType = StructType(StructField("a", IntegerType) :: Nil)
    val input = InternalRow(1)
    val writer = new CharArrayWriter()
    val gen = new JacksonGenerator(dataType, writer, option)
    gen.write(input)
    gen.flush()
    assert(writer.toString === """{"a":1}""")
  }

  test("initial with StructType and write out rows") {
    val dataType = StructType(StructField("a", IntegerType) :: Nil)
    val input = new GenericArrayData(InternalRow(1) :: InternalRow(2) :: Nil)
    val writer = new CharArrayWriter()
    val gen = new JacksonGenerator(dataType, writer, option)
    gen.write(input)
    gen.flush()
    assert(writer.toString === """[{"a":1},{"a":2}]""")
  }

  test("initial with StructType and write out an array with single empty row") {
    val dataType = StructType(StructField("a", IntegerType) :: Nil)
    val input = new GenericArrayData(InternalRow(null) :: Nil)
    val writer = new CharArrayWriter()
    val gen = new JacksonGenerator(dataType, writer, option)
    gen.write(input)
    gen.flush()
    assert(writer.toString === """[{}]""")
  }

  test("initial with StructType and write out an empty array") {
    val dataType = StructType(StructField("a", IntegerType) :: Nil)
    val input = new GenericArrayData(Nil)
    val writer = new CharArrayWriter()
    val gen = new JacksonGenerator(dataType, writer, option)
    gen.write(input)
    gen.flush()
    assert(writer.toString === """[]""")
  }

  test("initial with Map and write out a map data") {
    val dataType = MapType(StringType, IntegerType)
    val input = ArrayBasedMapData(Map("a" -> 1))
    val writer = new CharArrayWriter()
    val gen = new JacksonGenerator(dataType, writer, option)
    gen.write(input)
    gen.flush()
    assert(writer.toString === """{"a":1}""")
  }

  test("initial with Map and write out an array of maps") {
    val dataType = MapType(StringType, IntegerType)
    val input = new GenericArrayData(
      ArrayBasedMapData(Map("a" -> 1)) :: ArrayBasedMapData(Map("b" -> 2)) :: Nil)
    val writer = new CharArrayWriter()
    val gen = new JacksonGenerator(dataType, writer, option)
    gen.write(input)
    gen.flush()
    assert(writer.toString === """[{"a":1},{"b":2}]""")
  }

  test("error handling: initial with StructType but error calling write a map") {
    val dataType = StructType(StructField("a", IntegerType) :: Nil)
    val input = ArrayBasedMapData(Map("a" -> 1))
    val writer = new CharArrayWriter()
    val gen = new JacksonGenerator(dataType, writer, option)
    intercept[UnsupportedOperationException] {
      gen.write(input)
    }
  }

  test("error handling: initial with MapType and write out a row") {
    val dataType = MapType(StringType, IntegerType)
    val input = InternalRow(1)
    val writer = new CharArrayWriter()
    val gen = new JacksonGenerator(dataType, writer, option)
    intercept[UnsupportedOperationException] {
      gen.write(input)
    }
  }

} 
Example 4
Source File: CSVParser.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources.csv

import java.io.{CharArrayWriter, StringReader}

import com.univocity.parsers.csv._

import org.apache.spark.internal.Logging


private[csv] class LineCsvWriter(params: CSVOptions, headers: Seq[String]) extends Logging {
  private val writerSettings = new CsvWriterSettings
  private val format = writerSettings.getFormat

  format.setDelimiter(params.delimiter)
  format.setQuote(params.quote)
  format.setQuoteEscape(params.escape)
  format.setComment(params.comment)

  writerSettings.setNullValue(params.nullValue)
  writerSettings.setEmptyValue(params.nullValue)
  writerSettings.setSkipEmptyLines(true)
  writerSettings.setQuoteAllFields(params.quoteAll)
  writerSettings.setHeaders(headers: _*)
  writerSettings.setQuoteEscapingEnabled(params.escapeQuotes)

  private val buffer = new CharArrayWriter()
  private val writer = new CsvWriter(buffer, writerSettings)

  def writeRow(row: Seq[String], includeHeader: Boolean): Unit = {
    if (includeHeader) {
      writer.writeHeaders()
    }
    writer.writeRow(row.toArray: _*)
  }

  def flush(): String = {
    writer.flush()
    val lines = buffer.toString.stripLineEnd
    buffer.reset()
    lines
  }

  def close(): Unit = {
    writer.close()
  }
} 
Example 5
Source File: SparkLogDivertAppender.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hive.thriftserver.multitenancy

import java.io.CharArrayWriter

import scala.collection.JavaConverters._

import org.apache.hadoop.hive.ql.session.OperationLog
import org.apache.log4j._
import org.apache.log4j.spi.{Filter, LoggingEvent}

import org.apache.spark.internal.Logging

class SparkLogDivertAppender extends WriterAppender with Logging {

  private var operationManager: ThriftServerOperationManager = _

  private class NameFilter(
       var operationManager: ThriftServerOperationManager) extends Filter {
    override def decide(ev: LoggingEvent): Int = {
      val log = operationManager.getOperationLog
      if (log == null) return Filter.DENY
      val currentLoggingMode = log.getOpLoggingLevel
      // If logging is disabled, deny everything.
      if (currentLoggingMode == OperationLog.LoggingLevel.NONE) return Filter.DENY
      Filter.NEUTRAL
    }
  }

  
  override protected def subAppend(event: LoggingEvent): Unit = {
    super.subAppend(event)
    // That should've gone into our writer. Notify the LogContext.
    val logOutput = writer.toString
    writer.reset()
    val log = operationManager.getOperationLog
    if (log == null) {
      logDebug(" ---+++=== Dropped log event from thread " + event.getThreadName)
      return
    }
    log.writeOperationLog(logOutput)
  }

} 
Example 6
Source File: CSVParser.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources.csv

import java.io.{CharArrayWriter, StringReader}

import com.univocity.parsers.csv._

import org.apache.spark.internal.Logging


private[csv] class LineCsvWriter(params: CSVOptions, headers: Seq[String]) extends Logging {
  private val writerSettings = new CsvWriterSettings
  private val format = writerSettings.getFormat

  format.setDelimiter(params.delimiter)
  format.setQuote(params.quote)
  format.setQuoteEscape(params.escape)
  format.setComment(params.comment)

  writerSettings.setNullValue(params.nullValue)
  writerSettings.setEmptyValue(params.nullValue)
  writerSettings.setSkipEmptyLines(true)
  writerSettings.setQuoteAllFields(params.quoteAll)
  writerSettings.setHeaders(headers: _*)
  writerSettings.setQuoteEscapingEnabled(params.escapeQuotes)

  private val buffer = new CharArrayWriter()
  private val writer = new CsvWriter(buffer, writerSettings)

  def writeRow(row: Seq[String], includeHeader: Boolean): Unit = {
    if (includeHeader) {
      writer.writeHeaders()
    }
    writer.writeRow(row.toArray: _*)
  }

  def flush(): String = {
    writer.flush()
    val lines = buffer.toString.stripLineEnd
    buffer.reset()
    lines
  }

  def close(): Unit = {
    writer.close()
  }
} 
Example 7
Source File: JacksonGeneratorSuite.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.json

import java.io.CharArrayWriter

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils, GenericArrayData}
import org.apache.spark.sql.types._

class JacksonGeneratorSuite extends SparkFunSuite {

  val gmtId = DateTimeUtils.TimeZoneGMT.getID
  val option = new JSONOptions(Map.empty, gmtId)

  test("initial with StructType and write out a row") {
    val dataType = StructType(StructField("a", IntegerType) :: Nil)
    val input = InternalRow(1)
    val writer = new CharArrayWriter()
    val gen = new JacksonGenerator(dataType, writer, option)
    gen.write(input)
    gen.flush()
    assert(writer.toString === """{"a":1}""")
  }

  test("initial with StructType and write out rows") {
    val dataType = StructType(StructField("a", IntegerType) :: Nil)
    val input = new GenericArrayData(InternalRow(1) :: InternalRow(2) :: Nil)
    val writer = new CharArrayWriter()
    val gen = new JacksonGenerator(dataType, writer, option)
    gen.write(input)
    gen.flush()
    assert(writer.toString === """[{"a":1},{"a":2}]""")
  }

  test("initial with StructType and write out an array with single empty row") {
    val dataType = StructType(StructField("a", IntegerType) :: Nil)
    val input = new GenericArrayData(InternalRow(null) :: Nil)
    val writer = new CharArrayWriter()
    val gen = new JacksonGenerator(dataType, writer, option)
    gen.write(input)
    gen.flush()
    assert(writer.toString === """[{}]""")
  }

  test("initial with StructType and write out an empty array") {
    val dataType = StructType(StructField("a", IntegerType) :: Nil)
    val input = new GenericArrayData(Nil)
    val writer = new CharArrayWriter()
    val gen = new JacksonGenerator(dataType, writer, option)
    gen.write(input)
    gen.flush()
    assert(writer.toString === """[]""")
  }

  test("initial with Map and write out a map data") {
    val dataType = MapType(StringType, IntegerType)
    val input = ArrayBasedMapData(Map("a" -> 1))
    val writer = new CharArrayWriter()
    val gen = new JacksonGenerator(dataType, writer, option)
    gen.write(input)
    gen.flush()
    assert(writer.toString === """{"a":1}""")
  }

  test("initial with Map and write out an array of maps") {
    val dataType = MapType(StringType, IntegerType)
    val input = new GenericArrayData(
      ArrayBasedMapData(Map("a" -> 1)) :: ArrayBasedMapData(Map("b" -> 2)) :: Nil)
    val writer = new CharArrayWriter()
    val gen = new JacksonGenerator(dataType, writer, option)
    gen.write(input)
    gen.flush()
    assert(writer.toString === """[{"a":1},{"b":2}]""")
  }

  test("error handling: initial with StructType but error calling write a map") {
    val dataType = StructType(StructField("a", IntegerType) :: Nil)
    val input = ArrayBasedMapData(Map("a" -> 1))
    val writer = new CharArrayWriter()
    val gen = new JacksonGenerator(dataType, writer, option)
    intercept[UnsupportedOperationException] {
      gen.write(input)
    }
  }

  test("error handling: initial with MapType and write out a row") {
    val dataType = MapType(StringType, IntegerType)
    val input = InternalRow(1)
    val writer = new CharArrayWriter()
    val gen = new JacksonGenerator(dataType, writer, option)
    intercept[UnsupportedOperationException] {
      gen.write(input)
    }
  }

}