org.apache.spark.util.JsonProtocol Scala Examples
The following examples show how to use org.apache.spark.util.JsonProtocol.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: SourceStatus.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.streaming import java.{util => ju} import scala.collection.JavaConverters._ import org.json4s._ import org.json4s.JsonAST.JValue import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ import org.apache.spark.annotation.Experimental import org.apache.spark.sql.streaming.StreamingQueryStatus.indent import org.apache.spark.util.JsonProtocol private[sql] object SourceStatus { def apply( desc: String, offsetDesc: String, inputRate: Double, processingRate: Double, triggerDetails: Map[String, String]): SourceStatus = { new SourceStatus(desc, offsetDesc, inputRate, processingRate, triggerDetails.asJava) } }
Example 2
Source File: ReplayListenerBus.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io.{InputStream, IOException} import scala.io.Source import com.fasterxml.jackson.core.JsonParseException import org.json4s.jackson.JsonMethods._ import org.apache.spark.internal.Logging import org.apache.spark.scheduler.ReplayListenerBus._ import org.apache.spark.util.JsonProtocol def replay( logData: InputStream, sourceName: String, maybeTruncated: Boolean = false, eventsFilter: ReplayEventsFilter = SELECT_ALL_FILTER): Unit = { var currentLine: String = null var lineNumber: Int = 0 try { val lineEntries = Source.fromInputStream(logData) .getLines() .zipWithIndex .filter { case (line, _) => eventsFilter(line) } while (lineEntries.hasNext) { try { val entry = lineEntries.next() currentLine = entry._1 lineNumber = entry._2 + 1 postToAll(JsonProtocol.sparkEventFromJson(parse(currentLine))) } catch { case jpe: JsonParseException => // We can only ignore exception from last line of the file that might be truncated // the last entry may not be the very last line in the event log, but we treat it // as such in a best effort to replay the given input if (!maybeTruncated || lineEntries.hasNext) { throw jpe } else { logWarning(s"Got JsonParseException from log file $sourceName" + s" at line $lineNumber, the file might not have finished writing cleanly.") } } } } catch { case ioe: IOException => throw ioe case e: Exception => logError(s"Exception parsing Spark event log: $sourceName", e) logError(s"Malformed line #$lineNumber: $currentLine\n") } } } private[spark] object ReplayListenerBus { type ReplayEventsFilter = (String) => Boolean // utility filter that selects all event logs during replay val SELECT_ALL_FILTER: ReplayEventsFilter = { (eventString: String) => true } }
Example 3
Source File: SQLJsonProtocolSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import org.json4s.jackson.JsonMethods.parse import org.apache.spark.SparkFunSuite import org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart import org.apache.spark.util.JsonProtocol class SQLJsonProtocolSuite extends SparkFunSuite { test("SparkPlanGraph backward compatibility: metadata") { val SQLExecutionStartJsonString = """ |{ | "Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart", | "executionId":0, | "description":"test desc", | "details":"test detail", | "physicalPlanDescription":"test plan", | "sparkPlanInfo": { | "nodeName":"TestNode", | "simpleString":"test string", | "children":[], | "metadata":{}, | "metrics":[] | }, | "time":0 |} """.stripMargin val reconstructedEvent = JsonProtocol.sparkEventFromJson(parse(SQLExecutionStartJsonString)) val expectedEvent = SparkListenerSQLExecutionStart(0, "test desc", "test detail", "test plan", new SparkPlanInfo("TestNode", "test string", Nil, Map(), Nil), 0) assert(reconstructedEvent == expectedEvent) } }
Example 4
Source File: UnifiedSparkListener.scala From spark-monitoring with MIT License | 5 votes |
package org.apache.spark.listeners import java.time.Instant import org.apache.spark.{SparkConf, SparkException, SparkInformation} import org.apache.spark.internal.Logging import org.apache.spark.listeners.sink.SparkListenerSink import org.apache.spark.scheduler._ import org.apache.spark.sql.streaming.StreamingQueryListener import org.apache.spark.util.JsonProtocol import org.json4s.JsonAST.JValue import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods.{compact, render} import scala.util.control.NonFatal class UnifiedSparkListener(override val conf: SparkConf) extends UnifiedSparkListenerHandler with Logging with SparkListenerHandlers with StreamingListenerHandlers with StreamingQueryListenerHandlers { private val listenerSink = this.createSink(this.conf) override def onOtherEvent(event: SparkListenerEvent): Unit = { // All events in Spark that are not specific to SparkListener go through // this method. The typed ListenerBus implementations intercept and forward to // their "local" listeners. // We will just handle everything here so we only have to have one listener. // The advantage is that this can be registered in extraListeners, so no // code change is required to add listener support. event match { // We will use the ClassTag for the private wrapper class to match case this.streamingListenerEventClassTag(e) => this.onStreamingListenerEvent(e) case streamingQueryListenerEvent: StreamingQueryListener.Event => this.onStreamingQueryListenerEvent(streamingQueryListenerEvent) case sparkListenerEvent: SparkListenerEvent => if (sparkListenerEvent.logEvent) { logSparkListenerEvent(sparkListenerEvent) } } } private def createSink(conf: SparkConf): SparkListenerSink = { val sink = conf.getOption("spark.unifiedListener.sink") match { case Some(listenerSinkClassName) => listenerSinkClassName case None => throw new SparkException("spark.unifiedListener.sink setting is required") } logInfo(s"Creating listener sink: ${sink}") org.apache.spark.util.Utils.loadExtensions( classOf[SparkListenerSink], Seq(sink), conf).head } protected def logSparkListenerEvent( event: SparkListenerEvent, getTimestamp: () => Instant = () => Instant.now()): Unit = { val json = try { // Add a well-known time field. Some( JsonProtocol.sparkEventToJson(event) .merge(render( SparkInformation.get() + ("SparkEventTime" -> getTimestamp().toString) )) ) } catch { case NonFatal(e) => logError(s"Error serializing SparkListenerEvent to JSON: $event", e) None } sendToSink(json) } private[spark] def sendToSink(json: Option[JValue]): Unit = { try { json match { case Some(j) => { logDebug(s"Sending event to listener sink: ${compact(j)}") this.listenerSink.logEvent(json) } case None => { logWarning("json value was None") } } } catch { case NonFatal(e) => logError(s"Error sending to listener sink: $e") } } }
Example 5
Source File: ReplayListenerBus.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io.{InputStream, IOException} import scala.io.Source import org.json4s.jackson.JsonMethods._ import org.apache.spark.Logging import org.apache.spark.util.JsonProtocol def replay(logData: InputStream, sourceName: String): Unit = { var currentLine: String = null var lineNumber: Int = 1 try { val lines = Source.fromInputStream(logData).getLines() lines.foreach { line => currentLine = line postToAll(JsonProtocol.sparkEventFromJson(parse(line))) lineNumber += 1 } } catch { case ioe: IOException => throw ioe case e: Exception => logError(s"Exception parsing Spark event log: $sourceName", e) logError(s"Malformed line #$lineNumber: $currentLine\n") } } }
Example 6
Source File: MergeIntoAccumulatorSuite.scala From delta with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.delta import java.util.concurrent.atomic.AtomicReference import scala.collection.JavaConverters._ import org.apache.spark.sql.delta.commands.MergeIntoCommand import org.apache.spark.sql.delta.test.DeltaSQLCommandTest import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd} import org.apache.spark.sql.QueryTest import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.status.TaskDataWrapper import org.apache.spark.util.JsonProtocol class MergeIntoAccumulatorSuite extends QueryTest with SharedSparkSession with DeltaSQLCommandTest { import testImplicits._ private def runTestMergeCommand(): Unit = { // Run a simple merge command withTempView("source") { withTempDir { tempDir => val tempPath = tempDir.getCanonicalPath Seq((1, 1), (0, 3)).toDF("key", "value").createOrReplaceTempView("source") Seq((2, 2), (1, 4)).toDF("key", "value").write.format("delta").save(tempPath) spark.sql(s""" |MERGE INTO delta.`$tempPath` target |USING source src |ON src.key = target.key |WHEN MATCHED THEN UPDATE SET * |WHEN NOT MATCHED THEN INSERT * |""".stripMargin) } } } test("accumulators used by MERGE should not be tracked by Spark UI") { runTestMergeCommand() // Make sure all Spark events generated by the above command have been processed spark.sparkContext.listenerBus.waitUntilEmpty(30000) val store = spark.sparkContext.statusStore.store val iter = store.view(classOf[TaskDataWrapper]).closeableIterator() try { // Collect all accumulator names tracked by Spark UI. val accumNames = iter.asScala.toVector.flatMap { task => task.accumulatorUpdates.map(_.name) }.toSet // Verify accumulators used by MergeIntoCommand are not tracked. assert(!accumNames.contains(MergeIntoCommand.TOUCHED_FILES_ACCUM_NAME)) } finally { iter.close() } } }
Example 7
Source File: ReplayListenerBus.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io.{InputStream, IOException} import scala.io.Source import com.fasterxml.jackson.core.JsonParseException import org.json4s.jackson.JsonMethods._ import org.apache.spark.Logging import org.apache.spark.util.JsonProtocol def replay( logData: InputStream, sourceName: String, maybeTruncated: Boolean = false): Unit = { var currentLine: String = null var lineNumber: Int = 1 try { val lines = Source.fromInputStream(logData).getLines() while (lines.hasNext) { currentLine = lines.next() try { postToAll(JsonProtocol.sparkEventFromJson(parse(currentLine))) } catch { case jpe: JsonParseException => // We can only ignore exception from last line of the file that might be truncated if (!maybeTruncated || lines.hasNext) { throw jpe } else { logWarning(s"Got JsonParseException from log file $sourceName" + s" at line $lineNumber, the file might not have finished writing cleanly.") } } lineNumber += 1 } } catch { case ioe: IOException => throw ioe case e: Exception => logError(s"Exception parsing Spark event log: $sourceName", e) logError(s"Malformed line #$lineNumber: $currentLine\n") } } }
Example 8
Source File: ReplayListenerBus.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io.{InputStream, IOException} import scala.io.Source import com.fasterxml.jackson.core.JsonParseException import org.json4s.jackson.JsonMethods._ import org.apache.spark.Logging import org.apache.spark.util.JsonProtocol def replay( logData: InputStream, sourceName: String, maybeTruncated: Boolean = false): Unit = { var currentLine: String = null var lineNumber: Int = 1 try { val lines = Source.fromInputStream(logData).getLines() while (lines.hasNext) { currentLine = lines.next() try { postToAll(JsonProtocol.sparkEventFromJson(parse(currentLine))) } catch { case jpe: JsonParseException => // We can only ignore exception from last line of the file that might be truncated //我们只能忽略可能被截断的文件的最后一行的异常 if (!maybeTruncated || lines.hasNext) { throw jpe } else { logWarning(s"Got JsonParseException from log file $sourceName" + s" at line $lineNumber, the file might not have finished writing cleanly.") } } lineNumber += 1 } } catch { case ioe: IOException => throw ioe case e: Exception => logError(s"Exception parsing Spark event log: $sourceName", e) logError(s"Malformed line #$lineNumber: $currentLine\n") } } }
Example 9
Source File: SQLJsonProtocolSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import org.json4s.jackson.JsonMethods.parse import org.apache.spark.SparkFunSuite import org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart import org.apache.spark.util.JsonProtocol class SQLJsonProtocolSuite extends SparkFunSuite { test("SparkPlanGraph backward compatibility: metadata") { val SQLExecutionStartJsonString = """ |{ | "Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart", | "executionId":0, | "description":"test desc", | "details":"test detail", | "physicalPlanDescription":"test plan", | "sparkPlanInfo": { | "nodeName":"TestNode", | "simpleString":"test string", | "children":[], | "metadata":{}, | "metrics":[] | }, | "time":0 |} """.stripMargin val reconstructedEvent = JsonProtocol.sparkEventFromJson(parse(SQLExecutionStartJsonString)) val expectedEvent = SparkListenerSQLExecutionStart(0, "test desc", "test detail", "test plan", new SparkPlanInfo("TestNode", "test string", Nil, Nil), 0) assert(reconstructedEvent == expectedEvent) } }
Example 10
Source File: ReplayListenerBus.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io.{InputStream, IOException} import scala.io.Source import com.fasterxml.jackson.core.JsonParseException import org.json4s.jackson.JsonMethods._ import org.apache.spark.Logging import org.apache.spark.util.JsonProtocol def replay( logData: InputStream, sourceName: String, maybeTruncated: Boolean = false): Unit = { var currentLine: String = null var lineNumber: Int = 1 try { val lines = Source.fromInputStream(logData).getLines() while (lines.hasNext) { currentLine = lines.next() try { postToAll(JsonProtocol.sparkEventFromJson(parse(currentLine))) } catch { case jpe: JsonParseException => // We can only ignore exception from last line of the file that might be truncated if (!maybeTruncated || lines.hasNext) { throw jpe } else { logWarning(s"Got JsonParseException from log file $sourceName" + s" at line $lineNumber, the file might not have finished writing cleanly.") } } lineNumber += 1 } } catch { case ioe: IOException => throw ioe case e: Exception => logError(s"Exception parsing Spark event log: $sourceName", e) logError(s"Malformed line #$lineNumber: $currentLine\n") } } }