org.apache.spark.sql.catalyst.util.CaseInsensitiveMap Scala Examples
The following examples show how to use org.apache.spark.sql.catalyst.util.CaseInsensitiveMap.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: MQTTUtils.scala From bahir with Apache License 2.0 | 5 votes |
package org.apache.bahir.sql.streaming.mqtt import java.util.Properties import org.eclipse.paho.client.mqttv3.{MqttClient, MqttClientPersistence, MqttConnectOptions} import org.eclipse.paho.client.mqttv3.persist.{MemoryPersistence, MqttDefaultFilePersistence} import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.bahir.utils.Logging object MQTTUtils extends Logging { // Since data source configuration properties are case-insensitive, // we have to introduce our own keys. Also, good for vendor independence. private[mqtt] val sslParamMapping = Map( "ssl.protocol" -> "com.ibm.ssl.protocol", "ssl.key.store" -> "com.ibm.ssl.keyStore", "ssl.key.store.password" -> "com.ibm.ssl.keyStorePassword", "ssl.key.store.type" -> "com.ibm.ssl.keyStoreType", "ssl.key.store.provider" -> "com.ibm.ssl.keyStoreProvider", "ssl.trust.store" -> "com.ibm.ssl.trustStore", "ssl.trust.store.password" -> "com.ibm.ssl.trustStorePassword", "ssl.trust.store.type" -> "com.ibm.ssl.trustStoreType", "ssl.trust.store.provider" -> "com.ibm.ssl.trustStoreProvider", "ssl.ciphers" -> "com.ibm.ssl.enabledCipherSuites", "ssl.key.manager" -> "com.ibm.ssl.keyManager", "ssl.trust.manager" -> "com.ibm.ssl.trustManager" ) def parseConfigParams(config: Map[String, String]): (String, String, String, MqttClientPersistence, MqttConnectOptions, Int, Long, Long, Int) = { def e(s: String) = new IllegalArgumentException(s) val parameters = CaseInsensitiveMap(config) val brokerUrl: String = parameters.getOrElse("brokerUrl", parameters.getOrElse("path", throw e("Please provide a `brokerUrl` by specifying path or .options(\"brokerUrl\",...)"))) val persistence: MqttClientPersistence = parameters.get("persistence") match { case Some("memory") => new MemoryPersistence() case _ => val localStorage: Option[String] = parameters.get("localStorage") localStorage match { case Some(x) => new MqttDefaultFilePersistence(x) case None => new MqttDefaultFilePersistence() } } // if default is subscribe everything, it leads to getting lot unwanted system messages. val topic: String = parameters.getOrElse("topic", throw e("Please specify a topic, by .options(\"topic\",...)")) val clientId: String = parameters.getOrElse("clientId", { log.warn("If `clientId` is not set, a random value is picked up." + "\nRecovering from failure is not supported in such a case.") MqttClient.generateClientId()}) val username: Option[String] = parameters.get("username") val password: Option[String] = parameters.get("password") val connectionTimeout: Int = parameters.getOrElse("connectionTimeout", MqttConnectOptions.CONNECTION_TIMEOUT_DEFAULT.toString).toInt val keepAlive: Int = parameters.getOrElse("keepAlive", MqttConnectOptions .KEEP_ALIVE_INTERVAL_DEFAULT.toString).toInt val mqttVersion: Int = parameters.getOrElse("mqttVersion", MqttConnectOptions .MQTT_VERSION_DEFAULT.toString).toInt val cleanSession: Boolean = parameters.getOrElse("cleanSession", "false").toBoolean val qos: Int = parameters.getOrElse("QoS", "1").toInt val autoReconnect: Boolean = parameters.getOrElse("autoReconnect", "false").toBoolean val maxInflight: Int = parameters.getOrElse("maxInflight", "60").toInt val maxBatchMessageNum = parameters.getOrElse("maxBatchMessageNum", s"${Long.MaxValue}").toLong val maxBatchMessageSize = parameters.getOrElse("maxBatchMessageSize", s"${Long.MaxValue}").toLong val maxRetryNumber = parameters.getOrElse("maxRetryNum", "3").toInt val mqttConnectOptions: MqttConnectOptions = new MqttConnectOptions() mqttConnectOptions.setAutomaticReconnect(autoReconnect) mqttConnectOptions.setCleanSession(cleanSession) mqttConnectOptions.setConnectionTimeout(connectionTimeout) mqttConnectOptions.setKeepAliveInterval(keepAlive) mqttConnectOptions.setMqttVersion(mqttVersion) mqttConnectOptions.setMaxInflight(maxInflight) (username, password) match { case (Some(u: String), Some(p: String)) => mqttConnectOptions.setUserName(u) mqttConnectOptions.setPassword(p.toCharArray) case _ => } val sslProperties = new Properties() config.foreach(e => { if (e._1.startsWith("ssl.")) { sslProperties.setProperty(sslParamMapping(e._1), e._2) } }) mqttConnectOptions.setSSLProperties(sslProperties) (brokerUrl, clientId, topic, persistence, mqttConnectOptions, qos, maxBatchMessageNum, maxBatchMessageSize, maxRetryNumber) } }
Example 2
Source File: HiveAcidSinkOptions.scala From spark-acid with Apache License 2.0 | 5 votes |
package com.qubole.spark.hiveacid.streaming import java.util.concurrent.TimeUnit import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import scala.util.Try class HiveAcidSinkOptions(parameters: CaseInsensitiveMap[String]) { import HiveAcidSinkOptions._ def this(parameters: Map[String, String]) = this(CaseInsensitiveMap(parameters)) val tableName = parameters.get("table").getOrElse{ throw new IllegalArgumentException("Table Name is not specified") } val fileCleanupDelayMs = withLongParameter(CLEANUP_DELAY_KEY, DEFAULT_CLEANUP_DELAY) val isDeletingExpiredLog = withBooleanParameter(LOG_DELETION_KEY, DEFAULT_LOG_DELETION) val compactInterval = withIntParameter(COMPACT_INTERVAL_KEY, DEFAULT_COMPACT_INTERVAL) val minBatchesToRetain = withIntParameter(MIN_BATCHES_TO_RETAIN_KEY, DEFAULT_MIN_BATCHES_TO_RETAIN) val metadataDir = parameters.get(METADATA_DIR_KEY) private def withIntParameter(name: String, default: Int): Int = { parameters.get(name).map { str => Try(str.toInt).toOption.filter(_ > 0).getOrElse { throw new IllegalArgumentException( s"Invalid value '$str' for option '$name', must be a positive integer") } }.getOrElse(default) } private def withLongParameter(name: String, default: Long): Long = { parameters.get(name).map { str => Try(str.toLong).toOption.filter(_ >= 0).getOrElse { throw new IllegalArgumentException( s"Invalid value '$str' for option '$name', must be a positive integer") } }.getOrElse(default) } private def withBooleanParameter(name: String, default: Boolean): Boolean = { parameters.get(name).map { str => try { str.toBoolean } catch { case _: IllegalArgumentException => throw new IllegalArgumentException( s"Invalid value '$str' for option '$name', must be true or false") } }.getOrElse(default) } } object HiveAcidSinkOptions { val DEFAULT_CLEANUP_DELAY = TimeUnit.MINUTES.toMillis(10) val DEFAULT_LOG_DELETION = true val DEFAULT_COMPACT_INTERVAL = 10 val DEFAULT_MIN_BATCHES_TO_RETAIN = 100 val CLEANUP_DELAY_KEY = "spark.acid.streaming.log.cleanupDelayMs" val LOG_DELETION_KEY = "spark.acid.streaming.log.deletion" val COMPACT_INTERVAL_KEY = "spark.acid.streaming.log.compactInterval" val MIN_BATCHES_TO_RETAIN_KEY = "spark.acid.streaming.log.minBatchesToRetain" val METADATA_DIR_KEY = "spark.acid.streaming.log.metadataDir" }
Example 3
Source File: FileStreamOptions.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import scala.util.Try import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.spark.util.Utils val fileNameOnly: Boolean = withBooleanParameter("fileNameOnly", false) private def withBooleanParameter(name: String, default: Boolean) = { parameters.get(name).map { str => try { str.toBoolean } catch { case _: IllegalArgumentException => throw new IllegalArgumentException( s"Invalid value '$str' for option '$name', must be 'true' or 'false'") } }.getOrElse(default) } }
Example 4
Source File: ParquetOptions.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.parquet import java.util.Locale import org.apache.parquet.hadoop.ParquetOutputFormat import org.apache.parquet.hadoop.metadata.CompressionCodecName import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.spark.sql.internal.SQLConf val mergeSchema: Boolean = parameters .get(MERGE_SCHEMA) .map(_.toBoolean) .getOrElse(sqlConf.isParquetSchemaMergingEnabled) } object ParquetOptions { val MERGE_SCHEMA = "mergeSchema" // The parquet compression short names private val shortParquetCompressionCodecNames = Map( "none" -> CompressionCodecName.UNCOMPRESSED, "uncompressed" -> CompressionCodecName.UNCOMPRESSED, "snappy" -> CompressionCodecName.SNAPPY, "gzip" -> CompressionCodecName.GZIP, "lzo" -> CompressionCodecName.LZO) def getParquetCompressionCodecName(name: String): String = { shortParquetCompressionCodecNames(name).name() } }
Example 5
Source File: OrcOptions.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.orc import java.util.Locale import org.apache.orc.OrcConf.COMPRESS import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.spark.sql.internal.SQLConf val compressionCodec: String = { // `compression`, `orc.compress`(i.e., OrcConf.COMPRESS), and `spark.sql.orc.compression.codec` // are in order of precedence from highest to lowest. val orcCompressionConf = parameters.get(COMPRESS.getAttribute) val codecName = parameters .get("compression") .orElse(orcCompressionConf) .getOrElse(sqlConf.orcCompressionCodec) .toLowerCase(Locale.ROOT) if (!shortOrcCompressionCodecNames.contains(codecName)) { val availableCodecs = shortOrcCompressionCodecNames.keys.map(_.toLowerCase(Locale.ROOT)) throw new IllegalArgumentException(s"Codec [$codecName] " + s"is not available. Available codecs are ${availableCodecs.mkString(", ")}.") } shortOrcCompressionCodecNames(codecName) } } object OrcOptions { // The ORC compression short names private val shortOrcCompressionCodecNames = Map( "none" -> "NONE", "uncompressed" -> "NONE", "snappy" -> "SNAPPY", "zlib" -> "ZLIB", "lzo" -> "LZO") def getORCCompressionCodecName(name: String): String = shortOrcCompressionCodecNames(name) }
Example 6
Source File: LibSVMOptions.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.source.libsvm import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap val numFeatures = parameters.get(NUM_FEATURES).map(_.toInt).filter(_ > 0) val isSparse = parameters.getOrElse(VECTOR_TYPE, SPARSE_VECTOR_TYPE) match { case SPARSE_VECTOR_TYPE => true case DENSE_VECTOR_TYPE => false case o => throw new IllegalArgumentException(s"Invalid value `$o` for parameter " + s"`$VECTOR_TYPE`. Expected types are `sparse` and `dense`.") } } private[libsvm] object LibSVMOptions { val NUM_FEATURES = "numFeatures" val VECTOR_TYPE = "vectorType" val DENSE_VECTOR_TYPE = "dense" val SPARSE_VECTOR_TYPE = "sparse" }
Example 7
Source File: FileStreamOptions.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import scala.util.Try import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.spark.util.Utils val latestFirst: Boolean = parameters.get("latestFirst").map { str => try { str.toBoolean } catch { case _: IllegalArgumentException => throw new IllegalArgumentException( s"Invalid value '$str' for option 'latestFirst', must be 'true' or 'false'") } }.getOrElse(false) }
Example 8
Source File: ParquetOptions.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.parquet import org.apache.parquet.hadoop.metadata.CompressionCodecName import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.spark.sql.internal.SQLConf val mergeSchema: Boolean = parameters .get(MERGE_SCHEMA) .map(_.toBoolean) .getOrElse(sqlConf.isParquetSchemaMergingEnabled) } object ParquetOptions { val MERGE_SCHEMA = "mergeSchema" // The parquet compression short names private val shortParquetCompressionCodecNames = Map( "none" -> CompressionCodecName.UNCOMPRESSED, "uncompressed" -> CompressionCodecName.UNCOMPRESSED, "snappy" -> CompressionCodecName.SNAPPY, "gzip" -> CompressionCodecName.GZIP, "lzo" -> CompressionCodecName.LZO) }
Example 9
Source File: OrcOptions.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.orc import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap val compressionCodec: String = { // `orc.compress` is a ORC configuration. So, here we respect this as an option but // `compression` has higher precedence than `orc.compress`. It means if both are set, // we will use `compression`. val orcCompressionConf = parameters.get(OrcRelation.ORC_COMPRESSION) val codecName = parameters .get("compression") .orElse(orcCompressionConf) .getOrElse("snappy").toLowerCase if (!shortOrcCompressionCodecNames.contains(codecName)) { val availableCodecs = shortOrcCompressionCodecNames.keys.map(_.toLowerCase) throw new IllegalArgumentException(s"Codec [$codecName] " + s"is not available. Available codecs are ${availableCodecs.mkString(", ")}.") } shortOrcCompressionCodecNames(codecName) } } private[orc] object OrcOptions { // The ORC compression short names private val shortOrcCompressionCodecNames = Map( "none" -> "NONE", "uncompressed" -> "NONE", "snappy" -> "SNAPPY", "zlib" -> "ZLIB", "lzo" -> "LZO") }
Example 10
Source File: DeltaGenerateCommand.scala From delta with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.delta.commands import org.apache.spark.sql.delta.{DeltaErrors, DeltaLog, DeltaTableIdentifier} import org.apache.spark.sql.delta.hooks.GenerateSymlinkManifest import org.apache.hadoop.fs.Path import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.spark.sql.execution.command.RunnableCommand case class DeltaGenerateCommand(modeName: String, tableId: TableIdentifier) extends RunnableCommand { import DeltaGenerateCommand._ override def run(sparkSession: SparkSession): Seq[Row] = { if (!modeNameToGenerationFunc.contains(modeName)) { throw DeltaErrors.unsupportedGenerateModeException(modeName) } val tablePath = DeltaTableIdentifier(sparkSession, tableId) match { case Some(id) if id.path.isDefined => new Path(id.path.get) case _ => new Path(sparkSession.sessionState.catalog.getTableMetadata(tableId).location) } val deltaLog = DeltaLog.forTable(sparkSession, tablePath) if (deltaLog.snapshot.version < 0) { throw DeltaErrors.notADeltaTableException("GENERATE") } val generationFunc = modeNameToGenerationFunc(modeName) generationFunc(sparkSession, deltaLog) Seq.empty } } object DeltaGenerateCommand { val modeNameToGenerationFunc = CaseInsensitiveMap( Map[String, (SparkSession, DeltaLog) => Unit]( "symlink_format_manifest" -> GenerateSymlinkManifest.generateFullManifest )) }
Example 11
Source File: OrcOptions.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.orc import java.util.Locale import org.apache.orc.OrcConf.COMPRESS import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.spark.sql.internal.SQLConf val compressionCodec: String = { // `compression`, `orc.compress`(i.e., OrcConf.COMPRESS), and `spark.sql.orc.compression.codec` // are in order of precedence from highest to lowest. val orcCompressionConf = parameters.get(COMPRESS.getAttribute) val codecName = parameters .get("compression") .orElse(orcCompressionConf) .getOrElse(sqlConf.orcCompressionCodec) .toLowerCase(Locale.ROOT) if (!shortOrcCompressionCodecNames.contains(codecName)) { val availableCodecs = shortOrcCompressionCodecNames.keys.map(_.toLowerCase(Locale.ROOT)) throw new IllegalArgumentException(s"Codec [$codecName] " + s"is not available. Available codecs are ${availableCodecs.mkString(", ")}.") } shortOrcCompressionCodecNames(codecName) } } object OrcOptions { // The ORC compression short names private val shortOrcCompressionCodecNames = Map( "none" -> "NONE", "uncompressed" -> "NONE", "snappy" -> "SNAPPY", "zlib" -> "ZLIB", "lzo" -> "LZO") def getORCCompressionCodecName(name: String): String = shortOrcCompressionCodecNames(name) }
Example 12
Source File: DefaultSource.scala From memsql-spark-connector with Apache License 2.0 | 5 votes |
package com.memsql.spark import org.apache.spark.TaskContext import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.spark.metrics.source.MetricsHandler import org.apache.spark.sql.sources.{ BaseRelation, CreatableRelationProvider, DataSourceRegister, RelationProvider } import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode} object DefaultSource { val MEMSQL_SOURCE_NAME = "com.memsql.spark" val MEMSQL_SOURCE_NAME_SHORT = "memsql" val MEMSQL_GLOBAL_OPTION_PREFIX = "spark.datasource.memsql." } class DefaultSource extends RelationProvider with DataSourceRegister with CreatableRelationProvider with LazyLogging { override def shortName: String = DefaultSource.MEMSQL_SOURCE_NAME_SHORT private def includeGlobalParams(sqlContext: SQLContext, params: Map[String, String]): Map[String, String] = sqlContext.getAllConfs.foldLeft(params)({ case (params, (k, v)) if k.startsWith(DefaultSource.MEMSQL_GLOBAL_OPTION_PREFIX) => params + (k.stripPrefix(DefaultSource.MEMSQL_GLOBAL_OPTION_PREFIX) -> v) case (params, _) => params }) override def createRelation(sqlContext: SQLContext, parameters: Map[String, String]): BaseRelation = { val params = CaseInsensitiveMap(includeGlobalParams(sqlContext, parameters)) val options = MemsqlOptions(params) if (options.disablePushdown) { SQLPushdownRule.ensureRemoved(sqlContext.sparkSession) MemsqlReaderNoPushdown(MemsqlOptions.getQuery(params), options, sqlContext) } else { SQLPushdownRule.ensureInjected(sqlContext.sparkSession) MemsqlReader(MemsqlOptions.getQuery(params), Nil, options, sqlContext) } } override def createRelation(sqlContext: SQLContext, mode: SaveMode, parameters: Map[String, String], data: DataFrame): BaseRelation = { val opts = CaseInsensitiveMap(includeGlobalParams(sqlContext, parameters)) val conf = MemsqlOptions(opts) val table = MemsqlOptions .getTable(opts) .getOrElse( throw new IllegalArgumentException( s"To write a dataframe to MemSQL you must specify a table name via the '${MemsqlOptions.TABLE_NAME}' parameter" ) ) JdbcHelpers.prepareTableForWrite(conf, table, mode, data.schema) val isReferenceTable = JdbcHelpers.isReferenceTable(conf, table) val partitionWriterFactory = if (conf.onDuplicateKeySQL.isEmpty) { new LoadDataWriterFactory(table, conf) } else { new BatchInsertWriterFactory(table, conf) } val schema = data.schema var totalRowCount = 0L data.foreachPartition(partition => { val writer = partitionWriterFactory.createDataWriter(schema, TaskContext.getPartitionId(), 0, isReferenceTable, mode) try { partition.foreach(record => { writer.write(record) totalRowCount += 1 }) writer.commit() MetricsHandler.setRecordsWritten(totalRowCount) } catch { case e: Exception => { writer.abort() throw e } } }) createRelation(sqlContext, parameters) } }
Example 13
Source File: FileStreamOptions.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import scala.util.Try import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.spark.util.Utils val latestFirst: Boolean = parameters.get("latestFirst").map { str => try { str.toBoolean } catch { case _: IllegalArgumentException => throw new IllegalArgumentException( s"Invalid value '$str' for option 'latestFirst', must be 'true' or 'false'") } }.getOrElse(false) }
Example 14
Source File: ParquetOptions.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.parquet import org.apache.parquet.hadoop.metadata.CompressionCodecName import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.spark.sql.internal.SQLConf val mergeSchema: Boolean = parameters .get(MERGE_SCHEMA) .map(_.toBoolean) .getOrElse(sqlConf.isParquetSchemaMergingEnabled) } object ParquetOptions { val MERGE_SCHEMA = "mergeSchema" // The parquet compression short names private val shortParquetCompressionCodecNames = Map( "none" -> CompressionCodecName.UNCOMPRESSED, "uncompressed" -> CompressionCodecName.UNCOMPRESSED, "snappy" -> CompressionCodecName.SNAPPY, "gzip" -> CompressionCodecName.GZIP, "lzo" -> CompressionCodecName.LZO) }
Example 15
Source File: OrcOptions.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.orc import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap val compressionCodec: String = { // `orc.compress` is a ORC configuration. So, here we respect this as an option but // `compression` has higher precedence than `orc.compress`. It means if both are set, // we will use `compression`. val orcCompressionConf = parameters.get(OrcRelation.ORC_COMPRESSION) val codecName = parameters .get("compression") .orElse(orcCompressionConf) .getOrElse("snappy").toLowerCase if (!shortOrcCompressionCodecNames.contains(codecName)) { val availableCodecs = shortOrcCompressionCodecNames.keys.map(_.toLowerCase) throw new IllegalArgumentException(s"Codec [$codecName] " + s"is not available. Available codecs are ${availableCodecs.mkString(", ")}.") } shortOrcCompressionCodecNames(codecName) } } private[orc] object OrcOptions { // The ORC compression short names private val shortOrcCompressionCodecNames = Map( "none" -> "NONE", "uncompressed" -> "NONE", "snappy" -> "SNAPPY", "zlib" -> "ZLIB", "lzo" -> "LZO") }
Example 16
Source File: ArrowOptions.scala From OAP with Apache License 2.0 | 5 votes |
package com.intel.oap.spark.sql.execution.datasources.v2.arrow import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap class ArrowOptions(@transient private val parameters: CaseInsensitiveMap[String]) extends Serializable { def this(parameters: Map[String, String]) = this(CaseInsensitiveMap(parameters)) val originalFormat = parameters .get(ArrowOptions.KEY_ORIGINAL_FORMAT) .getOrElse(ArrowOptions.DEFAULT_ORIGINAL_FORMAT) val filesystem = parameters .get(ArrowOptions.KEY_FILESYSTEM) .getOrElse(ArrowOptions.DEFAULT_FILESYSTEM) } object ArrowOptions { val KEY_ORIGINAL_FORMAT = "originalFormat" val DEFAULT_ORIGINAL_FORMAT = "parquet" val KEY_FILESYSTEM = "filesystem" val DEFAULT_FILESYSTEM = "hdfs" }
Example 17
Source File: FileStreamOptions.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import scala.util.Try import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.spark.util.Utils val fileNameOnly: Boolean = withBooleanParameter("fileNameOnly", false) private def withBooleanParameter(name: String, default: Boolean) = { parameters.get(name).map { str => try { str.toBoolean } catch { case _: IllegalArgumentException => throw new IllegalArgumentException( s"Invalid value '$str' for option '$name', must be 'true' or 'false'") } }.getOrElse(default) } }
Example 18
Source File: ParquetOptions.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.parquet import java.util.Locale import org.apache.parquet.hadoop.ParquetOutputFormat import org.apache.parquet.hadoop.metadata.CompressionCodecName import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.spark.sql.internal.SQLConf val mergeSchema: Boolean = parameters .get(MERGE_SCHEMA) .map(_.toBoolean) .getOrElse(sqlConf.isParquetSchemaMergingEnabled) } object ParquetOptions { val MERGE_SCHEMA = "mergeSchema" // The parquet compression short names private val shortParquetCompressionCodecNames = Map( "none" -> CompressionCodecName.UNCOMPRESSED, "uncompressed" -> CompressionCodecName.UNCOMPRESSED, "snappy" -> CompressionCodecName.SNAPPY, "gzip" -> CompressionCodecName.GZIP, "lzo" -> CompressionCodecName.LZO, "lz4" -> CompressionCodecName.LZ4, "brotli" -> CompressionCodecName.BROTLI, "zstd" -> CompressionCodecName.ZSTD) def getParquetCompressionCodecName(name: String): String = { shortParquetCompressionCodecNames(name).name() } }
Example 19
Source File: TextOptions.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.text import java.nio.charset.{Charset, StandardCharsets} import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CompressionCodecs} val wholeText = parameters.getOrElse(WHOLETEXT, "false").toBoolean val encoding: Option[String] = parameters.get(ENCODING) val lineSeparator: Option[String] = parameters.get(LINE_SEPARATOR).map { lineSep => require(lineSep.nonEmpty, s"'$LINE_SEPARATOR' cannot be an empty string.") lineSep } // Note that the option 'lineSep' uses a different default value in read and write. val lineSeparatorInRead: Option[Array[Byte]] = lineSeparator.map { lineSep => lineSep.getBytes(encoding.map(Charset.forName(_)).getOrElse(StandardCharsets.UTF_8)) } val lineSeparatorInWrite: Array[Byte] = lineSeparatorInRead.getOrElse("\n".getBytes(StandardCharsets.UTF_8)) } private[datasources] object TextOptions { val COMPRESSION = "compression" val WHOLETEXT = "wholetext" val ENCODING = "encoding" val LINE_SEPARATOR = "lineSep" }
Example 20
Source File: JSONOptions.scala From multi-tenancy-spark with Apache License 2.0 | 4 votes |
package org.apache.spark.sql.catalyst.json import java.util.Locale import com.fasterxml.jackson.core.{JsonFactory, JsonParser} import org.apache.commons.lang3.time.FastDateFormat import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CompressionCodecs, ParseModes} def setJacksonOptions(factory: JsonFactory): Unit = { factory.configure(JsonParser.Feature.ALLOW_COMMENTS, allowComments) factory.configure(JsonParser.Feature.ALLOW_UNQUOTED_FIELD_NAMES, allowUnquotedFieldNames) factory.configure(JsonParser.Feature.ALLOW_SINGLE_QUOTES, allowSingleQuotes) factory.configure(JsonParser.Feature.ALLOW_NUMERIC_LEADING_ZEROS, allowNumericLeadingZeros) factory.configure(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS, allowNonNumericNumbers) factory.configure(JsonParser.Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER, allowBackslashEscapingAnyCharacter) } }
Example 21
Source File: JSONOptions.scala From sparkoscope with Apache License 2.0 | 4 votes |
package org.apache.spark.sql.catalyst.json import java.util.Locale import com.fasterxml.jackson.core.{JsonFactory, JsonParser} import org.apache.commons.lang3.time.FastDateFormat import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CompressionCodecs, ParseModes} def setJacksonOptions(factory: JsonFactory): Unit = { factory.configure(JsonParser.Feature.ALLOW_COMMENTS, allowComments) factory.configure(JsonParser.Feature.ALLOW_UNQUOTED_FIELD_NAMES, allowUnquotedFieldNames) factory.configure(JsonParser.Feature.ALLOW_SINGLE_QUOTES, allowSingleQuotes) factory.configure(JsonParser.Feature.ALLOW_NUMERIC_LEADING_ZEROS, allowNumericLeadingZeros) factory.configure(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS, allowNonNumericNumbers) factory.configure(JsonParser.Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER, allowBackslashEscapingAnyCharacter) } }