org.apache.hadoop.fs.permission.FsPermission Scala Examples
The following examples show how to use org.apache.hadoop.fs.permission.FsPermission.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: AvroSink.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.avro import java.io.File import io.eels.schema.StructType import io.eels.{Row, Sink, SinkWriter} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.permission.FsPermission import org.apache.hadoop.fs.{FileSystem, Path} case class AvroSink(path: Path, overwrite: Boolean = false, permission: Option[FsPermission] = None, inheritPermissions: Option[Boolean] = None) (implicit conf: Configuration, fs: FileSystem) extends Sink { def withOverwrite(overwrite: Boolean): AvroSink = copy(overwrite = overwrite) def withPermission(permission: FsPermission): AvroSink = copy(permission = Option(permission)) def withInheritPermission(inheritPermissions: Boolean): AvroSink = copy(inheritPermissions = Option(inheritPermissions)) override def open(schema: StructType): SinkWriter = new SinkWriter { private val writer = new AvroWriter(schema, fs.create(path, overwrite)) override def write(row: Row): Unit = writer.write(row) override def close(): Unit = { writer.close() permission match { case Some(perm) => fs.setPermission(path, perm) case None => if (inheritPermissions.getOrElse(false)) { val permission = fs.getFileStatus(path.getParent).getPermission fs.setPermission(path, permission) } } } } } object AvroSink { def apply(file: File)(implicit conf: Configuration, fs: FileSystem): AvroSink = AvroSink(new Path(file.getAbsoluteFile.toString)) def apply(path: java.nio.file.Path)(implicit conf: Configuration, fs: FileSystem): AvroSink = apply(path.toFile) }
Example 2
Source File: ParquetSink.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.parquet import com.sksamuel.exts.Logging import com.sksamuel.exts.OptionImplicits._ import io.eels.schema.StructType import io.eels.{Row, Sink, SinkWriter} import org.apache.hadoop.fs.permission.FsPermission import org.apache.hadoop.fs.{FileSystem, Path} import scala.math.BigDecimal.RoundingMode import scala.math.BigDecimal.RoundingMode.RoundingMode case class ParquetWriteOptions(overwrite: Boolean = false, permission: Option[FsPermission] = None, dictionary: Boolean = true, inheritPermissions: Option[Boolean] = None, roundingMode: RoundingMode = RoundingMode.UNNECESSARY, metadata: Map[String, String] = Map.empty) { def withOverwrite(overwrite: Boolean): ParquetWriteOptions = copy(overwrite = overwrite) def withDictionary(dictionary: Boolean): ParquetWriteOptions = copy(dictionary = dictionary) def withMetaData(map: Map[String, String]): ParquetWriteOptions = copy(metadata = map) def withPermission(permission: FsPermission): ParquetWriteOptions = copy(permission = permission.some) def withInheritPermission(inheritPermissions: Boolean): ParquetWriteOptions = copy(inheritPermissions = inheritPermissions.some) def withRoundingMode(mode: RoundingMode): ParquetWriteOptions = copy(roundingMode = mode) } case class ParquetSink(path: Path, options: ParquetWriteOptions = ParquetWriteOptions()) (implicit fs: FileSystem) extends Sink with Logging { // -- convenience methods -- def withOverwrite(overwrite: Boolean): ParquetSink = copy(options = options.withOverwrite(overwrite)) def withDictionary(dictionary: Boolean): ParquetSink = copy(options = options.copy(dictionary = dictionary)) def withMetaData(map: Map[String, String]): ParquetSink = copy(options = options.copy(metadata = map)) def withPermission(permission: FsPermission): ParquetSink = copy(options = options.copy(permission = permission.some)) def withInheritPermission(inheritPermissions: Boolean): ParquetSink = copy(options = options.copy(inheritPermissions = inheritPermissions.some)) def withRoundingMode(mode: RoundingMode): ParquetSink = copy(options = options.copy(roundingMode = mode)) private def create(schema: StructType, path: Path): SinkWriter = new SinkWriter { if (options.overwrite && fs.exists(path)) fs.delete(path, false) val writer = RowParquetWriterFn(path, schema, options.metadata, options.dictionary, options.roundingMode, fs.getConf) override def write(row: Row): Unit = { writer.write(row) } override def close(): Unit = { writer.close() options.permission match { case Some(perm) => fs.setPermission(path, perm) case None => if (options.inheritPermissions.getOrElse(false)) { val permission = fs.getFileStatus(path.getParent).getPermission fs.setPermission(path, permission) } } } } override def open(schema: StructType, n: Int): Seq[SinkWriter] = { if (n == 1) Seq(create(schema, path)) else List.tabulate(n) { k => create(schema, new Path(path.getParent, path.getName + "_" + k)) } } override def open(schema: StructType): SinkWriter = create(schema, path) } object ParquetSink { def apply(path: String)(implicit fs: FileSystem): ParquetSink = ParquetSink(new Path(path)) }
Example 3
Source File: ParquetHiveDialect.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.hive.dialect import java.util.concurrent.atomic.AtomicInteger import com.sksamuel.exts.Logging import com.sksamuel.exts.OptionImplicits._ import com.sksamuel.exts.io.Using import io.eels.component.hive.{HiveDialect, HiveOps, HiveOutputStream} import io.eels.component.parquet._ import io.eels.component.parquet.util.{ParquetIterator, ParquetLogMute} import io.eels.datastream.{DataStream, Publisher, Subscriber, Subscription} import io.eels.schema.StructType import io.eels.{Predicate, Row} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.permission.FsPermission import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.metastore.HiveMetaStoreClient import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe import org.apache.hadoop.hive.ql.io.parquet.{MapredParquetInputFormat, MapredParquetOutputFormat} import scala.math.BigDecimal.RoundingMode.RoundingMode case class ParquetHiveDialect(options: ParquetWriteOptions = ParquetWriteOptions()) extends HiveDialect with Logging with Using { override val serde: String = classOf[ParquetHiveSerDe].getCanonicalName override val inputFormat: String = classOf[MapredParquetInputFormat].getCanonicalName override val outputFormat: String = classOf[MapredParquetOutputFormat].getCanonicalName override def input(path: Path, ignore: StructType, projectionSchema: StructType, predicate: Option[Predicate]) (implicit fs: FileSystem, conf: Configuration): Publisher[Seq[Row]] = new Publisher[Seq[Row]] { val client = new HiveMetaStoreClient(new HiveConf) val ops = new HiveOps(client) override def subscribe(subscriber: Subscriber[Seq[Row]]): Unit = { // convert the eel projection schema into a parquet schema which will be used by the native parquet reader try { val parquetProjectionSchema = ParquetSchemaFns.toParquetMessageType(projectionSchema) using(RowParquetReaderFn(path, predicate, parquetProjectionSchema.some, true)) { reader => val subscription = new Subscription { override def cancel(): Unit = reader.close() } subscriber.subscribed(subscription) ParquetIterator(reader).grouped(DataStream.DefaultBatchSize).foreach(subscriber.next) subscriber.completed() } } catch { case t: Throwable => subscriber.error(t) } } } override def output(schema: StructType, path: Path, permission: Option[FsPermission], roundingMode: RoundingMode, metadata: Map[String, String]) (implicit fs: FileSystem, conf: Configuration): HiveOutputStream = { val path_x = path new HiveOutputStream { ParquetLogMute() private val _records = new AtomicInteger(0) logger.debug(s"Creating parquet writer at $path") private val writer = RowParquetWriterFn(path, schema, metadata, true, roundingMode, fs.getConf) override def write(row: Row) { require(row.values.nonEmpty, "Attempting to write an empty row") writer.write(row) _records.incrementAndGet() } override def close(): Unit = { logger.debug(s"Closing hive parquet writer $path") writer.close() // after the files are closed, we should set permissions if we've been asked to, this allows // all the files we create to stay consistent permission.foreach(fs.setPermission(path, _)) } override def records: Int = _records.get() override def path: Path = path_x } } }
Example 4
Source File: OrcHiveDialect.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.hive.dialect import com.sksamuel.exts.Logging import io.eels.component.hive.{HiveDialect, HiveOutputStream} import io.eels.component.orc.{OrcPublisher, OrcWriteOptions, OrcWriter} import io.eels.datastream.{Publisher, Subscriber} import io.eels.schema.StructType import io.eels.{Predicate, Row} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.permission.FsPermission import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.hive.ql.io.orc.{OrcInputFormat, OrcOutputFormat, OrcSerde} import scala.math.BigDecimal.RoundingMode.RoundingMode case class OrcHiveDialect(options: OrcWriteOptions = OrcWriteOptions()) extends HiveDialect with Logging { override val serde: String = classOf[OrcSerde].getCanonicalName override val inputFormat: String = classOf[OrcInputFormat].getCanonicalName override val outputFormat: String = classOf[OrcOutputFormat].getCanonicalName override def input(path: Path, metastoreSchema: StructType, projectionSchema: StructType, predicate: Option[Predicate]) (implicit fs: FileSystem, conf: Configuration): Publisher[Seq[Row]] = new Publisher[Seq[Row]] { override def subscribe(subscriber: Subscriber[Seq[Row]]): Unit = { new OrcPublisher(path, projectionSchema.fieldNames(), predicate).subscribe(subscriber) } } override def output(schema: StructType, path: Path, permission: Option[FsPermission], roundingMode: RoundingMode, metadata: Map[String, String])(implicit fs: FileSystem, conf: Configuration): HiveOutputStream = { val path_x = path val writer = new OrcWriter(path, schema, options) new HiveOutputStream { override def write(row: Row): Unit = { require(row.values.nonEmpty, "Attempting to write an empty row") writer.write(row) } override def close(): Unit = { writer.close() permission.foreach(fs.setPermission(path, _)) } override def records: Int = writer.records override def path: Path = path_x } } }
Example 5
Source File: HiveDialect.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.hive import com.sksamuel.exts.Logging import io.eels.component.hive.dialect.{OrcHiveDialect, ParquetHiveDialect} import io.eels.datastream.Publisher import io.eels.schema.StructType import io.eels.{Predicate, Row} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.permission.FsPermission import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.hive.metastore.api.Table import scala.math.BigDecimal.RoundingMode.RoundingMode trait HiveDialect extends Logging { def serde: String def inputFormat: String def outputFormat: String def output(schema: StructType, // schema without partition information path: Path, permission: Option[FsPermission], roundingMode: RoundingMode, metadata: Map[String, String]) (implicit fs: FileSystem, conf: Configuration): HiveOutputStream def stats(getPath: Path)(implicit fs: FileSystem): Long = throw new UnsupportedOperationException } object HiveDialect extends Logging { def apply(format: String): HiveDialect = format match { case input if input.contains("ParquetInputFormat") => ParquetHiveDialect() case input if input.contains("OrcInputFormat") => OrcHiveDialect() //case input if input.contains("AvroHiveDialect") || input.contains("AvroContainerInputFormat") => AvroHiveDialect // "org.apache.hadoop.mapred.TextInputFormat" -> TextHiveDialect case _ => throw new UnsupportedOperationException(s"Unknown hive input format $format") } def apply(table: Table): HiveDialect = { val format = table.getSd.getInputFormat logger.debug(s"Table format is $format") val dialect = HiveDialect(format) logger.debug(s"HiveDialect is $dialect") dialect } }
Example 6
Source File: HiveSpeedTest.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.hive import com.sksamuel.exts.metrics.Timed import io.eels.Row import io.eels.datastream.DataStream import io.eels.schema.StructType import org.apache.hadoop.fs.permission.FsPermission import scala.util.Random object HiveSpeedTest extends App with Timed { import HiveConfig._ val Database = "sam" val Table = "speedtest" val schema = StructType("artist", "album", "year") val data = Array( Vector("elton", "yellow brick road ", "1972"), Vector("elton", "tumbleweed connection", "1974"), Vector("elton", "empty sky", "1969"), Vector("beatles", "white album", "1969"), Vector("beatles", "tumbleweed connection", "1966"), Vector("pinkfloyd", "the wall", "1979"), Vector("pinkfloyd", "dark side of the moon", "1974"), Vector("pinkfloyd", "emily", "1966") ) def createRow = Row(schema, data(Random.nextInt(data.length))) val size = 10000000 while (true) { val ds = DataStream.fromRowIterator(schema, Iterator.continually(createRow).take(size)) .addField("bibble", "myvalue") .addField("timestamp", System.currentTimeMillis.toString) println(ds.schema.show()) HiveTable(Database, Table).drop(true) ops.createTable( Database, Table, ds.schema, List("artist"), overwrite = true ) timed("writing data") { val sink = HiveSink(Database, Table).withPermission(new FsPermission("700")) ds.to(sink, 4) logger.info("Write complete") } timed("reading data") { val source = HiveSource(Database, Table) source.toDataStream().size logger.info("Read complete") } Thread.sleep(5000) } }
Example 7
Source File: OrcSink.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.orc import com.sksamuel.exts.Logging import com.sksamuel.exts.OptionImplicits._ import com.sksamuel.exts.config.ConfigSupport import com.typesafe.config.ConfigFactory import io.eels.schema.StructType import io.eels.{Row, Sink, SinkWriter} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.permission.FsPermission import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.orc.OrcFile.{CompressionStrategy, EncodingStrategy} import org.apache.orc.OrcProto.CompressionKind case class OrcWriteOptions(overwrite: Boolean = false, compressionKind: CompressionKind, compressionStrategy: CompressionStrategy, compressionBufferSize: Option[Int], encodingStrategy: Option[EncodingStrategy], bloomFilterColumns: Seq[String] = Nil, permission: Option[FsPermission] = None, inheritPermissions: Option[Boolean] = None, rowIndexStride: Option[Int] = None) { def withCompressionKind(kind: CompressionKind): OrcWriteOptions = copy(compressionKind = kind) def withCompressionStrategy(strategy: CompressionStrategy): OrcWriteOptions = copy(compressionStrategy = strategy) def withCompressionBufferSize(size: Int): OrcWriteOptions = copy(compressionBufferSize = size.some) def withEncodingStrategy(strategy: EncodingStrategy): OrcWriteOptions = copy(encodingStrategy = strategy.some) def withBloomFilterColumns(bloomFilterColumns: Seq[String]): OrcWriteOptions = copy(bloomFilterColumns = bloomFilterColumns) def withRowIndexStride(stride: Int): OrcWriteOptions = copy(rowIndexStride = stride.some) def withOverwrite(overwrite: Boolean): OrcWriteOptions = copy(overwrite = overwrite) def withPermission(permission: FsPermission): OrcWriteOptions = copy(permission = permission.some) def withInheritPermission(inheritPermissions: Boolean): OrcWriteOptions = copy(inheritPermissions = inheritPermissions.some) } object OrcWriteOptions extends ConfigSupport { // creates a config from the typesafe reference.confs def apply(): OrcWriteOptions = { val config = ConfigFactory.load() OrcWriteOptions( false, CompressionKind valueOf config.getString("eel.orc.writer.compression-kind"), CompressionStrategy valueOf config.getString("eel.orc.writer.compression-strategy"), config.getIntOpt("eel.orc.writer.compression-buffer-size"), config.getStringOpt("eel.orc.writer.encoding-strategy").map(EncodingStrategy.valueOf) ) } } case class OrcSink(path: Path, options: OrcWriteOptions = OrcWriteOptions()) (implicit fs: FileSystem, conf: Configuration) extends Sink with Logging { // -- convenience options -- def withCompressionKind(kind: CompressionKind): OrcSink = copy(options = options.copy(compressionKind = kind)) def withCompressionStrategy(strategy: CompressionStrategy): OrcSink = copy(options = options.copy(compressionStrategy = strategy)) def withCompressionBufferSize(size: Int): OrcSink = copy(options = options.copy(compressionBufferSize = size.some)) def withEncodingStrategy(strategy: EncodingStrategy): OrcSink = copy(options = options.copy(encodingStrategy = strategy.some)) def withBloomFilterColumns(bloomFilterColumns: Seq[String]): OrcSink = copy(options = options.copy(bloomFilterColumns = bloomFilterColumns)) def withRowIndexStride(stride: Int): OrcSink = copy(options = options.copy(rowIndexStride = stride.some)) def withOverwrite(overwrite: Boolean): OrcSink = copy(options = options.copy(overwrite = overwrite)) def withPermission(permission: FsPermission): OrcSink = copy(options = options.copy(permission = permission.some)) def withInheritPermission(inheritPermissions: Boolean): OrcSink = copy(options = options.copy(inheritPermissions = inheritPermissions.some)) override def open(schema: StructType, n: Int): Seq[SinkWriter] = { if (n == 1) Seq(create(schema, path)) else List.tabulate(n) { k => create(schema, new Path(path.getParent, path.getName + "_" + k)) } } override def open(schema: StructType): SinkWriter = create(schema, path) private def create(schema: StructType, path: Path): SinkWriter = new SinkWriter { if (options.overwrite && fs.exists(path)) fs.delete(path, false) val writer = new OrcWriter(path, schema, options) override def write(row: Row): Unit = writer.write(row) override def close(): Unit = { writer.close() options.permission match { case Some(perm) => fs.setPermission(path, perm) case None => if (options.inheritPermissions.getOrElse(false)) { val permission = fs.getFileStatus(path.getParent).getPermission fs.setPermission(path, permission) } } } } }
Example 8
Source File: DFSJarStore.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.jarstore.dfs import java.io.{InputStream, OutputStream} import org.apache.gearpump.util.Constants import org.apache.gearpump.jarstore.JarStore import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import com.typesafe.config.Config import org.apache.hadoop.fs.permission.{FsAction, FsPermission} override def getFile(fileName: String): InputStream = { val filePath = new Path(rootPath, fileName) val fs = filePath.getFileSystem(new Configuration()) fs.open(filePath) } private def createDirIfNotExists(path: Path): Unit = { val fs = path.getFileSystem(new Configuration()) if (!fs.exists(path)) { fs.mkdirs(path, new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL)) } } }
Example 9
Source File: JsonFileReporter.scala From kyuubi with Apache License 2.0 | 5 votes |
package yaooqinn.kyuubi.metrics import java.io.{BufferedWriter, Closeable, IOException, OutputStreamWriter} import java.util.{Timer, TimerTask} import java.util.concurrent.TimeUnit import scala.util.Try import scala.util.control.NonFatal import com.codahale.metrics.MetricRegistry import com.codahale.metrics.json.MetricsModule import com.fasterxml.jackson.databind.ObjectMapper import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.fs.permission.FsPermission import org.apache.kyuubi.Logging import org.apache.spark.{KyuubiSparkUtil, SparkConf} import org.apache.spark.KyuubiConf._ private[metrics] class JsonFileReporter(conf: SparkConf, registry: MetricRegistry) extends Closeable with Logging { private val jsonMapper = new ObjectMapper().registerModule( new MetricsModule(TimeUnit.MILLISECONDS, TimeUnit.MILLISECONDS, false)) private val timer = new Timer(true) private val interval = KyuubiSparkUtil.timeStringAsMs(conf.get(METRICS_REPORT_INTERVAL)) private val path = conf.get(METRICS_REPORT_LOCATION) private val hadoopConf = KyuubiSparkUtil.newConfiguration(conf) def start(): Unit = { timer.schedule(new TimerTask { var bw: BufferedWriter = _ override def run(): Unit = try { val json = jsonMapper.writerWithDefaultPrettyPrinter().writeValueAsString(registry) val tmpPath = new Path(path + ".tmp") val tmpPathUri = tmpPath.toUri val fs = if (tmpPathUri.getScheme == null && tmpPathUri.getAuthority == null) { FileSystem.getLocal(hadoopConf) } else { FileSystem.get(tmpPathUri, hadoopConf) } fs.delete(tmpPath, true) bw = new BufferedWriter(new OutputStreamWriter(fs.create(tmpPath, true))) bw.write(json) bw.close() fs.setPermission(tmpPath, FsPermission.createImmutable(Integer.parseInt("644", 8).toShort)) val finalPath = new Path(path) fs.rename(tmpPath, finalPath) fs.setPermission(finalPath, FsPermission.createImmutable(Integer.parseInt("644", 8).toShort)) } catch { case NonFatal(e) => error("Error writing metrics to json file" + path, e) } finally { if (bw != null) { Try(bw.close()) } } }, 0, interval) } override def close(): Unit = { timer.cancel() } }
Example 10
Source File: package.scala From kyuubi with Apache License 2.0 | 5 votes |
package yaooqinn.kyuubi import scala.collection.mutable.HashMap import org.apache.hadoop.fs.permission.FsPermission package object yarn { type EnvMap = HashMap[String, String] val KYUUBI_YARN_APP_NAME = "KYUUBI SERVER" val KYUUBI_YARN_APP_TYPE = "KYUUBI" // Staging directory for any temporary jars or files val KYUUBI_STAGING: String = ".kyuubiStaging" // Staging directory is private! -> rwx-------- val STAGING_DIR_PERMISSION: FsPermission = FsPermission.createImmutable(Integer.parseInt("700", 8).toShort) // App files are world-wide readable and owner writable -> rw-r--r-- val APP_FILE_PERMISSION: FsPermission = FsPermission.createImmutable(Integer.parseInt("644", 8).toShort) val SPARK_CONF_DIR = "__spark_conf__" val SPARK_CONF_FILE = "__spark_conf__.properties" // Subdirectory in the conf directory containing Hadoop config files. val HADOOP_CONF_DIR = "__hadoop_conf__" // File containing the conf archive in the AM. See prepareLocalResources(). val SPARK_CONF_ARCHIVE: String = SPARK_CONF_DIR + ".zip" val SPARK_LIB_DIR = "__spark_libs__" val LOCAL_SCHEME = "local" }
Example 11
Source File: SparkHadoopUtilSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy import java.security.PrivilegedExceptionAction import scala.util.Random import org.apache.hadoop.fs.FileStatus import org.apache.hadoop.fs.permission.{FsAction, FsPermission} import org.apache.hadoop.security.UserGroupInformation import org.scalatest.Matchers import org.apache.spark.SparkFunSuite class SparkHadoopUtilSuite extends SparkFunSuite with Matchers { test("check file permission") { import FsAction._ val testUser = s"user-${Random.nextInt(100)}" val testGroups = Array(s"group-${Random.nextInt(100)}") val testUgi = UserGroupInformation.createUserForTesting(testUser, testGroups) testUgi.doAs(new PrivilegedExceptionAction[Void] { override def run(): Void = { val sparkHadoopUtil = new SparkHadoopUtil // If file is owned by user and user has access permission var status = fileStatus(testUser, testGroups.head, READ_WRITE, READ_WRITE, NONE) sparkHadoopUtil.checkAccessPermission(status, READ) should be(true) sparkHadoopUtil.checkAccessPermission(status, WRITE) should be(true) // If file is owned by user but user has no access permission status = fileStatus(testUser, testGroups.head, NONE, READ_WRITE, NONE) sparkHadoopUtil.checkAccessPermission(status, READ) should be(false) sparkHadoopUtil.checkAccessPermission(status, WRITE) should be(false) val otherUser = s"test-${Random.nextInt(100)}" val otherGroup = s"test-${Random.nextInt(100)}" // If file is owned by user's group and user's group has access permission status = fileStatus(otherUser, testGroups.head, NONE, READ_WRITE, NONE) sparkHadoopUtil.checkAccessPermission(status, READ) should be(true) sparkHadoopUtil.checkAccessPermission(status, WRITE) should be(true) // If file is owned by user's group but user's group has no access permission status = fileStatus(otherUser, testGroups.head, READ_WRITE, NONE, NONE) sparkHadoopUtil.checkAccessPermission(status, READ) should be(false) sparkHadoopUtil.checkAccessPermission(status, WRITE) should be(false) // If file is owned by other user and this user has access permission status = fileStatus(otherUser, otherGroup, READ_WRITE, READ_WRITE, READ_WRITE) sparkHadoopUtil.checkAccessPermission(status, READ) should be(true) sparkHadoopUtil.checkAccessPermission(status, WRITE) should be(true) // If file is owned by other user but this user has no access permission status = fileStatus(otherUser, otherGroup, READ_WRITE, READ_WRITE, NONE) sparkHadoopUtil.checkAccessPermission(status, READ) should be(false) sparkHadoopUtil.checkAccessPermission(status, WRITE) should be(false) null } }) } private def fileStatus( owner: String, group: String, userAction: FsAction, groupAction: FsAction, otherAction: FsAction): FileStatus = { new FileStatus(0L, false, 0, 0L, 0L, 0L, new FsPermission(userAction, groupAction, otherAction), owner, group, null) } }
Example 12
Source File: ParquetHiveFormat.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.formats import com.landoop.streamreactor.connect.hive.Serde import com.landoop.streamreactor.connect.hive.parquet.ParquetSinkConfig import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.fs.permission.FsPermission import org.apache.kafka.connect.data.{Schema, Struct} import org.apache.parquet.hadoop.ParquetWriter import scala.util.Try object ParquetHiveFormat extends HiveFormat { private val logger = org.slf4j.LoggerFactory.getLogger(getClass.getName) override def serde = Serde( "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe", "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat", "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat", Map("serialization.format" -> "1") ) override def writer(path: Path, schema: Schema) (implicit fs: FileSystem): HiveWriter = new HiveWriter { logger.debug(s"Creating parquet writer at $path") val writer: ParquetWriter[Struct] = com.landoop.streamreactor.connect.hive.parquet.parquetWriter(path, schema, ParquetSinkConfig(overwrite = true)) Try(fs.setPermission(path, FsPermission.valueOf("-rwxrwxrwx"))) val createdTime: Long = System.currentTimeMillis() var lastKnownFileSize: Long = fs.getFileStatus(path).getLen var readFileSize = false var count = 0 override def write(struct: Struct): Long = { writer.write(struct) count = count + 1 readFileSize = true count } override def close(): Unit = { logger.debug(s"Closing writer at path $path") writer.close() } override def currentCount: Long = count override def file: Path = path override def fileSize: Long = { if (readFileSize) { lastKnownFileSize = fs.getFileStatus(path).getLen readFileSize = false } lastKnownFileSize } } override def reader(path: Path, startAt: Int, schema: Schema) (implicit fs: FileSystem): HiveReader = new HiveReader { logger.debug(s"Creating parquet reader for $path with offset $startAt") val reader = com.landoop.streamreactor.connect.hive.parquet.parquetReader(path) var offset = startAt override def iterator: Iterator[Record] = Iterator.continually(reader.read).takeWhile(_ != null).drop(startAt).map { struct => val record = Record(struct, path, offset) offset = offset + 1 record } override def close(): Unit = reader.close() } }
Example 13
Source File: ParquetHiveFormat.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.formats import com.landoop.streamreactor.connect.hive.Serde import com.landoop.streamreactor.connect.hive.parquet.ParquetSinkConfig import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.fs.permission.FsPermission import org.apache.kafka.connect.data.{Schema, Struct} import org.apache.parquet.hadoop.ParquetWriter import scala.util.Try object ParquetHiveFormat extends HiveFormat { private val logger = org.slf4j.LoggerFactory.getLogger(getClass.getName) override def serde = Serde( "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe", "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat", "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat", Map("serialization.format" -> "1") ) override def writer(path: Path, schema: Schema) (implicit fs: FileSystem): HiveWriter = new HiveWriter { logger.debug(s"Creating parquet writer at $path") val writer: ParquetWriter[Struct] = com.landoop.streamreactor.connect.hive.parquet.parquetWriter(path, schema, ParquetSinkConfig(overwrite = true)) Try(fs.setPermission(path, FsPermission.valueOf("-rwxrwxrwx"))) val createdTimestamp: Long = System.currentTimeMillis() var lastKnownFileSize:Long = fs.getFileStatus(path).getLen var readFileSize = false var count = 0 override def write(struct: Struct): Long = { writer.write(struct) count = count + 1 readFileSize = true count } override def close(): Unit = { logger.debug(s"Closing writer at path $path") writer.close() } override def currentCount: Long = count override def file: Path = path override def createdTime: Long = createdTimestamp override def fileSize: Long = { if (readFileSize) { lastKnownFileSize = fs.getFileStatus(path).getLen readFileSize = false } lastKnownFileSize } } override def reader(path: Path, startAt: Int, schema: Schema) (implicit fs: FileSystem): HiveReader = new HiveReader { logger.debug(s"Creating parquet reader for $path with offset $startAt") val reader = com.landoop.streamreactor.connect.hive.parquet.parquetReader(path) var offset = startAt override def iterator: Iterator[Record] = Iterator.continually(reader.read).takeWhile(_ != null).drop(startAt).map { struct => val record = Record(struct, path, offset) offset = offset + 1 record } override def close(): Unit = reader.close() } }
Example 14
Source File: OrcHiveFormat.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.formats import com.landoop.streamreactor.connect.hive.{OrcSinkConfig, OrcSourceConfig, Serde} import com.landoop.streamreactor.connect.hive.orc.OrcSink import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.fs.permission.FsPermission import org.apache.kafka.connect.data.{Schema, Struct} import scala.util.Try object OrcHiveFormat extends HiveFormat { private val logger = org.slf4j.LoggerFactory.getLogger(getClass.getName) override def serde = Serde( "org.apache.hadoop.hive.ql.io.orc.OrcSerde", "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat", Map("org.apache.hadoop.hive.ql.io.orc.OrcSerde" -> "1") ) override def writer(path: Path, schema: Schema) (implicit fs: FileSystem): HiveWriter = new HiveWriter { logger.debug(s"Creating orc writer at $path") val sink: OrcSink = com.landoop.streamreactor.connect.hive.orc.sink(path, schema, OrcSinkConfig(overwrite = true)) Try(fs.setPermission(path, FsPermission.valueOf("-rwxrwxrwx"))) val cretedTimestamp: Long = System.currentTimeMillis() var lastKnownFileSize:Long = fs.getFileStatus(path).getLen var readFileSize = false var count = 0 override def write(struct: Struct): Long = { sink.write(struct) count = count + 1 readFileSize = true count } override def close(): Unit = { logger.debug(s"Closing orc writer at path $path") sink.close() } override def file: Path = path override def currentCount: Long = count override def createdTime: Long = cretedTimestamp override def fileSize: Long = { if (readFileSize) { lastKnownFileSize = fs.getFileStatus(path).getLen readFileSize = false } lastKnownFileSize } } override def reader(path: Path, startAt: Int, schema: Schema) (implicit fs: FileSystem): HiveReader = new HiveReader { logger.debug(s"Creating orc reader for $path with offset $startAt") val reader = com.landoop.streamreactor.connect.hive.orc.source(path, OrcSourceConfig()) var offset = startAt override def iterator: Iterator[Record] = reader.iterator.map { struct => val record = Record(struct, path, offset) offset = offset + 1 record } override def close(): Unit = reader.close() } }