org.apache.hadoop.fs.PathFilter Scala Examples
The following examples show how to use org.apache.hadoop.fs.PathFilter.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: ParquetCompatibilityTest.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.parquet import scala.collection.JavaConversions._ import org.apache.hadoop.fs.{Path, PathFilter} import org.apache.parquet.hadoop.ParquetFileReader import org.apache.parquet.schema.MessageType import org.apache.spark.sql.QueryTest private[sql] abstract class ParquetCompatibilityTest extends QueryTest with ParquetTest { protected def readParquetSchema(path: String): MessageType = { readParquetSchema(path, { path => !path.getName.startsWith("_") }) } //读Parquet模式 protected def readParquetSchema(path: String, pathFilter: Path => Boolean): MessageType = { val fsPath = new Path(path) val fs = fsPath.getFileSystem(configuration) val parquetFiles = fs.listStatus(fsPath, new PathFilter { override def accept(path: Path): Boolean = pathFilter(path) }).toSeq val footers = ParquetFileReader.readAllFootersInParallel(configuration, parquetFiles, true) footers.head.getParquetMetadata.getFileMetaData.getSchema } protected def logParquetSchema(path: String): Unit = { logInfo( //由parquet-avro写的Parquet文件的模式 s"""Schema of the Parquet file written by parquet-avro: |${readParquetSchema(path)} """.stripMargin) } } //复合Parquet的兼容性测试 object ParquetCompatibilityTest { def makeNullable[T <: AnyRef](i: Int)(f: => T): T = { if (i % 3 == 0) null.asInstanceOf[T] else f } }
Example 2
Source File: HadoopUtils.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.image import scala.language.existentials import scala.util.Random import org.apache.commons.io.FilenameUtils import org.apache.hadoop.conf.{Configuration, Configured} import org.apache.hadoop.fs.{Path, PathFilter} import org.apache.hadoop.mapreduce.lib.input.FileInputFormat import org.apache.spark.sql.SparkSession private object RecursiveFlag { def withPathFilter[T]( sampleRatio: Double, spark: SparkSession, seed: Long)(f: => T): T = { val sampleImages = sampleRatio < 1 if (sampleImages) { val flagName = FileInputFormat.PATHFILTER_CLASS val hadoopConf = spark.sparkContext.hadoopConfiguration val old = Option(hadoopConf.getClass(flagName, null)) hadoopConf.setDouble(SamplePathFilter.ratioParam, sampleRatio) hadoopConf.setLong(SamplePathFilter.seedParam, seed) hadoopConf.setClass(flagName, classOf[SamplePathFilter], classOf[PathFilter]) try f finally { hadoopConf.unset(SamplePathFilter.ratioParam) hadoopConf.unset(SamplePathFilter.seedParam) old match { case Some(v) => hadoopConf.setClass(flagName, v, classOf[PathFilter]) case None => hadoopConf.unset(flagName) } } } else { f } } }
Example 3
Source File: ParquetCompatibilityTest.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.parquet import scala.collection.JavaConverters.{collectionAsScalaIterableConverter, mapAsJavaMapConverter, seqAsJavaListConverter} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{Path, PathFilter} import org.apache.parquet.hadoop.api.WriteSupport import org.apache.parquet.hadoop.api.WriteSupport.WriteContext import org.apache.parquet.hadoop.{ParquetFileReader, ParquetWriter} import org.apache.parquet.io.api.RecordConsumer import org.apache.parquet.schema.{MessageType, MessageTypeParser} import org.apache.spark.sql.QueryTest def writeDirect( path: String, schema: String, metadata: Map[String, String], recordWriters: (RecordConsumer => Unit)*): Unit = { val messageType = MessageTypeParser.parseMessageType(schema) val writeSupport = new DirectWriteSupport(messageType, metadata) val parquetWriter = new ParquetWriter[RecordConsumer => Unit](new Path(path), writeSupport) try recordWriters.foreach(parquetWriter.write) finally parquetWriter.close() } }
Example 4
Source File: HadoopUtils.scala From spark-images with Apache License 2.0 | 5 votes |
package org.apache.spark.image import java.nio.file.Paths import org.apache.commons.io.FilenameUtils import scala.sys.process._ import org.apache.hadoop.conf.{Configuration, Configured} import org.apache.hadoop.fs.{Path, PathFilter} import org.apache.hadoop.mapreduce.lib.input.FileInputFormat import org.apache.spark.sql.SparkSession import scala.language.existentials import scala.util.Random object RecursiveFlag { def setPathFilter(value: Option[Class[_]], sampleRatio: Option[Double] = None, spark: SparkSession) : Option[Class[_]] = { val flagName = FileInputFormat.PATHFILTER_CLASS val hadoopConf = spark.sparkContext.hadoopConfiguration val old = Option(hadoopConf.getClass(flagName, null)) if (sampleRatio.isDefined) { hadoopConf.setDouble(SamplePathFilter.ratioParam, sampleRatio.get) } else { hadoopConf.unset(SamplePathFilter.ratioParam) None } value match { case Some(v) => hadoopConf.setClass(flagName, v, classOf[PathFilter]) case None => hadoopConf.unset(flagName) } old } }