org.apache.commons.io.FilenameUtils Scala Examples
The following examples show how to use org.apache.commons.io.FilenameUtils.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: AttributesComputation.scala From nexus with Apache License 2.0 | 5 votes |
package ch.epfl.bluebrain.nexus.storage.attributes import java.nio.file.{Files, Path} import java.security.MessageDigest import akka.http.scaladsl.model.HttpCharsets.`UTF-8` import akka.http.scaladsl.model.MediaTypes.{`application/octet-stream`, `application/x-tar`} import akka.http.scaladsl.model.{ContentType, MediaType, MediaTypes} import akka.stream.Materializer import akka.stream.scaladsl.{Keep, Sink} import akka.util.ByteString import cats.effect.Effect import cats.implicits._ import ch.epfl.bluebrain.nexus.storage.File.{Digest, FileAttributes} import ch.epfl.bluebrain.nexus.storage.StorageError.InternalError import ch.epfl.bluebrain.nexus.storage._ import org.apache.commons.io.FilenameUtils import scala.concurrent.{ExecutionContext, Future} import scala.util.{Failure, Success, Try} trait AttributesComputation[F[_], Source] { implicit def akkaAttributes[F[_]](implicit ec: ExecutionContext, mt: Materializer, F: Effect[F] ): AttributesComputation[F, AkkaSource] = (path: Path, algorithm: String) => { if (!Files.exists(path)) F.raiseError(InternalError(s"Path not found '$path'")) else Try(MessageDigest.getInstance(algorithm)) match { case Success(msgDigest) => val isDir = Files.isDirectory(path) val source = if (isDir) folderSource(path) else fileSource(path) source .alsoToMat(sinkSize)(Keep.right) .toMat(sinkDigest(msgDigest)) { (bytesF, digestF) => (bytesF, digestF).mapN { case (bytes, digest) => FileAttributes(path.toAkkaUri, bytes, digest, detectMediaType(path, isDir)) } } .run() .to[F] case Failure(_) => F.raiseError(InternalError(s"Invalid algorithm '$algorithm'.")) } } }
Example 2
Source File: CCUtil.scala From reforest with Apache License 2.0 | 5 votes |
package reforest.util import org.apache.commons.io.FilenameUtils import org.apache.spark.broadcast.Broadcast import org.apache.spark.{SparkConf, SparkContext} import reforest.TypeInfo import reforest.data.load.{ARFFUtil, DataLoad, LibSVMUtil} import reforest.rf.RFCategoryInfo import reforest.rf.parameter.RFParameter import scala.reflect.ClassTag def getDataLoader[T:ClassTag, U:ClassTag](property : RFParameter, typeInfo: Broadcast[TypeInfo[T]], instrumented: Broadcast[GCInstrumented], categoryInfo: Broadcast[RFCategoryInfo]): DataLoad[T, U] = { val extension = FilenameUtils.getExtension(property.dataset).toUpperCase() property.fileType match { case "LIBSVM" => new LibSVMUtil(typeInfo, instrumented, categoryInfo) case "SVM" => new LibSVMUtil(typeInfo, instrumented, categoryInfo) case "ARFF" => new ARFFUtil(typeInfo, instrumented, categoryInfo) case _ => new LibSVMUtil(typeInfo, instrumented, categoryInfo) } } }
Example 3
Source File: Watcher.scala From seed with Apache License 2.0 | 5 votes |
package seed.cli.util import java.nio.file.{Files, Path, StandardWatchEventKinds, WatchEvent} import org.slf4j.LoggerFactory import zio._ import zio.stream._ import io.methvin.watcher.DirectoryChangeEvent import io.methvin.watcher.DirectoryChangeEvent.EventType import io.methvin.watcher.DirectoryChangeListener import io.methvin.watcher.DirectoryWatcher import io.methvin.watcher.hashing.FileHasher import org.apache.commons.io.FilenameUtils import org.slf4j.Logger import scala.collection.JavaConverters import scala.concurrent.ExecutionContext object Watcher { val Extensions = Array("scala", "java") // System.setProperty(org.slf4j.impl.SimpleLogger.DEFAULT_LOG_LEVEL_KEY, "TRACE") def watchPaths( paths: List[Path], onStarted: () => Unit = () => () ): Stream[Throwable, Unit] = Stream.effectAsyncM[Throwable, Unit] { e => val logger = LoggerFactory.getLogger("watcher") val (p, f) = paths.partition(Files.isDirectory(_)) val watcher = new CustomRecursiveFileMonitor(p, f, logger = logger) { override def onCreate(file: Path, count: Int): Unit = if (Extensions.contains(FilenameUtils.getExtension(file.toString))) e(Task.succeed(())) override def onModify(file: Path, count: Int): Unit = if (Extensions.contains(FilenameUtils.getExtension(file.toString))) e(Task.succeed(())) override def onDelete(file: Path, count: Int): Unit = {} } Task.descriptorWith { d => val ec = d.executor.asEC UIO { watcher.start()(ec) onStarted() }.onTermination(_ => UIO(watcher.close())) } } } abstract class CustomRecursiveFileMonitor( val paths: List[Path], val files: List[Path], val fileHasher: Option[FileHasher] = Some(FileHasher.DEFAULT_FILE_HASHER), val logger: Logger ) { protected[this] val watcher: DirectoryWatcher = DirectoryWatcher.builder .paths(JavaConverters.seqAsJavaListConverter(paths).asJava) .files(JavaConverters.seqAsJavaListConverter(files).asJava) .listener(new DirectoryChangeListener { override def onEvent(event: DirectoryChangeEvent): Unit = event.eventType match { case EventType.OVERFLOW => case et => CustomRecursiveFileMonitor.this.onEvent( et.getWatchEventKind.asInstanceOf[WatchEvent.Kind[Path]], event.path, event.count ) } override def onException(e: Exception): Unit = e.printStackTrace() }) .fileHasher(fileHasher.orNull) .logger(logger) .build() def onEvent(eventType: WatchEvent.Kind[Path], file: Path, count: Int): Unit = eventType match { case StandardWatchEventKinds.ENTRY_CREATE => onCreate(file, count) case StandardWatchEventKinds.ENTRY_MODIFY => onModify(file, count) case StandardWatchEventKinds.ENTRY_DELETE => onDelete(file, count) } def start()(implicit executionContext: ExecutionContext): Unit = executionContext.execute(() => watcher.watch()) def close(): Unit = watcher.close() def onCreate(file: Path, count: Int): Unit def onModify(file: Path, count: Int): Unit def onDelete(file: Path, count: Int): Unit }
Example 4
Source File: HadoopUtils.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.image import scala.language.existentials import scala.util.Random import org.apache.commons.io.FilenameUtils import org.apache.hadoop.conf.{Configuration, Configured} import org.apache.hadoop.fs.{Path, PathFilter} import org.apache.hadoop.mapreduce.lib.input.FileInputFormat import org.apache.spark.sql.SparkSession private object RecursiveFlag { def withPathFilter[T]( sampleRatio: Double, spark: SparkSession, seed: Long)(f: => T): T = { val sampleImages = sampleRatio < 1 if (sampleImages) { val flagName = FileInputFormat.PATHFILTER_CLASS val hadoopConf = spark.sparkContext.hadoopConfiguration val old = Option(hadoopConf.getClass(flagName, null)) hadoopConf.setDouble(SamplePathFilter.ratioParam, sampleRatio) hadoopConf.setLong(SamplePathFilter.seedParam, seed) hadoopConf.setClass(flagName, classOf[SamplePathFilter], classOf[PathFilter]) try f finally { hadoopConf.unset(SamplePathFilter.ratioParam) hadoopConf.unset(SamplePathFilter.seedParam) old match { case Some(v) => hadoopConf.setClass(flagName, v, classOf[PathFilter]) case None => hadoopConf.unset(flagName) } } } else { f } } }
Example 5
Source File: EarlyStoppingMNIST.scala From dl4scala with MIT License | 5 votes |
package org.dl4scala.examples.misc.earlystopping import java.util.Collections import java.util.concurrent.TimeUnit import org.apache.commons.io.FilenameUtils import org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator import org.deeplearning4j.earlystopping.EarlyStoppingConfiguration import org.deeplearning4j.earlystopping.saver.LocalFileModelSaver import org.deeplearning4j.earlystopping.scorecalc.DataSetLossCalculator import org.deeplearning4j.earlystopping.termination.{MaxEpochsTerminationCondition, MaxTimeIterationTerminationCondition} import org.deeplearning4j.earlystopping.trainer.EarlyStoppingTrainer import org.deeplearning4j.nn.api.OptimizationAlgorithm import org.deeplearning4j.nn.conf.inputs.InputType import org.deeplearning4j.nn.conf.layers.{ConvolutionLayer, DenseLayer, OutputLayer, SubsamplingLayer} import org.deeplearning4j.nn.conf.{NeuralNetConfiguration, Updater} import org.deeplearning4j.nn.weights.WeightInit import org.nd4j.linalg.activations.Activation import org.nd4j.linalg.lossfunctions.LossFunctions import scala.collection.JavaConverters._ import java.util object EarlyStoppingMNIST { def main(args: Array[String]): Unit = { // Configure network://Configure network: val nChannels = 1 val outputNum = 10 val batchSize = 25 val iterations = 1 val seed = 123 val configuration = new NeuralNetConfiguration.Builder() .seed(seed) .iterations(iterations) .regularization(true).l2(0.0005) .learningRate(0.02) .weightInit(WeightInit.XAVIER) .activation(Activation.RELU) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(Updater.NESTEROVS) .list() .layer(0, new ConvolutionLayer.Builder(5, 5) .nIn(nChannels) .stride(1, 1) .nOut(20).dropOut(0.5) .build()) .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) .kernelSize(2, 2) .stride(2, 2) .build()) .layer(2, new DenseLayer.Builder() .nOut(500).build()) .layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(outputNum) .activation(Activation.SOFTMAX) .build()) .setInputType(InputType.convolutionalFlat(28, 28, 1)) //See note in LenetMnistExample .backprop(true).pretrain(false).build() // Get data: val mnistTrain1024 = new MnistDataSetIterator(batchSize, 1024, false, true, true, 12345) val mnistTest512 = new MnistDataSetIterator(batchSize, 512, false, false, true, 12345) val tempDir = System.getProperty("java.io.tmpdir") val exampleDirectory = FilenameUtils.concat(tempDir, "DL4JEarlyStoppingExample/") val saver = new LocalFileModelSaver(exampleDirectory) val esConf = new EarlyStoppingConfiguration.Builder() .epochTerminationConditions(new MaxEpochsTerminationCondition(50)) //Max of 50 epochs .evaluateEveryNEpochs(1) .iterationTerminationConditions(new MaxTimeIterationTerminationCondition(20, TimeUnit.MINUTES)) //Max of 20 minutes .scoreCalculator(new DataSetLossCalculator(mnistTest512, true)) //Calculate test set score .modelSaver(saver) .build() val trainer = new EarlyStoppingTrainer(esConf, configuration, mnistTrain1024) //Conduct early stopping training://Conduct early stopping training: val result = trainer.fit() println("Termination reason: " + result.getTerminationReason) println("Termination details: " + result.getTerminationDetails) println("Total epochs: " + result.getTotalEpochs) println("Best epoch number: " + result.getBestModelEpoch) println("Score at best epoch: " + result.getBestModelScore) //Print score vs. epoch val scoreVsEpoch = result.getScoreVsEpoch val list = new util.ArrayList[Integer](scoreVsEpoch.keySet) Collections.sort(list) System.out.println("Score vs. Epoch:") for (i <- list.asScala) { println(i + "\t" + scoreVsEpoch.get(i)) } } }
Example 6
Source File: HadoopUtils.scala From spark-images with Apache License 2.0 | 5 votes |
package org.apache.spark.image import java.nio.file.Paths import org.apache.commons.io.FilenameUtils import scala.sys.process._ import org.apache.hadoop.conf.{Configuration, Configured} import org.apache.hadoop.fs.{Path, PathFilter} import org.apache.hadoop.mapreduce.lib.input.FileInputFormat import org.apache.spark.sql.SparkSession import scala.language.existentials import scala.util.Random object RecursiveFlag { def setPathFilter(value: Option[Class[_]], sampleRatio: Option[Double] = None, spark: SparkSession) : Option[Class[_]] = { val flagName = FileInputFormat.PATHFILTER_CLASS val hadoopConf = spark.sparkContext.hadoopConfiguration val old = Option(hadoopConf.getClass(flagName, null)) if (sampleRatio.isDefined) { hadoopConf.setDouble(SamplePathFilter.ratioParam, sampleRatio.get) } else { hadoopConf.unset(SamplePathFilter.ratioParam) None } value match { case Some(v) => hadoopConf.setClass(flagName, v, classOf[PathFilter]) case None => hadoopConf.unset(flagName) } old } }
Example 7
Source File: SparkSqlRunner.scala From amaterasu with Apache License 2.0 | 5 votes |
package org.apache.amaterasu.executor.execution.actions.runners.spark.SparkSql import java.io.File import org.apache.amaterasu.common.execution.actions.Notifier import org.apache.amaterasu.common.logging.Logging import org.apache.amaterasu.common.runtime.Environment import org.apache.commons.io.FilenameUtils import org.apache.spark.SparkContext import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode, SparkSession} def findFileType(folderName: File): Array[String] = { // get all the files from a directory val files: Array[File] = folderName.listFiles() val extensions: Array[String] = files.map(file => FilenameUtils.getExtension(file.toString)) extensions } } object SparkSqlRunner { def apply(env: Environment, jobId: String, actionName: String, notifier: Notifier, sc: SparkContext): SparkSqlRunner = { val sparkSqlRunnerObj = new SparkSqlRunner sparkSqlRunnerObj.env = env sparkSqlRunnerObj.jobId = jobId sparkSqlRunnerObj.actionName = actionName sparkSqlRunnerObj.notifier = notifier sparkSqlRunnerObj.sc = sc sparkSqlRunnerObj.spark = SparkSession.builder().config(sc.getConf).enableHiveSupport().getOrCreate() sparkSqlRunnerObj } }