org.apache.commons.compress.archivers.tar.TarArchiveInputStream Scala Examples
The following examples show how to use org.apache.commons.compress.archivers.tar.TarArchiveInputStream.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: ImageLoaderUtils.scala From keystone with Apache License 2.0 | 5 votes |
package keystoneml.loaders import java.awt.image.BufferedImage import{InputStream, ByteArrayInputStream} import import import javax.imageio.ImageIO import keystoneml.loaders.VOCLoader._ import org.apache.commons.compress.archivers.ArchiveStreamFactory import org.apache.commons.compress.archivers.tar.TarArchiveInputStream import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import keystoneml.pipelines.Logging import keystoneml.utils._ import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag object ImageLoaderUtils extends Logging { def loadFiles[L, I <: AbstractLabeledImage[L] : ClassTag]( filePathsRDD: RDD[URI], labelsMap: String => L, imageBuilder: (Image, L, Option[String]) => I, // TODO(etrain): We can probably do this with implicits. namePrefix: Option[String] = None): RDD[I] = { filePathsRDD.flatMap(fileUri => loadFile(fileUri, labelsMap, imageBuilder, namePrefix)) } private def loadFile[L, I <: AbstractLabeledImage[L]]( fileUri: URI, labelsMap: String => L, imageBuilder: (Image, L, Option[String]) => I, namePrefix: Option[String]): Iterator[I] = { val filePath = new Path(fileUri) val conf = new Configuration(true) val fs = FileSystem.get(filePath.toUri(), conf) val fStream = val tarStream = new ArchiveStreamFactory().createArchiveInputStream( "tar", fStream).asInstanceOf[TarArchiveInputStream] var entry = tarStream.getNextTarEntry() val imgs = new ArrayBuffer[I] while (entry != null) { if (!entry.isDirectory && (namePrefix.isEmpty || entry.getName.startsWith(namePrefix.get))) { var offset = 0 var ret = 0 val content = new Array[Byte](entry.getSize().toInt) while (ret >= 0 && offset != entry.getSize()) { ret =, offset, content.length - offset) if (ret >= 0) { offset += ret } } val bais = new ByteArrayInputStream(content) val image = ImageUtils.loadImage(bais).map { img => imageBuilder(img, labelsMap(entry.getName), Some(entry.getName)) } imgs ++= image } entry = tarStream.getNextTarEntry() } imgs.iterator } }
Example 2
Source File: TarFlowSpec.scala From nexus with Apache License 2.0 | 5 votes |
package import import java.nio.file.{Files, Path, Paths} import import import{FileIO, Source} import akka.testkit.TestKit import akka.util.ByteString import{EitherValues, IOEitherValues, Randomness} import org.apache.commons.compress.archivers.tar.TarArchiveInputStream import import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpecLike import org.scalatest.{BeforeAndAfterAll, Inspectors, OptionValues} import scala.annotation.tailrec class TarFlowSpec extends TestKit(ActorSystem("TarFlowSpec")) with AnyWordSpecLike with Matchers with IOEitherValues with Randomness with EitherValues with OptionValues with Inspectors with BeforeAndAfterAll { val basePath = Files.createTempDirectory("tarflow") val dir1 = basePath.resolve("one") val dir2 = basePath.resolve("two") override def afterAll(): Unit = { super.afterAll() FileUtils.cleanDirectory(basePath.toFile) () } type PathAndContent = (Path, String) "A TarFlow" should { Files.createDirectories(dir1) Files.createDirectories(dir2) def relativize(path: Path): String = basePath.getParent().relativize(path).toString "generate the byteString for a tar file correctly" in { val file1 = dir1.resolve("file1.txt") val file1Content = genString() val file2 = dir1.resolve("file3.txt") val file2Content = genString() val file3 = dir2.resolve("file3.txt") val file3Content = genString() val files = List(file1 -> file1Content, file2 -> file2Content, file3 -> file3Content) forAll(files) { case (file, content) => Source.single(ByteString(content)).runWith(FileIO.toPath(file)).futureValue } val byteString = Directory.walk(basePath).via(TarFlow.writer(basePath)).runReduce(_ ++ _).futureValue val bytes = new ByteArrayInputStream(byteString.toArray) val tar = new TarArchiveInputStream(bytes) @tailrec def readEntries( tar: TarArchiveInputStream, entries: List[PathAndContent] = Nil ): List[PathAndContent] = { val entry = tar.getNextTarEntry if (entry == null) entries else { val data = Array.ofDim[Byte](entry.getSize.toInt) readEntries(tar, (Paths.get(entry.getName) -> ByteString(data).utf8String) :: entries) } } val directories = List(relativize(basePath) -> "", relativize(dir1) -> "", relativize(dir2) -> "") val untarred = readEntries(tar).map { case (path, content) => path.toString -> content } val expected = { case (path, content) => relativize(path) -> content } ++ directories untarred should contain theSameElementsAs expected } } }
Example 3
Source File: UtilCommands.scala From CM-Well with Apache License 2.0 | 5 votes |
import{BufferedInputStream, File} import java.nio.file.{Files, Paths} import import scala.util.control.Breaks._ import javax.xml.bind.DatatypeConverter import org.apache.commons.compress.archivers.ArchiveEntry import org.apache.commons.compress.archivers.tar.TarArchiveInputStream import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream import object UtilCommands { val OSX_NAME = "Mac OS X" val linuxSshpass = if (Files.exists(Paths.get("bin/utils/sshpass"))) "bin/utils/sshpass" else "sshpass" val osxSshpass = "/usr/local/bin/sshpass" val sshpass = if (isOSX) osxSshpass else linuxSshpass def isOSX = System.getProperty("") == OSX_NAME def verifyComponentConfNotChanged(componentName:String, configFilePath:String, expectedHash:String) = { val confContent = UtilCommands.unTarGz("./components", componentName, configFilePath) UtilCommands.checksum(componentName, configFilePath, confContent, expectedHash) } def checksum(componentName:String, configFilePath:String, confContent:Array[Byte], expectedHash:String) = { val actualHash = MessageDigest.getInstance("MD5").digest(confContent) val actualHashStr = DatatypeConverter.printHexBinary(actualHash) if (!expectedHash.equalsIgnoreCase(actualHashStr)) throw new Exception(s"$componentName configuration file $configFilePath has been changed, please change the template accordingly " + s"(the new digest is $actualHashStr)") } def unTarGz(rootFolder:String, componentName: String, configFilePath:String):Array[Byte] = { var tarArchiveInputStream:TarArchiveInputStream = null var bufferInputstream:BufferedInputStream = null val gzipCompressor:GzipCompressorInputStream = null var confContent: Array[Byte] = null try { val libDir = new File(rootFolder) val pathInput = libDir.listFiles().filter(file => file.getName.contains(componentName)) val path = Paths.get(pathInput(0).getAbsolutePath) val bufferInputStream = new BufferedInputStream(Files.newInputStream(path)) val gzipCompressor = new GzipCompressorInputStream(bufferInputStream) tarArchiveInputStream = new TarArchiveInputStream(gzipCompressor) var archiveEntry: ArchiveEntry = null archiveEntry = tarArchiveInputStream.getNextEntry if(archiveEntry.getName == "./") archiveEntry = tarArchiveInputStream.getNextEntry val extractFolder = archiveEntry.getName.replaceAll("^\\./","").split("/")(0) while (archiveEntry != null) { breakable { if (archiveEntry.getName.replaceAll("^\\./","") == s"$extractFolder/$configFilePath") { confContent = IOUtils.toByteArray(tarArchiveInputStream) break } } archiveEntry = tarArchiveInputStream.getNextEntry } } finally { if(tarArchiveInputStream != null) tarArchiveInputStream.close() if(bufferInputstream != null) bufferInputstream.close() if(gzipCompressor != null) gzipCompressor.close() } confContent } }
Example 4
Source File: CompressedFiles.scala From tensorflow_scala with Apache License 2.0 | 5 votes |
package import org.apache.commons.compress.archivers.tar.TarArchiveInputStream import org.apache.commons.compress.utils.IOUtils import{File, FileOutputStream, InputStream} import java.nio.file.{Files, Path} import object CompressedFiles { def decompressTGZ(tgzFilePath: Path, destinationPath: Path, bufferSize: Int = 8192): Unit = { decompressTGZStream(Files.newInputStream(tgzFilePath), destinationPath, bufferSize) } def decompressTar(tarFilePath: Path, destinationPath: Path, bufferSize: Int = 8192): Unit = { decompressTarStream(Files.newInputStream(tarFilePath), destinationPath, bufferSize) } def decompressTGZStream(tgzStream: InputStream, destinationPath: Path, bufferSize: Int = 8192): Unit = { decompressTarStream(new GZIPInputStream(tgzStream), destinationPath, bufferSize) } def decompressTarStream(tarStream: InputStream, destinationPath: Path, bufferSize: Int = 8192): Unit = { val inputStream = new TarArchiveInputStream(tarStream) var entry = inputStream.getNextTarEntry while (entry != null) { if (!entry.isDirectory) { val currentFile = new File(destinationPath.toAbsolutePath.toString, entry.getName) val parentFile = currentFile.getParentFile if (!parentFile.exists) parentFile.mkdirs() IOUtils.copy(inputStream, new FileOutputStream(currentFile)) } entry = inputStream.getNextTarEntry } inputStream.close() } }
Example 5
Source File: FileUtils.scala From mimir with Apache License 2.0 | 5 votes |
package mimir.util import java.lang.reflect.Method import import{File, FileOutputStream, BufferedOutputStream, InputStream} import org.apache.commons.compress.archivers.tar.{TarArchiveEntry, TarArchiveInputStream} import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream import import java.nio.file.Files object FileUtils { def getListOfFiles(dir: String):List[File] = { val d = new File(dir) if (d.exists && d.isDirectory) { d.listFiles.filter(_.isFile).toList } else { List[File]() } } def addJarToClasspath(jar: File): Unit = { // Get the ClassLoader class val cl: ClassLoader = ClassLoader.getSystemClassLoader val clazz: Class[_] = cl.getClass // Get the protected addURL method from the parent URLClassLoader class val method: Method = clazz.getSuperclass.getDeclaredMethod("addURL", Seq(classOf[URL]):_*) // Run projected addURL method to add JAR to classpath method.setAccessible(true) method.invoke(cl, Seq(jar.toURI().toURL()):_*) } def untar(in:InputStream, destinationDir: String): File = { val dest = new File(destinationDir) dest.mkdir() var tarIn: TarArchiveInputStream = null try { tarIn = new TarArchiveInputStream( new GzipCompressorInputStream( new BufferedInputStream(in))) var tarEntry = tarIn.getNextTarEntry while (tarEntry != null) { // create a file with the same name as the tarEntry val destPath = new File(dest, tarEntry.getName) if (tarEntry.isDirectory) { destPath.mkdirs() } else { // Create any necessary parent dirs val parent = destPath.getParentFile if (!Files.exists(parent.toPath)) { parent.mkdirs() } destPath.createNewFile() val btoRead = new Array[Byte](1024) var bout: BufferedOutputStream = null try { bout = new BufferedOutputStream(new FileOutputStream(destPath)) var len = 0 while (len != -1) { len = if (len != -1) { bout.write(btoRead, 0, len) } } } finally { if (bout != null) { bout.close() } } } tarEntry = tarIn.getNextTarEntry } } finally { if (tarIn != null) { tarIn.close() } } dest } }
Example 6
Source File: ArchiveUtils.scala From dl4scala with MIT License | 5 votes |
package org.dl4scala.util import org.slf4j.LoggerFactory import org.apache.commons.compress.archivers.tar.TarArchiveEntry import org.apache.commons.compress.archivers.tar.TarArchiveInputStream import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream import import import import import tarIn.close() } else if (file.endsWith(".gz")) { val is2 = new GZIPInputStream(fin) val extracted = new File(target.getParent, target.getName.replace(".gz", "")) if (extracted.exists) extracted.delete extracted.createNewFile val fos = FileUtils.openOutputStream(extracted) IOUtils.copyLarge(is2, fos) is2.close() fos.flush() fos.close() } target.delete } }
Example 7
Source File: DataUtilities.scala From dl4scala with MIT License | 5 votes |
package org.dl4scala.examples.utilities import import org.apache.commons.compress.archivers.tar.TarArchiveInputStream import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream import org.slf4j.{Logger, LoggerFactory} object DataUtilities { val logger: Logger = LoggerFactory.getLogger(DataUtilities.getClass) private val BUFFER_SIZE = 4096 @throws(classOf[IOException]) def extractTarGz(filePath: String, outputPath: String): Unit = { var fileCount = 0 var dirCount = 0"Extracting files") val tais = new TarArchiveInputStream(new GzipCompressorInputStream( new BufferedInputStream(new FileInputStream(filePath)))) // Read the tar entries using the getNextEntry method Stream.continually(tais.getNextTarEntry).takeWhile(_ !=null).foreach{ entry => // Create directories as required if (entry.isDirectory) { new File(outputPath + "/" + entry.getName).mkdirs dirCount += 1 } else { val data = new Array[Byte](BUFFER_SIZE) val fos = new FileOutputStream(outputPath + "/" + entry.getName) val dest = new BufferedOutputStream(fos, BUFFER_SIZE) Stream.continually(, 0, BUFFER_SIZE)).takeWhile(_ != -1).foreach{ count => dest.write(data, 0, count) } dest.close() fileCount = fileCount + 1 } if (fileCount % 1000 == 0)".") } tais.close() } }
Example 8
Source File: TgzTransformerSpec.scala From releaser with Apache License 2.0 | 5 votes |
package import import java.nio.file.{Files, Path} import org.apache.commons.compress.archivers.tar.{TarArchiveEntry, TarArchiveInputStream} import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream import import org.scalatest._ import scala.collection.mutable.ListBuffer import scala.util.{Failure, Success} class TgzTransformerSpec extends WordSpec with Matchers with BeforeAndAfterEach with OptionValues with TryValues{ val tgzPath = new File(this.getClass.getResource("/help-frontend/uk/gov/hmrc/help-frontend_2.11/1.26.0-3-gd7ed03c/help-frontend_2.11-1.26.0-3-gd7ed03c.tgz").toURI).toPath var transformer:TgzTransformer = _ val candidate_1_26_0_3_gd7ed03c = ReleaseCandidateVersion("1.26.0-3-gd7ed03c") val release_1_4_0 = ReleaseVersion("1.4.0") var tmpDir:Path = _ override def beforeEach(){ tmpDir = Files.createTempDirectory("tmp") transformer = new TgzTransformer() FileUtils.copyFileToDirectory(tgzPath.toFile, tmpDir.toFile) } override def afterEach(){ FileUtils.deleteDirectory(tmpDir.toFile) } "the transformer" should { "decompress the tgz, rename the main folder and compress it back" in { val inFile = new File(tmpDir.toFile, tgzPath.getFileName.toString).toPath val targetFilePath = tmpDir.resolve("help-frontend-1.4.0.tgz") val originalTarEntries = listTgzEntries(inFile) assertTarEntry(originalTarEntries, "./help-frontend-1.26.0-3-gd7ed03c/") assertTarEntry(originalTarEntries, "./help-frontend-1.4.0/", exists = false) assertTarEntry(originalTarEntries, "./", mode = Some(493)) val outFileTry = transformer(inFile, "help-frontend", candidate_1_26_0_3_gd7ed03c, release_1_4_0, targetFilePath) outFileTry match { case Success(outFile) => val tarEntries = listTgzEntries(targetFilePath) assertTarEntry(tarEntries, "./help-frontend-1.26.0-3-gd7ed03c/", exists = false) assertTarEntry(tarEntries, "./help-frontend-1.4.0/") assertTarEntry(tarEntries, "./", mode = Some(493)) case Failure(e) => fail("Caught exception: " + e.getMessage, e) } } } private def listTgzEntries(localTgzFile: Path) : List[TarArchiveEntry] = { val bytes = new Array[Byte](2048) val fin = new BufferedInputStream(new FileInputStream(localTgzFile.toFile)) val gzIn = new GzipCompressorInputStream(fin) val tarIn = new TarArchiveInputStream(gzIn) val entries = ListBuffer[TarArchiveEntry]() Iterator continually tarIn.getNextTarEntry takeWhile (null !=) foreach { tarEntry => entries += tarEntry } tarIn.close() entries.toList } private def assertTarEntry(tarEntries: List[TarArchiveEntry], entryName: String, exists: Boolean = true, mode: Option[Int] = None) = { val entryOption = tarEntries.find(_.getName == entryName) entryOption match { case Some(entry) => exists shouldBe true mode.foreach { m => m shouldBe entry.getMode} case None => exists shouldBe false } } }
Example 9
Source File: Tar.scala From libisabelle with Apache License 2.0 | 5 votes |
package info.hupel.isabelle.setup import import java.nio.file._ import java.nio.file.attribute.PosixFilePermissions import scala.util.Try import org.apache.commons.compress.archivers.tar.{TarArchiveEntry, TarArchiveInputStream} import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream import org.apache.commons.lang3.SystemUtils object Tar { val execPermissions = PosixFilePermissions.fromString("rwxr-xr-x") def download(url: URL): Try[TarArchiveInputStream] = Try(new TarArchiveInputStream(new GzipCompressorInputStream(url.openStream()))) def extractTo(path: Path, tar: TarArchiveInputStream): Try[Path] = Try { def next() = Option(tar.getNextTarEntry()) @annotation.tailrec def go(entry: Option[TarArchiveEntry], paths: List[Path]): List[Path] = entry match { case None => paths.reverse case Some(entry) => val name = entry.getName val subpath = path.resolve(name).normalize if (subpath.startsWith(path) && !Files.exists(subpath, LinkOption.NOFOLLOW_LINKS)) { Files.createDirectories(subpath.getParent) if (entry.isDirectory) Files.createDirectory(subpath) else if (entry.isSymbolicLink) Files.createSymbolicLink(subpath, Paths.get(entry.getLinkName)) else if (entry.isLink) Files.createLink(subpath, path.resolve(Paths.get(entry.getLinkName))) else if (entry.isFile) { Files.copy(tar, subpath) if (!SystemUtils.IS_OS_WINDOWS && (entry.getMode % 2 == 1)) Files.setPosixFilePermissions(subpath, execPermissions) } else sys.error("unknown tar file entry") } else sys.error("malicious tar file or file already exists") val p = if (entry.isDirectory) List(subpath) else Nil go(next(), p ::: paths) } go(next(), Nil).foldLeft(List.empty[Path]) { (roots, path) => if (roots.exists(path.startsWith)) roots else path :: roots } match { case List(root) => root case _ => sys.error("untarring created more than one root directory") } } }