Java Code Examples for org.apache.hadoop.fs.FileSystem#copyFromLocalFile()
The following examples show how to use
org.apache.hadoop.fs.FileSystem#copyFromLocalFile() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: JobFilePartitioner.java From hraven with Apache License 2.0 | 6 votes |
/** * @param hdfs * FileSystem handle * @param f * file to copy to HDFS * @param outputPath * @param skipExisting * skip if the file already exist in the target. File will be * overwritten if already there and this argument is false. * @throws IOException * if target directory cannot be created or file cannot be copied to * target directory. */ private void processPlainFile(FileSystem hdfs, File f, Path outputPath, boolean skipExisting) throws IOException { long fileModTime = f.lastModified(); Path targetDir = getTargetDirectory(hdfs, outputPath, fileModTime); boolean doCopy = true; Path sourceFile = new Path(f.getPath()); if (skipExisting) { Path target = new Path(targetDir, sourceFile.getName()); if (hdfs.exists(target)) { doCopy = false; } } if (doCopy) { hdfs.copyFromLocalFile(sourceFile, targetDir); } }
Example 2
Source File: HdfsDeployer.java From celos with Apache License 2.0 | 6 votes |
public void deploy() throws Exception { FileSystem fs = context.getFileSystem(); final String hdfsDirLocalPath = String.format(LOCAL_HDFS_PATTERN, context.getDeployDir()); final File hdfsDirLocal = new File(hdfsDirLocalPath); if (!hdfsDirLocal.exists()) { throw new IllegalStateException(hdfsDirLocalPath + " not found local FS"); } undeploy(); Path dst = getDestinationHdfsPath(); fs.mkdirs(dst); String[] childFiles = hdfsDirLocal.list(); for (String child : childFiles) { fs.copyFromLocalFile(new Path(hdfsDirLocalPath, child), dst); } }
Example 3
Source File: AbstractSolrSentryTestBase.java From incubator-sentry with Apache License 2.0 | 6 votes |
public static File setupSentry() throws Exception { File sentrySite = File.createTempFile("sentry-site", "xml"); sentrySite.deleteOnExit(); File authProviderDir = new File(RESOURCES_DIR, "sentry"); String authProviderName = "test-authz-provider.ini"; FileSystem clusterFs = dfsCluster.getFileSystem(); clusterFs.copyFromLocalFile(false, new Path(authProviderDir.toString(), authProviderName), new Path(authProviderName)); // need to write sentry-site at execution time because we don't know // the location of sentry.solr.provider.resource beforehand StringBuilder sentrySiteData = new StringBuilder(); sentrySiteData.append("<configuration>\n"); addPropertyToSentry(sentrySiteData, "sentry.provider", "org.apache.sentry.provider.file.LocalGroupResourceAuthorizationProvider"); addPropertyToSentry(sentrySiteData, "sentry.solr.provider.resource", clusterFs.getWorkingDirectory() + File.separator + authProviderName); sentrySiteData.append("</configuration>\n"); FileUtils.writeStringToFile(sentrySite,sentrySiteData.toString()); return sentrySite; }
Example 4
Source File: FileSystemOperations.java From submarine with Apache License 2.0 | 6 votes |
public Path uploadToRemoteFile(Path stagingDir, String fileToUpload) throws IOException { FileSystem fs = remoteDirectoryManager.getDefaultFileSystem(); // Upload to remote FS under staging area File localFile = new File(fileToUpload); if (!localFile.exists()) { throw new FileNotFoundException( "Trying to upload file " + localFile.getAbsolutePath() + " to remote, but could not find local file!"); } String filename = localFile.getName(); Path uploadedFilePath = new Path(stagingDir, filename); if (!uploadedFiles.contains(uploadedFilePath)) { if (SubmarineLogs.isVerbose()) { LOG.info("Copying local file " + fileToUpload + " to remote " + uploadedFilePath); } fs.copyFromLocalFile(new Path(fileToUpload), uploadedFilePath); uploadedFiles.add(uploadedFilePath); } return uploadedFilePath; }
Example 5
Source File: DistributedCacheUtilImpl.java From pentaho-hadoop-shims with Apache License 2.0 | 5 votes |
/** * Stages the source file or folder to a Hadoop file system and sets their permission and replication value * appropriately to be used with the Distributed Cache. WARNING: This will delete the contents of dest before staging * the archive. * * @param source File or folder to copy to the file system. If it is a folder all contents will be copied into * dest. * @param fs Hadoop file system to store the contents of the archive in * @param dest Destination to copy source into. If source is a file, the new file name will be exactly dest. If * source is a folder its contents will be copied into dest. For more info see {@link * FileSystem#copyFromLocalFile(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path)}. * @param overwrite Should an existing file or folder be overwritten? If not an exception will be thrown. * @throws IOException Destination exists is not a directory * @throws KettleFileException Source does not exist or destination exists and overwrite is false. */ public void stageForCache( FileObject source, FileSystem fs, Path dest, boolean overwrite, boolean isPublic ) throws IOException, KettleFileException { if ( !source.exists() ) { throw new KettleFileException( BaseMessages.getString( DistributedCacheUtilImpl.class, "DistributedCacheUtil.SourceDoesNotExist", source ) ); } if ( fs.exists( dest ) ) { if ( overwrite ) { // It is a directory, clear it out fs.delete( dest, true ); } else { throw new KettleFileException( BaseMessages .getString( DistributedCacheUtilImpl.class, "DistributedCacheUtil.DestinationExists", dest.toUri().getPath() ) ); } } // Use the same replication we'd use for submitting jobs short replication = (short) fs.getConf().getInt( "mapred.submit.replication", 10 ); if ( source.getURL().toString().endsWith( CONFIG_PROPERTIES ) ) { copyConfigProperties( source, fs, dest ); } else { Path local = new Path( source.getURL().getPath() ); fs.copyFromLocalFile( local, dest ); } if ( isPublic ) { fs.setPermission( dest, PUBLIC_CACHED_FILE_PERMISSION ); } else { fs.setPermission( dest, CACHED_FILE_PERMISSION ); } fs.setReplication( dest, replication ); }
Example 6
Source File: Client.java From hadoop with Apache License 2.0 | 5 votes |
private void addToLocalResources(FileSystem fs, String fileSrcPath, String fileDstPath, String appId, Map<String, LocalResource> localResources, String resources) throws IOException { String suffix = appName + "/" + appId + "/" + fileDstPath; Path dst = new Path(fs.getHomeDirectory(), suffix); if (fileSrcPath == null) { FSDataOutputStream ostream = null; try { ostream = FileSystem .create(fs, dst, new FsPermission((short) 0710)); ostream.writeUTF(resources); } finally { IOUtils.closeQuietly(ostream); } } else { fs.copyFromLocalFile(new Path(fileSrcPath), dst); } FileStatus scFileStatus = fs.getFileStatus(dst); LocalResource scRsrc = LocalResource.newInstance( ConverterUtils.getYarnUrlFromURI(dst.toUri()), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, scFileStatus.getLen(), scFileStatus.getModificationTime()); localResources.put(fileDstPath, scRsrc); }
Example 7
Source File: Utils.java From stratosphere with Apache License 2.0 | 5 votes |
/** * * @return Path to remote file (usually hdfs) * @throws IOException */ public static Path setupLocalResource(Configuration conf, FileSystem fs, String appId, Path localRsrcPath, LocalResource appMasterJar, Path homedir) throws IOException { // copy to HDFS String suffix = ".stratosphere/" + appId + "/" + localRsrcPath.getName(); Path dst = new Path(homedir, suffix); LOG.info("Copying from "+localRsrcPath+" to "+dst ); fs.copyFromLocalFile(localRsrcPath, dst); registerLocalResource(fs, dst, appMasterJar); return dst; }
Example 8
Source File: TestPigServerWithMacros.java From spork with Apache License 2.0 | 5 votes |
@Test public void testRegisterRemoteMacro() throws Throwable { PigServer pig = new PigServer(cluster.getExecType(), cluster.getProperties()); String macroName = "util.pig"; File macroFile = File.createTempFile("tmp", ""); PrintWriter pw = new PrintWriter(new FileWriter(macroFile)); pw.println("DEFINE row_count(X) RETURNS Z { Y = group $X all; $Z = foreach Y generate COUNT($X); };"); pw.close(); FileSystem fs = cluster.getFileSystem(); fs.copyFromLocalFile(new Path(macroFile.getAbsolutePath()), new Path(macroName)); // find the absolute path for the directory so that it does not // depend on configuration String absPath = fs.getFileStatus(new Path(macroName)).getPath().toString(); Util.createInputFile(cluster, "testRegisterRemoteMacro_input", new String[]{"1", "2"}); pig.registerQuery("import '" + absPath + "';"); pig.registerQuery("a = load 'testRegisterRemoteMacro_input';"); pig.registerQuery("b = row_count(a);"); Iterator<Tuple> iter = pig.openIterator("b"); assertEquals(2L, ((Long)iter.next().get(0)).longValue()); pig.shutdown(); }
Example 9
Source File: StramClientUtils.java From attic-apex-core with Apache License 2.0 | 5 votes |
public static void copyFromLocalFileNoChecksum(FileSystem fs, File fromLocal, Path toDFS) throws IOException { // This is to void the hadoop FileSystem API to perform checksum on the local file // This "feature" has caused a lot of headache because the local file can be copied from HDFS and modified, // and the checksum will fail if the file is again copied to HDFS try { new File(fromLocal.getParentFile(), "." + fromLocal.getName() + ".crc").delete(); } catch (Exception ex) { // ignore } fs.copyFromLocalFile(new Path(fromLocal.toURI()), toDFS); }
Example 10
Source File: Client.java From metron with Apache License 2.0 | 5 votes |
private Path addToLocalResources(FileSystem fs, String fileSrcPath, String fileDstPath, String appId, Map<String, LocalResource> localResources, String resources) throws IOException { String suffix = appName + "/" + appId + "/" + fileDstPath; Path dst = new Path(fs.getHomeDirectory(), suffix); if (fileSrcPath == null) { FSDataOutputStream ostream = null; try { ostream = FileSystem .create(fs, dst, new FsPermission((short) 0710)); ostream.writeUTF(resources); } finally { IOUtils.closeQuietly(ostream); } } else { fs.copyFromLocalFile(new Path(fileSrcPath), dst); } fs.setPermission(dst, new FsPermission((short)0755)); FileStatus scFileStatus = fs.getFileStatus(dst); LocalResource scRsrc = LocalResource.newInstance( ConverterUtils.getYarnUrlFromURI(dst.toUri()), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, scFileStatus.getLen(), scFileStatus.getModificationTime()); localResources.put(fileDstPath, scRsrc); return dst; }
Example 11
Source File: MapReduceJobConfiguration.java From datawave with Apache License 2.0 | 5 votes |
protected void addSingleFile(File source, Path destination, String jobId, Job job, FileSystem fs) throws IOException { Path jarPath = new Path(source.getAbsolutePath()); try { fs.copyFromLocalFile(false, false, jarPath, destination); } catch (IOException e) { // If the file already exists, ignore error if (!e.getMessage().endsWith("already exists")) throw e; } log.trace("Adding {} to the classpath for job {}.", jarPath, jobId); job.addFileToClassPath(destination); }
Example 12
Source File: HadoopFileUtils.java From mrgeo with Apache License 2.0 | 5 votes |
public static void copyToHdfs(Path fromDir, Path toDir, String fileName) throws IOException { FileSystem fs = getFileSystem(toDir); fs.mkdirs(toDir); fs.copyFromLocalFile(false, true, new Path(fromDir, fileName), new Path(toDir, fileName)); }
Example 13
Source File: HdfsUtil.java From spring-boot-tutorial with Creative Commons Attribution Share Alike 4.0 International | 5 votes |
/** * 上传文件 * * @param sourcePath 原文件路径 * @param targetPath 目标路径 * @throws IOException */ public void uploadFile(@NotBlank String sourcePath, @NotBlank String targetPath) throws Exception { FileSystem fileSystem = null; try { fileSystem = this.hdfsPool.borrowObject(); // 调用文件系统的文件复制方法,第一个参数为是否删除原文件(true为删除),默认为 false fileSystem.copyFromLocalFile(false, new Path(sourcePath), new Path(targetPath)); } catch (Exception e) { log.error("upload failed", e); throw e; } finally { if (fileSystem != null) { this.hdfsPool.returnObject(fileSystem); } } }
Example 14
Source File: TestPigServer.java From spork with Apache License 2.0 | 5 votes |
@Test public void testRegisterRemoteScript() throws Throwable { String scriptName = "script.py"; File scriptFile = File.createTempFile("tmp", ""); PrintWriter pw = new PrintWriter(new FileWriter(scriptFile)); pw.println("@outputSchema(\"word:chararray\")\ndef helloworld():\n return 'Hello, World'"); pw.close(); FileSystem fs = cluster.getFileSystem(); fs.copyFromLocalFile(new Path(scriptFile.getAbsolutePath()), new Path(scriptName)); // find the absolute path for the directory so that it does not // depend on configuration String absPath = fs.getFileStatus(new Path(scriptName)).getPath().toString(); Util.createInputFile(cluster, "testRegisterRemoteScript_input", new String[]{"1", "2"}); PigServer pig = new PigServer(cluster.getExecType(), properties); pig.registerCode(absPath, "jython", "pig"); pig.registerQuery("a = load 'testRegisterRemoteScript_input';"); pig.registerQuery("b = foreach a generate pig.helloworld($0);"); Iterator<Tuple> iter = pig.openIterator("b"); assertTrue(iter.hasNext()); Tuple t = iter.next(); assertTrue(t.size() > 0); assertEquals("Hello, World", t.get(0)); assertTrue(iter.hasNext()); t = iter.next(); assertTrue(t.size() > 0); assertEquals("Hello, World", t.get(0)); assertFalse(iter.hasNext()); }
Example 15
Source File: WcsGeneratorTestAbstract.java From mrgeo with Apache License 2.0 | 5 votes |
protected static void copyInputData() throws IOException { final FileSystem fileSystem = HadoopFileUtils.getFileSystem(inputHdfs); Properties mrgeoProperties = MrGeoProperties.getInstance(); mrgeoProperties.put(MrGeoConstants.MRGEO_COMMON_HOME, inputHdfs.toString()); mrgeoProperties.put(MrGeoConstants.MRGEO_HDFS_IMAGE, inputHdfs.toString()); mrgeoProperties.put(MrGeoConstants.MRGEO_HDFS_COLORSCALE, inputHdfs.toString()); mrgeoProperties.put("base.path", inputHdfs.toString()); fileSystem.copyFromLocalFile(false, true, new Path(input, "IslandsElevation-v2"), inputHdfs); }
Example 16
Source File: MapReduceBackupMergeJob.java From hbase with Apache License 2.0 | 5 votes |
/** * Copy file in DFS from p to newPath * @param fs file system * @param p old path * @param newPath new path * @throws IOException exception */ protected void copyFile(FileSystem fs, Path p, Path newPath) throws IOException { File f = File.createTempFile("data", "meta"); Path localPath = new Path(f.getAbsolutePath()); fs.copyToLocalFile(p, localPath); fs.copyFromLocalFile(localPath, newPath); boolean exists = fs.exists(newPath); if (!exists) { throw new IOException("Failed to copy meta file to: "+ newPath); } }
Example 17
Source File: BulkIngestMapFileLoader.java From datawave with Apache License 2.0 | 4 votes |
private void writeStats(Path[] jobDirectories) throws IOException { if (!INGEST_METRICS) { log.info("ingest metrics disabled"); } else { long now = System.currentTimeMillis(); for (Path p : jobDirectories) reporter.getCounter("MapFileLoader.EndTimes", p.getName()).increment(now); // Write out the metrics. // We are going to serialize the counters into a file in HDFS. // The context was set in the processKeyValues method below, and should not be null. We'll guard against NPE anyway FileSystem fs = getFileSystem(seqFileHdfs); RawLocalFileSystem rawFS = new RawLocalFileSystem(); rawFS.setConf(conf); CompressionCodec cc = new GzipCodec(); CompressionType ct = CompressionType.BLOCK; Counters c = reporter.getCounters(); if (null != c && c.countCounters() > 0) { // Serialize the counters to a file in HDFS. Path src = new Path(File.createTempFile("MapFileLoader", ".metrics").getAbsolutePath()); Writer writer = SequenceFile.createWriter(conf, Writer.file(rawFS.makeQualified(src)), Writer.keyClass(NullWritable.class), Writer.valueClass(Counters.class), Writer.compression(ct, cc)); writer.append(NullWritable.get(), c); writer.close(); // Now we will try to move the file to HDFS. // Copy the file to the temp dir try { Path mDir = new Path(workDir, "MapFileLoaderMetrics"); if (!fs.exists(mDir)) fs.mkdirs(mDir); Path dst = new Path(mDir, src.getName()); log.info("Copying file " + src + " to " + dst); fs.copyFromLocalFile(false, true, src, dst); // If this worked, then remove the local file rawFS.delete(src, false); // also remove the residual crc file rawFS.delete(getCrcFile(src), false); } catch (IOException e) { // If an error occurs in the copy, then we will leave in the local metrics directory. log.error("Error copying metrics file into HDFS, will remain in metrics directory."); } // reset reporter so that old metrics don't persist over time this.reporter = new StandaloneStatusReporter(); } } }
Example 18
Source File: OozieLocalServerIntegrationTest.java From hadoop-mini-clusters with Apache License 2.0 | 4 votes |
@Test public void testSubmitWorkflow() throws Exception { LOG.info("OOZIE: Test Submit Workflow Start"); FileSystem hdfsFs = hdfsLocalCluster.getHdfsFileSystemHandle(); OozieClient oozie = oozieLocalServer.getOozieClient(); Path appPath = new Path(hdfsFs.getHomeDirectory(), "testApp"); hdfsFs.mkdirs(new Path(appPath, "lib")); Path workflow = new Path(appPath, "workflow.xml"); // Setup input directory and file hdfsFs.mkdirs(new Path(TEST_INPUT_DIR)); hdfsFs.copyFromLocalFile( new Path(getClass().getClassLoader().getResource(TEST_INPUT_FILE).toURI()), new Path(TEST_INPUT_DIR)); //write workflow.xml String wfApp = "<workflow-app name=\"sugar-option-decision\" xmlns=\"uri:oozie:workflow:0.5\">\n" + " <global>\n" + " <job-tracker>${jobTracker}</job-tracker>\n" + " <name-node>${nameNode}</name-node>\n" + " <configuration>\n" + " <property>\n" + " <name>mapreduce.output.fileoutputformat.outputdir</name>\n" + " <value>" + TEST_OUTPUT_DIR + "</value>\n" + " </property>\n" + " <property>\n" + " <name>mapreduce.input.fileinputformat.inputdir</name>\n" + " <value>" + TEST_INPUT_DIR + "</value>\n" + " </property>\n" + " </configuration>\n" + " </global>\n" + " <start to=\"first\"/>\n" + " <action name=\"first\">\n" + " <map-reduce> <prepare><delete path=\"" + TEST_OUTPUT_DIR + "\"/></prepare></map-reduce>\n" + " <ok to=\"decision-second-option\"/>\n" + " <error to=\"kill\"/>\n" + " </action>\n" + " <decision name=\"decision-second-option\">\n" + " <switch>\n" + " <case to=\"option\">${doOption}</case>\n" + " <default to=\"second\"/>\n" + " </switch>\n" + " </decision>\n" + " <action name=\"option\">\n" + " <map-reduce> <prepare><delete path=\"" + TEST_OUTPUT_DIR + "\"/></prepare></map-reduce>\n" + " <ok to=\"second\"/>\n" + " <error to=\"kill\"/>\n" + " </action>\n" + " <action name=\"second\">\n" + " <map-reduce> <prepare><delete path=\"" + TEST_OUTPUT_DIR + "\"/></prepare></map-reduce>\n" + " <ok to=\"end\"/>\n" + " <error to=\"kill\"/>\n" + " </action>\n" + " <kill name=\"kill\">\n" + " <message>\n" + " Failed to workflow, error message[${wf: errorMessage (wf: lastErrorNode ())}]\n" + " </message>\n" + " </kill>\n" + " <end name=\"end\"/>\n" + "</workflow-app>"; Writer writer = new OutputStreamWriter(hdfsFs.create(workflow)); writer.write(wfApp); writer.close(); //write job.properties Properties conf = oozie.createConfiguration(); conf.setProperty(OozieClient.APP_PATH, workflow.toString()); conf.setProperty(OozieClient.USER_NAME, UserGroupInformation.getCurrentUser().getUserName()); conf.setProperty("nameNode", "hdfs://localhost:" + hdfsLocalCluster.getHdfsNamenodePort()); conf.setProperty("jobTracker", mrLocalCluster.getResourceManagerAddress()); conf.setProperty("doOption", "true"); //submit and check final String jobId = oozie.run(conf); WorkflowJob wf = oozie.getJobInfo(jobId); assertNotNull(wf); assertEquals(WorkflowJob.Status.RUNNING, wf.getStatus()); while(true){ Thread.sleep(1000); wf = oozie.getJobInfo(jobId); if(wf.getStatus() == WorkflowJob.Status.FAILED || wf.getStatus() == WorkflowJob.Status.KILLED || wf.getStatus() == WorkflowJob.Status.PREP || wf.getStatus() == WorkflowJob.Status.SUCCEEDED){ break; } } wf = oozie.getJobInfo(jobId); assertEquals(WorkflowJob.Status.SUCCEEDED, wf.getStatus()); LOG.info("OOZIE: Workflow: {}", wf.toString()); hdfsFs.close(); }
Example 19
Source File: Cluster.java From spork with Apache License 2.0 | 4 votes |
public void copyFromLocalFile(Path local, Path destination, boolean overwrite) throws IOException { FileSystem fs = local.getFileSystem(configuration); fs.copyFromLocalFile(false, overwrite, local, destination); }
Example 20
Source File: HadoopPopularWords.java From ignite with Apache License 2.0 | 3 votes |
/** * Prepare job's data: cleanup result directories that might have left over * after previous runs, copy input files from the local file system into DFS. * * @param fs Distributed file system to use in job. * @throws IOException If failed. */ private void prepareDirectories(FileSystem fs) throws IOException { X.println(">>> Cleaning up DFS result directory: " + RESULT_DFS_DIR); fs.delete(RESULT_DFS_DIR, true); X.println(">>> Cleaning up DFS input directory: " + BOOKS_DFS_DIR); fs.delete(BOOKS_DFS_DIR, true); X.println(">>> Copy local files into DFS input directory: " + BOOKS_DFS_DIR); fs.copyFromLocalFile(BOOKS_LOCAL_DIR, BOOKS_DFS_DIR); }