Java Code Examples for org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter#setupJob()
The following examples show how to use
org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter#setupJob() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SafeFileOutputCommitterTest.java From datawave with Apache License 2.0 | 5 votes |
private void testCommitterInternal(int version) throws Exception { Job job = Job.getInstance(); FileOutputFormat.setOutputPath(job, outDir); Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new SafeFileOutputCommitter(outDir, tContext); // setup committer.setupJob(jContext); committer.setupTask(tContext); // write output TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext); writeOutput(theRecordWriter, tContext); // do commit committer.commitTask(tContext); committer.commitJob(jContext); // validate output validateContent(outDir); FileUtil.fullyDelete(new File(outDir.toString())); }
Example 2
Source File: SafeFileOutputCommitterTest.java From datawave with Apache License 2.0 | 5 votes |
private void testMapFileOutputCommitterInternal(int version) throws Exception { Job job = Job.getInstance(); FileOutputFormat.setOutputPath(job, outDir); Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new SafeFileOutputCommitter(outDir, tContext); // setup committer.setupJob(jContext); committer.setupTask(tContext); // write output MapFileOutputFormat theOutputFormat = new MapFileOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext); writeMapFileOutput(theRecordWriter, tContext); // do commit committer.commitTask(tContext); committer.commitJob(jContext); // validate output validateMapFileOutputContent(FileSystem.get(job.getConfiguration()), outDir); FileUtil.fullyDelete(new File(outDir.toString())); }
Example 3
Source File: SafeFileOutputCommitterTest.java From datawave with Apache License 2.0 | 5 votes |
private void testSafety(int commitVersion) throws Exception { Job job = Job.getInstance(); FileOutputFormat.setOutputPath(job, outDir); Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 1); conf.setInt(FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, commitVersion); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new SafeFileOutputCommitter(outDir, tContext); // setup committer.setupJob(jContext); committer.setupTask(tContext); // write output TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext); writeOutput(theRecordWriter, tContext); // close the job prior to committing task (leaving files in temporary dir try { committer.commitJob(jContext); Assert.fail("Expected commit job to fail"); } catch (Exception e) { committer.commitTask(tContext); committer.commitJob(jContext); } validateContent(outDir); FileUtil.fullyDelete(new File(outDir.toString())); }
Example 4
Source File: SafeFileOutputCommitterTest.java From datawave with Apache License 2.0 | 4 votes |
private void testRecoveryInternal(int commitVersion, int recoveryVersion) throws Exception { Job job = Job.getInstance(); FileOutputFormat.setOutputPath(job, outDir); Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 1); conf.setInt(FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, commitVersion); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new SafeFileOutputCommitter(outDir, tContext); // setup committer.setupJob(jContext); committer.setupTask(tContext); // write output TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext); writeOutput(theRecordWriter, tContext); // do commit committer.commitTask(tContext); Path jobTempDir1 = committer.getCommittedTaskPath(tContext); File jtd = new File(jobTempDir1.toUri().getPath()); if (commitVersion == 1 || !patched) { assertTrue("Version 1 commits to temporary dir " + jtd, jtd.exists()); validateContent(jtd); } else { assertFalse("Version 2 commits to output dir " + jtd, jtd.exists()); } // now while running the second app attempt, // recover the task output from first attempt Configuration conf2 = job.getConfiguration(); conf2.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf2.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 2); conf2.setInt(FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, recoveryVersion); JobContext jContext2 = new JobContextImpl(conf2, taskID.getJobID()); TaskAttemptContext tContext2 = new TaskAttemptContextImpl(conf2, taskID); FileOutputCommitter committer2 = new SafeFileOutputCommitter(outDir, tContext2); committer2.setupJob(tContext2); Path jobTempDir2 = committer2.getCommittedTaskPath(tContext2); File jtd2 = new File(jobTempDir2.toUri().getPath()); committer2.recoverTask(tContext2); if (recoveryVersion == 1 || !patched) { assertTrue("Version 1 recovers to " + jtd2, jtd2.exists()); validateContent(jtd2); } else { assertFalse("Version 2 commits to output dir " + jtd2, jtd2.exists()); if (commitVersion == 1 || !patched) { assertEquals("Version 2 recovery moves to output dir from " + jtd, 0, jtd.list().length); } } committer2.commitJob(jContext2); validateContent(outDir); FileUtil.fullyDelete(new File(outDir.toString())); }
Example 5
Source File: SafeFileOutputCommitterTest.java From datawave with Apache License 2.0 | 4 votes |
private void testConcurrentCommitTaskWithSubDir(int version) throws Exception { final Job job = Job.getInstance(); FileOutputFormat.setOutputPath(job, outDir); final Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version); conf.setClass("fs.file.impl", RLFS.class, FileSystem.class); FileSystem.closeAll(); final JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); final FileOutputCommitter amCommitter = new SafeFileOutputCommitter(outDir, jContext); amCommitter.setupJob(jContext); final TaskAttemptContext[] taCtx = new TaskAttemptContextImpl[2]; taCtx[0] = new TaskAttemptContextImpl(conf, taskID); taCtx[1] = new TaskAttemptContextImpl(conf, taskID1); final TextOutputFormat[] tof = new TextOutputFormat[2]; for (int i = 0; i < tof.length; i++) { tof[i] = new TextOutputFormat() { @Override public Path getDefaultWorkFile(TaskAttemptContext context, String extension) throws IOException { final FileOutputCommitter foc = (FileOutputCommitter) getOutputCommitter(context); return new Path(new Path(foc.getWorkPath(), SUB_DIR), getUniqueFile(context, getOutputName(context), extension)); } }; } final ExecutorService executor = Executors.newFixedThreadPool(2); try { for (int i = 0; i < taCtx.length; i++) { final int taskIdx = i; executor.submit((Callable<Void>) () -> { final OutputCommitter outputCommitter = tof[taskIdx].getOutputCommitter(taCtx[taskIdx]); outputCommitter.setupTask(taCtx[taskIdx]); final RecordWriter rw = tof[taskIdx].getRecordWriter(taCtx[taskIdx]); writeOutput(rw, taCtx[taskIdx]); outputCommitter.commitTask(taCtx[taskIdx]); return null; }); } } finally { executor.shutdown(); while (!executor.awaitTermination(1, TimeUnit.SECONDS)) { LOG.info("Awaiting thread termination!"); } } amCommitter.commitJob(jContext); final RawLocalFileSystem lfs = new RawLocalFileSystem(); lfs.setConf(conf); assertFalse("Must not end up with sub_dir/sub_dir", lfs.exists(new Path(OUT_SUB_DIR, SUB_DIR))); // validate output validateContent(OUT_SUB_DIR); FileUtil.fullyDelete(new File(outDir.toString())); }