org.apache.hadoop.tools.CopyListing Java Examples
The following examples show how to use
org.apache.hadoop.tools.CopyListing.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CircusTrainCopyListing.java From circus-train with Apache License 2.0 | 4 votes |
static void setAsCopyListingClass(Configuration conf) { conf.setClass(CONF_LABEL_COPY_LISTING_CLASS, CircusTrainCopyListing.class, CopyListing.class); }
Example #2
Source File: TestUniformSizeInputFormat.java From hadoop with Apache License 2.0 | 4 votes |
public void testGetSplits(int nMaps) throws Exception { DistCpOptions options = getOptions(nMaps); Configuration configuration = new Configuration(); configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps())); Path listFile = new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testGetSplits_1/fileList.seq"); CopyListing.getCopyListing(configuration, CREDENTIALS, options). buildListing(listFile, options); JobContext jobContext = new JobContextImpl(configuration, new JobID()); UniformSizeInputFormat uniformSizeInputFormat = new UniformSizeInputFormat(); List<InputSplit> splits = uniformSizeInputFormat.getSplits(jobContext); int sizePerMap = totalFileSize/nMaps; checkSplits(listFile, splits); int doubleCheckedTotalSize = 0; int previousSplitSize = -1; for (int i=0; i<splits.size(); ++i) { InputSplit split = splits.get(i); int currentSplitSize = 0; RecordReader<Text, CopyListingFileStatus> recordReader = uniformSizeInputFormat.createRecordReader(split, null); StubContext stubContext = new StubContext(jobContext.getConfiguration(), recordReader, 0); final TaskAttemptContext taskAttemptContext = stubContext.getContext(); recordReader.initialize(split, taskAttemptContext); while (recordReader.nextKeyValue()) { Path sourcePath = recordReader.getCurrentValue().getPath(); FileSystem fs = sourcePath.getFileSystem(configuration); FileStatus fileStatus [] = fs.listStatus(sourcePath); if (fileStatus.length > 1) { continue; } currentSplitSize += fileStatus[0].getLen(); } Assert.assertTrue( previousSplitSize == -1 || Math.abs(currentSplitSize - previousSplitSize) < 0.1*sizePerMap || i == splits.size()-1); doubleCheckedTotalSize += currentSplitSize; } Assert.assertEquals(totalFileSize, doubleCheckedTotalSize); }
Example #3
Source File: TestCopyCommitter.java From hadoop with Apache License 2.0 | 4 votes |
@Test public void testPreserveStatus() { TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config); JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(), taskAttemptContext.getTaskAttemptID().getJobID()); Configuration conf = jobContext.getConfiguration(); String sourceBase; String targetBase; FileSystem fs = null; try { OutputCommitter committer = new CopyCommitter(null, taskAttemptContext); fs = FileSystem.get(conf); FsPermission sourcePerm = new FsPermission((short) 511); FsPermission initialPerm = new FsPermission((short) 448); sourceBase = TestDistCpUtils.createTestSetup(fs, sourcePerm); targetBase = TestDistCpUtils.createTestSetup(fs, initialPerm); DistCpOptions options = new DistCpOptions(Arrays.asList(new Path(sourceBase)), new Path("/out")); options.preserve(FileAttribute.PERMISSION); options.appendToConf(conf); options.setTargetPathExists(false); CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS); Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong())); listing.buildListing(listingFile, options); conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase); committer.commitJob(jobContext); if (!checkDirectoryPermissions(fs, targetBase, sourcePerm)) { Assert.fail("Permission don't match"); } //Test for idempotent commit committer.commitJob(jobContext); if (!checkDirectoryPermissions(fs, targetBase, sourcePerm)) { Assert.fail("Permission don't match"); } } catch (IOException e) { LOG.error("Exception encountered while testing for preserve status", e); Assert.fail("Preserve status failure"); } finally { TestDistCpUtils.delete(fs, "/tmp1"); conf.unset(DistCpConstants.CONF_LABEL_PRESERVE_STATUS); } }
Example #4
Source File: TestCopyCommitter.java From hadoop with Apache License 2.0 | 4 votes |
@Test public void testDeleteMissing() { TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config); JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(), taskAttemptContext.getTaskAttemptID().getJobID()); Configuration conf = jobContext.getConfiguration(); String sourceBase; String targetBase; FileSystem fs = null; try { OutputCommitter committer = new CopyCommitter(null, taskAttemptContext); fs = FileSystem.get(conf); sourceBase = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault()); targetBase = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault()); String targetBaseAdd = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault()); fs.rename(new Path(targetBaseAdd), new Path(targetBase)); DistCpOptions options = new DistCpOptions(Arrays.asList(new Path(sourceBase)), new Path("/out")); options.setSyncFolder(true); options.setDeleteMissing(true); options.appendToConf(conf); CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS); Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong())); listing.buildListing(listingFile, options); conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase); conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase); committer.commitJob(jobContext); if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) { Assert.fail("Source and target folders are not in sync"); } if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, sourceBase, targetBase)) { Assert.fail("Source and target folders are not in sync"); } //Test for idempotent commit committer.commitJob(jobContext); if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) { Assert.fail("Source and target folders are not in sync"); } if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, sourceBase, targetBase)) { Assert.fail("Source and target folders are not in sync"); } } catch (Throwable e) { LOG.error("Exception encountered while testing for delete missing", e); Assert.fail("Delete missing failure"); } finally { TestDistCpUtils.delete(fs, "/tmp1"); conf.set(DistCpConstants.CONF_LABEL_DELETE_MISSING, "false"); } }
Example #5
Source File: TestCopyCommitter.java From hadoop with Apache License 2.0 | 4 votes |
@Test public void testDeleteMissingFlatInterleavedFiles() { TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config); JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(), taskAttemptContext.getTaskAttemptID().getJobID()); Configuration conf = jobContext.getConfiguration(); String sourceBase; String targetBase; FileSystem fs = null; try { OutputCommitter committer = new CopyCommitter(null, taskAttemptContext); fs = FileSystem.get(conf); sourceBase = "/tmp1/" + String.valueOf(rand.nextLong()); targetBase = "/tmp1/" + String.valueOf(rand.nextLong()); TestDistCpUtils.createFile(fs, sourceBase + "/1"); TestDistCpUtils.createFile(fs, sourceBase + "/3"); TestDistCpUtils.createFile(fs, sourceBase + "/4"); TestDistCpUtils.createFile(fs, sourceBase + "/5"); TestDistCpUtils.createFile(fs, sourceBase + "/7"); TestDistCpUtils.createFile(fs, sourceBase + "/8"); TestDistCpUtils.createFile(fs, sourceBase + "/9"); TestDistCpUtils.createFile(fs, targetBase + "/2"); TestDistCpUtils.createFile(fs, targetBase + "/4"); TestDistCpUtils.createFile(fs, targetBase + "/5"); TestDistCpUtils.createFile(fs, targetBase + "/7"); TestDistCpUtils.createFile(fs, targetBase + "/9"); TestDistCpUtils.createFile(fs, targetBase + "/A"); DistCpOptions options = new DistCpOptions(Arrays.asList(new Path(sourceBase)), new Path("/out")); options.setSyncFolder(true); options.setDeleteMissing(true); options.appendToConf(conf); CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS); Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong())); listing.buildListing(listingFile, options); conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase); conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase); committer.commitJob(jobContext); if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) { Assert.fail("Source and target folders are not in sync"); } Assert.assertEquals(fs.listStatus(new Path(targetBase)).length, 4); //Test for idempotent commit committer.commitJob(jobContext); if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) { Assert.fail("Source and target folders are not in sync"); } Assert.assertEquals(fs.listStatus(new Path(targetBase)).length, 4); } catch (IOException e) { LOG.error("Exception encountered while testing for delete missing", e); Assert.fail("Delete missing failure"); } finally { TestDistCpUtils.delete(fs, "/tmp1"); conf.set(DistCpConstants.CONF_LABEL_DELETE_MISSING, "false"); } }
Example #6
Source File: TestDynamicInputFormat.java From hadoop with Apache License 2.0 | 4 votes |
@Test public void testGetSplits() throws Exception { DistCpOptions options = getOptions(); Configuration configuration = new Configuration(); configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps())); CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing( new Path(cluster.getFileSystem().getUri().toString() +"/tmp/testDynInputFormat/fileList.seq"), options); JobContext jobContext = new JobContextImpl(configuration, new JobID()); DynamicInputFormat<Text, CopyListingFileStatus> inputFormat = new DynamicInputFormat<Text, CopyListingFileStatus>(); List<InputSplit> splits = inputFormat.getSplits(jobContext); int nFiles = 0; int taskId = 0; for (InputSplit split : splits) { RecordReader<Text, CopyListingFileStatus> recordReader = inputFormat.createRecordReader(split, null); StubContext stubContext = new StubContext(jobContext.getConfiguration(), recordReader, taskId); final TaskAttemptContext taskAttemptContext = stubContext.getContext(); recordReader.initialize(splits.get(0), taskAttemptContext); float previousProgressValue = 0f; while (recordReader.nextKeyValue()) { CopyListingFileStatus fileStatus = recordReader.getCurrentValue(); String source = fileStatus.getPath().toString(); System.out.println(source); Assert.assertTrue(expectedFilePaths.contains(source)); final float progress = recordReader.getProgress(); Assert.assertTrue(progress >= previousProgressValue); Assert.assertTrue(progress >= 0.0f); Assert.assertTrue(progress <= 1.0f); previousProgressValue = progress; ++nFiles; } Assert.assertTrue(recordReader.getProgress() == 1.0f); ++taskId; } Assert.assertEquals(expectedFilePaths.size(), nFiles); }
Example #7
Source File: TestUniformSizeInputFormat.java From big-c with Apache License 2.0 | 4 votes |
public void testGetSplits(int nMaps) throws Exception { DistCpOptions options = getOptions(nMaps); Configuration configuration = new Configuration(); configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps())); Path listFile = new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testGetSplits_1/fileList.seq"); CopyListing.getCopyListing(configuration, CREDENTIALS, options). buildListing(listFile, options); JobContext jobContext = new JobContextImpl(configuration, new JobID()); UniformSizeInputFormat uniformSizeInputFormat = new UniformSizeInputFormat(); List<InputSplit> splits = uniformSizeInputFormat.getSplits(jobContext); int sizePerMap = totalFileSize/nMaps; checkSplits(listFile, splits); int doubleCheckedTotalSize = 0; int previousSplitSize = -1; for (int i=0; i<splits.size(); ++i) { InputSplit split = splits.get(i); int currentSplitSize = 0; RecordReader<Text, CopyListingFileStatus> recordReader = uniformSizeInputFormat.createRecordReader(split, null); StubContext stubContext = new StubContext(jobContext.getConfiguration(), recordReader, 0); final TaskAttemptContext taskAttemptContext = stubContext.getContext(); recordReader.initialize(split, taskAttemptContext); while (recordReader.nextKeyValue()) { Path sourcePath = recordReader.getCurrentValue().getPath(); FileSystem fs = sourcePath.getFileSystem(configuration); FileStatus fileStatus [] = fs.listStatus(sourcePath); if (fileStatus.length > 1) { continue; } currentSplitSize += fileStatus[0].getLen(); } Assert.assertTrue( previousSplitSize == -1 || Math.abs(currentSplitSize - previousSplitSize) < 0.1*sizePerMap || i == splits.size()-1); doubleCheckedTotalSize += currentSplitSize; } Assert.assertEquals(totalFileSize, doubleCheckedTotalSize); }
Example #8
Source File: TestCopyCommitter.java From big-c with Apache License 2.0 | 4 votes |
@Test public void testPreserveStatus() { TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config); JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(), taskAttemptContext.getTaskAttemptID().getJobID()); Configuration conf = jobContext.getConfiguration(); String sourceBase; String targetBase; FileSystem fs = null; try { OutputCommitter committer = new CopyCommitter(null, taskAttemptContext); fs = FileSystem.get(conf); FsPermission sourcePerm = new FsPermission((short) 511); FsPermission initialPerm = new FsPermission((short) 448); sourceBase = TestDistCpUtils.createTestSetup(fs, sourcePerm); targetBase = TestDistCpUtils.createTestSetup(fs, initialPerm); DistCpOptions options = new DistCpOptions(Arrays.asList(new Path(sourceBase)), new Path("/out")); options.preserve(FileAttribute.PERMISSION); options.appendToConf(conf); options.setTargetPathExists(false); CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS); Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong())); listing.buildListing(listingFile, options); conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase); committer.commitJob(jobContext); if (!checkDirectoryPermissions(fs, targetBase, sourcePerm)) { Assert.fail("Permission don't match"); } //Test for idempotent commit committer.commitJob(jobContext); if (!checkDirectoryPermissions(fs, targetBase, sourcePerm)) { Assert.fail("Permission don't match"); } } catch (IOException e) { LOG.error("Exception encountered while testing for preserve status", e); Assert.fail("Preserve status failure"); } finally { TestDistCpUtils.delete(fs, "/tmp1"); conf.unset(DistCpConstants.CONF_LABEL_PRESERVE_STATUS); } }
Example #9
Source File: TestCopyCommitter.java From big-c with Apache License 2.0 | 4 votes |
@Test public void testDeleteMissing() { TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config); JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(), taskAttemptContext.getTaskAttemptID().getJobID()); Configuration conf = jobContext.getConfiguration(); String sourceBase; String targetBase; FileSystem fs = null; try { OutputCommitter committer = new CopyCommitter(null, taskAttemptContext); fs = FileSystem.get(conf); sourceBase = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault()); targetBase = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault()); String targetBaseAdd = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault()); fs.rename(new Path(targetBaseAdd), new Path(targetBase)); DistCpOptions options = new DistCpOptions(Arrays.asList(new Path(sourceBase)), new Path("/out")); options.setSyncFolder(true); options.setDeleteMissing(true); options.appendToConf(conf); CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS); Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong())); listing.buildListing(listingFile, options); conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase); conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase); committer.commitJob(jobContext); if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) { Assert.fail("Source and target folders are not in sync"); } if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, sourceBase, targetBase)) { Assert.fail("Source and target folders are not in sync"); } //Test for idempotent commit committer.commitJob(jobContext); if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) { Assert.fail("Source and target folders are not in sync"); } if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, sourceBase, targetBase)) { Assert.fail("Source and target folders are not in sync"); } } catch (Throwable e) { LOG.error("Exception encountered while testing for delete missing", e); Assert.fail("Delete missing failure"); } finally { TestDistCpUtils.delete(fs, "/tmp1"); conf.set(DistCpConstants.CONF_LABEL_DELETE_MISSING, "false"); } }
Example #10
Source File: TestCopyCommitter.java From big-c with Apache License 2.0 | 4 votes |
@Test public void testDeleteMissingFlatInterleavedFiles() { TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config); JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(), taskAttemptContext.getTaskAttemptID().getJobID()); Configuration conf = jobContext.getConfiguration(); String sourceBase; String targetBase; FileSystem fs = null; try { OutputCommitter committer = new CopyCommitter(null, taskAttemptContext); fs = FileSystem.get(conf); sourceBase = "/tmp1/" + String.valueOf(rand.nextLong()); targetBase = "/tmp1/" + String.valueOf(rand.nextLong()); TestDistCpUtils.createFile(fs, sourceBase + "/1"); TestDistCpUtils.createFile(fs, sourceBase + "/3"); TestDistCpUtils.createFile(fs, sourceBase + "/4"); TestDistCpUtils.createFile(fs, sourceBase + "/5"); TestDistCpUtils.createFile(fs, sourceBase + "/7"); TestDistCpUtils.createFile(fs, sourceBase + "/8"); TestDistCpUtils.createFile(fs, sourceBase + "/9"); TestDistCpUtils.createFile(fs, targetBase + "/2"); TestDistCpUtils.createFile(fs, targetBase + "/4"); TestDistCpUtils.createFile(fs, targetBase + "/5"); TestDistCpUtils.createFile(fs, targetBase + "/7"); TestDistCpUtils.createFile(fs, targetBase + "/9"); TestDistCpUtils.createFile(fs, targetBase + "/A"); DistCpOptions options = new DistCpOptions(Arrays.asList(new Path(sourceBase)), new Path("/out")); options.setSyncFolder(true); options.setDeleteMissing(true); options.appendToConf(conf); CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS); Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong())); listing.buildListing(listingFile, options); conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase); conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase); committer.commitJob(jobContext); if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) { Assert.fail("Source and target folders are not in sync"); } Assert.assertEquals(fs.listStatus(new Path(targetBase)).length, 4); //Test for idempotent commit committer.commitJob(jobContext); if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) { Assert.fail("Source and target folders are not in sync"); } Assert.assertEquals(fs.listStatus(new Path(targetBase)).length, 4); } catch (IOException e) { LOG.error("Exception encountered while testing for delete missing", e); Assert.fail("Delete missing failure"); } finally { TestDistCpUtils.delete(fs, "/tmp1"); conf.set(DistCpConstants.CONF_LABEL_DELETE_MISSING, "false"); } }
Example #11
Source File: TestDynamicInputFormat.java From big-c with Apache License 2.0 | 4 votes |
@Test public void testGetSplits() throws Exception { DistCpOptions options = getOptions(); Configuration configuration = new Configuration(); configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps())); CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing( new Path(cluster.getFileSystem().getUri().toString() +"/tmp/testDynInputFormat/fileList.seq"), options); JobContext jobContext = new JobContextImpl(configuration, new JobID()); DynamicInputFormat<Text, CopyListingFileStatus> inputFormat = new DynamicInputFormat<Text, CopyListingFileStatus>(); List<InputSplit> splits = inputFormat.getSplits(jobContext); int nFiles = 0; int taskId = 0; for (InputSplit split : splits) { RecordReader<Text, CopyListingFileStatus> recordReader = inputFormat.createRecordReader(split, null); StubContext stubContext = new StubContext(jobContext.getConfiguration(), recordReader, taskId); final TaskAttemptContext taskAttemptContext = stubContext.getContext(); recordReader.initialize(splits.get(0), taskAttemptContext); float previousProgressValue = 0f; while (recordReader.nextKeyValue()) { CopyListingFileStatus fileStatus = recordReader.getCurrentValue(); String source = fileStatus.getPath().toString(); System.out.println(source); Assert.assertTrue(expectedFilePaths.contains(source)); final float progress = recordReader.getProgress(); Assert.assertTrue(progress >= previousProgressValue); Assert.assertTrue(progress >= 0.0f); Assert.assertTrue(progress <= 1.0f); previousProgressValue = progress; ++nFiles; } Assert.assertTrue(recordReader.getProgress() == 1.0f); ++taskId; } Assert.assertEquals(expectedFilePaths.size(), nFiles); }