org.apache.hadoop.fs.HdfsBlockLocation Java Examples
The following examples show how to use
org.apache.hadoop.fs.HdfsBlockLocation.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DFSClient.java From hadoop with Apache License 2.0 | 6 votes |
/** * Get block location info about file * * getBlockLocations() returns a list of hostnames that store * data for a specific file region. It returns a set of hostnames * for every block within the indicated region. * * This function is very useful when writing code that considers * data-placement when performing operations. For example, the * MapReduce system tries to schedule tasks on the same machines * as the data-block the task processes. */ public BlockLocation[] getBlockLocations(String src, long start, long length) throws IOException, UnresolvedLinkException { TraceScope scope = getPathTraceScope("getBlockLocations", src); try { LocatedBlocks blocks = getLocatedBlocks(src, start, length); BlockLocation[] locations = DFSUtil.locatedBlocks2Locations(blocks); HdfsBlockLocation[] hdfsLocations = new HdfsBlockLocation[locations.length]; for (int i = 0; i < locations.length; i++) { hdfsLocations[i] = new HdfsBlockLocation(locations[i], blocks.get(i)); } return hdfsLocations; } finally { scope.close(); } }
Example #2
Source File: TestFsDatasetCache.java From big-c with Apache License 2.0 | 6 votes |
@Test(timeout=60000) public void testPageRounder() throws Exception { // Write a small file Path fileName = new Path("/testPageRounder"); final int smallBlocks = 512; // This should be smaller than the page size assertTrue("Page size should be greater than smallBlocks!", PAGE_SIZE > smallBlocks); final int numBlocks = 5; final int fileLen = smallBlocks * numBlocks; FSDataOutputStream out = fs.create(fileName, false, 4096, (short)1, smallBlocks); out.write(new byte[fileLen]); out.close(); HdfsBlockLocation[] locs = (HdfsBlockLocation[])fs.getFileBlockLocations( fileName, 0, fileLen); // Cache the file and check the sizes match the page size setHeartbeatResponse(cacheBlocks(locs)); DFSTestUtil.verifyExpectedCacheUsage(PAGE_SIZE * numBlocks, numBlocks, fsd); // Uncache and check that it decrements by the page size too setHeartbeatResponse(uncacheBlocks(locs)); DFSTestUtil.verifyExpectedCacheUsage(0, 0, fsd); }
Example #3
Source File: TestFsDatasetCache.java From big-c with Apache License 2.0 | 6 votes |
@Test(timeout=60000) public void testUncacheUnknownBlock() throws Exception { // Create a file Path fileName = new Path("/testUncacheUnknownBlock"); int fileLen = 4096; DFSTestUtil.createFile(fs, fileName, fileLen, (short)1, 0xFDFD); HdfsBlockLocation[] locs = (HdfsBlockLocation[])fs.getFileBlockLocations( fileName, 0, fileLen); // Try to uncache it without caching it first setHeartbeatResponse(uncacheBlocks(locs)); GenericTestUtils.waitFor(new Supplier<Boolean>() { @Override public Boolean get() { return fsd.getNumBlocksFailedToUncache() > 0; } }, 100, 10000); }
Example #4
Source File: DFSClient.java From big-c with Apache License 2.0 | 6 votes |
/** * Get block location info about file * * getBlockLocations() returns a list of hostnames that store * data for a specific file region. It returns a set of hostnames * for every block within the indicated region. * * This function is very useful when writing code that considers * data-placement when performing operations. For example, the * MapReduce system tries to schedule tasks on the same machines * as the data-block the task processes. */ public BlockLocation[] getBlockLocations(String src, long start, long length) throws IOException, UnresolvedLinkException { TraceScope scope = getPathTraceScope("getBlockLocations", src); try { LocatedBlocks blocks = getLocatedBlocks(src, start, length); BlockLocation[] locations = DFSUtil.locatedBlocks2Locations(blocks); HdfsBlockLocation[] hdfsLocations = new HdfsBlockLocation[locations.length]; for (int i = 0; i < locations.length; i++) { hdfsLocations[i] = new HdfsBlockLocation(locations[i], blocks.get(i)); } return hdfsLocations; } finally { scope.close(); } }
Example #5
Source File: TestCombineFileInputFormat.java From big-c with Apache License 2.0 | 6 votes |
@Override public BlockLocation[] getFileBlockLocations( FileStatus stat, long start, long len) throws IOException { if (stat.isDir()) { return null; } System.out.println("File " + stat.getPath()); String name = stat.getPath().toUri().getPath(); BlockLocation[] locs = super.getFileBlockLocations(stat, start, len); if (name.equals(fileWithMissingBlocks)) { System.out.println("Returning missing blocks for " + fileWithMissingBlocks); locs[0] = new HdfsBlockLocation(new BlockLocation(new String[0], new String[0], locs[0].getOffset(), locs[0].getLength()), null); } return locs; }
Example #6
Source File: TestFsDatasetCache.java From hadoop with Apache License 2.0 | 6 votes |
@Test(timeout=60000) public void testPageRounder() throws Exception { // Write a small file Path fileName = new Path("/testPageRounder"); final int smallBlocks = 512; // This should be smaller than the page size assertTrue("Page size should be greater than smallBlocks!", PAGE_SIZE > smallBlocks); final int numBlocks = 5; final int fileLen = smallBlocks * numBlocks; FSDataOutputStream out = fs.create(fileName, false, 4096, (short)1, smallBlocks); out.write(new byte[fileLen]); out.close(); HdfsBlockLocation[] locs = (HdfsBlockLocation[])fs.getFileBlockLocations( fileName, 0, fileLen); // Cache the file and check the sizes match the page size setHeartbeatResponse(cacheBlocks(locs)); DFSTestUtil.verifyExpectedCacheUsage(PAGE_SIZE * numBlocks, numBlocks, fsd); // Uncache and check that it decrements by the page size too setHeartbeatResponse(uncacheBlocks(locs)); DFSTestUtil.verifyExpectedCacheUsage(0, 0, fsd); }
Example #7
Source File: TestFsDatasetCache.java From hadoop with Apache License 2.0 | 6 votes |
@Test(timeout=60000) public void testUncacheUnknownBlock() throws Exception { // Create a file Path fileName = new Path("/testUncacheUnknownBlock"); int fileLen = 4096; DFSTestUtil.createFile(fs, fileName, fileLen, (short)1, 0xFDFD); HdfsBlockLocation[] locs = (HdfsBlockLocation[])fs.getFileBlockLocations( fileName, 0, fileLen); // Try to uncache it without caching it first setHeartbeatResponse(uncacheBlocks(locs)); GenericTestUtils.waitFor(new Supplier<Boolean>() { @Override public Boolean get() { return fsd.getNumBlocksFailedToUncache() > 0; } }, 100, 10000); }
Example #8
Source File: TestCombineFileInputFormat.java From hadoop with Apache License 2.0 | 6 votes |
@Override public BlockLocation[] getFileBlockLocations( FileStatus stat, long start, long len) throws IOException { if (stat.isDir()) { return null; } System.out.println("File " + stat.getPath()); String name = stat.getPath().toUri().getPath(); BlockLocation[] locs = super.getFileBlockLocations(stat, start, len); if (name.equals(fileWithMissingBlocks)) { System.out.println("Returning missing blocks for " + fileWithMissingBlocks); locs[0] = new HdfsBlockLocation(new BlockLocation(new String[0], new String[0], locs[0].getOffset(), locs[0].getLength()), null); } return locs; }
Example #9
Source File: TestFsDatasetCache.java From hadoop with Apache License 2.0 | 5 votes |
/** * Creates a cache or uncache DatanodeCommand from an array of locations */ private static DatanodeCommand getResponse(HdfsBlockLocation[] locs, int action) { String bpid = locs[0].getLocatedBlock().getBlock().getBlockPoolId(); long[] blocks = new long[locs.length]; for (int i=0; i<locs.length; i++) { blocks[i] = locs[i].getLocatedBlock().getBlock().getBlockId(); } return new BlockIdCommand(action, bpid, blocks); }
Example #10
Source File: TestFsDatasetCache.java From big-c with Apache License 2.0 | 5 votes |
/** * Creates a cache or uncache DatanodeCommand from an array of locations */ private static DatanodeCommand getResponse(HdfsBlockLocation[] locs, int action) { String bpid = locs[0].getLocatedBlock().getBlock().getBlockPoolId(); long[] blocks = new long[locs.length]; for (int i=0; i<locs.length; i++) { blocks[i] = locs[i].getLocatedBlock().getBlock().getBlockId(); } return new BlockIdCommand(action, bpid, blocks); }
Example #11
Source File: MergeSortRowIdMatcher.java From incubator-retired-blur with Apache License 2.0 | 5 votes |
private static String getFirstBlockId(FileSystem fileSystem, Path realFile) throws IOException { FileStatus fileStatus = fileSystem.getFileStatus(realFile); BlockLocation[] locations = fileSystem.getFileBlockLocations(fileStatus, 0, 1); HdfsBlockLocation location = (HdfsBlockLocation) locations[0]; LocatedBlock locatedBlock = location.getLocatedBlock(); ExtendedBlock block = locatedBlock.getBlock(); return toNiceString(block.getBlockId()); }
Example #12
Source File: TestFsDatasetCache.java From hadoop with Apache License 2.0 | 4 votes |
@Test(timeout=600000) public void testFilesExceedMaxLockedMemory() throws Exception { LOG.info("beginning testFilesExceedMaxLockedMemory"); // Create some test files that will exceed total cache capacity final int numFiles = 5; final long fileSize = CACHE_CAPACITY / (numFiles-1); final Path[] testFiles = new Path[numFiles]; final HdfsBlockLocation[][] fileLocs = new HdfsBlockLocation[numFiles][]; final long[] fileSizes = new long[numFiles]; for (int i=0; i<numFiles; i++) { testFiles[i] = new Path("/testFilesExceedMaxLockedMemory-" + i); DFSTestUtil.createFile(fs, testFiles[i], fileSize, (short)1, 0xDFAl); fileLocs[i] = (HdfsBlockLocation[])fs.getFileBlockLocations( testFiles[i], 0, fileSize); // Get the file size (sum of blocks) long[] sizes = getBlockSizes(fileLocs[i]); for (int j=0; j<sizes.length; j++) { fileSizes[i] += sizes[j]; } } // Cache the first n-1 files long total = 0; DFSTestUtil.verifyExpectedCacheUsage(0, 0, fsd); for (int i=0; i<numFiles-1; i++) { setHeartbeatResponse(cacheBlocks(fileLocs[i])); total = DFSTestUtil.verifyExpectedCacheUsage( rounder.round(total + fileSizes[i]), 4 * (i + 1), fsd); } // nth file should hit a capacity exception final LogVerificationAppender appender = new LogVerificationAppender(); final Logger logger = Logger.getRootLogger(); logger.addAppender(appender); setHeartbeatResponse(cacheBlocks(fileLocs[numFiles-1])); GenericTestUtils.waitFor(new Supplier<Boolean>() { @Override public Boolean get() { int lines = appender.countLinesWithMessage( "more bytes in the cache: " + DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY); return lines > 0; } }, 500, 30000); // Also check the metrics for the failure assertTrue("Expected more than 0 failed cache attempts", fsd.getNumBlocksFailedToCache() > 0); // Uncache the n-1 files int curCachedBlocks = 16; for (int i=0; i<numFiles-1; i++) { setHeartbeatResponse(uncacheBlocks(fileLocs[i])); long uncachedBytes = rounder.round(fileSizes[i]); total -= uncachedBytes; curCachedBlocks -= uncachedBytes / BLOCK_SIZE; DFSTestUtil.verifyExpectedCacheUsage(total, curCachedBlocks, fsd); } LOG.info("finishing testFilesExceedMaxLockedMemory"); }
Example #13
Source File: TestFsDatasetCache.java From hadoop with Apache License 2.0 | 4 votes |
private static DatanodeCommand[] uncacheBlocks(HdfsBlockLocation[] locs) { return new DatanodeCommand[] { getResponse(locs, DatanodeProtocol.DNA_UNCACHE) }; }
Example #14
Source File: TestFsDatasetCache.java From hadoop with Apache License 2.0 | 4 votes |
private static DatanodeCommand[] uncacheBlock(HdfsBlockLocation loc) { return uncacheBlocks(new HdfsBlockLocation[] {loc}); }
Example #15
Source File: TestFsDatasetCache.java From hadoop with Apache License 2.0 | 4 votes |
private static DatanodeCommand[] cacheBlocks(HdfsBlockLocation[] locs) { return new DatanodeCommand[] { getResponse(locs, DatanodeProtocol.DNA_CACHE) }; }
Example #16
Source File: TestFsDatasetCache.java From hadoop with Apache License 2.0 | 4 votes |
private static DatanodeCommand[] cacheBlock(HdfsBlockLocation loc) { return cacheBlocks(new HdfsBlockLocation[] {loc}); }
Example #17
Source File: DFSClient.java From big-c with Apache License 2.0 | 4 votes |
/** * Get block location information about a list of {@link HdfsBlockLocation}. * Used by {@link DistributedFileSystem#getFileBlockStorageLocations(List)} to * get {@link BlockStorageLocation}s for blocks returned by * {@link DistributedFileSystem#getFileBlockLocations(org.apache.hadoop.fs.FileStatus, long, long)} * . * * This is done by making a round of RPCs to the associated datanodes, asking * the volume of each block replica. The returned array of * {@link BlockStorageLocation} expose this information as a * {@link VolumeId}. * * @param blockLocations * target blocks on which to query volume location information * @return volumeBlockLocations original block array augmented with additional * volume location information for each replica. */ public BlockStorageLocation[] getBlockStorageLocations( List<BlockLocation> blockLocations) throws IOException, UnsupportedOperationException, InvalidBlockTokenException { if (!getConf().getHdfsBlocksMetadataEnabled) { throw new UnsupportedOperationException("Datanode-side support for " + "getVolumeBlockLocations() must also be enabled in the client " + "configuration."); } // Downcast blockLocations and fetch out required LocatedBlock(s) List<LocatedBlock> blocks = new ArrayList<LocatedBlock>(); for (BlockLocation loc : blockLocations) { if (!(loc instanceof HdfsBlockLocation)) { throw new ClassCastException("DFSClient#getVolumeBlockLocations " + "expected to be passed HdfsBlockLocations"); } HdfsBlockLocation hdfsLoc = (HdfsBlockLocation) loc; blocks.add(hdfsLoc.getLocatedBlock()); } // Re-group the LocatedBlocks to be grouped by datanodes, with the values // a list of the LocatedBlocks on the datanode. Map<DatanodeInfo, List<LocatedBlock>> datanodeBlocks = new LinkedHashMap<DatanodeInfo, List<LocatedBlock>>(); for (LocatedBlock b : blocks) { for (DatanodeInfo info : b.getLocations()) { if (!datanodeBlocks.containsKey(info)) { datanodeBlocks.put(info, new ArrayList<LocatedBlock>()); } List<LocatedBlock> l = datanodeBlocks.get(info); l.add(b); } } // Make RPCs to the datanodes to get volume locations for its replicas TraceScope scope = Trace.startSpan("getBlockStorageLocations", traceSampler); Map<DatanodeInfo, HdfsBlocksMetadata> metadatas; try { metadatas = BlockStorageLocationUtil. queryDatanodesForHdfsBlocksMetadata(conf, datanodeBlocks, getConf().getFileBlockStorageLocationsNumThreads, getConf().getFileBlockStorageLocationsTimeoutMs, getConf().connectToDnViaHostname); if (LOG.isTraceEnabled()) { LOG.trace("metadata returned: " + Joiner.on("\n").withKeyValueSeparator("=").join(metadatas)); } } finally { scope.close(); } // Regroup the returned VolumeId metadata to again be grouped by // LocatedBlock rather than by datanode Map<LocatedBlock, List<VolumeId>> blockVolumeIds = BlockStorageLocationUtil .associateVolumeIdsWithBlocks(blocks, metadatas); // Combine original BlockLocations with new VolumeId information BlockStorageLocation[] volumeBlockLocations = BlockStorageLocationUtil .convertToVolumeBlockLocations(blocks, blockVolumeIds); return volumeBlockLocations; }
Example #18
Source File: TestFsDatasetCache.java From big-c with Apache License 2.0 | 4 votes |
private static DatanodeCommand[] cacheBlock(HdfsBlockLocation loc) { return cacheBlocks(new HdfsBlockLocation[] {loc}); }
Example #19
Source File: TestFsDatasetCache.java From big-c with Apache License 2.0 | 4 votes |
private static DatanodeCommand[] cacheBlocks(HdfsBlockLocation[] locs) { return new DatanodeCommand[] { getResponse(locs, DatanodeProtocol.DNA_CACHE) }; }
Example #20
Source File: TestFsDatasetCache.java From big-c with Apache License 2.0 | 4 votes |
private static DatanodeCommand[] uncacheBlock(HdfsBlockLocation loc) { return uncacheBlocks(new HdfsBlockLocation[] {loc}); }
Example #21
Source File: TestFsDatasetCache.java From big-c with Apache License 2.0 | 4 votes |
private static DatanodeCommand[] uncacheBlocks(HdfsBlockLocation[] locs) { return new DatanodeCommand[] { getResponse(locs, DatanodeProtocol.DNA_UNCACHE) }; }
Example #22
Source File: TestFsDatasetCache.java From big-c with Apache License 2.0 | 4 votes |
@Test(timeout=600000) public void testFilesExceedMaxLockedMemory() throws Exception { LOG.info("beginning testFilesExceedMaxLockedMemory"); // Create some test files that will exceed total cache capacity final int numFiles = 5; final long fileSize = CACHE_CAPACITY / (numFiles-1); final Path[] testFiles = new Path[numFiles]; final HdfsBlockLocation[][] fileLocs = new HdfsBlockLocation[numFiles][]; final long[] fileSizes = new long[numFiles]; for (int i=0; i<numFiles; i++) { testFiles[i] = new Path("/testFilesExceedMaxLockedMemory-" + i); DFSTestUtil.createFile(fs, testFiles[i], fileSize, (short)1, 0xDFAl); fileLocs[i] = (HdfsBlockLocation[])fs.getFileBlockLocations( testFiles[i], 0, fileSize); // Get the file size (sum of blocks) long[] sizes = getBlockSizes(fileLocs[i]); for (int j=0; j<sizes.length; j++) { fileSizes[i] += sizes[j]; } } // Cache the first n-1 files long total = 0; DFSTestUtil.verifyExpectedCacheUsage(0, 0, fsd); for (int i=0; i<numFiles-1; i++) { setHeartbeatResponse(cacheBlocks(fileLocs[i])); total = DFSTestUtil.verifyExpectedCacheUsage( rounder.round(total + fileSizes[i]), 4 * (i + 1), fsd); } // nth file should hit a capacity exception final LogVerificationAppender appender = new LogVerificationAppender(); final Logger logger = Logger.getRootLogger(); logger.addAppender(appender); setHeartbeatResponse(cacheBlocks(fileLocs[numFiles-1])); GenericTestUtils.waitFor(new Supplier<Boolean>() { @Override public Boolean get() { int lines = appender.countLinesWithMessage( "more bytes in the cache: " + DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY); return lines > 0; } }, 500, 30000); // Also check the metrics for the failure assertTrue("Expected more than 0 failed cache attempts", fsd.getNumBlocksFailedToCache() > 0); // Uncache the n-1 files int curCachedBlocks = 16; for (int i=0; i<numFiles-1; i++) { setHeartbeatResponse(uncacheBlocks(fileLocs[i])); long uncachedBytes = rounder.round(fileSizes[i]); total -= uncachedBytes; curCachedBlocks -= uncachedBytes / BLOCK_SIZE; DFSTestUtil.verifyExpectedCacheUsage(total, curCachedBlocks, fsd); } LOG.info("finishing testFilesExceedMaxLockedMemory"); }
Example #23
Source File: DFSClient.java From hadoop with Apache License 2.0 | 4 votes |
/** * Get block location information about a list of {@link HdfsBlockLocation}. * Used by {@link DistributedFileSystem#getFileBlockStorageLocations(List)} to * get {@link BlockStorageLocation}s for blocks returned by * {@link DistributedFileSystem#getFileBlockLocations(org.apache.hadoop.fs.FileStatus, long, long)} * . * * This is done by making a round of RPCs to the associated datanodes, asking * the volume of each block replica. The returned array of * {@link BlockStorageLocation} expose this information as a * {@link VolumeId}. * * @param blockLocations * target blocks on which to query volume location information * @return volumeBlockLocations original block array augmented with additional * volume location information for each replica. */ public BlockStorageLocation[] getBlockStorageLocations( List<BlockLocation> blockLocations) throws IOException, UnsupportedOperationException, InvalidBlockTokenException { if (!getConf().getHdfsBlocksMetadataEnabled) { throw new UnsupportedOperationException("Datanode-side support for " + "getVolumeBlockLocations() must also be enabled in the client " + "configuration."); } // Downcast blockLocations and fetch out required LocatedBlock(s) List<LocatedBlock> blocks = new ArrayList<LocatedBlock>(); for (BlockLocation loc : blockLocations) { if (!(loc instanceof HdfsBlockLocation)) { throw new ClassCastException("DFSClient#getVolumeBlockLocations " + "expected to be passed HdfsBlockLocations"); } HdfsBlockLocation hdfsLoc = (HdfsBlockLocation) loc; blocks.add(hdfsLoc.getLocatedBlock()); } // Re-group the LocatedBlocks to be grouped by datanodes, with the values // a list of the LocatedBlocks on the datanode. Map<DatanodeInfo, List<LocatedBlock>> datanodeBlocks = new LinkedHashMap<DatanodeInfo, List<LocatedBlock>>(); for (LocatedBlock b : blocks) { for (DatanodeInfo info : b.getLocations()) { if (!datanodeBlocks.containsKey(info)) { datanodeBlocks.put(info, new ArrayList<LocatedBlock>()); } List<LocatedBlock> l = datanodeBlocks.get(info); l.add(b); } } // Make RPCs to the datanodes to get volume locations for its replicas TraceScope scope = Trace.startSpan("getBlockStorageLocations", traceSampler); Map<DatanodeInfo, HdfsBlocksMetadata> metadatas; try { metadatas = BlockStorageLocationUtil. queryDatanodesForHdfsBlocksMetadata(conf, datanodeBlocks, getConf().getFileBlockStorageLocationsNumThreads, getConf().getFileBlockStorageLocationsTimeoutMs, getConf().connectToDnViaHostname); if (LOG.isTraceEnabled()) { LOG.trace("metadata returned: " + Joiner.on("\n").withKeyValueSeparator("=").join(metadatas)); } } finally { scope.close(); } // Regroup the returned VolumeId metadata to again be grouped by // LocatedBlock rather than by datanode Map<LocatedBlock, List<VolumeId>> blockVolumeIds = BlockStorageLocationUtil .associateVolumeIdsWithBlocks(blocks, metadatas); // Combine original BlockLocations with new VolumeId information BlockStorageLocation[] volumeBlockLocations = BlockStorageLocationUtil .convertToVolumeBlockLocations(blocks, blockVolumeIds); return volumeBlockLocations; }