Java Code Examples for org.apache.hadoop.io.MapFile#Reader
The following examples show how to use
org.apache.hadoop.io.MapFile#Reader .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestCodec.java From hadoop with Apache License 2.0 | 6 votes |
private void codecTestMapFile(Class<? extends CompressionCodec> clazz, CompressionType type, int records) throws Exception { FileSystem fs = FileSystem.get(conf); LOG.info("Creating MapFiles with " + records + " records using codec " + clazz.getSimpleName()); Path path = new Path(new Path( System.getProperty("test.build.data", "/tmp")), clazz.getSimpleName() + "-" + type + "-" + records); LOG.info("Writing " + path); createMapFile(conf, fs, path, clazz.newInstance(), type, records); MapFile.Reader reader = new MapFile.Reader(path, conf); Text key1 = new Text("002"); assertNotNull(reader.get(key1, new Text())); Text key2 = new Text("004"); assertNotNull(reader.get(key2, new Text())); }
Example 2
Source File: MapFileReader.java From deeplearning4j with Apache License 2.0 | 6 votes |
public MapFileReader(List<String> paths, IndexToKey indexToKey, Class<? extends Writable> recordClass) throws IOException { this.indexToKey = indexToKey; this.recordClass = recordClass; this.readers = new MapFile.Reader[paths.size()]; SequenceFile.Reader.Option[] opts = new SequenceFile.Reader.Option[0]; Configuration config = new Configuration(); for (int i = 0; i < paths.size(); i++) { readers[i] = new MapFile.Reader(new Path(paths.get(i)), config, opts); if (readers[i].getValueClass() != recordClass) { throw new UnsupportedOperationException("MapFile record class: " + readers[i].getValueClass() + ", but got class " + recordClass + ", path = " + paths.get(i)); } } recordIndexesEachReader = indexToKey.initialize(readers, recordClass); }
Example 3
Source File: MapFileOutputFormat.java From RDFS with Apache License 2.0 | 6 votes |
/** Open the output generated by this format. */ public static MapFile.Reader[] getReaders(FileSystem ignored, Path dir, Configuration conf) throws IOException { FileSystem fs = dir.getFileSystem(conf); Path[] names = FileUtil.stat2Paths(fs.listStatus(dir)); // sort names, so that hash partitioning works Arrays.sort(names); MapFile.Reader[] parts = new MapFile.Reader[names.length]; for (int i = 0; i < names.length; i++) { parts[i] = new MapFile.Reader(fs, names[i].toString(), conf); } return parts; }
Example 4
Source File: MapFileOutputFormat.java From hadoop with Apache License 2.0 | 5 votes |
/** Get an entry from output generated by this class. */ public static <K extends WritableComparable, V extends Writable> Writable getEntry(MapFile.Reader[] readers, Partitioner<K, V> partitioner, K key, V value) throws IOException { int part = partitioner.getPartition(key, value, readers.length); return readers[part].get(key, value); }
Example 5
Source File: LinkDumper.java From anthelion with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { if (args == null || args.length < 2) { System.out.println("LinkDumper$Reader usage: <webgraphdb> <url>"); return; } // open the readers for the linkdump directory Configuration conf = NutchConfiguration.create(); FileSystem fs = FileSystem.get(conf); Path webGraphDb = new Path(args[0]); String url = args[1]; MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, new Path( webGraphDb, DUMP_DIR), conf); // get the link nodes for the url Text key = new Text(url); LinkNodes nodes = new LinkNodes(); MapFileOutputFormat.getEntry(readers, new HashPartitioner<Text, LinkNodes>(), key, nodes); // print out the link nodes LinkNode[] linkNodesAr = nodes.getLinks(); System.out.println(url + ":"); for (LinkNode node : linkNodesAr) { System.out.println(" " + node.getUrl() + " - " + node.getNode().toString()); } // close the readers FSUtils.closeReaders(readers); }
Example 6
Source File: MapFileOutputFormat.java From hadoop-gpu with Apache License 2.0 | 5 votes |
/** Get an entry from output generated by this class. */ public static <K extends WritableComparable, V extends Writable> Writable getEntry(MapFile.Reader[] readers, Partitioner<K, V> partitioner, K key, V value) throws IOException { int part = partitioner.getPartition(key, value, readers.length); return readers[part].get(key, value); }
Example 7
Source File: SegmentHandler.java From anthelion with Apache License 2.0 | 5 votes |
/** Open the output generated by this format. */ private MapFile.Reader[] getReaders(String subDir) throws IOException { Path dir = new Path(segmentDir, subDir); FileSystem fs = dir.getFileSystem(conf); Path[] names = FileUtil.stat2Paths(fs.listStatus(dir, SegmentPathFilter.INSTANCE)); // sort names, so that hash partitioning works Arrays.sort(names); MapFile.Reader[] parts = new MapFile.Reader[names.length]; for (int i = 0; i < names.length; i++) { parts[i] = new MapFile.Reader(fs, names[i].toString(), conf); } return parts; }
Example 8
Source File: TestSegmentMergerCrawlDatums.java From nutch-htmlunit with Apache License 2.0 | 5 votes |
/** * Checks the merged segment and removes the stuff again. * * @param the test directory * @param the merged segment * @return the final status */ protected byte checkMergedSegment(Path testDir, Path mergedSegment) throws Exception { // Get a MapFile reader for the <Text,CrawlDatum> pairs MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, new Path(mergedSegment, CrawlDatum.FETCH_DIR_NAME), conf); Text key = new Text(); CrawlDatum value = new CrawlDatum(); byte finalStatus = 0x0; for (MapFile.Reader reader : readers) { while (reader.next(key, value)) { LOG.info("Reading status for: " + key.toString() + " > " + CrawlDatum.getStatusName(value.getStatus())); // Only consider fetch status if (CrawlDatum.hasFetchStatus(value) && key.toString().equals("http://nutch.apache.org/")) { finalStatus = value.getStatus(); } } // Close the reader again reader.close(); } // Remove the test directory again fs.delete(testDir, true); LOG.info("Final fetch status for: http://nutch.apache.org/ > " + CrawlDatum.getStatusName(finalStatus)); // Return the final status return finalStatus; }
Example 9
Source File: MapFileOutputFormat.java From big-c with Apache License 2.0 | 5 votes |
/** Get an entry from output generated by this class. */ public static <K extends WritableComparable<?>, V extends Writable> Writable getEntry(MapFile.Reader[] readers, Partitioner<K, V> partitioner, K key, V value) throws IOException { int part = partitioner.getPartition(key, value, readers.length); return readers[part].get(key, value); }
Example 10
Source File: MapFileOutputFormat.java From big-c with Apache License 2.0 | 5 votes |
/** Get an entry from output generated by this class. */ public static <K extends WritableComparable, V extends Writable> Writable getEntry(MapFile.Reader[] readers, Partitioner<K, V> partitioner, K key, V value) throws IOException { int part = partitioner.getPartition(key, value, readers.length); return readers[part].get(key, value); }
Example 11
Source File: FSUtils.java From nutch-htmlunit with Apache License 2.0 | 5 votes |
/** * Closes a group of MapFile readers. * * @param readers The MapFile readers to close. * @throws IOException If an error occurs while closing a reader. */ public static void closeReaders(MapFile.Reader[] readers) throws IOException { // loop through the readers closing one by one if (readers != null) { for (int i = 0; i < readers.length; i++) { MapFile.Reader reader = readers[i]; if (reader != null) { reader.close(); } } } }
Example 12
Source File: LinkDumper.java From nutch-htmlunit with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { if (args == null || args.length < 2) { System.out.println("LinkDumper$Reader usage: <webgraphdb> <url>"); return; } // open the readers for the linkdump directory Configuration conf = NutchConfiguration.create(); FileSystem fs = FileSystem.get(conf); Path webGraphDb = new Path(args[0]); String url = args[1]; MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, new Path( webGraphDb, DUMP_DIR), conf); // get the link nodes for the url Text key = new Text(url); LinkNodes nodes = new LinkNodes(); MapFileOutputFormat.getEntry(readers, new HashPartitioner<Text, LinkNodes>(), key, nodes); // print out the link nodes LinkNode[] linkNodesAr = nodes.getLinks(); System.out.println(url + ":"); for (LinkNode node : linkNodesAr) { System.out.println(" " + node.getUrl() + " - " + node.getNode().toString()); } // close the readers FSUtils.closeReaders(readers); }
Example 13
Source File: TestSequenceFile.java From compiler with Apache License 2.0 | 5 votes |
private static void closeMap(MapFile.Reader map) { if (map != null) try { map.close(); } catch (final IOException e) { e.printStackTrace(); } map = null; }
Example 14
Source File: FileSplit.java From mrgeo with Apache License 2.0 | 4 votes |
public void generateSplits(Path parent, Configuration conf) throws IOException { List<FileSplitInfo> list = new ArrayList<>(); // get a Hadoop file system handle FileSystem fs = getFileSystem(parent); // get the list of paths of the subdirectories of the parent Path[] paths = FileUtil.stat2Paths(fs.listStatus(parent)); Arrays.sort(paths); int partition = 0; // look inside each subdirectory for a data dir and keep track for (Path p : paths) { Path mapfile = null; FileStatus[] dirFiles = fs.listStatus(p); for (FileStatus dirFile : dirFiles) { if (dirFile.getPath().getName().equals("data")) { mapfile = dirFile.getPath().getParent(); break; } } if (mapfile != null) { RasterWritable val = new RasterWritable(); MapFile.Reader reader = createMapFileReader(conf, mapfile); TileIdWritable firstKey = (TileIdWritable) reader.getClosest(new TileIdWritable(0), val); TileIdWritable lastKey = (TileIdWritable) reader.getClosest(new TileIdWritable(Long.MAX_VALUE), val, true); if (firstKey != null && lastKey != null) { list.add(new FileSplitInfo(firstKey.get(), lastKey.get(), mapfile.getName(), partition)); } partition++; } } splits = list.toArray(new FileSplitInfo[list.size()]); }
Example 15
Source File: FileSplitTest.java From mrgeo with Apache License 2.0 | 4 votes |
@Test @Category(UnitTest.class) public void testGenerateSplitsFromPath() throws Exception { // Setup a mock directory structure Path rootPath = new Path(FileSplitTest.class.getName() + "-testRootPath"); Path path1 = new Path(rootPath, FileSplitTest.class.getName() + "-testPath1"); Path path2 = new Path(rootPath, FileSplitTest.class.getName() + "-testPath2"); Path path3 = new Path(rootPath, FileSplitTest.class.getName() + "-testPath3"); Path path1_1 = new Path(path1, "notDataDir"); Path path1_2 = new Path(path1, "data"); Path path2_1 = new Path(path2, "data"); Path path3_1 = new Path(path3, "notDataDir"); // Setup the FileSystem FileSystem mockFS = new FileSystemBuilder() .fileStatus(rootPath, new FileStatusBuilder().path(path1).build()) .fileStatus(rootPath, new FileStatusBuilder().path(path2).build()) .fileStatus(rootPath, new FileStatusBuilder().path(path3).build()) .fileStatus(path1, new FileStatusBuilder().path(path1_1).build()) .fileStatus(path1, new FileStatusBuilder().path(path1_2).build()) .fileStatus(path2, new FileStatusBuilder().path(path2_1).build()) .fileStatus(path3, new FileStatusBuilder().path(path3_1).build()) .build(); // setup map file readers for each of the data directories RasterWritable mockValue = new RasterWritable(); TileIdWritable[] path1Keys = {new TileIdWritable(2L), new TileIdWritable(4L), new TileIdWritable(6L)}; RasterWritable[] path1Values = {mockValue, mockValue, mockValue}; TileIdWritable[] path2Keys = {new TileIdWritable(5L), new TileIdWritable(6L), new TileIdWritable(7L)}; RasterWritable[] path2Values = {mockValue, mockValue, mockValue}; MapFile.Reader mockMapFileReaderPath1 = new MapFileReaderBuilder() .keyClass(TileIdWritable.class) .valueClass(RasterWritable.class) .keys(path1Keys) .values(path1Values) .build(); MapFile.Reader mockMapFileReaderPath2 = new MapFileReaderBuilder() .keyClass(TileIdWritable.class) .valueClass(RasterWritable.class) .keys(path2Keys) .values(path2Values) .build(); // Setup a Configuration Configuration mockConfiguration = new ConfigurationBuilder().build(); FileSplit spySubject = new FileSplit(); subject = spy(spySubject); doReturn(mockFS).when(subject).getFileSystem(rootPath); doReturn(mockMapFileReaderPath1).when(subject).createMapFileReader(mockConfiguration, path1); doReturn(mockMapFileReaderPath2).when(subject).createMapFileReader(mockConfiguration, path2); subject.generateSplits(rootPath, mockConfiguration); // Verify we got splits for path 1 and 2 SplitInfo[] splits = subject.getSplits(); Assert.assertEquals(2, splits.length); verifySplit(path1, path1Keys, splits, 0); verifySplit(path2, path2Keys, splits, 1); }
Example 16
Source File: MapFileReader.java From DataVec with Apache License 2.0 | 4 votes |
@Override public void close() throws IOException { for (MapFile.Reader r : readers) { r.close(); } }
Example 17
Source File: LongIndexToKey.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Override public List<Pair<Long, Long>> initialize(MapFile.Reader[] readers, Class<? extends Writable> valueClass) throws IOException { List<Pair<Long, Long>> l = new ArrayList<>(readers.length); for (MapFile.Reader r : readers) { //Get the first and last keys: long first = -1; long last = -1; //First key: no method for this for some inexplicable reason :/ LongWritable k = new LongWritable(); Writable v = ReflectionUtils.newInstance(valueClass, null); boolean hasNext = r.next(k, v); if(!hasNext){ //This map file is empty - no data l.add(new Pair<>(-1L, -1L)); continue; } first = k.get(); //Last key: easy r.reset(); r.finalKey(k); last = k.get(); l.add(new Pair<>(first, last)); } //Check that things are actually contiguous: List<Pair<Long, Long>> sorted = new ArrayList<>(l.size()); for(Pair<Long,Long> p : l){ if(p.getLeft() >= 0){ sorted.add(p); } } Collections.sort(sorted, new Comparator<Pair<Long, Long>>() { @Override public int compare(Pair<Long, Long> o1, Pair<Long, Long> o2) { return Long.compare(o1.getFirst(), o2.getFirst()); } }); if (sorted.size() == 0){ throw new IllegalStateException("Map file is empty - no data available"); } if (sorted.get(0).getFirst() != 0L) { throw new UnsupportedOperationException("Minimum key value is not 0: got " + sorted.get(0).getFirst()); } for (int i = 0; i < sorted.size() - 1; i++) { long currLast = sorted.get(i).getSecond(); long nextFirst = sorted.get(i + 1).getFirst(); if(nextFirst == -1){ //Skip empty map file continue; } if (currLast + 1 != nextFirst) { throw new IllegalStateException( "Keys are not contiguous between readers: first/last indices (inclusive) " + "are " + sorted + ".\n LongIndexKey assumes unique and contiguous LongWritable keys"); } } readerIndices = l; return readerIndices; }
Example 18
Source File: SegmentHandler.java From nutch-htmlunit with Apache License 2.0 | 4 votes |
private Writable getEntry(MapFile.Reader[] readers, Text url, Writable entry) throws IOException { return MapFileOutputFormat.getEntry(readers, PARTITIONER, url, entry); }
Example 19
Source File: FileSplit.java From mrgeo with Apache License 2.0 | 4 votes |
protected MapFile.Reader createMapFileReader(Configuration conf, Path mapfile) throws IOException { return new MapFile.Reader(mapfile, conf); }
Example 20
Source File: SegmentHandler.java From anthelion with Apache License 2.0 | 4 votes |
private void closeReaders(MapFile.Reader[] readers) throws IOException { for (int i = 0; i < readers.length; i++) { readers[i].close(); } }