org.apache.hadoop.mapred.lib.HashPartitioner Java Examples
The following examples show how to use
org.apache.hadoop.mapred.lib.HashPartitioner.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SortValidator.java From hadoop-gpu with Apache License 2.0 | 6 votes |
public void configure(JobConf job) { // 'key' == sortInput for sort-input; key == sortOutput for sort-output key = deduceInputFile(job); if (key == sortOutput) { partitioner = new HashPartitioner<WritableComparable, Writable>(); // Figure the 'current' partition and no. of reduces of the 'sort' try { URI inputURI = new URI(job.get("map.input.file")); String inputFile = inputURI.getPath(); partition = Integer.valueOf( inputFile.substring(inputFile.lastIndexOf("part")+5) ).intValue(); noSortReducers = job.getInt("sortvalidate.sort.reduce.tasks", -1); } catch (Exception e) { System.err.println("Caught: " + e); System.exit(-1); } } }
Example #2
Source File: SortValidator.java From big-c with Apache License 2.0 | 6 votes |
public void configure(JobConf job) { // 'key' == sortInput for sort-input; key == sortOutput for sort-output key = deduceInputFile(job); if (key == sortOutput) { partitioner = new HashPartitioner<WritableComparable, Writable>(); // Figure the 'current' partition and no. of reduces of the 'sort' try { URI inputURI = new URI(job.get(JobContext.MAP_INPUT_FILE)); String inputFile = inputURI.getPath(); // part file is of the form part-r-xxxxx partition = Integer.valueOf(inputFile.substring( inputFile.lastIndexOf("part") + 7)).intValue(); noSortReducers = job.getInt(SORT_REDUCES, -1); } catch (Exception e) { System.err.println("Caught: " + e); System.exit(-1); } } }
Example #3
Source File: NodeReader.java From nutch-htmlunit with Apache License 2.0 | 6 votes |
/** * Prints the content of the Node represented by the url to system out. * * @param webGraphDb The webgraph from which to get the node. * @param url The url of the node. * * @throws IOException If an error occurs while getting the node. */ public void dumpUrl(Path webGraphDb, String url) throws IOException { fs = FileSystem.get(getConf()); nodeReaders = MapFileOutputFormat.getReaders(fs, new Path(webGraphDb, WebGraph.NODE_DIR), getConf()); // open the readers, get the node, print out the info, and close the readers Text key = new Text(url); Node node = new Node(); MapFileOutputFormat.getEntry(nodeReaders, new HashPartitioner<Text, Node>(), key, node); System.out.println(url + ":"); System.out.println(" inlink score: " + node.getInlinkScore()); System.out.println(" outlink score: " + node.getOutlinkScore()); System.out.println(" num inlinks: " + node.getNumInlinks()); System.out.println(" num outlinks: " + node.getNumOutlinks()); FSUtils.closeReaders(nodeReaders); }
Example #4
Source File: LoopReader.java From anthelion with Apache License 2.0 | 6 votes |
/** * Prints loopset for a single url. The loopset information will show any * outlink url the eventually forms a link cycle. * * @param webGraphDb The WebGraph to check for loops * @param url The url to check. * * @throws IOException If an error occurs while printing loopset information. */ public void dumpUrl(Path webGraphDb, String url) throws IOException { // open the readers fs = FileSystem.get(getConf()); loopReaders = MapFileOutputFormat.getReaders(fs, new Path(webGraphDb, Loops.LOOPS_DIR), getConf()); // get the loopset for a given url, if any Text key = new Text(url); LoopSet loop = new LoopSet(); MapFileOutputFormat.getEntry(loopReaders, new HashPartitioner<Text, LoopSet>(), key, loop); // print out each loop url in the set System.out.println(url + ":"); for (String loopUrl : loop.getLoopSet()) { System.out.println(" " + loopUrl); } // close the readers FSUtils.closeReaders(loopReaders); }
Example #5
Source File: NodeReader.java From anthelion with Apache License 2.0 | 6 votes |
/** * Prints the content of the Node represented by the url to system out. * * @param webGraphDb The webgraph from which to get the node. * @param url The url of the node. * * @throws IOException If an error occurs while getting the node. */ public void dumpUrl(Path webGraphDb, String url) throws IOException { fs = FileSystem.get(getConf()); nodeReaders = MapFileOutputFormat.getReaders(fs, new Path(webGraphDb, WebGraph.NODE_DIR), getConf()); // open the readers, get the node, print out the info, and close the readers Text key = new Text(url); Node node = new Node(); MapFileOutputFormat.getEntry(nodeReaders, new HashPartitioner<Text, Node>(), key, node); System.out.println(url + ":"); System.out.println(" inlink score: " + node.getInlinkScore()); System.out.println(" outlink score: " + node.getOutlinkScore()); System.out.println(" num inlinks: " + node.getNumInlinks()); System.out.println(" num outlinks: " + node.getNumOutlinks()); FSUtils.closeReaders(nodeReaders); }
Example #6
Source File: SortValidator.java From hadoop with Apache License 2.0 | 6 votes |
public void configure(JobConf job) { // 'key' == sortInput for sort-input; key == sortOutput for sort-output key = deduceInputFile(job); if (key == sortOutput) { partitioner = new HashPartitioner<WritableComparable, Writable>(); // Figure the 'current' partition and no. of reduces of the 'sort' try { URI inputURI = new URI(job.get(JobContext.MAP_INPUT_FILE)); String inputFile = inputURI.getPath(); // part file is of the form part-r-xxxxx partition = Integer.valueOf(inputFile.substring( inputFile.lastIndexOf("part") + 7)).intValue(); noSortReducers = job.getInt(SORT_REDUCES, -1); } catch (Exception e) { System.err.println("Caught: " + e); System.exit(-1); } } }
Example #7
Source File: LoopReader.java From nutch-htmlunit with Apache License 2.0 | 6 votes |
/** * Prints loopset for a single url. The loopset information will show any * outlink url the eventually forms a link cycle. * * @param webGraphDb The WebGraph to check for loops * @param url The url to check. * * @throws IOException If an error occurs while printing loopset information. */ public void dumpUrl(Path webGraphDb, String url) throws IOException { // open the readers fs = FileSystem.get(getConf()); loopReaders = MapFileOutputFormat.getReaders(fs, new Path(webGraphDb, Loops.LOOPS_DIR), getConf()); // get the loopset for a given url, if any Text key = new Text(url); LoopSet loop = new LoopSet(); MapFileOutputFormat.getEntry(loopReaders, new HashPartitioner<Text, LoopSet>(), key, loop); // print out each loop url in the set System.out.println(url + ":"); for (String loopUrl : loop.getLoopSet()) { System.out.println(" " + loopUrl); } // close the readers FSUtils.closeReaders(loopReaders); }
Example #8
Source File: SortValidator.java From RDFS with Apache License 2.0 | 6 votes |
public void configure(JobConf job) { // 'key' == sortInput for sort-input; key == sortOutput for sort-output key = deduceInputFile(job); if (key == sortOutput) { partitioner = new HashPartitioner<WritableComparable, Writable>(); // Figure the 'current' partition and no. of reduces of the 'sort' try { URI inputURI = new URI(job.get("map.input.file")); String inputFile = inputURI.getPath(); partition = Integer.valueOf( inputFile.substring(inputFile.lastIndexOf("part")+5) ).intValue(); noSortReducers = job.getInt("sortvalidate.sort.reduce.tasks", -1); } catch (Exception e) { System.err.println("Caught: " + e); System.exit(-1); } } }
Example #9
Source File: LinkDumper.java From nutch-htmlunit with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { if (args == null || args.length < 2) { System.out.println("LinkDumper$Reader usage: <webgraphdb> <url>"); return; } // open the readers for the linkdump directory Configuration conf = NutchConfiguration.create(); FileSystem fs = FileSystem.get(conf); Path webGraphDb = new Path(args[0]); String url = args[1]; MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, new Path( webGraphDb, DUMP_DIR), conf); // get the link nodes for the url Text key = new Text(url); LinkNodes nodes = new LinkNodes(); MapFileOutputFormat.getEntry(readers, new HashPartitioner<Text, LinkNodes>(), key, nodes); // print out the link nodes LinkNode[] linkNodesAr = nodes.getLinks(); System.out.println(url + ":"); for (LinkNode node : linkNodesAr) { System.out.println(" " + node.getUrl() + " - " + node.getNode().toString()); } // close the readers FSUtils.closeReaders(readers); }
Example #10
Source File: CrawlDbReader.java From anthelion with Apache License 2.0 | 5 votes |
public CrawlDatum get(String crawlDb, String url, Configuration config) throws IOException { Text key = new Text(url); CrawlDatum val = new CrawlDatum(); openReaders(crawlDb, config); CrawlDatum res = (CrawlDatum)MapFileOutputFormat.getEntry(readers, new HashPartitioner<Text, CrawlDatum>(), key, val); return res; }
Example #11
Source File: LinkDumper.java From anthelion with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { if (args == null || args.length < 2) { System.out.println("LinkDumper$Reader usage: <webgraphdb> <url>"); return; } // open the readers for the linkdump directory Configuration conf = NutchConfiguration.create(); FileSystem fs = FileSystem.get(conf); Path webGraphDb = new Path(args[0]); String url = args[1]; MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, new Path( webGraphDb, DUMP_DIR), conf); // get the link nodes for the url Text key = new Text(url); LinkNodes nodes = new LinkNodes(); MapFileOutputFormat.getEntry(readers, new HashPartitioner<Text, LinkNodes>(), key, nodes); // print out the link nodes LinkNode[] linkNodesAr = nodes.getLinks(); System.out.println(url + ":"); for (LinkNode node : linkNodesAr) { System.out.println(" " + node.getUrl() + " - " + node.getNode().toString()); } // close the readers FSUtils.closeReaders(readers); }
Example #12
Source File: CrawlDbReader.java From nutch-htmlunit with Apache License 2.0 | 5 votes |
public CrawlDatum get(String crawlDb, String url, Configuration config) throws IOException { Text key = new Text(url); CrawlDatum val = new CrawlDatum(); openReaders(crawlDb, config); CrawlDatum res = (CrawlDatum)MapFileOutputFormat.getEntry(readers, new HashPartitioner<Text, CrawlDatum>(), key, val); return res; }
Example #13
Source File: Submitter.java From RDFS with Apache License 2.0 | 4 votes |
/** * Get the user's original partitioner. * @param conf the configuration to look in * @return the class that the user submitted */ static Class<? extends Partitioner> getJavaPartitioner(JobConf conf) { return conf.getClass("hadoop.pipes.partitioner", HashPartitioner.class, Partitioner.class); }
Example #14
Source File: Submitter.java From hadoop-gpu with Apache License 2.0 | 4 votes |
/** * Get the user's original partitioner. * @param conf the configuration to look in * @return the class that the user submitted */ static Class<? extends Partitioner> getJavaPartitioner(JobConf conf) { return conf.getClass("hadoop.pipes.partitioner", HashPartitioner.class, Partitioner.class); }
Example #15
Source File: Submitter.java From big-c with Apache License 2.0 | 4 votes |
/** * Get the user's original partitioner. * @param conf the configuration to look in * @return the class that the user submitted */ static Class<? extends Partitioner> getJavaPartitioner(JobConf conf) { return conf.getClass(Submitter.PARTITIONER, HashPartitioner.class, Partitioner.class); }
Example #16
Source File: Submitter.java From hadoop with Apache License 2.0 | 4 votes |
/** * Get the user's original partitioner. * @param conf the configuration to look in * @return the class that the user submitted */ static Class<? extends Partitioner> getJavaPartitioner(JobConf conf) { return conf.getClass(Submitter.PARTITIONER, HashPartitioner.class, Partitioner.class); }
Example #17
Source File: JobConf.java From RDFS with Apache License 2.0 | 2 votes |
/** * Get the {@link Partitioner} used to partition {@link Mapper}-outputs * to be sent to the {@link Reducer}s. * * @return the {@link Partitioner} used to partition map-outputs. */ public Class<? extends Partitioner> getPartitionerClass() { return getClass("mapred.partitioner.class", HashPartitioner.class, Partitioner.class); }
Example #18
Source File: JobConf.java From big-c with Apache License 2.0 | 2 votes |
/** * Get the {@link Partitioner} used to partition {@link Mapper}-outputs * to be sent to the {@link Reducer}s. * * @return the {@link Partitioner} used to partition map-outputs. */ public Class<? extends Partitioner> getPartitionerClass() { return getClass("mapred.partitioner.class", HashPartitioner.class, Partitioner.class); }
Example #19
Source File: JobConf.java From hadoop with Apache License 2.0 | 2 votes |
/** * Get the {@link Partitioner} used to partition {@link Mapper}-outputs * to be sent to the {@link Reducer}s. * * @return the {@link Partitioner} used to partition map-outputs. */ public Class<? extends Partitioner> getPartitionerClass() { return getClass("mapred.partitioner.class", HashPartitioner.class, Partitioner.class); }
Example #20
Source File: JobConf.java From hadoop-gpu with Apache License 2.0 | 2 votes |
/** * Get the {@link Partitioner} used to partition {@link Mapper}-outputs * to be sent to the {@link Reducer}s. * * @return the {@link Partitioner} used to partition map-outputs. */ public Class<? extends Partitioner> getPartitionerClass() { return getClass("mapred.partitioner.class", HashPartitioner.class, Partitioner.class); }