Java Code Examples for org.apache.hadoop.mapred.JobConf#getOutputKeyComparator()
The following examples show how to use
org.apache.hadoop.mapred.JobConf#getOutputKeyComparator() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TotalOrderPartitioner.java From RDFS with Apache License 2.0 | 5 votes |
/** * Read in the partition file and build indexing data structures. * If the keytype is {@link org.apache.hadoop.io.BinaryComparable} and * <tt>total.order.partitioner.natural.order</tt> is not false, a trie * of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes * will be built. Otherwise, keys will be located using a binary search of * the partition keyset using the {@link org.apache.hadoop.io.RawComparator} * defined for this job. The input file must be sorted with the same * comparator and contain {@link org.apache.hadoop.mapred.JobConf#getNumReduceTasks} - 1 keys. */ @SuppressWarnings("unchecked") // keytype from conf not static public void configure(JobConf job) { try { String parts = getPartitionFile(job); final Path partFile = new Path(parts); final FileSystem fs = (DEFAULT_PATH.equals(parts)) ? FileSystem.getLocal(job) // assume in DistributedCache : partFile.getFileSystem(job); Class<K> keyClass = (Class<K>)job.getMapOutputKeyClass(); K[] splitPoints = readPartitions(fs, partFile, keyClass, job); if (splitPoints.length != job.getNumReduceTasks() - 1) { throw new IOException("Wrong number of partitions in keyset"); } RawComparator<K> comparator = (RawComparator<K>) job.getOutputKeyComparator(); for (int i = 0; i < splitPoints.length - 1; ++i) { if (comparator.compare(splitPoints[i], splitPoints[i+1]) >= 0) { throw new IOException("Split points are out of order"); } } boolean natOrder = job.getBoolean("total.order.partitioner.natural.order", true); if (natOrder && BinaryComparable.class.isAssignableFrom(keyClass)) { partitions = buildTrie((BinaryComparable[])splitPoints, 0, splitPoints.length, new byte[0], job.getInt("total.order.partitioner.max.trie.depth", 2)); } else { partitions = new BinarySearchNode(splitPoints, comparator); } } catch (IOException e) { throw new IllegalArgumentException("Can't read partitions file", e); } }
Example 2
Source File: InputSampler.java From RDFS with Apache License 2.0 | 5 votes |
/** * Write a partition file for the given job, using the Sampler provided. * Queries the sampler for a sample keyset, sorts by the output key * comparator, selects the keys for each rank, and writes to the destination * returned from {@link org.apache.hadoop.mapred.lib.TotalOrderPartitioner#getPartitionFile}. */ @SuppressWarnings("unchecked") // getInputFormat, getOutputKeyComparator public static <K,V> void writePartitionFile(JobConf job, Sampler<K,V> sampler) throws IOException { final InputFormat<K,V> inf = (InputFormat<K,V>) job.getInputFormat(); int numPartitions = job.getNumReduceTasks(); K[] samples = sampler.getSample(inf, job); LOG.info("Using " + samples.length + " samples"); RawComparator<K> comparator = (RawComparator<K>) job.getOutputKeyComparator(); Arrays.sort(samples, comparator); Path dst = new Path(TotalOrderPartitioner.getPartitionFile(job)); FileSystem fs = dst.getFileSystem(job); if (fs.exists(dst)) { fs.delete(dst, false); } SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, dst, job.getMapOutputKeyClass(), NullWritable.class); NullWritable nullValue = NullWritable.get(); float stepSize = samples.length / (float) numPartitions; int last = -1; for(int i = 1; i < numPartitions; ++i) { int k = Math.round(stepSize * i); while (last >= k && comparator.compare(samples[last], samples[k]) == 0) { ++k; } writer.append(samples[k], nullValue); last = k; } writer.close(); }
Example 3
Source File: TotalOrderPartitioner.java From hadoop-gpu with Apache License 2.0 | 5 votes |
/** * Read in the partition file and build indexing data structures. * If the keytype is {@link org.apache.hadoop.io.BinaryComparable} and * <tt>total.order.partitioner.natural.order</tt> is not false, a trie * of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes * will be built. Otherwise, keys will be located using a binary search of * the partition keyset using the {@link org.apache.hadoop.io.RawComparator} * defined for this job. The input file must be sorted with the same * comparator and contain {@link org.apache.hadoop.mapred.JobConf#getNumReduceTasks} - 1 keys. */ @SuppressWarnings("unchecked") // keytype from conf not static public void configure(JobConf job) { try { String parts = getPartitionFile(job); final Path partFile = new Path(parts); final FileSystem fs = (DEFAULT_PATH.equals(parts)) ? FileSystem.getLocal(job) // assume in DistributedCache : partFile.getFileSystem(job); Class<K> keyClass = (Class<K>)job.getMapOutputKeyClass(); K[] splitPoints = readPartitions(fs, partFile, keyClass, job); if (splitPoints.length != job.getNumReduceTasks() - 1) { throw new IOException("Wrong number of partitions in keyset"); } RawComparator<K> comparator = (RawComparator<K>) job.getOutputKeyComparator(); for (int i = 0; i < splitPoints.length - 1; ++i) { if (comparator.compare(splitPoints[i], splitPoints[i+1]) >= 0) { throw new IOException("Split points are out of order"); } } boolean natOrder = job.getBoolean("total.order.partitioner.natural.order", true); if (natOrder && BinaryComparable.class.isAssignableFrom(keyClass)) { partitions = buildTrie((BinaryComparable[])splitPoints, 0, splitPoints.length, new byte[0], job.getInt("total.order.partitioner.max.trie.depth", 2)); } else { partitions = new BinarySearchNode(splitPoints, comparator); } } catch (IOException e) { throw new IllegalArgumentException("Can't read partitions file", e); } }
Example 4
Source File: InputSampler.java From hadoop-gpu with Apache License 2.0 | 5 votes |
/** * Write a partition file for the given job, using the Sampler provided. * Queries the sampler for a sample keyset, sorts by the output key * comparator, selects the keys for each rank, and writes to the destination * returned from {@link org.apache.hadoop.mapred.lib.TotalOrderPartitioner#getPartitionFile}. */ @SuppressWarnings("unchecked") // getInputFormat, getOutputKeyComparator public static <K,V> void writePartitionFile(JobConf job, Sampler<K,V> sampler) throws IOException { final InputFormat<K,V> inf = (InputFormat<K,V>) job.getInputFormat(); int numPartitions = job.getNumReduceTasks(); K[] samples = sampler.getSample(inf, job); LOG.info("Using " + samples.length + " samples"); RawComparator<K> comparator = (RawComparator<K>) job.getOutputKeyComparator(); Arrays.sort(samples, comparator); Path dst = new Path(TotalOrderPartitioner.getPartitionFile(job)); FileSystem fs = dst.getFileSystem(job); if (fs.exists(dst)) { fs.delete(dst, false); } SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, dst, job.getMapOutputKeyClass(), NullWritable.class); NullWritable nullValue = NullWritable.get(); float stepSize = samples.length / (float) numPartitions; int last = -1; for(int i = 1; i < numPartitions; ++i) { int k = Math.round(stepSize * i); while (last >= k && comparator.compare(samples[last], samples[k]) == 0) { ++k; } writer.append(samples[k], nullValue); last = k; } writer.close(); }
Example 5
Source File: BasicTypeSorterBase.java From hadoop with Apache License 2.0 | 4 votes |
public void configure(JobConf conf) { comparator = conf.getOutputKeyComparator(); }
Example 6
Source File: BasicTypeSorterBase.java From big-c with Apache License 2.0 | 4 votes |
public void configure(JobConf conf) { comparator = conf.getOutputKeyComparator(); }
Example 7
Source File: BasicTypeSorterBase.java From RDFS with Apache License 2.0 | 4 votes |
public void configure(JobConf conf) { comparator = conf.getOutputKeyComparator(); }
Example 8
Source File: BasicTypeSorterBase.java From hadoop-gpu with Apache License 2.0 | 4 votes |
public void configure(JobConf conf) { comparator = conf.getOutputKeyComparator(); }