org.apache.hadoop.io.RawComparator Java Examples
The following examples show how to use
org.apache.hadoop.io.RawComparator.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ValuesIterator.java From incubator-tez with Apache License 2.0 | 6 votes |
public ValuesIterator (TezRawKeyValueIterator in, RawComparator<KEY> comparator, Class<KEY> keyClass, Class<VALUE> valClass, Configuration conf, TezCounter inputKeyCounter, TezCounter inputValueCounter) throws IOException { this.in = in; this.comparator = comparator; this.inputKeyCounter = inputKeyCounter; this.inputValueCounter = inputValueCounter; SerializationFactory serializationFactory = new SerializationFactory(conf); this.keyDeserializer = serializationFactory.getDeserializer(keyClass); this.keyDeserializer.open(keyIn); this.valDeserializer = serializationFactory.getDeserializer(valClass); this.valDeserializer.open(this.valueIn); }
Example #2
Source File: Merger.java From RDFS with Apache License 2.0 | 6 votes |
public static <K extends Object, V extends Object> RawKeyValueIterator merge(Configuration conf, FileSystem fs, Class<K> keyClass, Class<V> valueClass, CompressionCodec codec, Path[] inputs, boolean deleteInputs, int mergeFactor, Path tmpDir, RawComparator<K> comparator, Progressable reporter, Counters.Counter readsCounter, Counters.Counter writesCounter) throws IOException { return new MergeQueue<K, V>(conf, fs, inputs, deleteInputs, codec, comparator, reporter).merge(keyClass, valueClass, mergeFactor, tmpDir, readsCounter, writesCounter); }
Example #3
Source File: Merger.java From hadoop with Apache License 2.0 | 6 votes |
public static <K extends Object, V extends Object> RawKeyValueIterator merge(Configuration conf, FileSystem fs, Class<K> keyClass, Class<V> valueClass, CompressionCodec codec, Path[] inputs, boolean deleteInputs, int mergeFactor, Path tmpDir, RawComparator<K> comparator, Progressable reporter, Counters.Counter readsCounter, Counters.Counter writesCounter, Counters.Counter mergedMapOutputsCounter, Progress mergePhase) throws IOException { return new MergeQueue<K, V>(conf, fs, inputs, deleteInputs, codec, comparator, reporter, mergedMapOutputsCounter, TaskType.REDUCE).merge( keyClass, valueClass, mergeFactor, tmpDir, readsCounter, writesCounter, mergePhase); }
Example #4
Source File: TestValuesIterator.java From tez with Apache License 2.0 | 6 votes |
private RawComparator getComparator(TestWithComparator comparator) { switch (comparator) { case LONG: return new LongWritable.Comparator(); case INT: return new IntWritable.Comparator(); case BYTES: return new BytesWritable.Comparator(); case TEZ_BYTES: return new TezBytesComparator(); case TEXT: return new Text.Comparator(); case CUSTOM: return new CustomKey.Comparator(); default: return null; } }
Example #5
Source File: Merger.java From big-c with Apache License 2.0 | 6 votes |
public static <K extends Object, V extends Object> RawKeyValueIterator merge(Configuration conf, FileSystem fs, Class<K> keyClass, Class<V> valueClass, CompressionCodec codec, Path[] inputs, boolean deleteInputs, int mergeFactor, Path tmpDir, RawComparator<K> comparator, Progressable reporter, Counters.Counter readsCounter, Counters.Counter writesCounter, Progress mergePhase) throws IOException { return new MergeQueue<K, V>(conf, fs, inputs, deleteInputs, codec, comparator, reporter, null, TaskType.REDUCE).merge(keyClass, valueClass, mergeFactor, tmpDir, readsCounter, writesCounter, mergePhase); }
Example #6
Source File: WeightedRangePartitioner.java From spork with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") @Override public int getPartition(PigNullableWritable key, Writable value, int numPartitions){ if (!inited) { init(); } if (comparator == null) { comparator = (RawComparator<PigNullableWritable>)PigMapReduce.sJobContext.getSortComparator(); } if(!weightedParts.containsKey(key)){ int index = Arrays.binarySearch(quantiles, key, comparator); if (index < 0) index = -index-1; else index = index + 1; return Math.min(index, numPartitions - 1); } DiscreteProbabilitySampleGenerator gen = weightedParts.get(key); return gen.getNext(); }
Example #7
Source File: ReduceTask.java From hadoop-gpu with Apache License 2.0 | 6 votes |
public SkippingReduceValuesIterator(RawKeyValueIterator in, RawComparator<KEY> comparator, Class<KEY> keyClass, Class<VALUE> valClass, Configuration conf, TaskReporter reporter, TaskUmbilicalProtocol umbilical) throws IOException { super(in, comparator, keyClass, valClass, conf, reporter); this.umbilical = umbilical; this.skipGroupCounter = reporter.getCounter(Counter.REDUCE_SKIPPED_GROUPS); this.skipRecCounter = reporter.getCounter(Counter.REDUCE_SKIPPED_RECORDS); this.toWriteSkipRecs = toWriteSkipRecs() && SkipBadRecords.getSkipOutputPath(conf)!=null; this.keyClass = keyClass; this.valClass = valClass; this.reporter = reporter; skipIt = getSkipRanges().skipRangeIterator(); mayBeSkip(); }
Example #8
Source File: Merger.java From hadoop-gpu with Apache License 2.0 | 6 votes |
public MergeQueue(Configuration conf, FileSystem fs, Path[] inputs, boolean deleteInputs, CompressionCodec codec, RawComparator<K> comparator, Progressable reporter) throws IOException { this.conf = conf; this.fs = fs; this.codec = codec; this.comparator = comparator; this.reporter = reporter; for (Path file : inputs) { segments.add(new Segment<K, V>(conf, fs, file, codec, !deleteInputs)); } // Sort segments on file-lengths Collections.sort(segments, segmentComparator); }
Example #9
Source File: TezMerger.java From tez with Apache License 2.0 | 6 votes |
public static <K extends Object, V extends Object> TezRawKeyValueIterator merge(Configuration conf, FileSystem fs, Class keyClass, Class valueClass, List<Segment> segments, int mergeFactor, Path tmpDir, RawComparator comparator, Progressable reporter, boolean sortSegments, TezCounter readsCounter, TezCounter writesCounter, TezCounter bytesReadCounter, Progress mergePhase) throws IOException, InterruptedException { return new MergeQueue(conf, fs, segments, comparator, reporter, sortSegments, false).merge(keyClass, valueClass, mergeFactor, tmpDir, readsCounter, writesCounter, bytesReadCounter, mergePhase); }
Example #10
Source File: ValuesIterator.java From tez with Apache License 2.0 | 6 votes |
public ValuesIterator (TezRawKeyValueIterator in, RawComparator<KEY> comparator, Class<KEY> keyClass, Class<VALUE> valClass, Configuration conf, TezCounter inputKeyCounter, TezCounter inputValueCounter) throws IOException { this.in = in; this.comparator = comparator; this.inputKeyCounter = inputKeyCounter; this.inputValueCounter = inputValueCounter; SerializationFactory serializationFactory = new SerializationFactory(conf); this.keyDeserializer = serializationFactory.getDeserializer(keyClass); this.keyDeserializer.open(keyIn); this.valDeserializer = serializationFactory.getDeserializer(valClass); this.valDeserializer.open(this.valueIn); }
Example #11
Source File: Task.java From RDFS with Apache License 2.0 | 6 votes |
public ValuesIterator (RawKeyValueIterator in, RawComparator<KEY> comparator, Class<KEY> keyClass, Class<VALUE> valClass, Configuration conf, Progressable reporter) throws IOException { this.in = in; this.comparator = comparator; this.reporter = reporter; SerializationFactory serializationFactory = new SerializationFactory(conf); this.keyDeserializer = serializationFactory.getDeserializer(keyClass); this.keyDeserializer.open(keyIn); this.valDeserializer = serializationFactory.getDeserializer(valClass); this.valDeserializer.open(this.valueIn); readNextKey(); key = nextKey; nextKey = null; // force new instance creation hasNext = more; }
Example #12
Source File: TezMerger.java From incubator-tez with Apache License 2.0 | 6 votes |
public static <K extends Object, V extends Object> TezRawKeyValueIterator merge(Configuration conf, FileSystem fs, Class keyClass, Class valueClass, List<Segment> segments, int mergeFactor, Path tmpDir, RawComparator comparator, Progressable reporter, boolean sortSegments, TezCounter readsCounter, TezCounter writesCounter, TezCounter bytesReadCounter, Progress mergePhase) throws IOException { return new MergeQueue(conf, fs, segments, comparator, reporter, sortSegments, false).merge(keyClass, valueClass, mergeFactor, tmpDir, readsCounter, writesCounter, bytesReadCounter, mergePhase); }
Example #13
Source File: ReduceTask.java From RDFS with Apache License 2.0 | 6 votes |
public SkippingReduceValuesIterator(RawKeyValueIterator in, RawComparator<KEY> comparator, Class<KEY> keyClass, Class<VALUE> valClass, Configuration conf, TaskReporter reporter, TaskUmbilicalProtocol umbilical) throws IOException { super(in, comparator, keyClass, valClass, conf, reporter); this.umbilical = umbilical; this.skipGroupCounter = reporter.getCounter(Counter.REDUCE_SKIPPED_GROUPS); this.skipRecCounter = reporter.getCounter(Counter.REDUCE_SKIPPED_RECORDS); this.toWriteSkipRecs = toWriteSkipRecs() && SkipBadRecords.getSkipOutputPath(conf)!=null; this.keyClass = keyClass; this.valClass = valClass; this.reporter = reporter; skipIt = getSkipRanges().skipRangeIterator(); mayBeSkip(); }
Example #14
Source File: Merger.java From big-c with Apache License 2.0 | 6 votes |
public static <K extends Object, V extends Object> RawKeyValueIterator merge(Configuration conf, FileSystem fs, Class<K> keyClass, Class<V> valueClass, CompressionCodec codec, List<Segment<K, V>> segments, int mergeFactor, Path tmpDir, RawComparator<K> comparator, Progressable reporter, boolean sortSegments, Counters.Counter readsCounter, Counters.Counter writesCounter, Progress mergePhase, TaskType taskType) throws IOException { return new MergeQueue<K, V>(conf, fs, segments, comparator, reporter, sortSegments, codec, taskType).merge(keyClass, valueClass, mergeFactor, tmpDir, readsCounter, writesCounter, mergePhase); }
Example #15
Source File: TotalOrderPartitioner.java From hadoop-gpu with Apache License 2.0 | 5 votes |
/** * Read in the partition file and build indexing data structures. * If the keytype is {@link org.apache.hadoop.io.BinaryComparable} and * <tt>total.order.partitioner.natural.order</tt> is not false, a trie * of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes * will be built. Otherwise, keys will be located using a binary search of * the partition keyset using the {@link org.apache.hadoop.io.RawComparator} * defined for this job. The input file must be sorted with the same * comparator and contain {@link org.apache.hadoop.mapred.JobConf#getNumReduceTasks} - 1 keys. */ @SuppressWarnings("unchecked") // keytype from conf not static public void configure(JobConf job) { try { String parts = getPartitionFile(job); final Path partFile = new Path(parts); final FileSystem fs = (DEFAULT_PATH.equals(parts)) ? FileSystem.getLocal(job) // assume in DistributedCache : partFile.getFileSystem(job); Class<K> keyClass = (Class<K>)job.getMapOutputKeyClass(); K[] splitPoints = readPartitions(fs, partFile, keyClass, job); if (splitPoints.length != job.getNumReduceTasks() - 1) { throw new IOException("Wrong number of partitions in keyset"); } RawComparator<K> comparator = (RawComparator<K>) job.getOutputKeyComparator(); for (int i = 0; i < splitPoints.length - 1; ++i) { if (comparator.compare(splitPoints[i], splitPoints[i+1]) >= 0) { throw new IOException("Split points are out of order"); } } boolean natOrder = job.getBoolean("total.order.partitioner.natural.order", true); if (natOrder && BinaryComparable.class.isAssignableFrom(keyClass)) { partitions = buildTrie((BinaryComparable[])splitPoints, 0, splitPoints.length, new byte[0], job.getInt("total.order.partitioner.max.trie.depth", 2)); } else { partitions = new BinarySearchNode(splitPoints, comparator); } } catch (IOException e) { throw new IllegalArgumentException("Can't read partitions file", e); } }
Example #16
Source File: Task.java From big-c with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") protected OldCombinerRunner(Class<? extends Reducer<K,V,K,V>> cls, JobConf conf, Counters.Counter inputCounter, TaskReporter reporter) { super(inputCounter, conf, reporter); combinerClass = cls; keyClass = (Class<K>) job.getMapOutputKeyClass(); valueClass = (Class<V>) job.getMapOutputValueClass(); comparator = (RawComparator<K>) job.getCombinerKeyGroupingComparator(); }
Example #17
Source File: ConfigUtils.java From incubator-tez with Apache License 2.0 | 5 votes |
public static <V> RawComparator<V> getInputKeySecondaryGroupingComparator( Configuration conf) { Class<? extends RawComparator> theClass = conf .getClass( TezJobConfig.TEZ_RUNTIME_KEY_SECONDARY_COMPARATOR_CLASS, null, RawComparator.class); if (theClass == null) { return getIntermediateInputKeyComparator(conf); } return ReflectionUtils.newInstance(theClass, conf); }
Example #18
Source File: ReduceTask.java From big-c with Apache License 2.0 | 5 votes |
public ReduceValuesIterator (RawKeyValueIterator in, RawComparator<KEY> comparator, Class<KEY> keyClass, Class<VALUE> valClass, Configuration conf, Progressable reporter) throws IOException { super(in, comparator, keyClass, valClass, conf, reporter); }
Example #19
Source File: JobConf.java From big-c with Apache License 2.0 | 5 votes |
/** * Get the user defined {@link WritableComparable} comparator for * grouping keys of inputs to the reduce. * * @return comparator set by the user for grouping values. * @see #setOutputValueGroupingComparator(Class) for details. */ public RawComparator getOutputValueGroupingComparator() { Class<? extends RawComparator> theClass = getClass( JobContext.GROUP_COMPARATOR_CLASS, null, RawComparator.class); if (theClass == null) { return getOutputKeyComparator(); } return ReflectionUtils.newInstance(theClass, this); }
Example #20
Source File: ShuffledMergedInput.java From incubator-tez with Apache License 2.0 | 5 votes |
@SuppressWarnings({ "rawtypes", "unchecked" }) protected synchronized void createValuesIterator() throws IOException { // Not used by ReduceProcessor vIter = new ValuesIterator(rawIter, (RawComparator) ConfigUtils.getIntermediateInputKeyComparator(conf), ConfigUtils.getIntermediateInputKeyClass(conf), ConfigUtils.getIntermediateInputValueClass(conf), conf, inputKeyCounter, inputValueCounter); }
Example #21
Source File: TestTezMerger.java From tez with Apache License 2.0 | 5 votes |
/** * Merge the data sets * * @param pathList * @param rc * @return * @throws IOException */ private TezRawKeyValueIterator merge(List<Path> pathList, RawComparator rc) throws IOException, InterruptedException { TezMerger merger = new TezMerger(); TezRawKeyValueIterator records = merger.merge(defaultConf, localFs, IntWritable.class, LongWritable.class, null, false, 0, 1024, pathList.toArray(new Path[pathList.size()]), true, 4, new Path(workDir, "tmp_" + System.nanoTime()), ((rc == null) ? comparator : rc), new Reporter(), null, null, null, new Progress()); return records; }
Example #22
Source File: TFile.java From RDFS with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") static BytesComparator makeComparator(String comparator) { if (comparator.length() == 0) { // unsorted keys return null; } if (comparator.equals(COMPARATOR_MEMCMP)) { // default comparator return new BytesComparator(new MemcmpRawComparator()); } else if (comparator.startsWith(COMPARATOR_JCLASS)) { String compClassName = comparator.substring(COMPARATOR_JCLASS.length()).trim(); try { Class compClass = Class.forName(compClassName); // use its default ctor to create an instance return new BytesComparator((RawComparator<Object>) compClass .newInstance()); } catch (Exception e) { throw new IllegalArgumentException( "Failed to instantiate comparator: " + comparator + "(" + e.toString() + ")"); } } else { throw new IllegalArgumentException("Unsupported comparator: " + comparator); } }
Example #23
Source File: Task.java From hadoop-gpu with Apache License 2.0 | 5 votes |
protected OldCombinerRunner(Class<? extends Reducer<K,V,K,V>> cls, JobConf conf, Counters.Counter inputCounter, TaskReporter reporter) { super(inputCounter, conf, reporter); combinerClass = cls; keyClass = (Class<K>) job.getMapOutputKeyClass(); valueClass = (Class<V>) job.getMapOutputValueClass(); comparator = (RawComparator<K>) job.getOutputKeyComparator(); }
Example #24
Source File: JobConf.java From hadoop with Apache License 2.0 | 5 votes |
/** * Get the user defined {@link WritableComparable} comparator for * grouping keys of inputs to the combiner. * * @return comparator set by the user for grouping values. * @see #setCombinerKeyGroupingComparator(Class) for details. */ public RawComparator getCombinerKeyGroupingComparator() { Class<? extends RawComparator> theClass = getClass( JobContext.COMBINER_GROUP_COMPARATOR_CLASS, null, RawComparator.class); if (theClass == null) { return getOutputKeyComparator(); } return ReflectionUtils.newInstance(theClass, this); }
Example #25
Source File: JobConf.java From hadoop with Apache License 2.0 | 5 votes |
/** * Get the user defined {@link WritableComparable} comparator for * grouping keys of inputs to the reduce. * * @return comparator set by the user for grouping values. * @see #setOutputValueGroupingComparator(Class) for details. */ public RawComparator getOutputValueGroupingComparator() { Class<? extends RawComparator> theClass = getClass( JobContext.GROUP_COMPARATOR_CLASS, null, RawComparator.class); if (theClass == null) { return getOutputKeyComparator(); } return ReflectionUtils.newInstance(theClass, this); }
Example #26
Source File: Task.java From big-c with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") protected static <INKEY,INVALUE,OUTKEY,OUTVALUE> org.apache.hadoop.mapreduce.Reducer<INKEY,INVALUE,OUTKEY,OUTVALUE>.Context createReduceContext(org.apache.hadoop.mapreduce.Reducer <INKEY,INVALUE,OUTKEY,OUTVALUE> reducer, Configuration job, org.apache.hadoop.mapreduce.TaskAttemptID taskId, RawKeyValueIterator rIter, org.apache.hadoop.mapreduce.Counter inputKeyCounter, org.apache.hadoop.mapreduce.Counter inputValueCounter, org.apache.hadoop.mapreduce.RecordWriter<OUTKEY,OUTVALUE> output, org.apache.hadoop.mapreduce.OutputCommitter committer, org.apache.hadoop.mapreduce.StatusReporter reporter, RawComparator<INKEY> comparator, Class<INKEY> keyClass, Class<INVALUE> valueClass ) throws IOException, InterruptedException { org.apache.hadoop.mapreduce.ReduceContext<INKEY, INVALUE, OUTKEY, OUTVALUE> reduceContext = new ReduceContextImpl<INKEY, INVALUE, OUTKEY, OUTVALUE>(job, taskId, rIter, inputKeyCounter, inputValueCounter, output, committer, reporter, comparator, keyClass, valueClass); org.apache.hadoop.mapreduce.Reducer<INKEY,INVALUE,OUTKEY,OUTVALUE>.Context reducerContext = new WrappedReducer<INKEY, INVALUE, OUTKEY, OUTVALUE>().getReducerContext( reduceContext); return reducerContext; }
Example #27
Source File: InputSampler.java From hadoop-gpu with Apache License 2.0 | 5 votes |
/** * Write a partition file for the given job, using the Sampler provided. * Queries the sampler for a sample keyset, sorts by the output key * comparator, selects the keys for each rank, and writes to the destination * returned from {@link org.apache.hadoop.mapred.lib.TotalOrderPartitioner#getPartitionFile}. */ @SuppressWarnings("unchecked") // getInputFormat, getOutputKeyComparator public static <K,V> void writePartitionFile(JobConf job, Sampler<K,V> sampler) throws IOException { final InputFormat<K,V> inf = (InputFormat<K,V>) job.getInputFormat(); int numPartitions = job.getNumReduceTasks(); K[] samples = sampler.getSample(inf, job); LOG.info("Using " + samples.length + " samples"); RawComparator<K> comparator = (RawComparator<K>) job.getOutputKeyComparator(); Arrays.sort(samples, comparator); Path dst = new Path(TotalOrderPartitioner.getPartitionFile(job)); FileSystem fs = dst.getFileSystem(job); if (fs.exists(dst)) { fs.delete(dst, false); } SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, dst, job.getMapOutputKeyClass(), NullWritable.class); NullWritable nullValue = NullWritable.get(); float stepSize = samples.length / (float) numPartitions; int last = -1; for(int i = 1; i < numPartitions; ++i) { int k = Math.round(stepSize * i); while (last >= k && comparator.compare(samples[last], samples[k]) == 0) { ++k; } writer.append(samples[k], nullValue); last = k; } writer.close(); }
Example #28
Source File: Task.java From hadoop-gpu with Apache License 2.0 | 5 votes |
public CombineValuesIterator(RawKeyValueIterator in, RawComparator<KEY> comparator, Class<KEY> keyClass, Class<VALUE> valClass, Configuration conf, Reporter reporter, Counters.Counter combineInputCounter) throws IOException { super(in, comparator, keyClass, valClass, conf, reporter); this.combineInputCounter = combineInputCounter; }
Example #29
Source File: Merger.java From big-c with Apache License 2.0 | 5 votes |
public MergeQueue(Configuration conf, FileSystem fs, Path[] inputs, boolean deleteInputs, CompressionCodec codec, RawComparator<K> comparator, Progressable reporter, Counters.Counter mergedMapOutputsCounter, TaskType taskType) throws IOException { this.conf = conf; this.fs = fs; this.codec = codec; this.comparator = comparator; this.reporter = reporter; if (taskType == TaskType.MAP) { considerFinalMergeForProgress(); } for (Path file : inputs) { LOG.debug("MergeQ: adding: " + file); segments.add(new Segment<K, V>(conf, fs, file, codec, !deleteInputs, (file.toString().endsWith( Task.MERGED_OUTPUT_PREFIX) ? null : mergedMapOutputsCounter))); } // Sort segments on file-lengths Collections.sort(segments, segmentComparator); }
Example #30
Source File: SequenceFile.java From gemfirexd-oss with Apache License 2.0 | 5 votes |
/** Sort and merge using an arbitrary {@link RawComparator}. */ public Sorter(FileSystem fs, RawComparator comparator, Class keyClass, Class valClass, Configuration conf, Metadata metadata) { this.fs = fs; this.comparator = comparator; this.keyClass = keyClass; this.valClass = valClass; this.memory = conf.getInt("io.sort.mb", 100) * 1024 * 1024; this.factor = conf.getInt("io.sort.factor", 100); this.conf = conf; this.metadata = metadata; }