org.apache.hadoop.mapreduce.Reducer Java Examples
The following examples show how to use
org.apache.hadoop.mapreduce.Reducer.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FlowPartitionReducer.java From xxhadoop with Apache License 2.0 | 6 votes |
@Override protected void reduce(Text key, Iterable<FlowBean> values, Reducer<Text, FlowBean, Text, FlowBean>.Context context) throws IOException, InterruptedException { //super.reduce(arg0, arg1, arg2); long upFlow = 0; long downFlow = 0; //long flowSum = 0; for (FlowBean flowBean : values) { upFlow += flowBean.getUpFlow(); downFlow += flowBean.getDownFlow(); //flowSum += flowBean.getSumFlow(); } result.setPhoneNum(key.toString()); result.setUpFlow(upFlow); result.setDownFlow(downFlow); //result.setSumFlow(flowSum); result.setSumFlow(upFlow + downFlow); context.write(key, result); }
Example #2
Source File: CountPlan.java From rya with Apache License 2.0 | 6 votes |
@Override public void reduce(final IntermediateProspect prospect, final Iterable<LongWritable> counts, final Date timestamp, final Reducer.Context context) throws IOException, InterruptedException { long sum = 0; for(final LongWritable count : counts) { sum += count.get(); } final String indexType = prospect.getTripleValueType().getIndexType(); // not sure if this is the best idea.. if ((sum >= 0) || indexType.equals(TripleValueType.PREDICATE.getIndexType())) { final Mutation m = new Mutation(indexType + DELIM + prospect.getData() + DELIM + ProspectorUtils.getReverseIndexDateTime(timestamp)); final String dataType = prospect.getDataType(); final ColumnVisibility visibility = new ColumnVisibility(prospect.getVisibility()); final Value sumValue = new Value(("" + sum).getBytes(StandardCharsets.UTF_8)); m.put(COUNT, prospect.getDataType(), visibility, timestamp.getTime(), sumValue); context.write(null, m); } }
Example #3
Source File: MapReduceTestUtil.java From big-c with Apache License 2.0 | 6 votes |
/** * Creates a simple fail job. * * @param conf Configuration object * @param outdir Output directory. * @param indirs Comma separated input directories. * @return Job initialized for a simple kill job. * @throws Exception If an error occurs creating job configuration. */ public static Job createKillJob(Configuration conf, Path outdir, Path... indirs) throws Exception { Job theJob = Job.getInstance(conf); theJob.setJobName("Kill-Job"); FileInputFormat.setInputPaths(theJob, indirs); theJob.setMapperClass(KillMapper.class); theJob.setReducerClass(Reducer.class); theJob.setNumReduceTasks(0); FileOutputFormat.setOutputPath(theJob, outdir); theJob.setOutputKeyClass(Text.class); theJob.setOutputValueClass(Text.class); return theJob; }
Example #4
Source File: StoreCopyReducer.java From geowave with Apache License 2.0 | 6 votes |
@Override protected void reduceNativeValues( final GeoWaveInputKey key, final Iterable<Object> values, final Reducer<GeoWaveInputKey, ObjectWritable, GeoWaveOutputKey, Object>.Context context) throws IOException, InterruptedException { final Iterator<Object> objects = values.iterator(); while (objects.hasNext()) { final AdapterToIndexMapping mapping = store.getIndicesForAdapter(key.getInternalAdapterId()); context.write( new GeoWaveOutputKey<>( internalAdapterStore.getTypeName(mapping.getAdapterId()), mapping.getIndexNames()), objects.next()); } }
Example #5
Source File: Chain.java From big-c with Apache License 2.0 | 6 votes |
/** * Add reducer that reads from context and writes to a queue */ @SuppressWarnings("unchecked") void addReducer(TaskInputOutputContext inputContext, ChainBlockingQueue<KeyValuePair<?, ?>> outputQueue) throws IOException, InterruptedException { Class<?> keyOutClass = rConf.getClass(REDUCER_OUTPUT_KEY_CLASS, Object.class); Class<?> valueOutClass = rConf.getClass(REDUCER_OUTPUT_VALUE_CLASS, Object.class); RecordWriter rw = new ChainRecordWriter(keyOutClass, valueOutClass, outputQueue, rConf); Reducer.Context reducerContext = createReduceContext(rw, (ReduceContext) inputContext, rConf); ReduceRunner runner = new ReduceRunner(reducerContext, reducer, rw); threads.add(runner); }
Example #6
Source File: MapReduceUtil.java From kylin with Apache License 2.0 | 6 votes |
private static int getReduceTaskNum(double totalSizeInM, KylinConfig kylinConfig) { double perReduceInputMB = kylinConfig.getDefaultHadoopJobReducerInputMB(); double reduceCountRatio = kylinConfig.getDefaultHadoopJobReducerCountRatio(); // number of reduce tasks int numReduceTasks = (int) Math.round(totalSizeInM / perReduceInputMB * reduceCountRatio); // at least 1 reducer by default numReduceTasks = Math.max(kylinConfig.getHadoopJobMinReducerNumber(), numReduceTasks); // no more than 500 reducer by default numReduceTasks = Math.min(kylinConfig.getHadoopJobMaxReducerNumber(), numReduceTasks); logger.info("Having total map input MB " + Math.round(totalSizeInM)); logger.info("Having per reduce MB " + perReduceInputMB); logger.info("Setting " + Reducer.Context.NUM_REDUCES + "=" + numReduceTasks); return numReduceTasks; }
Example #7
Source File: DataValidationReducer.java From jumbune with GNU Lesser General Public License v3.0 | 6 votes |
@SuppressWarnings({ "rawtypes", "unchecked" }) protected void setup(Reducer.Context context) throws IOException, InterruptedException { super.setup(context); maxViolationsInReport = context.getConfiguration().getInt(DataValidationConstants.DV_NUM_REPORT_VIOLATION, 1000); String dir = context.getConfiguration().get(SLAVE_FILE_LOC); dirPath = JobUtil.getAndReplaceHolders(dir); fileHandlerMap = new DVLRUCache(DataValidationConstants.TEN); offsetLinesMap = new TreeMap<>(); ViolationPersistenceBean bean = new ViolationPersistenceBean(); bean.setLineNum(Integer.MAX_VALUE); nullMap = new MultiValueTreeMap<String,ViolationPersistenceBean>(maxViolationsInReport); dataTypeMap = new MultiValueTreeMap<String,ViolationPersistenceBean>(maxViolationsInReport); regexMap = new MultiValueTreeMap<String,ViolationPersistenceBean>(maxViolationsInReport); numFieldsMap = new MultiValueTreeMap<String,ViolationPersistenceBean>(maxViolationsInReport); fileNames = new HashSet<String>(); }
Example #8
Source File: Chain.java From hadoop with Apache License 2.0 | 6 votes |
/** * Add reducer that reads from context and writes to a queue */ @SuppressWarnings("unchecked") void addReducer(TaskInputOutputContext inputContext, ChainBlockingQueue<KeyValuePair<?, ?>> outputQueue) throws IOException, InterruptedException { Class<?> keyOutClass = rConf.getClass(REDUCER_OUTPUT_KEY_CLASS, Object.class); Class<?> valueOutClass = rConf.getClass(REDUCER_OUTPUT_VALUE_CLASS, Object.class); RecordWriter rw = new ChainRecordWriter(keyOutClass, valueOutClass, outputQueue, rConf); Reducer.Context reducerContext = createReduceContext(rw, (ReduceContext) inputContext, rConf); ReduceRunner runner = new ReduceRunner(reducerContext, reducer, rw); threads.add(runner); }
Example #9
Source File: MapReduceUtil.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
private static int getReduceTaskNum(double totalSizeInM, KylinConfig kylinConfig) { double perReduceInputMB = kylinConfig.getDefaultHadoopJobReducerInputMB(); double reduceCountRatio = kylinConfig.getDefaultHadoopJobReducerCountRatio(); // number of reduce tasks int numReduceTasks = (int) Math.round(totalSizeInM / perReduceInputMB * reduceCountRatio); // at least 1 reducer by default numReduceTasks = Math.max(kylinConfig.getHadoopJobMinReducerNumber(), numReduceTasks); // no more than 500 reducer by default numReduceTasks = Math.min(kylinConfig.getHadoopJobMaxReducerNumber(), numReduceTasks); logger.info("Having total map input MB " + Math.round(totalSizeInM)); logger.info("Having per reduce MB " + perReduceInputMB); logger.info("Setting " + Reducer.Context.NUM_REDUCES + "=" + numReduceTasks); return numReduceTasks; }
Example #10
Source File: MapReduceTestUtil.java From hadoop with Apache License 2.0 | 6 votes |
/** * Creates a simple fail job. * * @param conf Configuration object * @param outdir Output directory. * @param indirs Comma separated input directories. * @return Job initialized for a simple fail job. * @throws Exception If an error occurs creating job configuration. */ public static Job createFailJob(Configuration conf, Path outdir, Path... indirs) throws Exception { FileSystem fs = outdir.getFileSystem(conf); if (fs.exists(outdir)) { fs.delete(outdir, true); } conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 2); Job theJob = Job.getInstance(conf); theJob.setJobName("Fail-Job"); FileInputFormat.setInputPaths(theJob, indirs); theJob.setMapperClass(FailMapper.class); theJob.setReducerClass(Reducer.class); theJob.setNumReduceTasks(0); FileOutputFormat.setOutputPath(theJob, outdir); theJob.setOutputKeyClass(Text.class); theJob.setOutputValueClass(Text.class); return theJob; }
Example #11
Source File: KMeansDriver.java From flink-perf with Apache License 2.0 | 6 votes |
public static void convertCentersSequenceFileToText (Configuration conf, FileSystem fs, String seqFilePath, String outputPath) throws Exception { Path seqFile = new Path (seqFilePath); Path output = new Path (outputPath); if (fs.exists(output)) { fs.delete(output, true); } Job job = Job.getInstance(conf); job.setMapperClass(CenterSequenceToTextConverter.class); job.setReducerClass(Reducer.class); job.setNumReduceTasks(0); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); job.setInputFormatClass(SequenceFileInputFormat.class); FileInputFormat.addInputPath(job, seqFile); FileOutputFormat.setOutputPath(job, output); job.waitForCompletion(true); }
Example #12
Source File: UpdateCentroidCostMapReduce.java From geowave with Apache License 2.0 | 6 votes |
@Override public void reduce( final GroupIDText key, final Iterable<CountofDoubleWritable> values, final Reducer<GroupIDText, CountofDoubleWritable, GroupIDText, CountofDoubleWritable>.Context context) throws IOException, InterruptedException { double expectation = 0; double ptCount = 0; for (final CountofDoubleWritable value : values) { expectation += value.getValue(); ptCount += value.getCount(); } outputValue.set(expectation, ptCount); context.write(key, outputValue); }
Example #13
Source File: FlowSumReducer.java From xxhadoop with Apache License 2.0 | 6 votes |
@Override protected void reduce(Text key, Iterable<FlowBean> values, Reducer<Text, FlowBean, Text, FlowBean>.Context context) throws IOException, InterruptedException { //super.reduce(arg0, arg1, arg2); long upFlow = 0; long downFlow = 0; //long flowSum = 0; for (FlowBean flowBean : values) { upFlow += flowBean.getUpFlow(); downFlow += flowBean.getDownFlow(); //flowSum += flowBean.getSumFlow(); } result.setPhoneNum(key.toString()); result.setUpFlow(upFlow); result.setDownFlow(downFlow); //result.setSumFlow(flowSum); result.setSumFlow(upFlow + downFlow); context.write(key, result); }
Example #14
Source File: Chain.java From hadoop with Apache License 2.0 | 5 votes |
ReduceRunner(Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context, Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT> reducer, RecordWriter<KEYOUT, VALUEOUT> rw) throws IOException, InterruptedException { this.reducer = reducer; this.chainContext = context; this.rw = rw; }
Example #15
Source File: JobContextImpl.java From hadoop with Apache License 2.0 | 5 votes |
/** * Get the {@link Reducer} class for the job. * * @return the {@link Reducer} class for the job. */ @SuppressWarnings("unchecked") public Class<? extends Reducer<?,?,?,?>> getReducerClass() throws ClassNotFoundException { return (Class<? extends Reducer<?,?,?,?>>) conf.getClass(REDUCE_CLASS_ATTR, Reducer.class); }
Example #16
Source File: JobContextImpl.java From hadoop with Apache License 2.0 | 5 votes |
/** * Get the combiner class for the job. * * @return the combiner class for the job. */ @SuppressWarnings("unchecked") public Class<? extends Reducer<?,?,?,?>> getCombinerClass() throws ClassNotFoundException { return (Class<? extends Reducer<?,?,?,?>>) conf.getClass(COMBINE_CLASS_ATTR, null); }
Example #17
Source File: TestLineRecordReaderJobs.java From big-c with Apache License 2.0 | 5 votes |
/** * Creates and runs an MR job * * @param conf * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void createAndRunJob(Configuration conf) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(conf); job.setJarByClass(TestLineRecordReaderJobs.class); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); FileInputFormat.addInputPath(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); job.waitForCompletion(true); }
Example #18
Source File: GeoWaveWritableOutputReducer.java From geowave with Apache License 2.0 | 5 votes |
protected void reduceWritableValues( final KEYIN key, final Iterable<VALUEIN> values, final Reducer<KEYIN, VALUEIN, GeoWaveInputKey, ObjectWritable>.Context context) throws IOException, InterruptedException { reduceNativeValues(key, values, new NativeReduceContext(context, serializationTool)); }
Example #19
Source File: DataProfNoCriteriaReducer.java From jumbune with GNU Lesser General Public License v3.0 | 5 votes |
@Override protected void setup(Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException { sorted = new DataProfNoCritBean[1000]; DataProfNoCritBean dataProfNoCritBean = new DataProfNoCritBean(); dataProfNoCritBean.setKey(INITIAL_KEY); dataProfNoCritBean.setValue(Integer.MIN_VALUE); sorted[0] = dataProfNoCritBean; }
Example #20
Source File: GeoWaveWritableOutputReducer.java From geowave with Apache License 2.0 | 5 votes |
@Override protected void reduce( final KEYIN key, final Iterable<VALUEIN> values, final Reducer<KEYIN, VALUEIN, GeoWaveInputKey, ObjectWritable>.Context context) throws IOException, InterruptedException { reduceWritableValues(key, values, context); }
Example #21
Source File: Upgrade322Tool.java From rya with Apache License 2.0 | 5 votes |
@Override public int run(String[] strings) throws Exception { conf.set(MRUtils.JOB_NAME_PROP, "Upgrade to Rya 3.2.2"); //faster init(); Job job = new Job(conf); job.setJarByClass(Upgrade322Tool.class); setupAccumuloInput(job); AccumuloInputFormat.setInputTableName(job, MRUtils.getTablePrefix(conf) + TBL_OSP_SUFFIX); //we do not need to change any row that is a string, custom, or iri type IteratorSetting regex = new IteratorSetting(30, "regex", RegExFilter.class); RegExFilter.setRegexs(regex, "\\w*" + TYPE_DELIM + "[\u0003|\u0008|\u0002]", null, null, null, false); RegExFilter.setNegate(regex, true); // set input output of the particular job job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Mutation.class); setupAccumuloOutput(job, MRUtils.getTablePrefix(conf) + TBL_SPO_SUFFIX); // set mapper and reducer classes job.setMapperClass(Upgrade322Mapper.class); job.setReducerClass(Reducer.class); // Submit the job return job.waitForCompletion(true) ? 0 : 1; }
Example #22
Source File: Chain.java From hadoop with Apache License 2.0 | 5 votes |
/** * Create a reduce context that is based on ChainMapContext and the given * record writer */ private <KEYIN, VALUEIN, KEYOUT, VALUEOUT> Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context createReduceContext( RecordWriter<KEYOUT, VALUEOUT> rw, ReduceContext<KEYIN, VALUEIN, KEYOUT, VALUEOUT> context, Configuration conf) { ReduceContext<KEYIN, VALUEIN, KEYOUT, VALUEOUT> reduceContext = new ChainReduceContextImpl<KEYIN, VALUEIN, KEYOUT, VALUEOUT>( context, rw, conf); Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context reducerContext = new WrappedReducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>() .getReducerContext(reduceContext); return reducerContext; }
Example #23
Source File: TestLineRecordReaderJobs.java From hadoop with Apache License 2.0 | 5 votes |
/** * Creates and runs an MR job * * @param conf * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void createAndRunJob(Configuration conf) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(conf); job.setJarByClass(TestLineRecordReaderJobs.class); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); FileInputFormat.addInputPath(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); job.waitForCompletion(true); }
Example #24
Source File: Chain.java From big-c with Apache License 2.0 | 5 votes |
ReduceRunner(Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context, Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT> reducer, RecordWriter<KEYOUT, VALUEOUT> rw) throws IOException, InterruptedException { this.reducer = reducer; this.chainContext = context; this.rw = rw; }
Example #25
Source File: Chain.java From big-c with Apache License 2.0 | 5 votes |
/** * Create a reduce context that is based on ChainMapContext and the given * record writer */ private <KEYIN, VALUEIN, KEYOUT, VALUEOUT> Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context createReduceContext( RecordWriter<KEYOUT, VALUEOUT> rw, ReduceContext<KEYIN, VALUEIN, KEYOUT, VALUEOUT> context, Configuration conf) { ReduceContext<KEYIN, VALUEIN, KEYOUT, VALUEOUT> reduceContext = new ChainReduceContextImpl<KEYIN, VALUEIN, KEYOUT, VALUEOUT>( context, rw, conf); Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context reducerContext = new WrappedReducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>() .getReducerContext(reduceContext); return reducerContext; }
Example #26
Source File: PutSortReducer.java From hbase with Apache License 2.0 | 5 votes |
@Override protected void setup(Reducer<ImmutableBytesWritable, Put, ImmutableBytesWritable, KeyValue>.Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); this.kvCreator = new CellCreator(conf); }
Example #27
Source File: GATKTools.java From halvade with GNU General Public License v3.0 | 5 votes |
public void setContext(Reducer.Context context) { this.context = context; // mem = context.getConfiguration().get("mapreduce.reduce.java.opts"); mem = "-Xmx" + (int)(0.8*Integer.parseInt(context.getConfiguration().get("mapreduce.reduce.memory.mb"))) + "m"; String customArgs = HalvadeConf.getCustomArgs(context.getConfiguration(), "java", ""); if(customArgs != null) java.add(customArgs); }
Example #28
Source File: InputToOutputKeyReducer.java From geowave with Apache License 2.0 | 5 votes |
@Override protected void reduceNativeValues( final GeoWaveInputKey key, final Iterable<Object> values, final Reducer<GeoWaveInputKey, ObjectWritable, GeoWaveOutputKey, Object>.Context context) throws IOException, InterruptedException { outputKey.setTypeName(internalAdapterStore.getTypeName(key.getInternalAdapterId())); for (final Object value : values) { context.write(outputKey, value); } }
Example #29
Source File: TransformBaseRunner.java From BigDataPlatform with GNU General Public License v3.0 | 5 votes |
public void setupRunner(String jobName, Class<?> runnerClass, Class<? extends TableMapper<?, ?>> mapperClass, Class<? extends Reducer<?, ?, ?, ?>> reducerClass, Class<? extends WritableComparable<?>> outputKeyClass, Class<? extends Writable> outputValueClass, Class<? extends OutputFormat<?, ?>> outputFormatClass) { this.setupRunner(jobName, runnerClass, mapperClass, reducerClass, outputKeyClass, outputValueClass, outputKeyClass, outputValueClass, outputFormatClass); }
Example #30
Source File: BulkIngestInputGenerationIT.java From geowave with Apache License 2.0 | 5 votes |
@Override public int run(final String[] args) throws Exception { final Configuration conf = getConf(); conf.set("fs.defaultFS", "file:///"); final Job job = Job.getInstance(conf, JOB_NAME); job.setJarByClass(getClass()); FileInputFormat.setInputPaths(job, new Path(TEST_DATA_LOCATION)); FileOutputFormat.setOutputPath(job, cleanPathForReuse(conf, OUTPUT_PATH)); job.setMapperClass(SimpleFeatureToAccumuloKeyValueMapper.class); job.setReducerClass(Reducer.class); // (Identity Reducer) job.setInputFormatClass(GeonamesDataFileInputFormat.class); job.setOutputFormatClass(AccumuloFileOutputFormat.class); job.setMapOutputKeyClass(Key.class); job.setMapOutputValueClass(Value.class); job.setOutputKeyClass(Key.class); job.setOutputValueClass(Value.class); job.setNumReduceTasks(1); job.setSpeculativeExecution(false); final boolean result = job.waitForCompletion(true); mapInputRecords = job.getCounters().findCounter(TASK_COUNTER_GROUP_NAME, MAP_INPUT_RECORDS).getValue(); mapOutputRecords = job.getCounters().findCounter(TASK_COUNTER_GROUP_NAME, MAP_OUTPUT_RECORDS).getValue(); return result ? 0 : 1; }