Java Code Examples for org.apache.hadoop.mapred.JobConf#setStrings()
The following examples show how to use
org.apache.hadoop.mapred.JobConf#setStrings() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TableMapReduceUtil.java From hbase with Apache License 2.0 | 6 votes |
/** * Use this before submitting a TableReduce job. It will * appropriately set up the JobConf. * * @param table The output table. * @param reducer The reducer class to use. * @param job The current job configuration to adjust. * @param partitioner Partitioner to use. Pass <code>null</code> to use * default partitioner. * @param addDependencyJars upload HBase jars and jars for any of the configured * job classes via the distributed cache (tmpjars). * @throws IOException When determining the region count fails. */ public static void initTableReduceJob(String table, Class<? extends TableReduce> reducer, JobConf job, Class partitioner, boolean addDependencyJars) throws IOException { job.setOutputFormat(TableOutputFormat.class); job.setReducerClass(reducer); job.set(TableOutputFormat.OUTPUT_TABLE, table); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(Put.class); job.setStrings("io.serializations", job.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName()); if (partitioner == HRegionPartitioner.class) { job.setPartitionerClass(HRegionPartitioner.class); int regions = MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job), TableName.valueOf(table)); if (job.getNumReduceTasks() > regions) { job.setNumReduceTasks(regions); } } else if (partitioner != null) { job.setPartitionerClass(partitioner); } if (addDependencyJars) { addDependencyJars(job); } initCredentials(job); }
Example 2
Source File: TableMapReduceUtil.java From hbase with Apache License 2.0 | 5 votes |
/** * Use this before submitting a TableMap job. It will * appropriately set up the JobConf. * * @param table The table name to read from. * @param columns The columns to scan. * @param mapper The mapper class to use. * @param outputKeyClass The class of the output key. * @param outputValueClass The class of the output value. * @param job The current job configuration to adjust. * @param addDependencyJars upload HBase jars and jars for any of the configured * job classes via the distributed cache (tmpjars). */ public static void initTableMapJob(String table, String columns, Class<? extends TableMap> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, JobConf job, boolean addDependencyJars, Class<? extends InputFormat> inputFormat) { job.setInputFormat(inputFormat); job.setMapOutputValueClass(outputValueClass); job.setMapOutputKeyClass(outputKeyClass); job.setMapperClass(mapper); job.setStrings("io.serializations", job.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName()); FileInputFormat.addInputPaths(job, table); job.set(TableInputFormat.COLUMN_LIST, columns); if (addDependencyJars) { try { addDependencyJars(job); } catch (IOException e) { LOG.error("IOException encountered while adding dependency jars", e); } } try { initCredentials(job); } catch (IOException ioe) { // just spit out the stack trace? really? LOG.error("IOException encountered while initializing credentials", ioe); } }
Example 3
Source File: SleepJob.java From RDFS with Apache License 2.0 | 5 votes |
public JobConf setupJobConf(int numMapper, int numReducer, long mapSleepTime, int mapSleepCount, long reduceSleepTime, int reduceSleepCount, boolean doSpeculation, List<String> slowMaps, List<String> slowReduces, int slowRatio, int countersPerTask, List<String> hosts, int hostsPerSplit, boolean setup) { JobConf job = new JobConf(getConf(), SleepJob.class); job.setNumMapTasks(numMapper); job.setNumReduceTasks(numReducer); job.setMapperClass(SleepJob.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setReducerClass(SleepJob.class); job.setOutputFormat(NullOutputFormat.class); job.setJobSetupCleanupNeeded(setup); job.setInputFormat(SleepInputFormat.class); job.setPartitionerClass(SleepJob.class); job.setJobName("Sleep job"); FileInputFormat.addInputPath(job, new Path("ignored")); job.setLong("sleep.job.map.sleep.time", mapSleepTime); job.setLong("sleep.job.reduce.sleep.time", reduceSleepTime); job.setInt("sleep.job.map.sleep.count", mapSleepCount); job.setInt("sleep.job.reduce.sleep.count", reduceSleepCount); job.setSpeculativeExecution(doSpeculation); job.setInt(SLOW_RATIO, slowRatio); job.setStrings(SLOW_MAPS, slowMaps.toArray(new String[slowMaps.size()])); job.setStrings(SLOW_REDUCES, slowMaps.toArray(new String[slowReduces.size()])); job.setInt("sleep.job.counters.per.task", countersPerTask); job.setStrings(HOSTS_FOR_LOCALITY, hosts.toArray(new String[hosts.size()])); job.setInt(HOSTS_PER_SPLIT, hostsPerSplit); return job; }