Java Code Examples for org.apache.hadoop.mapred.JobConf#setReduceSpeculativeExecution()
The following examples show how to use
org.apache.hadoop.mapred.JobConf#setReduceSpeculativeExecution() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ReadExistingDataJob.java From tracing-framework with BSD 3-Clause "New" or "Revised" License | 6 votes |
public void configure(JobConf job) { // Set the mapper and reducers job.setMapperClass(ReadDataJob.TestMapper.class); // Make sure this jar is included job.setJarByClass(ReadDataJob.TestMapper.class); // Specify the input and output data formats job.setInputFormat(TextInputFormat.class); job.setOutputFormat(NullOutputFormat.class); // Turn off speculative execution job.setMapSpeculativeExecution(false); job.setReduceSpeculativeExecution(false); // Add the job input path FileInputFormat.addInputPath(job, new Path(this.input_path)); }
Example 2
Source File: ReadDataJob.java From tracing-framework with BSD 3-Clause "New" or "Revised" License | 6 votes |
public void configure(JobConf job) { // Set the mapper and reducers job.setMapperClass(TestMapper.class); // job.setReducerClass(TestReducer.class); // Set the output types of the mapper and reducer // job.setMapOutputKeyClass(IntWritable.class); // job.setMapOutputValueClass(NullWritable.class); // job.setOutputKeyClass(NullWritable.class); // job.setOutputValueClass(NullWritable.class); // Make sure this jar is included job.setJarByClass(TestMapper.class); // Specify the input and output data formats job.setInputFormat(TextInputFormat.class); job.setOutputFormat(NullOutputFormat.class); // Turn off speculative execution job.setMapSpeculativeExecution(false); job.setReduceSpeculativeExecution(false); // Add the job input path FileInputFormat.addInputPath(job, new Path(this.input_filename)); }
Example 3
Source File: DistCp.java From RDFS with Apache License 2.0 | 6 votes |
private static JobConf createJobConf(Configuration conf, boolean useFastCopy) { Class<? extends InputFormat> inputFormat = (useFastCopy) ? FastCopyInputFormat.class : CopyInputFormat.class; JobConf jobconf = new JobConf(conf, DistCp.class); jobconf.setJobName(NAME); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobconf.setReduceSpeculativeExecution(false); jobconf.setMapOutputKeyClass(FilePairComparable.class); jobconf.setMapOutputValueClass(Text.class); jobconf.setOutputKeyClass(FilePairComparable.class); jobconf.setOutputValueClass(Text.class); jobconf.setInputFormat(inputFormat); jobconf.setMapperClass(CopyFilesTask.class); jobconf.setReducerClass(CopyFilesTask.class); jobconf.setNumReduceTasks(conf.getInt(MAX_REDUCE_LABEL, 1)); // Prevent the reducer from starting until all maps are done. jobconf.setInt("mapred.job.rushreduce.reduce.threshold", 0); jobconf.setFloat("mapred.reduce.slowstart.completed.maps", 1.0f); return jobconf; }
Example 4
Source File: DBOutputFormat.java From SpyGlass with Apache License 2.0 | 6 votes |
/** * Initializes the reduce-part of the job with the appropriate output settings * * @param job The job * @param dbOutputFormatClass * @param tableName The table to insert data into * @param fieldNames The field names in the table. If unknown, supply the appropriate */ public static void setOutput(JobConf job, Class<? extends DBOutputFormat> dbOutputFormatClass, String tableName, String[] fieldNames, String[] updateFields, int batchSize) { if (dbOutputFormatClass == null) { job.setOutputFormat(DBOutputFormat.class); } else { job.setOutputFormat(dbOutputFormatClass); } // writing doesn't always happen in reduce job.setReduceSpeculativeExecution(false); job.setMapSpeculativeExecution(false); DBConfiguration dbConf = new DBConfiguration(job); dbConf.setOutputTableName(tableName); dbConf.setOutputFieldNames(fieldNames); if (updateFields != null) { dbConf.setOutputUpdateFieldNames(updateFields); } if (batchSize != -1) { dbConf.setBatchStatementsNum(batchSize); } }
Example 5
Source File: DBOutputFormat.java From hadoop with Apache License 2.0 | 5 votes |
private static DBConfiguration setOutput(JobConf job, String tableName) { job.setOutputFormat(DBOutputFormat.class); job.setReduceSpeculativeExecution(false); DBConfiguration dbConf = new DBConfiguration(job); dbConf.setOutputTableName(tableName); return dbConf; }
Example 6
Source File: DBOutputFormat.java From big-c with Apache License 2.0 | 5 votes |
private static DBConfiguration setOutput(JobConf job, String tableName) { job.setOutputFormat(DBOutputFormat.class); job.setReduceSpeculativeExecution(false); DBConfiguration dbConf = new DBConfiguration(job); dbConf.setOutputTableName(tableName); return dbConf; }
Example 7
Source File: SolrIndexer.java From anthelion with Apache License 2.0 | 4 votes |
public void indexSolr(String solrUrl, Path crawlDb, Path linkDb, List<Path> segments, boolean noCommit, boolean deleteGone, String solrParams, boolean filter, boolean normalize) throws IOException { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); long start = System.currentTimeMillis(); LOG.info("SolrIndexer: starting at " + sdf.format(start)); final JobConf job = new NutchJob(getConf()); job.setJobName("index-solr " + solrUrl); LOG.info("SolrIndexer: deleting gone documents: " + deleteGone); LOG.info("SolrIndexer: URL filtering: " + filter); LOG.info("SolrIndexer: URL normalizing: " + normalize); IndexerMapReduce.initMRJob(crawlDb, linkDb, segments, job); job.set(SolrConstants.SERVER_URL, solrUrl); job.setBoolean(IndexerMapReduce.INDEXER_DELETE, deleteGone); job.setBoolean(IndexerMapReduce.URL_FILTERING, filter); job.setBoolean(IndexerMapReduce.URL_NORMALIZING, normalize); if (solrParams != null) { job.set(SolrConstants.PARAMS, solrParams); } NutchIndexWriterFactory.addClassToConf(job, SolrWriter.class); job.setReduceSpeculativeExecution(false); final Path tmp = new Path("tmp_" + System.currentTimeMillis() + "-" + new Random().nextInt()); FileOutputFormat.setOutputPath(job, tmp); try { JobClient.runJob(job); // do the commits once and for all the reducers in one go SolrServer solr = SolrUtils.getCommonsHttpSolrServer(job); if (!noCommit) { solr.commit(); } long end = System.currentTimeMillis(); LOG.info("SolrIndexer: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end)); } catch (Exception e){ LOG.error(e.toString()); } finally { FileSystem.get(job).delete(tmp, true); } }
Example 8
Source File: IndexingJob.java From nutch-htmlunit with Apache License 2.0 | 4 votes |
public void index(Path crawlDb, Path linkDb, List<Path> segments, boolean noCommit, boolean deleteGone, String params, boolean filter, boolean normalize) throws IOException { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); long start = System.currentTimeMillis(); LOG.info("Indexer: starting at " + sdf.format(start)); final JobConf job = new NutchJob(getConf()); job.setJobName("Indexer"); LOG.info("Indexer: deleting gone documents: " + deleteGone); LOG.info("Indexer: URL filtering: " + filter); LOG.info("Indexer: URL normalizing: " + normalize); IndexWriters writers = new IndexWriters(getConf()); LOG.info(writers.describe()); IndexerMapReduce.initMRJob(crawlDb, linkDb, segments, job); // NOW PASSED ON THE COMMAND LINE AS A HADOOP PARAM // job.set(SolrConstants.SERVER_URL, solrUrl); job.setBoolean(IndexerMapReduce.INDEXER_DELETE, deleteGone); job.setBoolean(IndexerMapReduce.URL_FILTERING, filter); job.setBoolean(IndexerMapReduce.URL_NORMALIZING, normalize); if (params != null) { job.set(IndexerMapReduce.INDEXER_PARAMS, params); } job.setReduceSpeculativeExecution(false); final Path tmp = new Path("tmp_" + System.currentTimeMillis() + "-" + new Random().nextInt()); FileOutputFormat.setOutputPath(job, tmp); try { JobClient.runJob(job); // do the commits once and for all the reducers in one go if (!noCommit) { writers.open(job,"commit"); writers.commit(); } long end = System.currentTimeMillis(); LOG.info("Indexer: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end)); } finally { FileSystem.get(job).delete(tmp, true); } }
Example 9
Source File: DBOutputFormat.java From RDFS with Apache License 2.0 | 3 votes |
/** * Initializes the reduce-part of the job with the appropriate output settings * * @param job * The job * @param tableName * The table to insert data into * @param fieldNames * The field names in the table. If unknown, supply the appropriate * number of nulls. */ public static void setOutput(JobConf job, String tableName, String... fieldNames) { job.setOutputFormat(DBOutputFormat.class); job.setReduceSpeculativeExecution(false); DBConfiguration dbConf = new DBConfiguration(job); dbConf.setOutputTableName(tableName); dbConf.setOutputFieldNames(fieldNames); }
Example 10
Source File: DBOutputFormat.java From hadoop-gpu with Apache License 2.0 | 3 votes |
/** * Initializes the reduce-part of the job with the appropriate output settings * * @param job * The job * @param tableName * The table to insert data into * @param fieldNames * The field names in the table. If unknown, supply the appropriate * number of nulls. */ public static void setOutput(JobConf job, String tableName, String... fieldNames) { job.setOutputFormat(DBOutputFormat.class); job.setReduceSpeculativeExecution(false); DBConfiguration dbConf = new DBConfiguration(job); dbConf.setOutputTableName(tableName); dbConf.setOutputFieldNames(fieldNames); }