org.apache.hadoop.mapred.SkipBadRecords Java Examples
The following examples show how to use
org.apache.hadoop.mapred.SkipBadRecords.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MultithreadedMapRunner.java From RDFS with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") public void configure(JobConf jobConf) { int numberOfThreads = jobConf.getInt("mapred.map.multithreadedrunner.threads", 10); if (LOG.isDebugEnabled()) { LOG.debug("Configuring jobConf " + jobConf.getJobName() + " to use " + numberOfThreads + " threads"); } this.job = jobConf; //increment processed counter only if skipping feature is enabled this.incrProcCount = SkipBadRecords.getMapperMaxSkipRecords(job)>0 && SkipBadRecords.getAutoIncrMapperProcCount(job); this.mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(), jobConf); // Creating a threadpool of the configured size to execute the Mapper // map method in parallel. executorService = new ThreadPoolExecutor(numberOfThreads, numberOfThreads, 0L, TimeUnit.MILLISECONDS, new BlockingArrayQueue (numberOfThreads)); }
Example #2
Source File: TestStreamingBadRecords.java From hadoop-gpu with Apache License 2.0 | 6 votes |
public App(String[] args) throws Exception{ if(args.length>0) { isReducer = Boolean.parseBoolean(args[0]); } String counter = SkipBadRecords.COUNTER_MAP_PROCESSED_RECORDS; if(isReducer) { counter = SkipBadRecords.COUNTER_REDUCE_PROCESSED_GROUPS; } BufferedReader in = new BufferedReader(new InputStreamReader(System.in)); String line; int count = 0; while ((line = in.readLine()) != null) { processLine(line); count++; if(count>=10) { System.err.println("reporter:counter:"+SkipBadRecords.COUNTER_GROUP+ ","+counter+","+count); count = 0; } } }
Example #3
Source File: PipeMapper.java From hadoop-gpu with Apache License 2.0 | 6 votes |
public void configure(JobConf job) { super.configure(job); //disable the auto increment of the counter. For streaming, no of //processed records could be different(equal or less) than the no of //records input. SkipBadRecords.setAutoIncrMapperProcCount(job, false); skipping = job.getBoolean("mapred.skip.on", false); String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName(); ignoreKey = inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()); try { mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8"); mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8"); numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1); } catch (UnsupportedEncodingException e) { throw new RuntimeException("The current system does not support UTF-8 encoding!", e); } }
Example #4
Source File: PipeReducer.java From hadoop-gpu with Apache License 2.0 | 6 votes |
public void configure(JobConf job) { super.configure(job); //disable the auto increment of the counter. For streaming, no of //processed records could be different(equal or less) than the no of //records input. SkipBadRecords.setAutoIncrReducerProcCount(job, false); skipping = job.getBoolean("mapred.skip.on", false); try { reduceOutFieldSeparator = job_.get("stream.reduce.output.field.separator", "\t").getBytes("UTF-8"); reduceInputFieldSeparator = job_.get("stream.reduce.input.field.separator", "\t").getBytes("UTF-8"); this.numOfReduceOutputKeyFields = job_.getInt("stream.num.reduce.output.key.fields", 1); } catch (UnsupportedEncodingException e) { throw new RuntimeException("The current system does not support UTF-8 encoding!", e); } }
Example #5
Source File: MultithreadedMapRunner.java From hadoop-gpu with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") public void configure(JobConf jobConf) { int numberOfThreads = jobConf.getInt("mapred.map.multithreadedrunner.threads", 10); if (LOG.isDebugEnabled()) { LOG.debug("Configuring jobConf " + jobConf.getJobName() + " to use " + numberOfThreads + " threads"); } this.job = jobConf; //increment processed counter only if skipping feature is enabled this.incrProcCount = SkipBadRecords.getMapperMaxSkipRecords(job)>0 && SkipBadRecords.getAutoIncrMapperProcCount(job); this.mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(), jobConf); // Creating a threadpool of the configured size to execute the Mapper // map method in parallel. executorService = new ThreadPoolExecutor(numberOfThreads, numberOfThreads, 0L, TimeUnit.MILLISECONDS, new BlockingArrayQueue (numberOfThreads)); }
Example #6
Source File: TestStreamingBadRecords.java From RDFS with Apache License 2.0 | 6 votes |
public App(String[] args) throws Exception{ if(args.length>0) { isReducer = Boolean.parseBoolean(args[0]); } String counter = SkipBadRecords.COUNTER_MAP_PROCESSED_RECORDS; if(isReducer) { counter = SkipBadRecords.COUNTER_REDUCE_PROCESSED_GROUPS; } BufferedReader in = new BufferedReader(new InputStreamReader(System.in)); String line; int count = 0; while ((line = in.readLine()) != null) { processLine(line); count++; if(count>=10) { System.err.println("reporter:counter:"+SkipBadRecords.COUNTER_GROUP+ ","+counter+","+count); count = 0; } } }
Example #7
Source File: PipeMapper.java From RDFS with Apache License 2.0 | 6 votes |
public void configure(JobConf job) { super.configure(job); //disable the auto increment of the counter. For streaming, no of //processed records could be different(equal or less) than the no of //records input. SkipBadRecords.setAutoIncrMapperProcCount(job, false); skipping = job.getBoolean("mapred.skip.on", false); String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName(); ignoreKey = inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()); try { mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8"); mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8"); numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1); } catch (UnsupportedEncodingException e) { throw new RuntimeException("The current system does not support UTF-8 encoding!", e); } }
Example #8
Source File: PipeReducer.java From RDFS with Apache License 2.0 | 6 votes |
public void configure(JobConf job) { super.configure(job); //disable the auto increment of the counter. For streaming, no of //processed records could be different(equal or less) than the no of //records input. SkipBadRecords.setAutoIncrReducerProcCount(job, false); skipping = job.getBoolean("mapred.skip.on", false); try { reduceOutFieldSeparator = job_.get("stream.reduce.output.field.separator", "\t").getBytes("UTF-8"); reduceInputFieldSeparator = job_.get("stream.reduce.input.field.separator", "\t").getBytes("UTF-8"); this.numOfReduceOutputKeyFields = job_.getInt("stream.num.reduce.output.key.fields", 1); } catch (UnsupportedEncodingException e) { throw new RuntimeException("The current system does not support UTF-8 encoding!", e); } }
Example #9
Source File: TestStreamingBadRecords.java From big-c with Apache License 2.0 | 6 votes |
public App(String[] args) throws Exception{ if(args.length>0) { isReducer = Boolean.parseBoolean(args[0]); } String counter = SkipBadRecords.COUNTER_MAP_PROCESSED_RECORDS; if(isReducer) { counter = SkipBadRecords.COUNTER_REDUCE_PROCESSED_GROUPS; } BufferedReader in = new BufferedReader(new InputStreamReader(System.in)); String line; int count = 0; while ((line = in.readLine()) != null) { processLine(line); count++; if(count>=10) { System.err.println("reporter:counter:"+SkipBadRecords.COUNTER_GROUP+ ","+counter+","+count); count = 0; } } }
Example #10
Source File: PipeMapper.java From big-c with Apache License 2.0 | 6 votes |
public void configure(JobConf job) { super.configure(job); //disable the auto increment of the counter. For streaming, no of //processed records could be different(equal or less) than the no of //records input. SkipBadRecords.setAutoIncrMapperProcCount(job, false); skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false); if (mapInputWriterClass_.getCanonicalName().equals(TextInputWriter.class.getCanonicalName())) { String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName(); ignoreKey = job.getBoolean("stream.map.input.ignoreKey", inputFormatClassName.equals(TextInputFormat.class.getCanonicalName())); } try { mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8"); mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8"); numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1); } catch (UnsupportedEncodingException e) { throw new RuntimeException("The current system does not support UTF-8 encoding!", e); } }
Example #11
Source File: MultithreadedMapRunner.java From big-c with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") public void configure(JobConf jobConf) { int numberOfThreads = jobConf.getInt(MultithreadedMapper.NUM_THREADS, 10); if (LOG.isDebugEnabled()) { LOG.debug("Configuring jobConf " + jobConf.getJobName() + " to use " + numberOfThreads + " threads"); } this.job = jobConf; //increment processed counter only if skipping feature is enabled this.incrProcCount = SkipBadRecords.getMapperMaxSkipRecords(job)>0 && SkipBadRecords.getAutoIncrMapperProcCount(job); this.mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(), jobConf); // Creating a threadpool of the configured size to execute the Mapper // map method in parallel. executorService = new ThreadPoolExecutor(numberOfThreads, numberOfThreads, 0L, TimeUnit.MILLISECONDS, new BlockingArrayQueue (numberOfThreads)); }
Example #12
Source File: MultithreadedMapRunner.java From hadoop with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") public void configure(JobConf jobConf) { int numberOfThreads = jobConf.getInt(MultithreadedMapper.NUM_THREADS, 10); if (LOG.isDebugEnabled()) { LOG.debug("Configuring jobConf " + jobConf.getJobName() + " to use " + numberOfThreads + " threads"); } this.job = jobConf; //increment processed counter only if skipping feature is enabled this.incrProcCount = SkipBadRecords.getMapperMaxSkipRecords(job)>0 && SkipBadRecords.getAutoIncrMapperProcCount(job); this.mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(), jobConf); // Creating a threadpool of the configured size to execute the Mapper // map method in parallel. executorService = new ThreadPoolExecutor(numberOfThreads, numberOfThreads, 0L, TimeUnit.MILLISECONDS, new BlockingArrayQueue (numberOfThreads)); }
Example #13
Source File: PipeReducer.java From hadoop with Apache License 2.0 | 6 votes |
public void configure(JobConf job) { super.configure(job); //disable the auto increment of the counter. For streaming, no of //processed records could be different(equal or less) than the no of //records input. SkipBadRecords.setAutoIncrReducerProcCount(job, false); skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false); try { reduceOutFieldSeparator = job_.get("stream.reduce.output.field.separator", "\t").getBytes("UTF-8"); reduceInputFieldSeparator = job_.get("stream.reduce.input.field.separator", "\t").getBytes("UTF-8"); this.numOfReduceOutputKeyFields = job_.getInt("stream.num.reduce.output.key.fields", 1); } catch (UnsupportedEncodingException e) { throw new RuntimeException("The current system does not support UTF-8 encoding!", e); } }
Example #14
Source File: PipeMapper.java From hadoop with Apache License 2.0 | 6 votes |
public void configure(JobConf job) { super.configure(job); //disable the auto increment of the counter. For streaming, no of //processed records could be different(equal or less) than the no of //records input. SkipBadRecords.setAutoIncrMapperProcCount(job, false); skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false); if (mapInputWriterClass_.getCanonicalName().equals(TextInputWriter.class.getCanonicalName())) { String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName(); ignoreKey = job.getBoolean("stream.map.input.ignoreKey", inputFormatClassName.equals(TextInputFormat.class.getCanonicalName())); } try { mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8"); mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8"); numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1); } catch (UnsupportedEncodingException e) { throw new RuntimeException("The current system does not support UTF-8 encoding!", e); } }
Example #15
Source File: TestStreamingBadRecords.java From hadoop with Apache License 2.0 | 6 votes |
public App(String[] args) throws Exception{ if(args.length>0) { isReducer = Boolean.parseBoolean(args[0]); } String counter = SkipBadRecords.COUNTER_MAP_PROCESSED_RECORDS; if(isReducer) { counter = SkipBadRecords.COUNTER_REDUCE_PROCESSED_GROUPS; } BufferedReader in = new BufferedReader(new InputStreamReader(System.in)); String line; int count = 0; while ((line = in.readLine()) != null) { processLine(line); count++; if(count>=10) { System.err.println("reporter:counter:"+SkipBadRecords.COUNTER_GROUP+ ","+counter+","+count); count = 0; } } }
Example #16
Source File: PipeReducer.java From big-c with Apache License 2.0 | 6 votes |
public void configure(JobConf job) { super.configure(job); //disable the auto increment of the counter. For streaming, no of //processed records could be different(equal or less) than the no of //records input. SkipBadRecords.setAutoIncrReducerProcCount(job, false); skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false); try { reduceOutFieldSeparator = job_.get("stream.reduce.output.field.separator", "\t").getBytes("UTF-8"); reduceInputFieldSeparator = job_.get("stream.reduce.input.field.separator", "\t").getBytes("UTF-8"); this.numOfReduceOutputKeyFields = job_.getInt("stream.num.reduce.output.key.fields", 1); } catch (UnsupportedEncodingException e) { throw new RuntimeException("The current system does not support UTF-8 encoding!", e); } }
Example #17
Source File: PipesMapRunner.java From big-c with Apache License 2.0 | 5 votes |
/** * Get the new configuration. * @param job the job's configuration */ public void configure(JobConf job) { this.job = job; //disable the auto increment of the counter. For pipes, no of processed //records could be different(equal or less) than the no of records input. SkipBadRecords.setAutoIncrMapperProcCount(job, false); }
Example #18
Source File: PipesReducer.java From big-c with Apache License 2.0 | 5 votes |
public void configure(JobConf job) { this.job = job; //disable the auto increment of the counter. For pipes, no of processed //records could be different(equal or less) than the no of records input. SkipBadRecords.setAutoIncrReducerProcCount(job, false); skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false); }
Example #19
Source File: TestStreamingBadRecords.java From RDFS with Apache License 2.0 | 5 votes |
public void testSkip() throws Exception { JobConf clusterConf = createJobConf(); createInput(); int attSkip =0; SkipBadRecords.setAttemptsToStartSkipping(clusterConf,attSkip); //the no of attempts to successfully complete the task depends //on the no of bad records. int mapperAttempts = attSkip+1+MAPPER_BAD_RECORDS.size(); int reducerAttempts = attSkip+1+REDUCER_BAD_RECORDS.size(); String[] args = new String[] { "-input", (new Path(getInputDir(), "text.txt")).toString(), "-output", getOutputDir().toString(), "-mapper", badMapper, "-reducer", badReducer, "-verbose", "-inputformat", "org.apache.hadoop.mapred.KeyValueTextInputFormat", "-jobconf", "mapred.skip.attempts.to.start.skipping="+attSkip, "-jobconf", "mapred.skip.out.dir=none", "-jobconf", "mapred.map.max.attempts="+mapperAttempts, "-jobconf", "mapred.reduce.max.attempts="+reducerAttempts, "-jobconf", "mapred.skip.map.max.skip.records="+Long.MAX_VALUE, "-jobconf", "mapred.skip.reduce.max.skip.groups="+Long.MAX_VALUE, "-jobconf", "mapred.map.tasks=1", "-jobconf", "mapred.reduce.tasks=1", "-jobconf", "fs.default.name="+clusterConf.get("fs.default.name"), "-jobconf", "mapred.job.tracker="+clusterConf.get("mapred.job.tracker"), "-jobconf", "mapred.job.tracker.http.address=" +clusterConf.get("mapred.job.tracker.http.address"), "-jobconf", "stream.debug=set", "-jobconf", "keep.failed.task.files=true", "-jobconf", "stream.tmpdir="+System.getProperty("test.build.data","/tmp") }; StreamJob job = new StreamJob(args, false); job.go(); validateOutput(job.running_, false); //validate that there is no skip directory as it has been set to "none" assertTrue(SkipBadRecords.getSkipOutputPath(job.jobConf_)==null); }
Example #20
Source File: TestStreamingBadRecords.java From RDFS with Apache License 2.0 | 5 votes |
public void testNarrowDown() throws Exception { createInput(); JobConf clusterConf = createJobConf(); String[] args = new String[] { "-input", (new Path(getInputDir(), "text.txt")).toString(), "-output", getOutputDir().toString(), "-mapper", badMapper, "-reducer", badReducer, "-verbose", "-inputformat", "org.apache.hadoop.mapred.KeyValueTextInputFormat", "-jobconf", "mapred.skip.attempts.to.start.skipping=1", //actually fewer attempts are required than specified //but to cater to the case of slow processed counter update, need to //have more attempts "-jobconf", "mapred.map.max.attempts=20", "-jobconf", "mapred.reduce.max.attempts=15", "-jobconf", "mapred.skip.map.max.skip.records=1", "-jobconf", "mapred.skip.reduce.max.skip.groups=1", "-jobconf", "mapred.map.tasks=1", "-jobconf", "mapred.reduce.tasks=1", "-jobconf", "fs.default.name="+clusterConf.get("fs.default.name"), "-jobconf", "mapred.job.tracker="+clusterConf.get("mapred.job.tracker"), "-jobconf", "mapred.job.tracker.http.address=" +clusterConf.get("mapred.job.tracker.http.address"), "-jobconf", "stream.debug=set", "-jobconf", "keep.failed.task.files=true", "-jobconf", "stream.tmpdir="+System.getProperty("test.build.data","/tmp") }; StreamJob job = new StreamJob(args, false); job.go(); validateOutput(job.running_, true); assertTrue(SkipBadRecords.getSkipOutputPath(job.jobConf_)!=null); }
Example #21
Source File: PipesReducer.java From hadoop with Apache License 2.0 | 5 votes |
public void configure(JobConf job) { this.job = job; //disable the auto increment of the counter. For pipes, no of processed //records could be different(equal or less) than the no of records input. SkipBadRecords.setAutoIncrReducerProcCount(job, false); skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false); }
Example #22
Source File: PipesGPUMapRunner.java From hadoop-gpu with Apache License 2.0 | 5 votes |
/** * Get the new configuration. * @param job the job's configuration */ public void configure(JobConf job) { this.job = job; //disable the auto increment of the counter. For pipes, no of processed //records could be different(equal or less) than the no of records input. SkipBadRecords.setAutoIncrMapperProcCount(job, false); }
Example #23
Source File: PipesReducer.java From hadoop-gpu with Apache License 2.0 | 5 votes |
public void configure(JobConf job) { this.job = job; //disable the auto increment of the counter. For pipes, no of processed //records could be different(equal or less) than the no of records input. SkipBadRecords.setAutoIncrReducerProcCount(job, false); skipping = job.getBoolean("mapred.skip.on", false); }
Example #24
Source File: PipesMapRunner.java From hadoop-gpu with Apache License 2.0 | 5 votes |
/** * Get the new configuration. * @param job the job's configuration */ public void configure(JobConf job) { this.job = job; //disable the auto increment of the counter. For pipes, no of processed //records could be different(equal or less) than the no of records input. SkipBadRecords.setAutoIncrMapperProcCount(job, false); }
Example #25
Source File: PipesMapRunner.java From RDFS with Apache License 2.0 | 5 votes |
/** * Get the new configuration. * @param job the job's configuration */ public void configure(JobConf job) { this.job = job; //disable the auto increment of the counter. For pipes, no of processed //records could be different(equal or less) than the no of records input. SkipBadRecords.setAutoIncrMapperProcCount(job, false); }
Example #26
Source File: PipesReducer.java From RDFS with Apache License 2.0 | 5 votes |
public void configure(JobConf job) { this.job = job; //disable the auto increment of the counter. For pipes, no of processed //records could be different(equal or less) than the no of records input. SkipBadRecords.setAutoIncrReducerProcCount(job, false); skipping = job.getBoolean("mapred.skip.on", false); }
Example #27
Source File: TestStreamingBadRecords.java From hadoop-gpu with Apache License 2.0 | 5 votes |
public void testSkip() throws Exception { JobConf clusterConf = createJobConf(); createInput(); int attSkip =0; SkipBadRecords.setAttemptsToStartSkipping(clusterConf,attSkip); //the no of attempts to successfully complete the task depends //on the no of bad records. int mapperAttempts = attSkip+1+MAPPER_BAD_RECORDS.size(); int reducerAttempts = attSkip+1+REDUCER_BAD_RECORDS.size(); String[] args = new String[] { "-input", (new Path(getInputDir(), "text.txt")).toString(), "-output", getOutputDir().toString(), "-mapper", badMapper, "-reducer", badReducer, "-verbose", "-inputformat", "org.apache.hadoop.mapred.KeyValueTextInputFormat", "-jobconf", "mapred.skip.attempts.to.start.skipping="+attSkip, "-jobconf", "mapred.skip.out.dir=none", "-jobconf", "mapred.map.max.attempts="+mapperAttempts, "-jobconf", "mapred.reduce.max.attempts="+reducerAttempts, "-jobconf", "mapred.skip.map.max.skip.records="+Long.MAX_VALUE, "-jobconf", "mapred.skip.reduce.max.skip.groups="+Long.MAX_VALUE, "-jobconf", "mapred.map.tasks=1", "-jobconf", "mapred.reduce.tasks=1", "-jobconf", "fs.default.name="+clusterConf.get("fs.default.name"), "-jobconf", "mapred.job.tracker="+clusterConf.get("mapred.job.tracker"), "-jobconf", "mapred.job.tracker.http.address=" +clusterConf.get("mapred.job.tracker.http.address"), "-jobconf", "stream.debug=set", "-jobconf", "keep.failed.task.files=true", "-jobconf", "stream.tmpdir="+System.getProperty("test.build.data","/tmp") }; StreamJob job = new StreamJob(args, false); job.go(); validateOutput(job.running_, false); //validate that there is no skip directory as it has been set to "none" assertTrue(SkipBadRecords.getSkipOutputPath(job.jobConf_)==null); }
Example #28
Source File: TestStreamingBadRecords.java From hadoop-gpu with Apache License 2.0 | 5 votes |
public void testNarrowDown() throws Exception { createInput(); JobConf clusterConf = createJobConf(); String[] args = new String[] { "-input", (new Path(getInputDir(), "text.txt")).toString(), "-output", getOutputDir().toString(), "-mapper", badMapper, "-reducer", badReducer, "-verbose", "-inputformat", "org.apache.hadoop.mapred.KeyValueTextInputFormat", "-jobconf", "mapred.skip.attempts.to.start.skipping=1", //actually fewer attempts are required than specified //but to cater to the case of slow processed counter update, need to //have more attempts "-jobconf", "mapred.map.max.attempts=20", "-jobconf", "mapred.reduce.max.attempts=15", "-jobconf", "mapred.skip.map.max.skip.records=1", "-jobconf", "mapred.skip.reduce.max.skip.groups=1", "-jobconf", "mapred.map.tasks=1", "-jobconf", "mapred.reduce.tasks=1", "-jobconf", "fs.default.name="+clusterConf.get("fs.default.name"), "-jobconf", "mapred.job.tracker="+clusterConf.get("mapred.job.tracker"), "-jobconf", "mapred.job.tracker.http.address=" +clusterConf.get("mapred.job.tracker.http.address"), "-jobconf", "stream.debug=set", "-jobconf", "keep.failed.task.files=true", "-jobconf", "stream.tmpdir="+System.getProperty("test.build.data","/tmp") }; StreamJob job = new StreamJob(args, false); job.go(); validateOutput(job.running_, true); assertTrue(SkipBadRecords.getSkipOutputPath(job.jobConf_)!=null); }
Example #29
Source File: PipesMapRunner.java From hadoop with Apache License 2.0 | 5 votes |
/** * Get the new configuration. * @param job the job's configuration */ public void configure(JobConf job) { this.job = job; //disable the auto increment of the counter. For pipes, no of processed //records could be different(equal or less) than the no of records input. SkipBadRecords.setAutoIncrMapperProcCount(job, false); }