Java Code Examples for org.apache.hadoop.mapred.JobConf#getInt()
The following examples show how to use
org.apache.hadoop.mapred.JobConf#getInt() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LinkRank.java From anthelion with Apache License 2.0 | 6 votes |
/** * Configures the job, sets the damping factor, rank one score, and other * needed values for analysis. */ public void configure(JobConf conf) { try { this.conf = conf; this.dampingFactor = conf.getFloat("link.analyze.damping.factor", 0.85f); this.rankOne = conf.getFloat("link.analyze.rank.one", 0.0f); this.itNum = conf.getInt("link.analyze.iteration", 0); limitPages = conf.getBoolean("link.ignore.limit.page", true); limitDomains = conf.getBoolean("link.ignore.limit.domain", true); } catch (Exception e) { LOG.error(StringUtils.stringifyException(e)); throw new IllegalArgumentException(e); } }
Example 2
Source File: AvroAsJsonOutputFormat.java From iow-hadoop-streaming with Apache License 2.0 | 6 votes |
static <K> void configureDataFileWriter(DataFileWriter<K> writer, JobConf job) throws UnsupportedEncodingException { if (FileOutputFormat.getCompressOutput(job)) { int level = job.getInt(org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY, org.apache.avro.mapred.AvroOutputFormat.DEFAULT_DEFLATE_LEVEL); String codecName = job.get(AvroJob.OUTPUT_CODEC, DEFLATE_CODEC); CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); writer.setCodec(factory); } writer.setSyncInterval(job.getInt(org.apache.avro.mapred.AvroOutputFormat.SYNC_INTERVAL_KEY, DEFAULT_SYNC_INTERVAL)); // copy metadata from job for (Map.Entry<String,String> e : job) { if (e.getKey().startsWith(AvroJob.TEXT_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()),e.getValue()); if (e.getKey().startsWith(AvroJob.BINARY_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()), URLDecoder.decode(e.getValue(), "ISO-8859-1") .getBytes("ISO-8859-1")); } }
Example 3
Source File: ImportRecordReaderFactory.java From emr-dynamodb-connector with Apache License 2.0 | 6 votes |
static RecordReader<NullWritable, DynamoDBItemWritable> getRecordReader( InputSplit inputSplit, JobConf job, Reporter reporter) throws IOException { // CombineFileSplit indicates the new export format which includes a manifest file if (inputSplit instanceof CombineFileSplit) { int version = job.getInt(DynamoDBConstants.EXPORT_FORMAT_VERSION, -1); if (version != ExportManifestRecordWriter.FORMAT_VERSION) { throw new IOException("Unknown version: " + job.get(DynamoDBConstants .EXPORT_FORMAT_VERSION)); } return new ImportCombineFileRecordReader((CombineFileSplit) inputSplit, job, reporter); } else if (inputSplit instanceof FileSplit) { // FileSplit indicates the old data pipeline format which doesn't include a manifest file Path path = ((FileSplit) inputSplit).getPath(); return new ImportRecordReader(job, path); } else { throw new IOException("Expecting CombineFileSplit or FileSplit but the input split type is:" + " " + inputSplit.getClass()); } }
Example 4
Source File: DistCp.java From hadoop-gpu with Apache License 2.0 | 6 votes |
/** Mapper configuration. * Extracts source and destination file system, as well as * top-level paths on source and destination directories. * Gets the named file systems, to be used later in map. */ public void configure(JobConf job) { destPath = new Path(job.get(DST_DIR_LABEL, "/")); try { destFileSys = destPath.getFileSystem(job); } catch (IOException ex) { throw new RuntimeException("Unable to get the named file system.", ex); } sizeBuf = job.getInt("copy.buf.size", 128 * 1024); buffer = new byte[sizeBuf]; ignoreReadFailures = job.getBoolean(Options.IGNORE_READ_FAILURES.propertyname, false); preserve_status = job.getBoolean(Options.PRESERVE_STATUS.propertyname, false); if (preserve_status) { preseved = FileAttribute.parse(job.get(PRESERVE_STATUS_LABEL)); } update = job.getBoolean(Options.UPDATE.propertyname, false); overwrite = !update && job.getBoolean(Options.OVERWRITE.propertyname, false); this.job = job; }
Example 5
Source File: MultithreadedMapRunner.java From hadoop-gpu with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") public void configure(JobConf jobConf) { int numberOfThreads = jobConf.getInt("mapred.map.multithreadedrunner.threads", 10); if (LOG.isDebugEnabled()) { LOG.debug("Configuring jobConf " + jobConf.getJobName() + " to use " + numberOfThreads + " threads"); } this.job = jobConf; //increment processed counter only if skipping feature is enabled this.incrProcCount = SkipBadRecords.getMapperMaxSkipRecords(job)>0 && SkipBadRecords.getAutoIncrMapperProcCount(job); this.mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(), jobConf); // Creating a threadpool of the configured size to execute the Mapper // map method in parallel. executorService = new ThreadPoolExecutor(numberOfThreads, numberOfThreads, 0L, TimeUnit.MILLISECONDS, new BlockingArrayQueue (numberOfThreads)); }
Example 6
Source File: MultithreadedMapRunner.java From hadoop with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") public void configure(JobConf jobConf) { int numberOfThreads = jobConf.getInt(MultithreadedMapper.NUM_THREADS, 10); if (LOG.isDebugEnabled()) { LOG.debug("Configuring jobConf " + jobConf.getJobName() + " to use " + numberOfThreads + " threads"); } this.job = jobConf; //increment processed counter only if skipping feature is enabled this.incrProcCount = SkipBadRecords.getMapperMaxSkipRecords(job)>0 && SkipBadRecords.getAutoIncrMapperProcCount(job); this.mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(), jobConf); // Creating a threadpool of the configured size to execute the Mapper // map method in parallel. executorService = new ThreadPoolExecutor(numberOfThreads, numberOfThreads, 0L, TimeUnit.MILLISECONDS, new BlockingArrayQueue (numberOfThreads)); }
Example 7
Source File: ValueAggregatorJobBase.java From big-c with Apache License 2.0 | 5 votes |
private static ArrayList<ValueAggregatorDescriptor> getAggregatorDescriptors(JobConf job) { String advn = "aggregator.descriptor"; int num = job.getInt(advn + ".num", 0); ArrayList<ValueAggregatorDescriptor> retv = new ArrayList<ValueAggregatorDescriptor>(num); for (int i = 0; i < num; i++) { String spec = job.get(advn + "." + i); ValueAggregatorDescriptor ad = getValueAggregatorDescriptor(spec, job); if (ad != null) { retv.add(ad); } } return retv; }
Example 8
Source File: RandomWriter.java From hadoop-gpu with Apache License 2.0 | 5 votes |
/** * Save the values out of the configuaration that we need to write * the data. */ @Override public void configure(JobConf job) { numBytesToWrite = job.getLong("test.randomwrite.bytes_per_map", 1*1024*1024*1024); minKeySize = job.getInt("test.randomwrite.min_key", 10); keySizeRange = job.getInt("test.randomwrite.max_key", 1000) - minKeySize; minValueSize = job.getInt("test.randomwrite.min_value", 0); valueSizeRange = job.getInt("test.randomwrite.max_value", 20000) - minValueSize; }
Example 9
Source File: FreeGenerator.java From nutch-htmlunit with Apache License 2.0 | 5 votes |
@Override public void configure(JobConf job) { super.configure(job); defaultInterval = job.getInt("db.fetch.interval.default", 0); scfilters = new ScoringFilters(job); if (job.getBoolean(FILTER_KEY, false)) { filters = new URLFilters(job); } if (job.getBoolean(NORMALIZE_KEY, false)) { normalizers = new URLNormalizers(job, URLNormalizers.SCOPE_INJECT); } }
Example 10
Source File: DistCp.java From RDFS with Apache License 2.0 | 5 votes |
/** * Produce splits such that each is no greater than the quotient of the * total size and the number of splits requested. * @param job The handle to the JobConf object * @param numSplits Number of splits requested */ public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { int cnfiles = job.getInt(SRC_COUNT_LABEL, -1); long cbsize = job.getLong(TOTAL_SIZE_LABEL, -1); String srcFileList = job.get(SRC_LIST_LABEL, ""); Path srcFileListPath = new Path(srcFileList); if (cnfiles < 0 || cbsize < 0 || "".equals(srcFileList)) { throw new RuntimeException("Invalid metadata: #files(" + cnfiles + ") total_size(" + cbsize + ") src_chunk_file_list_uri(" + srcFileList + ")"); } ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits); SequenceFile.Reader sl = null; String splitList = job.get(SPLIT_LIST_LABEL, ""); if("".equals(splitList)) { throw new RuntimeException("Invalid metadata: split_list_uri(" + srcFileList + ")"); } //split file list which contains start pos and split length pairs //they are used to split srcChunkFileList Path splitListPath = new Path(splitList); FileSystem splitListFs = splitListPath.getFileSystem(job); try{ sl = new SequenceFile.Reader(splitListFs, splitListPath, job); LongWritable startpos = new LongWritable(); LongWritable length = new LongWritable(); while (sl.next(startpos, length)) { splits.add(new FileSplit(srcFileListPath, startpos.get(), length.get(), (String[])null)); } } finally{ checkAndClose(sl); } return splits.toArray(new FileSplit[splits.size()]); }
Example 11
Source File: RandomWriter.java From hadoop-book with Apache License 2.0 | 5 votes |
/** * Save the values out of the configuaration that we need to write the * data. */ @Override public void configure(JobConf job) { numBytesToWrite = job.getLong("test.randomwrite.bytes_per_map", 1 * 1024 * 1024 * 1024); minKeySize = job.getInt("test.randomwrite.min_key", 10); keySizeRange = job.getInt("test.randomwrite.max_key", 1000) - minKeySize; minValueSize = job.getInt("test.randomwrite.min_value", 0); valueSizeRange = job.getInt("test.randomwrite.max_value", 20000) - minValueSize; }
Example 12
Source File: AvroRecordWriter.java From spork with Apache License 2.0 | 5 votes |
static void configureDataFileWriter(DataFileWriter<GenericData.Record> writer, JobConf job) throws UnsupportedEncodingException { if (FileOutputFormat.getCompressOutput(job)) { int level = job.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); String codecName = job.get(AvroJob.OUTPUT_CODEC, DEFLATE_CODEC); CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); writer.setCodec(factory); } // Do max as core-default.xml has io.file.buffer.size as 4K writer.setSyncInterval(job.getInt(SYNC_INTERVAL_KEY, Math.max( job.getInt("io.file.buffer.size", DEFAULT_SYNC_INTERVAL), DEFAULT_SYNC_INTERVAL))); // copy metadata from job for (Map.Entry<String,String> e : job) { if (e.getKey().startsWith(AvroJob.TEXT_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue()); if (e.getKey().startsWith(AvroJob.BINARY_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()), URLDecoder.decode(e.getValue(), "ISO-8859-1") .getBytes("ISO-8859-1")); } }
Example 13
Source File: RandomWriter.java From RDFS with Apache License 2.0 | 5 votes |
/** * Save the values out of the configuaration that we need to write * the data. */ @Override public void configure(JobConf job) { numBytesToWrite = job.getLong("test.randomwrite.bytes_per_map", 1*1024*1024*1024); minKeySize = job.getInt("test.randomwrite.min_key", 10); keySizeRange = job.getInt("test.randomwrite.max_key", 1000) - minKeySize; minValueSize = job.getInt("test.randomwrite.min_value", 0); valueSizeRange = job.getInt("test.randomwrite.max_value", 20000) - minValueSize; }
Example 14
Source File: NLineInputFormat.java From big-c with Apache License 2.0 | 4 votes |
public void configure(JobConf conf) { N = conf.getInt("mapreduce.input.lineinputformat.linespermap", 1); }
Example 15
Source File: AbstractMROldApiSaveTest.java From elasticsearch-hadoop with Apache License 2.0 | 4 votes |
@Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { return super.getSplits(job, job.getInt("actual.splits", 3)); }
Example 16
Source File: GenerateDistCacheData.java From big-c with Apache License 2.0 | 4 votes |
@Override public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException { final JobConf jobConf = new JobConf(jobCtxt.getConfiguration()); final JobClient client = new JobClient(jobConf); ClusterStatus stat = client.getClusterStatus(true); int numTrackers = stat.getTaskTrackers(); final int fileCount = jobConf.getInt(GRIDMIX_DISTCACHE_FILE_COUNT, -1); // Total size of distributed cache files to be generated final long totalSize = jobConf.getLong(GRIDMIX_DISTCACHE_BYTE_COUNT, -1); // Get the path of the special file String distCacheFileList = jobConf.get(GRIDMIX_DISTCACHE_FILE_LIST); if (fileCount < 0 || totalSize < 0 || distCacheFileList == null) { throw new RuntimeException("Invalid metadata: #files (" + fileCount + "), total_size (" + totalSize + "), filelisturi (" + distCacheFileList + ")"); } Path sequenceFile = new Path(distCacheFileList); FileSystem fs = sequenceFile.getFileSystem(jobConf); FileStatus srcst = fs.getFileStatus(sequenceFile); // Consider the number of TTs * mapSlotsPerTracker as number of mappers. int numMapSlotsPerTracker = jobConf.getInt(TTConfig.TT_MAP_SLOTS, 2); int numSplits = numTrackers * numMapSlotsPerTracker; List<InputSplit> splits = new ArrayList<InputSplit>(numSplits); LongWritable key = new LongWritable(); BytesWritable value = new BytesWritable(); // Average size of data to be generated by each map task final long targetSize = Math.max(totalSize / numSplits, DistributedCacheEmulator.AVG_BYTES_PER_MAP); long splitStartPosition = 0L; long splitEndPosition = 0L; long acc = 0L; long bytesRemaining = srcst.getLen(); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, sequenceFile, jobConf); while (reader.next(key, value)) { // If adding this file would put this split past the target size, // cut the last split and put this file in the next split. if (acc + key.get() > targetSize && acc != 0) { long splitSize = splitEndPosition - splitStartPosition; splits.add(new FileSplit( sequenceFile, splitStartPosition, splitSize, (String[])null)); bytesRemaining -= splitSize; splitStartPosition = splitEndPosition; acc = 0L; } acc += key.get(); splitEndPosition = reader.getPosition(); } } finally { if (reader != null) { reader.close(); } } if (bytesRemaining != 0) { splits.add(new FileSplit( sequenceFile, splitStartPosition, bytesRemaining, (String[])null)); } return splits; }
Example 17
Source File: DistRaid.java From RDFS with Apache License 2.0 | 4 votes |
/** * Produce splits such that each is no greater than the quotient of the * total size and the number of splits requested. * * @param job * The handle to the JobConf object * @param numSplits * Number of splits requested */ public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { final int srcCount = job.getInt(OP_COUNT_LABEL, -1); final int targetcount = srcCount / numSplits; String srclist = job.get(OP_LIST_LABEL, ""); if (srcCount < 0 || "".equals(srclist)) { throw new RuntimeException("Invalid metadata: #files(" + srcCount + ") listuri(" + srclist + ")"); } Path srcs = new Path(srclist); FileSystem fs = srcs.getFileSystem(job); List<FileSplit> splits = new ArrayList<FileSplit>(numSplits); Text key = new Text(); PolicyInfo value = new PolicyInfo(); SequenceFile.Reader in = null; long prev = 0L; int count = 0; // count src try { for (in = new SequenceFile.Reader(fs, srcs, job); in.next(key, value);) { long curr = in.getPosition(); long delta = curr - prev; if (++count > targetcount) { count = 0; splits.add(new FileSplit(srcs, prev, delta, (String[]) null)); prev = curr; } } } finally { in.close(); } long remaining = fs.getFileStatus(srcs).getLen() - prev; if (remaining != 0) { splits.add(new FileSplit(srcs, prev, remaining, (String[]) null)); } LOG.info("jobname= " + jobName + " numSplits=" + numSplits + ", splits.size()=" + splits.size()); return splits.toArray(new FileSplit[splits.size()]); }
Example 18
Source File: RegexMapper.java From big-c with Apache License 2.0 | 4 votes |
public void configure(JobConf job) { pattern = Pattern.compile(job.get(org.apache.hadoop.mapreduce.lib.map. RegexMapper.PATTERN)); group = job.getInt(org.apache.hadoop.mapreduce.lib.map. RegexMapper.GROUP, 0); }
Example 19
Source File: DistCp.java From RDFS with Apache License 2.0 | 4 votes |
/** * Produce splits such that each is no greater than the quotient of the * total size and the number of splits requested. * @param job The handle to the JobConf object * @param numSplits Number of splits requested */ public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { int cnfiles = job.getInt(SRC_COUNT_LABEL, -1); long cbsize = job.getLong(TOTAL_SIZE_LABEL, -1); long blocks = job.getLong(TOTAL_BLOCKS_LABEL, -1); String srcfilelist = job.get(SRC_LIST_LABEL, ""); if (cnfiles < 0 || cbsize < 0 || blocks < 0 || "".equals(srcfilelist)) { throw new RuntimeException("Invalid metadata: #files(" + cnfiles + ") total_size(" + cbsize + ") listuri(" + srcfilelist + ")"); } Path src = new Path(srcfilelist); FileSystem fs = src.getFileSystem(job); FileStatus srcst = fs.getFileStatus(src); ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits); LongWritable key = new LongWritable(); FilePairComparable value = new FilePairComparable(); final long targetsize = getTargetSize(job, numSplits); long pos = 0L; long last = 0L; long acc = 0L; long cbrem = srcst.getLen(); SequenceFile.Reader sl = null; try { sl = new SequenceFile.Reader(fs, src, job); for (; sl.next(key, value); last = sl.getPosition()) { // if adding this split would put this split past the target size, // cut the last split and put this next file in the next split. long increment = getIncrement(key, value); if (acc + increment > targetsize && acc != 0) { long splitsize = last - pos; splits.add(new FileSplit(src, pos, splitsize, (String[])null)); cbrem -= splitsize; pos = last; acc = 0L; } acc += increment; } } finally { checkAndClose(sl); } if (cbrem != 0) { splits.add(new FileSplit(src, pos, cbrem, (String[])null)); } return splits.toArray(new FileSplit[splits.size()]); }
Example 20
Source File: TaskCalculator.java From emr-dynamodb-connector with Apache License 2.0 | 4 votes |
public int getMaxMapTasks() throws IOException { JobConf conf = (JobConf) jobClient.getConf(); // Total number of nodes in the cluster int nodes = jobClient.getClusterStatus().getTaskTrackers(); log.info("Cluster has " + nodes + " active nodes."); if (nodes == 0) { log.warn("Cluster doesn't have any nodes"); return 0; } // Memory per slot int slotMemory = conf.getInt("yarn.scheduler.minimum-allocation-mb", 1024); // Default value // from yarn-default.xml // Number of slots in a core node int nodeMemory = nodeCapacityProvider.getCoreNodeMemoryMB(); int nodeSlots = nodeMemory / slotMemory; // Number of slots for a mapper int mapMemory = conf.getInt(MRJobConfig.MAP_MEMORY_MB, MRJobConfig.DEFAULT_MAP_MEMORY_MB); int mapSlots = (int) Math.ceil((double) mapMemory / slotMemory); // Number of slots for an application master int amMemory = conf.getInt(MRJobConfig.MR_AM_VMEM_MB, MRJobConfig.DEFAULT_MR_AM_VMEM_MB); int appMasterSlots = (int) Math.ceil((double) amMemory / slotMemory); // Number of slots for a reducer int reduceMemory = conf.getInt(MRJobConfig.REDUCE_MEMORY_MB, MRJobConfig .DEFAULT_REDUCE_MEMORY_MB); int reduceSlots = (int) Math.ceil((double) reduceMemory / slotMemory); // Number of reducers int reducers = conf.getNumReduceTasks(); // Calculate the number of mappers int mappers = yarnContainerAllocator.getMaxMappers(nodes, reducers, nodeSlots, appMasterSlots, mapSlots, reduceSlots); log.info("Slot size: " + slotMemory + "MB."); log.info("Node manager can allocate " + nodeMemory + "MB (" + nodeSlots + " slots) for " + "containers on each node."); log.info("Each mapper needs: " + mapMemory + "MB. (" + mapSlots + " slots)"); log.info("Each reducer needs: " + reduceMemory + "MB. (" + reduceSlots + " slots)"); log.info("MapReduce Application Manager needs: " + amMemory + " MB. (" + appMasterSlots + " " + "slots)"); log.info("Number of reducers: " + reducers); log.info("Max number of cluster map tasks: " + mappers); if (mappers < 1) { log.warn("The calculated max number of concurrent map tasks is less than 1. Use 1 instead."); mappers = 1; } return mappers; }