org.apache.hadoop.mapred.JobConf Java Examples
The following examples show how to use
org.apache.hadoop.mapred.JobConf.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SliveMapper.java From hadoop with Apache License 2.0 | 6 votes |
@Override // MapReduceBase public void configure(JobConf conf) { try { config = new ConfigExtractor(conf); ConfigExtractor.dumpOptions(config); filesystem = config.getBaseDirectory().getFileSystem(conf); } catch (Exception e) { LOG.error("Unable to setup slive " + StringUtils.stringifyException(e)); throw new RuntimeException("Unable to setup slive configuration", e); } if(conf.get(MRJobConfig.TASK_ATTEMPT_ID) != null ) { this.taskId = TaskAttemptID.forName(conf.get(MRJobConfig.TASK_ATTEMPT_ID)) .getTaskID().getId(); } else { // So that branch-1/0.20 can run this same code as well this.taskId = TaskAttemptID.forName(conf.get("mapred.task.id")) .getTaskID().getId(); } }
Example #2
Source File: TestCombineFileInputFormat.java From RDFS with Apache License 2.0 | 6 votes |
@Override protected LocatedFileStatus[] listLocatedStatus(JobConf job) throws IOException { Path[] files = getInputPaths(job); LocatedFileStatus[] results = new LocatedFileStatus[files.length]; for (int i = 0; i < files.length; i++) { Path p = files[i]; FileSystem fs = p.getFileSystem(job); FileStatus stat = fs.getFileStatus(p); if (stat.isDir()) { results[i] = new LocatedFileStatus(stat, null); } else { results[i] = new LocatedFileStatus(stat, fs.getFileBlockLocations(stat, 0, stat.getLen())); } } return results; }
Example #3
Source File: UtilsForTests.java From hadoop-gpu with Apache License 2.0 | 6 votes |
static RunningJob runJobFail(JobConf conf, Path inDir, Path outDir) throws IOException { conf.setJobName("test-job-fail"); conf.setMapperClass(FailMapper.class); conf.setReducerClass(IdentityReducer.class); RunningJob job = UtilsForTests.runJob(conf, inDir, outDir); while (!job.isComplete()) { try { Thread.sleep(100); } catch (InterruptedException e) { break; } } return job; }
Example #4
Source File: TestCombineFileInputFormat.java From RDFS with Apache License 2.0 | 6 votes |
private void splitRealFiles(String[] args) throws IOException { JobConf conf = new JobConf(); FileSystem fs = FileSystem.get(conf); if (!(fs instanceof DistributedFileSystem)) { throw new IOException("Wrong file system: " + fs.getClass().getName()); } int blockSize = conf.getInt("dfs.block.size", 128 * 1024 * 1024); DummyInputFormat inFormat = new DummyInputFormat(); for (int i = 0; i < args.length; i++) { inFormat.addInputPaths(conf, args[i]); } inFormat.setMinSplitSizeRack(blockSize); inFormat.setMaxSplitSize(10 * blockSize); InputSplit[] splits = inFormat.getSplits(conf, 1); System.out.println("Total number of splits " + splits.length); for (int i = 0; i < splits.length; ++i) { CombineFileSplit fileSplit = (CombineFileSplit) splits[i]; System.out.println("Split[" + i + "] " + fileSplit); } }
Example #5
Source File: HadoopReduceCombineFunction.java From flink with Apache License 2.0 | 6 votes |
/** * Maps two Hadoop Reducer (mapred API) to a combinable Flink GroupReduceFunction. * * @param hadoopReducer The Hadoop Reducer that is mapped to a GroupReduceFunction. * @param hadoopCombiner The Hadoop Reducer that is mapped to the combiner function. * @param conf The JobConf that is used to configure both Hadoop Reducers. */ public HadoopReduceCombineFunction(Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT> hadoopReducer, Reducer<KEYIN, VALUEIN, KEYIN, VALUEIN> hadoopCombiner, JobConf conf) { if (hadoopReducer == null) { throw new NullPointerException("Reducer may not be null."); } if (hadoopCombiner == null) { throw new NullPointerException("Combiner may not be null."); } if (conf == null) { throw new NullPointerException("JobConf may not be null."); } this.reducer = hadoopReducer; this.combiner = hadoopCombiner; this.jobConf = conf; }
Example #6
Source File: HadoopUtils.java From incubator-hivemall with Apache License 2.0 | 6 votes |
public static int getTaskId() { MapredContext ctx = MapredContextAccessor.get(); if (ctx == null) { throw new IllegalStateException("MapredContext is not set"); } JobConf jobconf = ctx.getJobConf(); if (jobconf == null) { throw new IllegalStateException("JobConf is not set"); } int taskid = jobconf.getInt("mapred.task.partition", -1); if (taskid == -1) { taskid = jobconf.getInt("mapreduce.task.partition", -1); if (taskid == -1) { throw new IllegalStateException( "Both mapred.task.partition and mapreduce.task.partition are not set: " + toString(jobconf)); } } return taskid; }
Example #7
Source File: AvroAsJsonOutputFormat.java From iow-hadoop-streaming with Apache License 2.0 | 6 votes |
static <K> void configureDataFileWriter(DataFileWriter<K> writer, JobConf job) throws UnsupportedEncodingException { if (FileOutputFormat.getCompressOutput(job)) { int level = job.getInt(org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY, org.apache.avro.mapred.AvroOutputFormat.DEFAULT_DEFLATE_LEVEL); String codecName = job.get(AvroJob.OUTPUT_CODEC, DEFLATE_CODEC); CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); writer.setCodec(factory); } writer.setSyncInterval(job.getInt(org.apache.avro.mapred.AvroOutputFormat.SYNC_INTERVAL_KEY, DEFAULT_SYNC_INTERVAL)); // copy metadata from job for (Map.Entry<String,String> e : job) { if (e.getKey().startsWith(AvroJob.TEXT_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()),e.getValue()); if (e.getKey().startsWith(AvroJob.BINARY_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()), URLDecoder.decode(e.getValue(), "ISO-8859-1") .getBytes("ISO-8859-1")); } }
Example #8
Source File: GridmixJob.java From hadoop with Apache License 2.0 | 6 votes |
@SuppressWarnings("deprecation") protected static void configureTaskJVMOptions(Configuration originalJobConf, Configuration simulatedJobConf){ // Get the heap related java opts used for the original job and set the // same for the simulated job. // set task task heap options configureTaskJVMMaxHeapOptions(originalJobConf, simulatedJobConf, JobConf.MAPRED_TASK_JAVA_OPTS); // set map task heap options configureTaskJVMMaxHeapOptions(originalJobConf, simulatedJobConf, MRJobConfig.MAP_JAVA_OPTS); // set reduce task heap options configureTaskJVMMaxHeapOptions(originalJobConf, simulatedJobConf, MRJobConfig.REDUCE_JAVA_OPTS); }
Example #9
Source File: TokenUtils.java From incubator-gobblin with Apache License 2.0 | 6 votes |
private static void getJtToken(Credentials cred) throws IOException { try { JobConf jobConf = new JobConf(); JobClient jobClient = new JobClient(jobConf); LOG.info("Pre-fetching JT token from JobTracker"); Token<DelegationTokenIdentifier> mrdt = jobClient.getDelegationToken(getMRTokenRenewerInternal(jobConf)); if (mrdt == null) { LOG.error("Failed to fetch JT token"); throw new IOException("Failed to fetch JT token."); } LOG.info("Created JT token: " + mrdt.toString()); LOG.info("Token kind: " + mrdt.getKind()); LOG.info("Token id: " + Arrays.toString(mrdt.getIdentifier())); LOG.info("Token service: " + mrdt.getService()); cred.addToken(mrdt.getService(), mrdt); } catch (InterruptedException ie) { throw new IOException(ie); } }
Example #10
Source File: PipeReducer.java From hadoop with Apache License 2.0 | 6 votes |
public void configure(JobConf job) { super.configure(job); //disable the auto increment of the counter. For streaming, no of //processed records could be different(equal or less) than the no of //records input. SkipBadRecords.setAutoIncrReducerProcCount(job, false); skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false); try { reduceOutFieldSeparator = job_.get("stream.reduce.output.field.separator", "\t").getBytes("UTF-8"); reduceInputFieldSeparator = job_.get("stream.reduce.input.field.separator", "\t").getBytes("UTF-8"); this.numOfReduceOutputKeyFields = job_.getInt("stream.num.reduce.output.key.fields", 1); } catch (UnsupportedEncodingException e) { throw new RuntimeException("The current system does not support UTF-8 encoding!", e); } }
Example #11
Source File: TestDatamerge.java From big-c with Apache License 2.0 | 6 votes |
private static void joinAs(String jointype, Class<? extends SimpleCheckerBase> c) throws Exception { final int srcs = 4; Configuration conf = new Configuration(); JobConf job = new JobConf(conf, c); Path base = cluster.getFileSystem().makeQualified(new Path("/"+jointype)); Path[] src = writeSimpleSrc(base, conf, srcs); job.set("mapreduce.join.expr", CompositeInputFormat.compose(jointype, SequenceFileInputFormat.class, src)); job.setInt("testdatamerge.sources", srcs); job.setInputFormat(CompositeInputFormat.class); FileOutputFormat.setOutputPath(job, new Path(base, "out")); job.setMapperClass(c); job.setReducerClass(c); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); JobClient.runJob(job); base.getFileSystem(job).delete(base, true); }
Example #12
Source File: TestDFSIO.java From big-c with Apache License 2.0 | 6 votes |
@Override // Mapper public void configure(JobConf conf) { super.configure(conf); // grab compression String compression = getConf().get("test.io.compression.class", null); Class<? extends CompressionCodec> codec; // try to initialize codec try { codec = (compression == null) ? null : Class.forName(compression).asSubclass(CompressionCodec.class); } catch(Exception e) { throw new RuntimeException("Compression codec not found: ", e); } if(codec != null) { compressionCodec = (CompressionCodec) ReflectionUtils.newInstance(codec, getConf()); } }
Example #13
Source File: DFSGeneralTest.java From RDFS with Apache License 2.0 | 6 votes |
public void control(JobConf fsConfig, String fileName) throws IOException { String name = fileName; FileSystem fs = FileSystem.get(fsConfig); SequenceFile.Writer write = null; for (int i = 0; i < nmaps; i++) { try { Path controlFile = new Path(dfs_input, name + i); write = SequenceFile.createWriter(fs, fsConfig, controlFile, Text.class, Text.class, CompressionType.NONE); write.append(new Text(name + i), new Text(workdir)); } finally { if (write != null) write.close(); write = null; } } }
Example #14
Source File: ReaderTextCellParallel.java From systemds with Apache License 2.0 | 5 votes |
public ReadTask( InputSplit split, TextInputFormat informat, JobConf job, MatrixBlock dest, long rlen, long clen, boolean mm, FileFormatPropertiesMM mmProps ) { _split = split; _sparse = dest.isInSparseFormat(); _informat = informat; _job = job; _dest = dest; _rlen = rlen; _clen = clen; _matrixMarket = mm; _mmProps = mmProps; }
Example #15
Source File: MapTaskImpl.java From big-c with Apache License 2.0 | 5 votes |
public MapTaskImpl(JobId jobId, int partition, EventHandler eventHandler, Path remoteJobConfFile, JobConf conf, TaskSplitMetaInfo taskSplitMetaInfo, TaskAttemptListener taskAttemptListener, Token<JobTokenIdentifier> jobToken, Credentials credentials, Clock clock, int appAttemptId, MRAppMetrics metrics, AppContext appContext) { super(jobId, TaskType.MAP, partition, eventHandler, remoteJobConfFile, conf, taskAttemptListener, jobToken, credentials, clock, appAttemptId, metrics, appContext); this.taskSplitMetaInfo = taskSplitMetaInfo; }
Example #16
Source File: FrameReaderTextCSV.java From systemds with Apache License 2.0 | 5 votes |
@Override public final FrameBlock readFrameFromHDFS(String fname, ValueType[] schema, String[] names, long rlen, long clen) throws IOException, DMLRuntimeException { //prepare file access JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); Path path = new Path( fname ); FileSystem fs = IOUtilFunctions.getFileSystem(path, job); FileInputFormat.addInputPath(job, path); //check existence and non-empty file checkValidInputFile(fs, path); //compute size if necessary if( rlen <= 0 || clen <= 0 ) { Pair<Integer,Integer> size = computeCSVSize(path, job, fs); rlen = size.getKey(); clen = size.getValue(); } //allocate output frame block ValueType[] lschema = createOutputSchema(schema, clen); String[] lnames = createOutputNames(names, clen); FrameBlock ret = createOutputFrameBlock(lschema, lnames, rlen); //core read (sequential/parallel) readCSVFrameFromHDFS(path, job, fs, ret, lschema, lnames, rlen, clen); return ret; }
Example #17
Source File: ParquetFileWriterFactory.java From presto with Apache License 2.0 | 5 votes |
private static CompressionCodecName getCompression(JobConf configuration) { String compressionName = configuration.get(ParquetOutputFormat.COMPRESSION); if (compressionName == null) { return CompressionCodecName.GZIP; } return CompressionCodecName.valueOf(compressionName); }
Example #18
Source File: CombineFileSplit.java From RDFS with Apache License 2.0 | 5 votes |
public CombineFileSplit(JobConf job, Path[] files, long[] lengths) { long[] startoffset = new long[files.length]; for (int i = 0; i < startoffset.length; i++) { startoffset[i] = 0; } String[] locations = new String[files.length]; for (int i = 0; i < locations.length; i++) { locations[i] = ""; } initSplit(job, files, startoffset, lengths, locations); }
Example #19
Source File: KafkaInputFormat.java From HiveKa with Apache License 2.0 | 5 votes |
private Set<String> getMoveToLatestTopicsSet(JobConf conf) { Set<String> topics = new HashSet<String>(); String[] arr = getMoveToLatestTopics(conf); if (arr != null) { for (String topic : arr) { topics.add(topic); } } return topics; }
Example #20
Source File: DistCpV1.java From big-c with Apache License 2.0 | 5 votes |
public void setConf(Configuration conf) { if (conf instanceof JobConf) { this.conf = (JobConf) conf; } else { this.conf = new JobConf(conf); } }
Example #21
Source File: TestCLI.java From hadoop-gpu with Apache License 2.0 | 5 votes |
public void setUp() throws Exception { // Read the testConfig.xml file readTestConfigFile(); // Start up the mini dfs cluster boolean success = false; conf = new Configuration(); conf.setClass(PolicyProvider.POLICY_PROVIDER_CONFIG, HadoopPolicyProvider.class, PolicyProvider.class); conf.setBoolean(ServiceAuthorizationManager.SERVICE_AUTHORIZATION_CONFIG, true); dfsCluster = new MiniDFSCluster(conf, 1, true, null); namenode = conf.get("fs.default.name", "file:///"); clitestDataDir = new File(TEST_CACHE_DATA_DIR). toURI().toString().replace(' ', '+'); username = System.getProperty("user.name"); FileSystem fs = dfsCluster.getFileSystem(); assertTrue("Not a HDFS: "+fs.getUri(), fs instanceof DistributedFileSystem); dfs = (DistributedFileSystem) fs; // Start up mini mr cluster JobConf mrConf = new JobConf(conf); mrCluster = new MiniMRCluster(1, dfsCluster.getFileSystem().getUri().toString(), 1, null, null, mrConf); jobtracker = mrCluster.createJobConf().get("mapred.job.tracker", "local"); success = true; assertTrue("Error setting up Mini DFS & MR clusters", success); }
Example #22
Source File: DummyInputFormat.java From hadoop with Apache License 2.0 | 5 votes |
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { InputSplit[] splits = new InputSplit[numSplits]; for (int i = 0; i < splits.length; ++i) { splits[i] = new EmptySplit(); } return splits; }
Example #23
Source File: FileOutputCommitterWrapper.java From stratosphere with Apache License 2.0 | 5 votes |
public void setupJob(JobConf conf) throws IOException { Path outputPath = FileOutputFormat.getOutputPath(conf); if (outputPath != null) { Path tmpDir = new Path(outputPath, FileOutputCommitter.TEMP_DIR_NAME); FileSystem fileSys = tmpDir.getFileSystem(conf); if (!fileSys.mkdirs(tmpDir)) { LOG.error("Mkdirs failed to create " + tmpDir.toString()); } } }
Example #24
Source File: GrokHelper.java From hadoop-solr with Apache License 2.0 | 5 votes |
public static String readConfiguration(String path, JobConf conf) { String response = ""; Path p = new Path(path); try { FileSystem fs = p.getFileSystem(conf); FSDataInputStream inputStream = fs.open(p); ByteArrayOutputStream out = new ByteArrayOutputStream(); IOUtils.copyBytes(inputStream, out, conf); response = out.toString(); fs.close(); } catch (IOException e) { log.error("Unable to read " + path + " from HDFS", e); } return response; }
Example #25
Source File: HiveExcelRowFileOutputFormat.java From hadoopoffice with Apache License 2.0 | 5 votes |
@Override public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException { FileSystem fs = finalOutPath.getFileSystem(jc); HiveExcelRowFileOutputFormat.setOutputPath(jc, finalOutPath); RecordWriter<?, ?> recordWriter = this.getRecordWriter(fs, jc, null, progress); return new HivePassThroughRecordWriter(recordWriter); }
Example #26
Source File: TestMRAppWithCombiner.java From big-c with Apache License 2.0 | 5 votes |
@Test public void testCombinerShouldUpdateTheReporter() throws Exception { JobConf conf = new JobConf(mrCluster.getConfig()); int numMaps = 5; int numReds = 2; Path in = new Path(mrCluster.getTestWorkDir().getAbsolutePath(), "testCombinerShouldUpdateTheReporter-in"); Path out = new Path(mrCluster.getTestWorkDir().getAbsolutePath(), "testCombinerShouldUpdateTheReporter-out"); createInputOutPutFolder(in, out, numMaps); conf.setJobName("test-job-with-combiner"); conf.setMapperClass(IdentityMapper.class); conf.setCombinerClass(MyCombinerToCheckReporter.class); //conf.setJarByClass(MyCombinerToCheckReporter.class); conf.setReducerClass(IdentityReducer.class); DistributedCache.addFileToClassPath(TestMRJobs.APP_JAR, conf); conf.setOutputCommitter(CustomOutputCommitter.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(conf, in); FileOutputFormat.setOutputPath(conf, out); conf.setNumMapTasks(numMaps); conf.setNumReduceTasks(numReds); runJob(conf); }
Example #27
Source File: TestMiniCoronaRunJob.java From RDFS with Apache License 2.0 | 5 votes |
private void runSleepJob(JobConf conf, int maps, int reduces) throws Exception { String[] args = {"-m", maps + "", "-r", reduces + "", "-mt", "1", "-rt", "1" }; ToolRunner.run(conf, new SleepJob(), args); // This sleep is here to wait for the JobTracker to go down completely TstUtils.reliableSleep(1000); }
Example #28
Source File: WriterMatrixMarketParallel.java From systemds with Apache License 2.0 | 5 votes |
public WriteMMTask(Path path, JobConf job, FileSystem fs, MatrixBlock src, int rl, int ru) { _path = path; _job = job; _fs = fs; _src = src; _rl = rl; _ru = ru; }
Example #29
Source File: HiveTableSink.java From flink with Apache License 2.0 | 5 votes |
public HiveTableSink(JobConf jobConf, ObjectPath tablePath, CatalogTable table) { this.jobConf = jobConf; this.tablePath = tablePath; this.catalogTable = table; hiveVersion = Preconditions.checkNotNull(jobConf.get(HiveCatalogValidator.CATALOG_HIVE_VERSION), "Hive version is not defined"); tableSchema = table.getSchema(); }
Example #30
Source File: ReaderTextLIBSVMParallel.java From systemds with Apache License 2.0 | 5 votes |
private void readLIBSVMMatrixFromHDFS(InputSplit[] splits, Path path, JobConf job, MatrixBlock dest, long rlen, long clen, int blen) throws IOException { FileInputFormat.addInputPath(job, path); TextInputFormat informat = new TextInputFormat(); informat.configure(job); ExecutorService pool = CommonThreadPool.get(_numThreads); try { // create read tasks for all splits ArrayList<LIBSVMReadTask> tasks = new ArrayList<>(); int splitCount = 0; for (InputSplit split : splits) { tasks.add( new LIBSVMReadTask(split, _offsets, informat, job, dest, rlen, clen, splitCount++) ); } pool.invokeAll(tasks); pool.shutdown(); // check return codes and aggregate nnz long lnnz = 0; for (LIBSVMReadTask rt : tasks) { lnnz += rt.getPartialNnz(); if (!rt.getReturnCode()) { Exception err = rt.getException(); throw new IOException("Read task for libsvm input failed: "+ err.toString(), err); } } dest.setNonZeros(lnnz); } catch (Exception e) { throw new IOException("Threadpool issue, while parallel read.", e); } }