org.apache.hadoop.mapred.JobConf Java Exaples

Source File: SliveMapper.java From hadoop with Apache License 2.0

6 votes

@Override // MapReduceBase
public void configure(JobConf conf) {
  try {
    config = new ConfigExtractor(conf);
    ConfigExtractor.dumpOptions(config);
    filesystem = config.getBaseDirectory().getFileSystem(conf);
  } catch (Exception e) {
    LOG.error("Unable to setup slive " + StringUtils.stringifyException(e));
    throw new RuntimeException("Unable to setup slive configuration", e);
  }
  if(conf.get(MRJobConfig.TASK_ATTEMPT_ID) != null ) {
    this.taskId = TaskAttemptID.forName(conf.get(MRJobConfig.TASK_ATTEMPT_ID))
      .getTaskID().getId();
  } else {
    // So that branch-1/0.20 can run this same code as well
    this.taskId = TaskAttemptID.forName(conf.get("mapred.task.id"))
        .getTaskID().getId();
  }
}

Source File: TestCombineFileInputFormat.java From RDFS with Apache License 2.0

6 votes

@Override
protected LocatedFileStatus[] listLocatedStatus(JobConf job) throws IOException {
  Path[] files = getInputPaths(job);
  LocatedFileStatus[] results = new LocatedFileStatus[files.length];
  for (int i = 0; i < files.length; i++) {
    Path p = files[i];
    FileSystem fs = p.getFileSystem(job);
    FileStatus stat = fs.getFileStatus(p);
    if (stat.isDir()) {
      results[i] = new LocatedFileStatus(stat, null);
    } else {
      results[i] = new LocatedFileStatus(stat,
          fs.getFileBlockLocations(stat, 0, stat.getLen()));
    }
  }
  return results;
}

Source File: UtilsForTests.java From hadoop-gpu with Apache License 2.0

6 votes

static RunningJob runJobFail(JobConf conf, Path inDir, Path outDir)
       throws IOException {
  conf.setJobName("test-job-fail");
  conf.setMapperClass(FailMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  
  RunningJob job = UtilsForTests.runJob(conf, inDir, outDir);
  while (!job.isComplete()) {
    try {
      Thread.sleep(100);
    } catch (InterruptedException e) {
      break;
    }
  }

  return job;
}

Source File: TestCombineFileInputFormat.java From RDFS with Apache License 2.0

6 votes

private void splitRealFiles(String[] args) throws IOException {
  JobConf conf = new JobConf();
  FileSystem fs = FileSystem.get(conf);
  if (!(fs instanceof DistributedFileSystem)) {
    throw new IOException("Wrong file system: " + fs.getClass().getName());
  }
  int blockSize = conf.getInt("dfs.block.size", 128 * 1024 * 1024);

  DummyInputFormat inFormat = new DummyInputFormat();
  for (int i = 0; i < args.length; i++) {
    inFormat.addInputPaths(conf, args[i]);
  }
  inFormat.setMinSplitSizeRack(blockSize);
  inFormat.setMaxSplitSize(10 * blockSize);

  InputSplit[] splits = inFormat.getSplits(conf, 1);
  System.out.println("Total number of splits " + splits.length);
  for (int i = 0; i < splits.length; ++i) {
    CombineFileSplit fileSplit = (CombineFileSplit) splits[i];
    System.out.println("Split[" + i + "] " + fileSplit);
  }
}

Source File: HadoopReduceCombineFunction.java From flink with Apache License 2.0

6 votes

/**
 * Maps two Hadoop Reducer (mapred API) to a combinable Flink GroupReduceFunction.
 *
 * @param hadoopReducer The Hadoop Reducer that is mapped to a GroupReduceFunction.
 * @param hadoopCombiner The Hadoop Reducer that is mapped to the combiner function.
 * @param conf The JobConf that is used to configure both Hadoop Reducers.
 */
public HadoopReduceCombineFunction(Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT> hadoopReducer,
							Reducer<KEYIN, VALUEIN, KEYIN, VALUEIN> hadoopCombiner, JobConf conf) {
	if (hadoopReducer == null) {
		throw new NullPointerException("Reducer may not be null.");
	}
	if (hadoopCombiner == null) {
		throw new NullPointerException("Combiner may not be null.");
	}
	if (conf == null) {
		throw new NullPointerException("JobConf may not be null.");
	}

	this.reducer = hadoopReducer;
	this.combiner = hadoopCombiner;
	this.jobConf = conf;
}

Source File: HadoopUtils.java From incubator-hivemall with Apache License 2.0

6 votes

public static int getTaskId() {
    MapredContext ctx = MapredContextAccessor.get();
    if (ctx == null) {
        throw new IllegalStateException("MapredContext is not set");
    }
    JobConf jobconf = ctx.getJobConf();
    if (jobconf == null) {
        throw new IllegalStateException("JobConf is not set");
    }
    int taskid = jobconf.getInt("mapred.task.partition", -1);
    if (taskid == -1) {
        taskid = jobconf.getInt("mapreduce.task.partition", -1);
        if (taskid == -1) {
            throw new IllegalStateException(
                "Both mapred.task.partition and mapreduce.task.partition are not set: "
                        + toString(jobconf));
        }
    }
    return taskid;
}

Source File: AvroAsJsonOutputFormat.java From iow-hadoop-streaming with Apache License 2.0

6 votes

static <K> void configureDataFileWriter(DataFileWriter<K> writer,
    JobConf job) throws UnsupportedEncodingException {

    if (FileOutputFormat.getCompressOutput(job)) {
        int level = job.getInt(org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY,
                org.apache.avro.mapred.AvroOutputFormat.DEFAULT_DEFLATE_LEVEL);
        String codecName = job.get(AvroJob.OUTPUT_CODEC, DEFLATE_CODEC);
        CodecFactory factory = codecName.equals(DEFLATE_CODEC) ?
            CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName);
        writer.setCodec(factory);
    }

    writer.setSyncInterval(job.getInt(org.apache.avro.mapred.AvroOutputFormat.SYNC_INTERVAL_KEY,
            DEFAULT_SYNC_INTERVAL));

    // copy metadata from job
    for (Map.Entry<String,String> e : job) {
        if (e.getKey().startsWith(AvroJob.TEXT_PREFIX))
            writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()),e.getValue());
        if (e.getKey().startsWith(AvroJob.BINARY_PREFIX))
            writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()),
                   URLDecoder.decode(e.getValue(), "ISO-8859-1")
                   .getBytes("ISO-8859-1"));
    }
}

Source File: GridmixJob.java From hadoop with Apache License 2.0

6 votes

@SuppressWarnings("deprecation")
protected static void configureTaskJVMOptions(Configuration originalJobConf,
                                              Configuration simulatedJobConf){
  // Get the heap related java opts used for the original job and set the 
  // same for the simulated job.
  //    set task task heap options
  configureTaskJVMMaxHeapOptions(originalJobConf, simulatedJobConf, 
                                 JobConf.MAPRED_TASK_JAVA_OPTS);
  //  set map task heap options
  configureTaskJVMMaxHeapOptions(originalJobConf, simulatedJobConf, 
                                 MRJobConfig.MAP_JAVA_OPTS);

  //  set reduce task heap options
  configureTaskJVMMaxHeapOptions(originalJobConf, simulatedJobConf, 
                                 MRJobConfig.REDUCE_JAVA_OPTS);
}

Source File: TokenUtils.java From incubator-gobblin with Apache License 2.0

6 votes

private static void getJtToken(Credentials cred) throws IOException {
  try {
    JobConf jobConf = new JobConf();
    JobClient jobClient = new JobClient(jobConf);
    LOG.info("Pre-fetching JT token from JobTracker");

    Token<DelegationTokenIdentifier> mrdt = jobClient.getDelegationToken(getMRTokenRenewerInternal(jobConf));
    if (mrdt == null) {
      LOG.error("Failed to fetch JT token");
      throw new IOException("Failed to fetch JT token.");
    }
    LOG.info("Created JT token: " + mrdt.toString());
    LOG.info("Token kind: " + mrdt.getKind());
    LOG.info("Token id: " + Arrays.toString(mrdt.getIdentifier()));
    LOG.info("Token service: " + mrdt.getService());
    cred.addToken(mrdt.getService(), mrdt);
  } catch (InterruptedException ie) {
    throw new IOException(ie);
  }
}

Source File: PipeReducer.java From hadoop with Apache License 2.0

6 votes

public void configure(JobConf job) {
  super.configure(job);
  //disable the auto increment of the counter. For streaming, no of 
  //processed records could be different(equal or less) than the no of 
  //records input.
  SkipBadRecords.setAutoIncrReducerProcCount(job, false);
  skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);

  try {
    reduceOutFieldSeparator = job_.get("stream.reduce.output.field.separator", "\t").getBytes("UTF-8");
    reduceInputFieldSeparator = job_.get("stream.reduce.input.field.separator", "\t").getBytes("UTF-8");
    this.numOfReduceOutputKeyFields = job_.getInt("stream.num.reduce.output.key.fields", 1);
  } catch (UnsupportedEncodingException e) {
    throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
  }
}

Source File: TestDatamerge.java From big-c with Apache License 2.0

6 votes

private static void joinAs(String jointype,
    Class<? extends SimpleCheckerBase> c) throws Exception {
  final int srcs = 4;
  Configuration conf = new Configuration();
  JobConf job = new JobConf(conf, c);
  Path base = cluster.getFileSystem().makeQualified(new Path("/"+jointype));
  Path[] src = writeSimpleSrc(base, conf, srcs);
  job.set("mapreduce.join.expr", CompositeInputFormat.compose(jointype,
      SequenceFileInputFormat.class, src));
  job.setInt("testdatamerge.sources", srcs);
  job.setInputFormat(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(c);
  job.setReducerClass(c);
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(IntWritable.class);
  JobClient.runJob(job);
  base.getFileSystem(job).delete(base, true);
}

Source File: TestDFSIO.java From big-c with Apache License 2.0

6 votes

@Override // Mapper
public void configure(JobConf conf) {
  super.configure(conf);

  // grab compression
  String compression = getConf().get("test.io.compression.class", null);
  Class<? extends CompressionCodec> codec;

  // try to initialize codec
  try {
    codec = (compression == null) ? null : 
      Class.forName(compression).asSubclass(CompressionCodec.class);
  } catch(Exception e) {
    throw new RuntimeException("Compression codec not found: ", e);
  }

  if(codec != null) {
    compressionCodec = (CompressionCodec)
        ReflectionUtils.newInstance(codec, getConf());
  }
}

Source File: DFSGeneralTest.java From RDFS with Apache License 2.0

6 votes

public void control(JobConf fsConfig, String fileName)
    throws IOException {
  String name = fileName;
  FileSystem fs = FileSystem.get(fsConfig);

  SequenceFile.Writer write = null;
  for (int i = 0; i < nmaps; i++) {
    try {
      Path controlFile = new Path(dfs_input, name + i);
      write = SequenceFile.createWriter(fs, fsConfig, controlFile,
          Text.class, Text.class, CompressionType.NONE);
      write.append(new Text(name + i), new Text(workdir));
    } finally {
      if (write != null)
        write.close();
      write = null;
    }
  }
}

Source File: ReaderTextCellParallel.java From systemds with Apache License 2.0

5 votes

public ReadTask( InputSplit split, TextInputFormat informat, JobConf job, MatrixBlock dest, long rlen, long clen, boolean mm, FileFormatPropertiesMM mmProps ) {
	_split = split;
	_sparse = dest.isInSparseFormat();
	_informat = informat;
	_job = job;
	_dest = dest;
	_rlen = rlen;
	_clen = clen;
	_matrixMarket = mm;
	_mmProps = mmProps;
}

Source File: MapTaskImpl.java From big-c with Apache License 2.0

5 votes

public MapTaskImpl(JobId jobId, int partition, EventHandler eventHandler,
    Path remoteJobConfFile, JobConf conf,
    TaskSplitMetaInfo taskSplitMetaInfo,
    TaskAttemptListener taskAttemptListener,
    Token<JobTokenIdentifier> jobToken,
    Credentials credentials, Clock clock,
    int appAttemptId, MRAppMetrics metrics, AppContext appContext) {
  super(jobId, TaskType.MAP, partition, eventHandler, remoteJobConfFile,
      conf, taskAttemptListener, jobToken, credentials, clock,
      appAttemptId, metrics, appContext);
  this.taskSplitMetaInfo = taskSplitMetaInfo;
}

Source File: FrameReaderTextCSV.java From systemds with Apache License 2.0

5 votes

@Override
public final FrameBlock readFrameFromHDFS(String fname, ValueType[] schema, String[] names,
		long rlen, long clen)
	throws IOException, DMLRuntimeException 
{
	//prepare file access
	JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());	
	Path path = new Path( fname );
	FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
	FileInputFormat.addInputPath(job, path);
	
	//check existence and non-empty file
	checkValidInputFile(fs, path); 
	
	//compute size if necessary
	if( rlen <= 0 || clen <= 0 ) {
		Pair<Integer,Integer> size = computeCSVSize(path, job, fs);
		rlen = size.getKey();
		clen = size.getValue();
	}
	
	//allocate output frame block
	ValueType[] lschema = createOutputSchema(schema, clen);
	String[] lnames = createOutputNames(names, clen);
	FrameBlock ret = createOutputFrameBlock(lschema, lnames, rlen);

	//core read (sequential/parallel) 
	readCSVFrameFromHDFS(path, job, fs, ret, lschema, lnames, rlen, clen);
	
	return ret;
}

Source File: ParquetFileWriterFactory.java From presto with Apache License 2.0

5 votes

private static CompressionCodecName getCompression(JobConf configuration)
{
    String compressionName = configuration.get(ParquetOutputFormat.COMPRESSION);
    if (compressionName == null) {
        return CompressionCodecName.GZIP;
    }
    return CompressionCodecName.valueOf(compressionName);
}

Source File: CombineFileSplit.java From RDFS with Apache License 2.0

5 votes

public CombineFileSplit(JobConf job, Path[] files, long[] lengths) {
  long[] startoffset = new long[files.length];
  for (int i = 0; i < startoffset.length; i++) {
    startoffset[i] = 0;
  }
  String[] locations = new String[files.length];
  for (int i = 0; i < locations.length; i++) {
    locations[i] = "";
  }
  initSplit(job, files, startoffset, lengths, locations);
}

Source File: KafkaInputFormat.java From HiveKa with Apache License 2.0

5 votes

private Set<String> getMoveToLatestTopicsSet(JobConf conf) {
	Set<String> topics = new HashSet<String>();

	String[] arr = getMoveToLatestTopics(conf);

	if (arr != null) {
		for (String topic : arr) {
			topics.add(topic);
		}
	}

	return topics;
}

Source File: DistCpV1.java From big-c with Apache License 2.0

5 votes

public void setConf(Configuration conf) {
  if (conf instanceof JobConf) {
    this.conf = (JobConf) conf;
  } else {
    this.conf = new JobConf(conf);
  }
}

Source File: TestCLI.java From hadoop-gpu with Apache License 2.0

5 votes

public void setUp() throws Exception {
  // Read the testConfig.xml file
  readTestConfigFile();
  
  // Start up the mini dfs cluster
  boolean success = false;
  conf = new Configuration();
  conf.setClass(PolicyProvider.POLICY_PROVIDER_CONFIG,
                HadoopPolicyProvider.class, PolicyProvider.class);
  conf.setBoolean(ServiceAuthorizationManager.SERVICE_AUTHORIZATION_CONFIG, 
                  true);

  dfsCluster = new MiniDFSCluster(conf, 1, true, null);
  namenode = conf.get("fs.default.name", "file:///");
  clitestDataDir = new File(TEST_CACHE_DATA_DIR).
    toURI().toString().replace(' ', '+');
  username = System.getProperty("user.name");

  FileSystem fs = dfsCluster.getFileSystem();
  assertTrue("Not a HDFS: "+fs.getUri(),
             fs instanceof DistributedFileSystem);
  dfs = (DistributedFileSystem) fs;
  
   // Start up mini mr cluster
  JobConf mrConf = new JobConf(conf);
  mrCluster = new MiniMRCluster(1, dfsCluster.getFileSystem().getUri().toString(), 1, 
                         null, null, mrConf);
  jobtracker = mrCluster.createJobConf().get("mapred.job.tracker", "local");

  success = true;

  assertTrue("Error setting up Mini DFS & MR clusters", success);
}

Source File: DummyInputFormat.java From hadoop with Apache License 2.0

5 votes

public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
  InputSplit[] splits = new InputSplit[numSplits];
  for (int i = 0; i < splits.length; ++i) {
    splits[i] = new EmptySplit();
  }
  return splits;
}

Source File: FileOutputCommitterWrapper.java From stratosphere with Apache License 2.0

5 votes

public void setupJob(JobConf conf) throws IOException {
	Path outputPath = FileOutputFormat.getOutputPath(conf);
	if (outputPath != null) {
		Path tmpDir = new Path(outputPath, FileOutputCommitter.TEMP_DIR_NAME);
		FileSystem fileSys = tmpDir.getFileSystem(conf);
		if (!fileSys.mkdirs(tmpDir)) {
			LOG.error("Mkdirs failed to create " + tmpDir.toString());
		}
	}
}

Source File: GrokHelper.java From hadoop-solr with Apache License 2.0

5 votes

public static String readConfiguration(String path, JobConf conf) {
  String response = "";
  Path p = new Path(path);
  try {
    FileSystem fs = p.getFileSystem(conf);
    FSDataInputStream inputStream = fs.open(p);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    IOUtils.copyBytes(inputStream, out, conf);
    response = out.toString();
    fs.close();
  } catch (IOException e) {
    log.error("Unable to read " + path + " from HDFS", e);
  }
  return response;
}

Source File: HiveExcelRowFileOutputFormat.java From hadoopoffice with Apache License 2.0

5 votes

@Override
public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jc,
		Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties,
		Progressable progress) throws IOException {
	FileSystem fs = finalOutPath.getFileSystem(jc);
	HiveExcelRowFileOutputFormat.setOutputPath(jc, finalOutPath);
    RecordWriter<?, ?> recordWriter = this.getRecordWriter(fs, jc, null, progress);
    return new HivePassThroughRecordWriter(recordWriter);
}

Source File: TestMRAppWithCombiner.java From big-c with Apache License 2.0

5 votes

@Test
public void testCombinerShouldUpdateTheReporter() throws Exception {
  JobConf conf = new JobConf(mrCluster.getConfig());
  int numMaps = 5;
  int numReds = 2;
  Path in = new Path(mrCluster.getTestWorkDir().getAbsolutePath(),
      "testCombinerShouldUpdateTheReporter-in");
  Path out = new Path(mrCluster.getTestWorkDir().getAbsolutePath(),
      "testCombinerShouldUpdateTheReporter-out");
  createInputOutPutFolder(in, out, numMaps);
  conf.setJobName("test-job-with-combiner");
  conf.setMapperClass(IdentityMapper.class);
  conf.setCombinerClass(MyCombinerToCheckReporter.class);
  //conf.setJarByClass(MyCombinerToCheckReporter.class);
  conf.setReducerClass(IdentityReducer.class);
  DistributedCache.addFileToClassPath(TestMRJobs.APP_JAR, conf);
  conf.setOutputCommitter(CustomOutputCommitter.class);
  conf.setInputFormat(TextInputFormat.class);
  conf.setOutputKeyClass(LongWritable.class);
  conf.setOutputValueClass(Text.class);

  FileInputFormat.setInputPaths(conf, in);
  FileOutputFormat.setOutputPath(conf, out);
  conf.setNumMapTasks(numMaps);
  conf.setNumReduceTasks(numReds);
  
  runJob(conf);
}

Source File: TestMiniCoronaRunJob.java From RDFS with Apache License 2.0

5 votes

private void runSleepJob(JobConf conf, int maps, int reduces)
    throws Exception {
  String[] args = {"-m", maps + "",
                   "-r", reduces + "",
                   "-mt", "1",
                   "-rt", "1" };
  ToolRunner.run(conf, new SleepJob(), args);
  // This sleep is here to wait for the JobTracker to go down completely
  TstUtils.reliableSleep(1000);
}

Source File: WriterMatrixMarketParallel.java From systemds with Apache License 2.0

5 votes

public WriteMMTask(Path path, JobConf job, FileSystem fs, MatrixBlock src, int rl, int ru) {
	_path = path;
	_job = job;
	_fs = fs;
	_src = src;
	_rl = rl;
	_ru = ru;
}

Source File: HiveTableSink.java From flink with Apache License 2.0

5 votes

public HiveTableSink(JobConf jobConf, ObjectPath tablePath, CatalogTable table) {
	this.jobConf = jobConf;
	this.tablePath = tablePath;
	this.catalogTable = table;
	hiveVersion = Preconditions.checkNotNull(jobConf.get(HiveCatalogValidator.CATALOG_HIVE_VERSION),
			"Hive version is not defined");
	tableSchema = table.getSchema();
}

Source File: ReaderTextLIBSVMParallel.java From systemds with Apache License 2.0

5 votes

private void readLIBSVMMatrixFromHDFS(InputSplit[] splits, Path path, JobConf job, 
		MatrixBlock dest, long rlen, long clen, int blen) 
	throws IOException 
{
	FileInputFormat.addInputPath(job, path);
	TextInputFormat informat = new TextInputFormat();
	informat.configure(job);

	ExecutorService pool = CommonThreadPool.get(_numThreads);

	try 
	{
		// create read tasks for all splits
		ArrayList<LIBSVMReadTask> tasks = new ArrayList<>();
		int splitCount = 0;
		for (InputSplit split : splits) {
			tasks.add( new LIBSVMReadTask(split, _offsets, informat, job, dest, rlen, clen, splitCount++) );
		}
		pool.invokeAll(tasks);
		pool.shutdown();

		// check return codes and aggregate nnz
		long lnnz = 0;
		for (LIBSVMReadTask rt : tasks) {
			lnnz += rt.getPartialNnz();
			if (!rt.getReturnCode()) {
				Exception err = rt.getException();
				throw new IOException("Read task for libsvm input failed: "+ err.toString(), err);
			}
		}
		dest.setNonZeros(lnnz);
	} 
	catch (Exception e) {
		throw new IOException("Threadpool issue, while parallel read.", e);
	}
}

org.apache.hadoop.mapred.JobConf Java Examples