org.apache.hadoop.mapred.JobConf#getUseNewMapper

Source File: JobSubmitter.java From hadoop with Apache License 2.0

5 votes

private void checkSpecs(Job job) throws ClassNotFoundException, 
    InterruptedException, IOException {
  JobConf jConf = (JobConf)job.getConfiguration();
  // Check the output specification
  if (jConf.getNumReduceTasks() == 0 ? 
      jConf.getUseNewMapper() : jConf.getUseNewReducer()) {
    org.apache.hadoop.mapreduce.OutputFormat<?, ?> output =
      ReflectionUtils.newInstance(job.getOutputFormatClass(),
        job.getConfiguration());
    output.checkOutputSpecs(job);
  } else {
    jConf.getOutputFormat().checkOutputSpecs(jtFs, jConf);
  }
}

Source File: JobSubmitter.java From hadoop with Apache License 2.0

5 votes

private int writeSplits(org.apache.hadoop.mapreduce.JobContext job,
    Path jobSubmitDir) throws IOException,
    InterruptedException, ClassNotFoundException {
  JobConf jConf = (JobConf)job.getConfiguration();
  int maps;
  if (jConf.getUseNewMapper()) {
    maps = writeNewSplits(job, jobSubmitDir);
  } else {
    maps = writeOldSplits(jConf, jobSubmitDir);
  }
  return maps;
}

Source File: JobSubmitter.java From big-c with Apache License 2.0

5 votes

private void checkSpecs(Job job) throws ClassNotFoundException, 
    InterruptedException, IOException {
  JobConf jConf = (JobConf)job.getConfiguration();
  // Check the output specification
  if (jConf.getNumReduceTasks() == 0 ? 
      jConf.getUseNewMapper() : jConf.getUseNewReducer()) {
    org.apache.hadoop.mapreduce.OutputFormat<?, ?> output =
      ReflectionUtils.newInstance(job.getOutputFormatClass(),
        job.getConfiguration());
    output.checkOutputSpecs(job);
  } else {
    jConf.getOutputFormat().checkOutputSpecs(jtFs, jConf);
  }
}

Source File: JobSubmitter.java From big-c with Apache License 2.0

5 votes

private int writeSplits(org.apache.hadoop.mapreduce.JobContext job,
    Path jobSubmitDir) throws IOException,
    InterruptedException, ClassNotFoundException {
  JobConf jConf = (JobConf)job.getConfiguration();
  int maps;
  if (jConf.getUseNewMapper()) {
    maps = writeNewSplits(job, jobSubmitDir);
  } else {
    maps = writeOldSplits(jConf, jobSubmitDir);
  }
  return maps;
}

Source File: HadoopV2TaskContext.java From ignite with Apache License 2.0

5 votes

/**
 * @param taskInfo Task info.
 * @param job Job.
 * @param jobId Job ID.
 * @param locNodeId Local node ID.
 * @param jobConfDataInput DataInput for read JobConf.
 */
public HadoopV2TaskContext(HadoopTaskInfo taskInfo, HadoopJobEx job, HadoopJobId jobId,
    @Nullable UUID locNodeId, DataInput jobConfDataInput) throws IgniteCheckedException {
    super(taskInfo, job);
    this.locNodeId = locNodeId;

    // Before create JobConf instance we should set new context class loader.
    ClassLoader oldLdr = HadoopCommonUtils.setContextClassLoader(getClass().getClassLoader());

    try {
        JobConf jobConf = new JobConf();

        try {
            jobConf.readFields(jobConfDataInput);
        }
        catch (IOException e) {
            throw new IgniteCheckedException(e);
        }

        // For map-reduce jobs prefer local writes.
        jobConf.setBooleanIfUnset(PARAM_IGFS_PREFER_LOCAL_WRITES, true);

        initializePartiallyRawComparator(jobConf);

        jobCtx = new JobContextImpl(jobConf, new JobID(jobId.globalId().toString(), jobId.localId()));

        useNewMapper = jobConf.getUseNewMapper();
        useNewReducer = jobConf.getUseNewReducer();
        useNewCombiner = jobConf.getCombinerClass() == null;
    }
    finally {
        HadoopCommonUtils.restoreContextClassLoader(oldLdr);
    }
}

Source File: MRHelpers.java From incubator-tez with Apache License 2.0

4 votes

/**
 * Helper api to generate splits
 * @param conf Configuration with all necessary information set to generate
 * splits. The following are required at a minimum:
 *
 *   - mapred.mapper.new-api: determine whether mapred.InputFormat or
 *     mapreduce.InputFormat is to be used
 *   - mapred.input.format.class or mapreduce.job.inputformat.class:
 *     determines the InputFormat class to be used
 *
 * In addition to this, all the configs needed by the InputFormat class also
 * have to be set. For example, FileInputFormat needs the input directory
 * paths to be set in the config.
 *
 * @param inputSplitsDir Directory in which the splits file and meta info file
 * will be generated. job.split and job.splitmetainfo files in this directory
 * will be overwritten. Should be a fully-qualified path.
 *
 * @return InputSplitInfo containing the split files' information and the
 * number of splits generated to be used to determining parallelism of
 * the map stage.
 *
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
public static InputSplitInfoDisk generateInputSplits(Configuration conf,
    Path inputSplitsDir) throws IOException, InterruptedException,
    ClassNotFoundException {
  Job job = Job.getInstance(conf);
  JobConf jobConf = new JobConf(conf);
  conf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false);
  if (jobConf.getUseNewMapper()) {
    LOG.info("Generating new input splits"
        + ", splitsDir=" + inputSplitsDir.toString());
    return writeNewSplits(job, inputSplitsDir);
  } else {
    LOG.info("Generating old input splits"
        + ", splitsDir=" + inputSplitsDir.toString());
    return writeOldSplits(jobConf, inputSplitsDir);
  }
}

Source File: MRInputSplitDistributor.java From incubator-tez with Apache License 2.0

4 votes

@Override
public List<Event> initialize(TezRootInputInitializerContext rootInputContext)
    throws IOException {
  Stopwatch sw = null;
  if (LOG.isDebugEnabled()) {
    sw = new Stopwatch().start();
  }
  MRInputUserPayloadProto userPayloadProto = MRHelpers.parseMRInputPayload(rootInputContext.getUserPayload());
  if (LOG.isDebugEnabled()) {
    sw.stop();
    LOG.debug("Time to parse MRInput payload into prot: "
        + sw.elapsedMillis());  
  }
  Configuration conf = MRHelpers.createConfFromByteString(userPayloadProto
      .getConfigurationBytes());
  JobConf jobConf = new JobConf(conf);
  boolean useNewApi = jobConf.getUseNewMapper();
  sendSerializedEvents = conf.getBoolean(
      MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD,
      MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD_DEFAULT);
  LOG.info("Emitting serialized splits: " + sendSerializedEvents);

  this.splitsProto = userPayloadProto.getSplits();
  
  MRInputUserPayloadProto.Builder updatedPayloadBuilder = MRInputUserPayloadProto.newBuilder(userPayloadProto);
  updatedPayloadBuilder.clearSplits();

  List<Event> events = Lists.newArrayListWithCapacity(this.splitsProto.getSplitsCount() + 1);
  RootInputUpdatePayloadEvent updatePayloadEvent = new RootInputUpdatePayloadEvent(
      updatedPayloadBuilder.build().toByteArray());

  events.add(updatePayloadEvent);
  int count = 0;

  for (MRSplitProto mrSplit : this.splitsProto.getSplitsList()) {

    RootInputDataInformationEvent diEvent;

    if (sendSerializedEvents) {
      // Unnecessary array copy, can be avoided by using ByteBuffer instead of
      // a raw array.
      diEvent = new RootInputDataInformationEvent(count++, mrSplit.toByteArray());
    } else {
      if (useNewApi) {
        org.apache.hadoop.mapreduce.InputSplit newInputSplit = MRInputUtils
            .getNewSplitDetailsFromEvent(mrSplit, conf);
        diEvent = new RootInputDataInformationEvent(count++, newInputSplit);
      } else {
        org.apache.hadoop.mapred.InputSplit oldInputSplit = MRInputUtils
            .getOldSplitDetailsFromEvent(mrSplit, conf);
        diEvent = new RootInputDataInformationEvent(count++, oldInputSplit);
      }
    }
    events.add(diEvent);
  }

  return events;
}

Source File: MRInputHelpers.java From tez with Apache License 2.0

4 votes

/**
 * Helper api to generate splits
 * @param conf Configuration with all necessary information set to generate
 * splits. The following are required at a minimum:
 *
 *   - mapred.mapper.new-api: determine whether mapred.InputFormat or
 *     mapreduce.InputFormat is to be used
 *   - mapred.input.format.class or mapreduce.job.inputformat.class:
 *     determines the InputFormat class to be used
 *
 * In addition to this, all the configs needed by the InputFormat class also
 * have to be set. For example, FileInputFormat needs the input directory
 * paths to be set in the config.
 *
 * @param inputSplitsDir Directory in which the splits file and meta info file
 * will be generated. job.split and job.splitmetainfo files in this directory
 * will be overwritten. Should be a fully-qualified path.
 *
 * @return InputSplitInfo containing the split files' information and the
 * number of splits generated to be used to determining parallelism of
 * the map stage.
 *
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
private static InputSplitInfoDisk generateInputSplits(Configuration conf,
                                                      Path inputSplitsDir) throws IOException, InterruptedException,
    ClassNotFoundException {
  Job job = Job.getInstance(conf);
  JobConf jobConf = new JobConf(conf);
  conf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false);
  if (jobConf.getUseNewMapper()) {
    LOG.info("Generating new input splits"
        + ", splitsDir=" + inputSplitsDir.toString());
    return writeNewSplits(job, inputSplitsDir);
  } else {
    LOG.info("Generating old input splits"
        + ", splitsDir=" + inputSplitsDir.toString());
    return writeOldSplits(jobConf, inputSplitsDir);
  }
}

Source File: MRInputSplitDistributor.java From tez with Apache License 2.0

4 votes

@Override
public List<Event> initialize() throws IOException {
  StopWatch sw = new StopWatch().start();
  MRInputUserPayloadProto userPayloadProto = MRInputHelpers
      .parseMRInputPayload(getContext().getInputUserPayload());
  sw.stop();
  if (LOG.isDebugEnabled()) {
    LOG.debug("Time to parse MRInput payload into prot: "
        + sw.now(TimeUnit.MILLISECONDS));
  }
  Configuration conf = TezUtils.createConfFromByteString(userPayloadProto
      .getConfigurationBytes());
  JobConf jobConf = new JobConf(conf);
  boolean useNewApi = jobConf.getUseNewMapper();
  sendSerializedEvents = conf.getBoolean(
      MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD,
      MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD_DEFAULT);
  LOG.info("Emitting serialized splits: " + sendSerializedEvents);

  this.splitsProto = userPayloadProto.getSplits();
  
  MRInputUserPayloadProto.Builder updatedPayloadBuilder = MRInputUserPayloadProto.newBuilder(userPayloadProto);
  updatedPayloadBuilder.clearSplits();

  List<Event> events = Lists.newArrayListWithCapacity(this.splitsProto.getSplitsCount() + 1);
  InputUpdatePayloadEvent updatePayloadEvent = InputUpdatePayloadEvent.create(
      updatedPayloadBuilder.build().toByteString().asReadOnlyByteBuffer());

  events.add(updatePayloadEvent);
  int count = 0;

  for (MRSplitProto mrSplit : this.splitsProto.getSplitsList()) {

    InputDataInformationEvent diEvent;

    if (sendSerializedEvents) {
      // Unnecessary array copy, can be avoided by using ByteBuffer instead of
      // a raw array.
      diEvent = InputDataInformationEvent.createWithSerializedPayload(count++,
          mrSplit.toByteString().asReadOnlyByteBuffer());
    } else {
      if (useNewApi) {
        org.apache.hadoop.mapreduce.InputSplit newInputSplit = MRInputUtils
            .getNewSplitDetailsFromEvent(mrSplit, conf);
        diEvent = InputDataInformationEvent.createWithObjectPayload(count++, newInputSplit);
      } else {
        org.apache.hadoop.mapred.InputSplit oldInputSplit = MRInputUtils
            .getOldSplitDetailsFromEvent(mrSplit, conf);
        diEvent = InputDataInformationEvent.createWithObjectPayload(count++, oldInputSplit);
      }
    }
    events.add(diEvent);
  }

  return events;
}

Java Code Examples for org.apache.hadoop.mapred.JobConf#getUseNewMapper()