org.apache.hadoop.mapred.JobContextImpl Java Exaples

Source File: HadoopOutputFormatBase.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws java.io.IOException
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {

	// enforce sequential open() calls
	synchronized (OPEN_MUTEX) {
		if (Integer.toString(taskNumber + 1).length() > 6) {
			throw new IOException("Task id too large.");
		}

		TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
				+ String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
				+ Integer.toString(taskNumber + 1)
				+ "_0");

		this.jobConf.set("mapred.task.id", taskAttemptID.toString());
		this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
		// for hadoop 2.2
		this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
		this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);

		this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID);

		this.outputCommitter = this.jobConf.getOutputCommitter();

		JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());

		this.outputCommitter.setupJob(jobContext);

		this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
	}
}

Source File: HadoopOutputFormatBase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
public void finalizeGlobal(int parallelism) throws IOException {

	try {
		JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
		OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();

		// finalize HDFS output format
		outputCommitter.commitJob(jobContext);
	} catch (Exception e) {
		throw new RuntimeException(e);
	}
}

Source File: HadoopOutputFormatBase.java From flink with Apache License 2.0

5 votes

/**
 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws java.io.IOException
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {

	// enforce sequential open() calls
	synchronized (OPEN_MUTEX) {
		if (Integer.toString(taskNumber + 1).length() > 6) {
			throw new IOException("Task id too large.");
		}

		TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
				+ String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
				+ Integer.toString(taskNumber + 1)
				+ "_0");

		this.jobConf.set("mapred.task.id", taskAttemptID.toString());
		this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
		// for hadoop 2.2
		this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
		this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);

		this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID);

		this.outputCommitter = this.jobConf.getOutputCommitter();

		JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());

		this.outputCommitter.setupJob(jobContext);

		this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
	}
}

Source File: HadoopOutputFormatBase.java From flink with Apache License 2.0

5 votes

@Override
public void finalizeGlobal(int parallelism) throws IOException {

	try {
		JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
		OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();

		// finalize HDFS output format
		outputCommitter.commitJob(jobContext);
	} catch (Exception e) {
		throw new RuntimeException(e);
	}
}

Source File: HiveTableOutputFormat.java From flink with Apache License 2.0

5 votes

private void commitJob(String location) throws IOException {
	jobConf.set(OUTDIR, location);
	JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
	OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();
	// finalize HDFS output format
	outputCommitter.commitJob(jobContext);
}

Source File: TestMRCJCFileOutputCommitter.java From hadoop with Apache License 2.0

5 votes

public void testAbort() throws IOException {
  JobConf job = new JobConf();
  setConfForFileOutputCommitter(job);
  JobContext jContext = new JobContextImpl(job, taskID.getJobID());
  TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID);
  FileOutputCommitter committer = new FileOutputCommitter();
  FileOutputFormat.setWorkOutputPath(job, committer
      .getTaskAttemptPath(tContext));

  // do setup
  committer.setupJob(jContext);
  committer.setupTask(tContext);
  String file = "test.txt";

  // A reporter that does nothing
  Reporter reporter = Reporter.NULL;
  // write output
  FileSystem localFs = FileSystem.getLocal(job);
  TextOutputFormat theOutputFormat = new TextOutputFormat();
  RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs,
      job, file, reporter);
  writeOutput(theRecordWriter, reporter);

  // do abort
  committer.abortTask(tContext);
  File expectedFile = new File(new Path(committer
      .getTaskAttemptPath(tContext), file).toString());
  assertFalse("task temp dir still exists", expectedFile.exists());

  committer.abortJob(jContext, JobStatus.State.FAILED);
  expectedFile = new File(new Path(outDir, FileOutputCommitter.TEMP_DIR_NAME)
      .toString());
  assertFalse("job temp dir "+expectedFile+" still exists", expectedFile.exists());
  assertEquals("Output directory not empty", 0, new File(outDir.toString())
      .listFiles().length);
  FileUtil.fullyDelete(new File(outDir.toString()));
}

Source File: TestMRCJCFileOutputCommitter.java From big-c with Apache License 2.0

5 votes

public void testAbort() throws IOException {
  JobConf job = new JobConf();
  setConfForFileOutputCommitter(job);
  JobContext jContext = new JobContextImpl(job, taskID.getJobID());
  TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID);
  FileOutputCommitter committer = new FileOutputCommitter();
  FileOutputFormat.setWorkOutputPath(job, committer
      .getTaskAttemptPath(tContext));

  // do setup
  committer.setupJob(jContext);
  committer.setupTask(tContext);
  String file = "test.txt";

  // A reporter that does nothing
  Reporter reporter = Reporter.NULL;
  // write output
  FileSystem localFs = FileSystem.getLocal(job);
  TextOutputFormat theOutputFormat = new TextOutputFormat();
  RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs,
      job, file, reporter);
  writeOutput(theRecordWriter, reporter);

  // do abort
  committer.abortTask(tContext);
  File expectedFile = new File(new Path(committer
      .getTaskAttemptPath(tContext), file).toString());
  assertFalse("task temp dir still exists", expectedFile.exists());

  committer.abortJob(jContext, JobStatus.State.FAILED);
  expectedFile = new File(new Path(outDir, FileOutputCommitter.TEMP_DIR_NAME)
      .toString());
  assertFalse("job temp dir "+expectedFile+" still exists", expectedFile.exists());
  assertEquals("Output directory not empty", 0, new File(outDir.toString())
      .listFiles().length);
  FileUtil.fullyDelete(new File(outDir.toString()));
}

Source File: HadoopV2TaskContext.java From ignite with Apache License 2.0

5 votes

/**
 * @param taskInfo Task info.
 * @param job Job.
 * @param jobId Job ID.
 * @param locNodeId Local node ID.
 * @param jobConfDataInput DataInput for read JobConf.
 */
public HadoopV2TaskContext(HadoopTaskInfo taskInfo, HadoopJobEx job, HadoopJobId jobId,
    @Nullable UUID locNodeId, DataInput jobConfDataInput) throws IgniteCheckedException {
    super(taskInfo, job);
    this.locNodeId = locNodeId;

    // Before create JobConf instance we should set new context class loader.
    ClassLoader oldLdr = HadoopCommonUtils.setContextClassLoader(getClass().getClassLoader());

    try {
        JobConf jobConf = new JobConf();

        try {
            jobConf.readFields(jobConfDataInput);
        }
        catch (IOException e) {
            throw new IgniteCheckedException(e);
        }

        // For map-reduce jobs prefer local writes.
        jobConf.setBooleanIfUnset(PARAM_IGFS_PREFER_LOCAL_WRITES, true);

        initializePartiallyRawComparator(jobConf);

        jobCtx = new JobContextImpl(jobConf, new JobID(jobId.globalId().toString(), jobId.localId()));

        useNewMapper = jobConf.getUseNewMapper();
        useNewReducer = jobConf.getUseNewReducer();
        useNewCombiner = jobConf.getCombinerClass() == null;
    }
    finally {
        HadoopCommonUtils.restoreContextClassLoader(oldLdr);
    }
}

Source File: HadoopV2JobResourceManager.java From ignite with Apache License 2.0

5 votes

/**
 * Creates new instance.
 * @param jobId Job ID.
 * @param ctx Hadoop job context.
 * @param log Logger.
 */
public HadoopV2JobResourceManager(HadoopJobId jobId, JobContextImpl ctx, IgniteLogger log, HadoopV2Job job) {
    this.jobId = jobId;
    this.ctx = ctx;
    this.log = log.getLogger(HadoopV2JobResourceManager.class);
    this.job = job;
}

Source File: HadoopOutputFormatBase.java From flink with Apache License 2.0

5 votes

/**
 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws java.io.IOException
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {

	// enforce sequential open() calls
	synchronized (OPEN_MUTEX) {
		if (Integer.toString(taskNumber + 1).length() > 6) {
			throw new IOException("Task id too large.");
		}

		TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
				+ String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
				+ Integer.toString(taskNumber + 1)
				+ "_0");

		this.jobConf.set("mapred.task.id", taskAttemptID.toString());
		this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
		// for hadoop 2.2
		this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
		this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);

		this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID);

		this.outputCommitter = this.jobConf.getOutputCommitter();

		JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());

		this.outputCommitter.setupJob(jobContext);

		this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
	}
}

Source File: HadoopOutputFormatBase.java From flink with Apache License 2.0

5 votes

@Override
public void finalizeGlobal(int parallelism) throws IOException {

	try {
		JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
		OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();

		// finalize HDFS output format
		outputCommitter.commitJob(jobContext);
	} catch (Exception e) {
		throw new RuntimeException(e);
	}
}

Source File: HbaseClient.java From presto-connectors with Apache License 2.0

4 votes

/**
 * Fetches the TabletSplitMetadata for a query against an Hbase table.
 * <p>
 * Does a whole bunch of fun stuff! Splitting on row ID ranges, applying secondary indexes, column pruning,
 * all sorts of sweet optimizations. What you have here is an important method.
 *
 * @param session Current session
 * @param schema Schema name
 * @param table Table Name
 * @param rowIdDomain Domain for the row ID
 * @param constraints Column constraints for the query
 * @return List of TabletSplitMetadata objects for Presto
 */
public List<TabletSplitMetadata> getTabletSplits(
        ConnectorSession session,
        String schema,
        String table,
        Optional<Domain> rowIdDomain,
        List<HbaseColumnConstraint> constraints) //HbaseRowSerializer serializer
{
    try {
        TableName tableName = TableName.valueOf(schema, table);
        LOG.debug("Getting tablet splits for table %s", tableName);

        // Get the initial Range based on the row ID domain
        Collection<Range> rowIdRanges = getRangesFromDomain(rowIdDomain);  //serializer

        // Split the ranges on tablet boundaries, if enabled
        // Create TabletSplitMetadata objects for each range
        boolean fetchTabletLocations = HbaseSessionProperties.isOptimizeLocalityEnabled(session);

        LOG.debug("Fetching tablet locations: %s", fetchTabletLocations);

        ImmutableList.Builder<TabletSplitMetadata> builder = ImmutableList.builder();
        if (rowIdRanges.size() == 0) {  //无 rowkey过滤
            LOG.warn("This request has no rowkey filter");
        }
        List<Scan> rowIdScans = rowIdRanges.size() == 0 ?
                Arrays.asList(new Scan())
                : rowIdRanges.stream().map(HbaseClient::getScanFromPrestoRange).collect(Collectors.toList());

        for (Scan scan : rowIdScans) {
            TableInputFormat tableInputFormat = getNewTableInputFormat(connection, tableName);
            tableInputFormat.setConf(connection.getConfiguration());
            tableInputFormat.setScan(scan);

            JobContext context = new JobContextImpl(new JobConf(), null);
            List<TableSplit> splits = tableInputFormat.getSplits(context)
                    .stream().map(x -> (TableSplit) x).collect(Collectors.toList());

            for (TableSplit split : splits) {
                TabletSplitMetadata metadata = new TabletSplitMetadata(
                        split.getTable().getName(),
                        split.getStartRow(),
                        split.getEndRow(),
                        TabletSplitMetadata.convertScanToString(split.getScan()),
                        split.getRegionLocation(),
                        split.getLength());
                builder.add(metadata);
            }
        }
        List<TabletSplitMetadata> tabletSplits = builder.build();

        // Log some fun stuff and return the tablet splits
        LOG.debug("Number of splits for table %s is %d with %d ranges", tableName, tabletSplits.size(), rowIdRanges.size());
        return tabletSplits;
    }
    catch (Exception e) {
        throw new PrestoException(UNEXPECTED_HBASE_ERROR, "Failed to get splits from Hbase", e);
    }
}

Source File: TestMRCJCFileOutputCommitter.java From hadoop with Apache License 2.0

4 votes

@SuppressWarnings("unchecked")
public void testCommitter() throws Exception {
  JobConf job = new JobConf();
  setConfForFileOutputCommitter(job);
  JobContext jContext = new JobContextImpl(job, taskID.getJobID());
  TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID);
  FileOutputCommitter committer = new FileOutputCommitter();
  FileOutputFormat.setWorkOutputPath(job, 
    committer.getTaskAttemptPath(tContext));

  committer.setupJob(jContext);
  committer.setupTask(tContext);
  String file = "test.txt";

  // A reporter that does nothing
  Reporter reporter = Reporter.NULL;
  // write output
  FileSystem localFs = FileSystem.getLocal(job);
  TextOutputFormat theOutputFormat = new TextOutputFormat();
  RecordWriter theRecordWriter =
    theOutputFormat.getRecordWriter(localFs, job, file, reporter);
  writeOutput(theRecordWriter, reporter);

  // do commit
  committer.commitTask(tContext);
  committer.commitJob(jContext);
  
  // validate output
  File expectedFile = new File(new Path(outDir, file).toString());
  StringBuffer expectedOutput = new StringBuffer();
  expectedOutput.append(key1).append('\t').append(val1).append("\n");
  expectedOutput.append(val1).append("\n");
  expectedOutput.append(val2).append("\n");
  expectedOutput.append(key2).append("\n");
  expectedOutput.append(key1).append("\n");
  expectedOutput.append(key2).append('\t').append(val2).append("\n");
  String output = UtilsForTests.slurp(expectedFile);
  assertEquals(output, expectedOutput.toString());

  FileUtil.fullyDelete(new File(outDir.toString()));
}

Source File: TestMRCJCFileOutputCommitter.java From big-c with Apache License 2.0

4 votes

@SuppressWarnings("unchecked")
public void testCommitter() throws Exception {
  JobConf job = new JobConf();
  setConfForFileOutputCommitter(job);
  JobContext jContext = new JobContextImpl(job, taskID.getJobID());
  TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID);
  FileOutputCommitter committer = new FileOutputCommitter();
  FileOutputFormat.setWorkOutputPath(job, 
    committer.getTaskAttemptPath(tContext));

  committer.setupJob(jContext);
  committer.setupTask(tContext);
  String file = "test.txt";

  // A reporter that does nothing
  Reporter reporter = Reporter.NULL;
  // write output
  FileSystem localFs = FileSystem.getLocal(job);
  TextOutputFormat theOutputFormat = new TextOutputFormat();
  RecordWriter theRecordWriter =
    theOutputFormat.getRecordWriter(localFs, job, file, reporter);
  writeOutput(theRecordWriter, reporter);

  // do commit
  committer.commitTask(tContext);
  committer.commitJob(jContext);
  
  // validate output
  File expectedFile = new File(new Path(outDir, file).toString());
  StringBuffer expectedOutput = new StringBuffer();
  expectedOutput.append(key1).append('\t').append(val1).append("\n");
  expectedOutput.append(val1).append("\n");
  expectedOutput.append(val2).append("\n");
  expectedOutput.append(key2).append("\n");
  expectedOutput.append(key1).append("\n");
  expectedOutput.append(key2).append('\t').append(val2).append("\n");
  String output = UtilsForTests.slurp(expectedFile);
  assertEquals(output, expectedOutput.toString());

  FileUtil.fullyDelete(new File(outDir.toString()));
}

Source File: HadoopV2TaskContext.java From ignite with Apache License 2.0

2 votes

/**
 * Gets job context of the task.
 *
 * @return Job context.
 */
public JobContextImpl jobContext() {
    return jobCtx;
}

org.apache.hadoop.mapred.JobContextImpl Java Examples