org.apache.hadoop.mapred.JobContext Java Exaples

Source File: HadoopOutputFormatTest.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testOpen() throws Exception {

	OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
	DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
	JobConf jobConf = Mockito.spy(new JobConf());
	when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);

	HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);

	outputFormat.open(1, 1);

	verify(jobConf, times(2)).getOutputCommitter();
	verify(outputCommitter, times(1)).setupJob(any(JobContext.class));
	verify(dummyOutputFormat, times(1)).getRecordWriter(nullable(FileSystem.class), any(JobConf.class), anyString(), any(Progressable.class));
}

Source File: HadoopOutputFormatTest.java From flink with Apache License 2.0

6 votes

@Test
public void testOpen() throws Exception {

	OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
	DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
	JobConf jobConf = Mockito.spy(new JobConf());
	when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);

	HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);

	outputFormat.open(1, 1);

	verify(jobConf, times(2)).getOutputCommitter();
	verify(outputCommitter, times(1)).setupJob(any(JobContext.class));
	verify(dummyOutputFormat, times(1)).getRecordWriter(nullable(FileSystem.class), any(JobConf.class), anyString(), any(Progressable.class));
}

Source File: HadoopV1CleanupTask.java From ignite with Apache License 2.0

6 votes

/** {@inheritDoc} */
@Override public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException {
    HadoopV2TaskContext ctx = (HadoopV2TaskContext)taskCtx;

    JobContext jobCtx = ctx.jobContext();

    try {
        OutputCommitter committer = jobCtx.getJobConf().getOutputCommitter();

        if (abort)
            committer.abortJob(jobCtx, JobStatus.State.FAILED);
        else
            committer.commitJob(jobCtx);
    }
    catch (IOException e) {
        throw new IgniteCheckedException(e);
    }
}

Source File: HadoopOutputFormatTest.java From flink with Apache License 2.0

6 votes

@Test
public void testOpen() throws Exception {

	OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
	DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
	JobConf jobConf = Mockito.spy(new JobConf());
	when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);

	HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);

	outputFormat.open(1, 1);

	verify(jobConf, times(2)).getOutputCommitter();
	verify(outputCommitter, times(1)).setupJob(any(JobContext.class));
	verify(dummyOutputFormat, times(1)).getRecordWriter(nullable(FileSystem.class), any(JobConf.class), anyString(), any(Progressable.class));
}

Source File: HdfsHelper.java From DataLink with Apache License 2.0

6 votes

TextWriterProxy(Configuration config, String fileName) throws IOException{
	fieldDelimiter = config.getChar(Key.FIELD_DELIMITER);
       columns = config.getListConfiguration(Key.COLUMN);
       
       String compress = config.getString(Key.COMPRESS,null);
       SimpleDateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmm");
       String attempt = "attempt_"+dateFormat.format(new Date())+"_0001_m_000000_0";
       Path outputPath = new Path(fileName);
       //todo 需要进一步确定TASK_ATTEMPT_ID
       conf.set(JobContext.TASK_ATTEMPT_ID, attempt);
       FileOutputFormat outFormat = new TextOutputFormat();
       outFormat.setOutputPath(conf, outputPath);
       outFormat.setWorkOutputPath(conf, outputPath);
       if(null != compress) {
           Class<? extends CompressionCodec> codecClass = getCompressCodec(compress);
           if (null != codecClass) {
               outFormat.setOutputCompressorClass(conf, codecClass);
           }
       }
       
       writer = outFormat.getRecordWriter(fileSystem, conf, outputPath.toString(), Reporter.NULL);
}

Source File: TestJobEndNotifier.java From big-c with Apache License 2.0

6 votes

@Test
public void testAbsentNotificationOnNotLastRetryUnregistrationFailure()
    throws Exception {
  HttpServer2 server = startHttpServer();
  MRApp app = spy(new MRAppWithCustomContainerAllocator(2, 2, false,
      this.getClass().getName(), true, 1, false));
  doNothing().when(app).sysexit();
  JobConf conf = new JobConf();
  conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL,
      JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus");
  JobImpl job = (JobImpl)app.submit(conf);
  app.waitForState(job, JobState.RUNNING);
  app.getContext().getEventHandler()
    .handle(new JobEvent(app.getJobId(), JobEventType.JOB_AM_REBOOT));
  app.waitForInternalState(job, JobStateInternal.REBOOT);
  // Now shutdown.
  // Unregistration fails: isLastAMRetry is recalculated, this is not
  app.shutDownJob();
  // Not the last AM attempt. So user should that the job is still running.
  app.waitForState(job, JobState.RUNNING);
  Assert.assertFalse(app.isLastAMRetry());
  Assert.assertEquals(0, JobEndServlet.calledTimes);
  Assert.assertNull(JobEndServlet.requestUri);
  Assert.assertNull(JobEndServlet.foundJobState);
  server.stop();
}

Source File: TestJobEndNotifier.java From big-c with Apache License 2.0

6 votes

@Test
public void testNotificationOnLastRetryNormalShutdown() throws Exception {
  HttpServer2 server = startHttpServer();
  // Act like it is the second attempt. Default max attempts is 2
  MRApp app = spy(new MRAppWithCustomContainerAllocator(
      2, 2, true, this.getClass().getName(), true, 2, true));
  doNothing().when(app).sysexit();
  JobConf conf = new JobConf();
  conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL,
      JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus");
  JobImpl job = (JobImpl)app.submit(conf);
  app.waitForInternalState(job, JobStateInternal.SUCCEEDED);
  // Unregistration succeeds: successfullyUnregistered is set
  app.shutDownJob();
  Assert.assertTrue(app.isLastAMRetry());
  Assert.assertEquals(1, JobEndServlet.calledTimes);
  Assert.assertEquals("jobid=" + job.getID() + "&status=SUCCEEDED",
      JobEndServlet.requestUri.getQuery());
  Assert.assertEquals(JobState.SUCCEEDED.toString(),
    JobEndServlet.foundJobState);
  server.stop();
}

Source File: TestJobEndNotifier.java From hadoop with Apache License 2.0

6 votes

@Test
public void testAbsentNotificationOnNotLastRetryUnregistrationFailure()
    throws Exception {
  HttpServer2 server = startHttpServer();
  MRApp app = spy(new MRAppWithCustomContainerAllocator(2, 2, false,
      this.getClass().getName(), true, 1, false));
  doNothing().when(app).sysexit();
  JobConf conf = new JobConf();
  conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL,
      JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus");
  JobImpl job = (JobImpl)app.submit(conf);
  app.waitForState(job, JobState.RUNNING);
  app.getContext().getEventHandler()
    .handle(new JobEvent(app.getJobId(), JobEventType.JOB_AM_REBOOT));
  app.waitForInternalState(job, JobStateInternal.REBOOT);
  // Now shutdown.
  // Unregistration fails: isLastAMRetry is recalculated, this is not
  app.shutDownJob();
  // Not the last AM attempt. So user should that the job is still running.
  app.waitForState(job, JobState.RUNNING);
  Assert.assertFalse(app.isLastAMRetry());
  Assert.assertEquals(0, JobEndServlet.calledTimes);
  Assert.assertNull(JobEndServlet.requestUri);
  Assert.assertNull(JobEndServlet.foundJobState);
  server.stop();
}

Source File: HadoopSegmentPreprocessingJob.java From incubator-pinot with Apache License 2.0

6 votes

private void setHadoopJobConfigs(Job job, int numInputPaths) {
  job.getConfiguration().set(JobContext.JOB_NAME, this.getClass().getName());
  // Turn this on to always firstly use class paths that user specifies.
  job.getConfiguration().set(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, "true");
  // Turn this off since we don't need an empty file in the output directory
  job.getConfiguration().set(FileOutputCommitter.SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, "false");

  job.setJarByClass(HadoopSegmentPreprocessingJob.class);

  String hadoopTokenFileLocation = System.getenv(UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION);
  if (hadoopTokenFileLocation != null) {
    job.getConfiguration().set(MRJobConfig.MAPREDUCE_JOB_CREDENTIALS_BINARY, hadoopTokenFileLocation);
  }

  // Mapper configs.
  job.setMapperClass(SegmentPreprocessingMapper.class);
  job.setMapOutputKeyClass(AvroKey.class);
  job.setMapOutputValueClass(AvroValue.class);
  job.getConfiguration().setInt(JobContext.NUM_MAPS, numInputPaths);

  // Reducer configs.
  job.setReducerClass(SegmentPreprocessingReducer.class);
  job.setOutputKeyClass(AvroKey.class);
  job.setOutputValueClass(NullWritable.class);
}

Source File: HadoopOutputFormatTest.java From flink with Apache License 2.0

5 votes

@Test
public void testFinalizeGlobal() throws Exception {
	OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
	DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
	JobConf jobConf = Mockito.spy(new JobConf());
	when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);

	HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);

	outputFormat.finalizeGlobal(1);

	verify(outputCommitter, times(1)).commitJob(any(JobContext.class));
}

Source File: HadoopOutputFormatTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testFinalizeGlobal() throws Exception {
	OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
	DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
	JobConf jobConf = Mockito.spy(new JobConf());
	when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);

	HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);

	outputFormat.finalizeGlobal(1);

	verify(outputCommitter, times(1)).commitJob(any(JobContext.class));
}

Source File: HadoopOutputFormatBase.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws java.io.IOException
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {

	// enforce sequential open() calls
	synchronized (OPEN_MUTEX) {
		if (Integer.toString(taskNumber + 1).length() > 6) {
			throw new IOException("Task id too large.");
		}

		TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
				+ String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
				+ Integer.toString(taskNumber + 1)
				+ "_0");

		this.jobConf.set("mapred.task.id", taskAttemptID.toString());
		this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
		// for hadoop 2.2
		this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
		this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);

		this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID);

		this.outputCommitter = this.jobConf.getOutputCommitter();

		JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());

		this.outputCommitter.setupJob(jobContext);

		this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
	}
}

Source File: HadoopOutputFormatBase.java From flink with Apache License 2.0

5 votes

@Override
public void finalizeGlobal(int parallelism) throws IOException {

	try {
		JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
		OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();

		// finalize HDFS output format
		outputCommitter.commitJob(jobContext);
	} catch (Exception e) {
		throw new RuntimeException(e);
	}
}

Source File: HadoopOutputFormatTest.java From flink with Apache License 2.0

5 votes

@Test
public void testFinalizeGlobal() throws Exception {
	OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
	DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
	JobConf jobConf = Mockito.spy(new JobConf());
	when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);

	HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);

	outputFormat.finalizeGlobal(1);

	verify(outputCommitter, times(1)).commitJob(any(JobContext.class));
}

Source File: HiveTableOutputFormat.java From flink with Apache License 2.0

5 votes

private void commitJob(String location) throws IOException {
	jobConf.set(OUTDIR, location);
	JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
	OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();
	// finalize HDFS output format
	outputCommitter.commitJob(jobContext);
}

Source File: HadoopOutputFormatBase.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
public void finalizeGlobal(int parallelism) throws IOException {

	try {
		JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
		OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();

		// finalize HDFS output format
		outputCommitter.commitJob(jobContext);
	} catch (Exception e) {
		throw new RuntimeException(e);
	}
}

Source File: TestJobEndNotifier.java From hadoop with Apache License 2.0

5 votes

@Test
public void testNotificationOnLastRetryUnregistrationFailure()
    throws Exception {
  HttpServer2 server = startHttpServer();
  MRApp app = spy(new MRAppWithCustomContainerAllocator(2, 2, false,
      this.getClass().getName(), true, 2, false));
  // Currently, we will have isLastRetry always equals to false at beginning
  // of MRAppMaster, except staging area exists or commit already started at 
  // the beginning.
  // Now manually set isLastRetry to true and this should reset to false when
  // unregister failed.
  app.isLastAMRetry = true;
  doNothing().when(app).sysexit();
  JobConf conf = new JobConf();
  conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL,
      JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus");
  JobImpl job = (JobImpl)app.submit(conf);
  app.waitForState(job, JobState.RUNNING);
  app.getContext().getEventHandler()
    .handle(new JobEvent(app.getJobId(), JobEventType.JOB_AM_REBOOT));
  app.waitForInternalState(job, JobStateInternal.REBOOT);
  // Now shutdown. User should see FAILED state.
  // Unregistration fails: isLastAMRetry is recalculated, this is
  ///reboot will stop service internally, we don't need to shutdown twice
  app.waitForServiceToStop(10000);
  Assert.assertFalse(app.isLastAMRetry());
  // Since it's not last retry, JobEndServlet didn't called
  Assert.assertEquals(0, JobEndServlet.calledTimes);
  Assert.assertNull(JobEndServlet.requestUri);
  Assert.assertNull(JobEndServlet.foundJobState);
  server.stop();
}

Source File: TestGridMixClasses.java From hadoop with Apache License 2.0

5 votes

@SuppressWarnings({"unchecked", "rawtypes"})
@Test (timeout=30000)
public void testSleepMapper() throws Exception {
  SleepJob.SleepMapper test = new SleepJob.SleepMapper();

  Configuration conf = new Configuration();
  conf.setInt(JobContext.NUM_REDUCES, 2);

  CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
  conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
  TaskAttemptID taskId = new TaskAttemptID();
  FakeRecordLLReader reader = new FakeRecordLLReader();
  LoadRecordGkNullWriter writer = new LoadRecordGkNullWriter();
  OutputCommitter committer = new CustomOutputCommitter();
  StatusReporter reporter = new TaskAttemptContextImpl.DummyReporter();
  SleepSplit split = getSleepSplit();
  MapContext<LongWritable, LongWritable, GridmixKey, NullWritable> mapcontext = new MapContextImpl<LongWritable, LongWritable, GridmixKey, NullWritable>(
          conf, taskId, reader, writer, committer, reporter, split);
  Context context = new WrappedMapper<LongWritable, LongWritable, GridmixKey, NullWritable>()
          .getMapContext(mapcontext);

  long start = System.currentTimeMillis();
  LOG.info("start:" + start);
  LongWritable key = new LongWritable(start + 2000);
  LongWritable value = new LongWritable(start + 2000);
  // should slip 2 sec
  test.map(key, value, context);
  LOG.info("finish:" + System.currentTimeMillis());
  assertTrue(System.currentTimeMillis() >= (start + 2000));

  test.cleanup(context);
  assertEquals(1, writer.getData().size());
}

Source File: TestJobEndNotifier.java From big-c with Apache License 2.0

5 votes

@Test
public void testNotificationOnLastRetryUnregistrationFailure()
    throws Exception {
  HttpServer2 server = startHttpServer();
  MRApp app = spy(new MRAppWithCustomContainerAllocator(2, 2, false,
      this.getClass().getName(), true, 2, false));
  // Currently, we will have isLastRetry always equals to false at beginning
  // of MRAppMaster, except staging area exists or commit already started at 
  // the beginning.
  // Now manually set isLastRetry to true and this should reset to false when
  // unregister failed.
  app.isLastAMRetry = true;
  doNothing().when(app).sysexit();
  JobConf conf = new JobConf();
  conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL,
      JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus");
  JobImpl job = (JobImpl)app.submit(conf);
  app.waitForState(job, JobState.RUNNING);
  app.getContext().getEventHandler()
    .handle(new JobEvent(app.getJobId(), JobEventType.JOB_AM_REBOOT));
  app.waitForInternalState(job, JobStateInternal.REBOOT);
  // Now shutdown. User should see FAILED state.
  // Unregistration fails: isLastAMRetry is recalculated, this is
  ///reboot will stop service internally, we don't need to shutdown twice
  app.waitForServiceToStop(10000);
  Assert.assertFalse(app.isLastAMRetry());
  // Since it's not last retry, JobEndServlet didn't called
  Assert.assertEquals(0, JobEndServlet.calledTimes);
  Assert.assertNull(JobEndServlet.requestUri);
  Assert.assertNull(JobEndServlet.foundJobState);
  server.stop();
}

Source File: TestGridMixClasses.java From big-c with Apache License 2.0

5 votes

@SuppressWarnings({"rawtypes", "unchecked"})
@Test (timeout=10000)
public void testLoadMapper() throws Exception {

  Configuration conf = new Configuration();
  conf.setInt(JobContext.NUM_REDUCES, 2);

  CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
  conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);

  TaskAttemptID taskId = new TaskAttemptID();
  RecordReader<NullWritable, GridmixRecord> reader = new FakeRecordReader();

  LoadRecordGkGrWriter writer = new LoadRecordGkGrWriter();

  OutputCommitter committer = new CustomOutputCommitter();
  StatusReporter reporter = new TaskAttemptContextImpl.DummyReporter();
  LoadSplit split = getLoadSplit();

  MapContext<NullWritable, GridmixRecord, GridmixKey, GridmixRecord> mapContext = new MapContextImpl<NullWritable, GridmixRecord, GridmixKey, GridmixRecord>(
          conf, taskId, reader, writer, committer, reporter, split);
  // context
  Context ctx = new WrappedMapper<NullWritable, GridmixRecord, GridmixKey, GridmixRecord>()
          .getMapContext(mapContext);

  reader.initialize(split, ctx);
  ctx.getConfiguration().setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
  CompressionEmulationUtil.setCompressionEmulationEnabled(
          ctx.getConfiguration(), true);

  LoadJob.LoadMapper mapper = new LoadJob.LoadMapper();
  // setup, map, clean
  mapper.run(ctx);

  Map<GridmixKey, GridmixRecord> data = writer.getData();
  // check result
  assertEquals(2, data.size());

}

Source File: TestGridMixClasses.java From big-c with Apache License 2.0

5 votes

@SuppressWarnings({"unchecked", "rawtypes"})
@Test (timeout=30000)
public void testSleepMapper() throws Exception {
  SleepJob.SleepMapper test = new SleepJob.SleepMapper();

  Configuration conf = new Configuration();
  conf.setInt(JobContext.NUM_REDUCES, 2);

  CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
  conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
  TaskAttemptID taskId = new TaskAttemptID();
  FakeRecordLLReader reader = new FakeRecordLLReader();
  LoadRecordGkNullWriter writer = new LoadRecordGkNullWriter();
  OutputCommitter committer = new CustomOutputCommitter();
  StatusReporter reporter = new TaskAttemptContextImpl.DummyReporter();
  SleepSplit split = getSleepSplit();
  MapContext<LongWritable, LongWritable, GridmixKey, NullWritable> mapcontext = new MapContextImpl<LongWritable, LongWritable, GridmixKey, NullWritable>(
          conf, taskId, reader, writer, committer, reporter, split);
  Context context = new WrappedMapper<LongWritable, LongWritable, GridmixKey, NullWritable>()
          .getMapContext(mapcontext);

  long start = System.currentTimeMillis();
  LOG.info("start:" + start);
  LongWritable key = new LongWritable(start + 2000);
  LongWritable value = new LongWritable(start + 2000);
  // should slip 2 sec
  test.map(key, value, context);
  LOG.info("finish:" + System.currentTimeMillis());
  assertTrue(System.currentTimeMillis() >= (start + 2000));

  test.cleanup(context);
  assertEquals(1, writer.getData().size());
}

Source File: HadoopOutputFormatBase.java From flink with Apache License 2.0

5 votes

/**
 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws java.io.IOException
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {

	// enforce sequential open() calls
	synchronized (OPEN_MUTEX) {
		if (Integer.toString(taskNumber + 1).length() > 6) {
			throw new IOException("Task id too large.");
		}

		TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
				+ String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
				+ Integer.toString(taskNumber + 1)
				+ "_0");

		this.jobConf.set("mapred.task.id", taskAttemptID.toString());
		this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
		// for hadoop 2.2
		this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
		this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);

		this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID);

		this.outputCommitter = this.jobConf.getOutputCommitter();

		JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());

		this.outputCommitter.setupJob(jobContext);

		this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
	}
}

Source File: HadoopOutputFormatBase.java From flink with Apache License 2.0

5 votes

@Override
public void finalizeGlobal(int parallelism) throws IOException {

	try {
		JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
		OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();

		// finalize HDFS output format
		outputCommitter.commitJob(jobContext);
	} catch (Exception e) {
		throw new RuntimeException(e);
	}
}

Source File: TestGridMixClasses.java From hadoop with Apache License 2.0

5 votes

@SuppressWarnings({"rawtypes", "unchecked"})
@Test (timeout=10000)
public void testLoadMapper() throws Exception {

  Configuration conf = new Configuration();
  conf.setInt(JobContext.NUM_REDUCES, 2);

  CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
  conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);

  TaskAttemptID taskId = new TaskAttemptID();
  RecordReader<NullWritable, GridmixRecord> reader = new FakeRecordReader();

  LoadRecordGkGrWriter writer = new LoadRecordGkGrWriter();

  OutputCommitter committer = new CustomOutputCommitter();
  StatusReporter reporter = new TaskAttemptContextImpl.DummyReporter();
  LoadSplit split = getLoadSplit();

  MapContext<NullWritable, GridmixRecord, GridmixKey, GridmixRecord> mapContext = new MapContextImpl<NullWritable, GridmixRecord, GridmixKey, GridmixRecord>(
          conf, taskId, reader, writer, committer, reporter, split);
  // context
  Context ctx = new WrappedMapper<NullWritable, GridmixRecord, GridmixKey, GridmixRecord>()
          .getMapContext(mapContext);

  reader.initialize(split, ctx);
  ctx.getConfiguration().setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
  CompressionEmulationUtil.setCompressionEmulationEnabled(
          ctx.getConfiguration(), true);

  LoadJob.LoadMapper mapper = new LoadJob.LoadMapper();
  // setup, map, clean
  mapper.run(ctx);

  Map<GridmixKey, GridmixRecord> data = writer.getData();
  // check result
  assertEquals(2, data.size());

}

Source File: KafkaInputFormat.java From HiveKa with Apache License 2.0

4 votes

public static void setKafkaClientTimeout(JobContext job, int val) {
	job.getConfiguration().setInt(KAFKA_CLIENT_SO_TIMEOUT, val);
}

Source File: BlurHiveMRLoaderOutputCommitter.java From incubator-retired-blur with Apache License 2.0

4 votes

@Override
public void commitJob(JobContext context) throws IOException {
  finishBulkJob(context, true);
}

Source File: KafkaInputFormat.java From HiveKa with Apache License 2.0

4 votes

public static int getKafkaClientTimeout(JobContext job) {
	return job.getConfiguration().getInt(KAFKA_CLIENT_SO_TIMEOUT, 60000);
}

Source File: KafkaInputFormat.java From HiveKa with Apache License 2.0

4 votes

public static void setKafkaMaxPullHrs(JobContext job, int val) {
	job.getConfiguration().setInt(KAFKA_MAX_PULL_HRS, val);
}

Source File: KafkaInputFormat.java From HiveKa with Apache License 2.0

4 votes

public static int getKafkaMaxPullHrs(JobContext job) {
	return job.getConfiguration().getInt(KAFKA_MAX_PULL_HRS, -1);
}

Source File: BlurHiveOutputCommitter.java From incubator-retired-blur with Apache License 2.0

4 votes

@Override
public void setupJob(JobContext jobContext) throws IOException {
}

org.apache.hadoop.mapred.JobContext Java Examples