org.apache.hadoop.mapred.TaskAttemptID Java Examples
The following examples show how to use
org.apache.hadoop.mapred.TaskAttemptID.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HadoopCfgUtils.java From elasticsearch-hadoop with Apache License 2.0 | 6 votes |
public static TaskID getTaskID(Configuration cfg) { // first try with the attempt since some Hadoop versions mix the two String taskAttemptId = HadoopCfgUtils.getTaskAttemptId(cfg); if (StringUtils.hasText(taskAttemptId)) { try { return TaskAttemptID.forName(taskAttemptId).getTaskID(); } catch (IllegalArgumentException ex) { // the task attempt is invalid (Tez in particular uses the wrong string - see #346) // try to fallback to task id return parseTaskIdFromTaskAttemptId(taskAttemptId); } } String taskIdProp = HadoopCfgUtils.getTaskId(cfg); // double-check task id bug in Hadoop 2.5.x if (StringUtils.hasText(taskIdProp) && !taskIdProp.contains("attempt")) { return TaskID.forName(taskIdProp); } return null; }
Example #2
Source File: ContentIndexingColumnBasedHandlerTest.java From datawave with Apache License 2.0 | 6 votes |
@Before public void setUp() throws Exception { conf = new Configuration(); conf.addResource("config/all-config.xml"); ctx = new TaskAttemptContextImpl(conf, new TaskAttemptID()); ctx.getConfiguration().setInt(ContentIndexingColumnBasedHandler.NUM_SHARDS, 131); ctx.getConfiguration().set(ContentIndexingColumnBasedHandler.SHARD_TNAME, "shard"); ctx.getConfiguration().set(ContentIndexingColumnBasedHandler.SHARD_GIDX_TNAME, "shardIndex"); ctx.getConfiguration().set(ContentIndexingColumnBasedHandler.SHARD_GRIDX_TNAME, "shardIndex"); ctx.getConfiguration().set(TypeRegistry.INGEST_DATA_TYPES, "test"); ctx.getConfiguration().set("data.name", "test"); ctx.getConfiguration().set("test.data.auth.id.mode", "NEVER"); ctx.getConfiguration().set("test" + BaseIngestHelper.DEFAULT_TYPE, LcNoDiacriticsType.class.getName()); ctx.getConfiguration().set("test" + TypeRegistry.HANDLER_CLASSES, TestContentIndexingColumnBasedHandler.class.getName()); ctx.getConfiguration().set("test" + TypeRegistry.RAW_READER, TestEventRecordReader.class.getName()); ctx.getConfiguration().set("test" + TypeRegistry.INGEST_HELPER, TestContentBaseIngestHelper.class.getName()); ctx.getConfiguration().set(TypeRegistry.EXCLUDED_HANDLER_CLASSES, "FAKE_HANDLER_CLASS"); // it will die if this field is not faked helper = new TestContentBaseIngestHelper(); colVis = new ColumnVisibility(""); }
Example #3
Source File: SplitBasedHashPartitionerTest.java From datawave with Apache License 2.0 | 6 votes |
private TaskInputOutputContextImpl getTaskInputOutputContext(final String testFilePath, final Configuration conf) { return new TaskInputOutputContextImpl(conf, new TaskAttemptID(), null, null, null) { @Override public boolean nextKeyValue() throws IOException, InterruptedException { throw new UnsupportedOperationException(); } @Override public Object getCurrentKey() throws IOException, InterruptedException { throw new UnsupportedOperationException(); } @Override public Object getCurrentValue() throws IOException, InterruptedException { throw new UnsupportedOperationException(); } @Deprecated public Path[] getLocalCacheFiles() throws IOException { return new Path[] {new Path(testFilePath)}; } }; }
Example #4
Source File: HadoopSource.java From twister2 with Apache License 2.0 | 6 votes |
@Override public boolean hasNext() { if (currentReader != null) { try { boolean current = currentReader.nextKeyValue(); while (!current && consumingSplit < assignedSplits.size() - 1) { TaskID taskID = new TaskID(context.getId(), context.getIndex(), TaskType.MAP, context.getIndex()); TaskAttemptID taskAttemptID = new TaskAttemptID(taskID, context.getIndex()); consumingSplit++; TaskAttemptContextImpl taskAttemptContext = new TaskAttemptContextImpl(jconf, taskAttemptID); currentReader = format.createRecordReader(assignedSplits.get(consumingSplit), taskAttemptContext); currentReader.initialize(assignedSplits.get(consumingSplit), taskAttemptContext); current = currentReader.nextKeyValue(); } return current; } catch (IOException | InterruptedException e) { throw new RuntimeException("Failed to read the next key vale", e); } } return false; }
Example #5
Source File: HadoopV1OutputCollector.java From ignite with Apache License 2.0 | 6 votes |
/** * @param jobConf Job configuration. * @param taskCtx Task context. * @param directWrite Direct write flag. * @param fileName File name. * @throws IOException In case of IO exception. */ HadoopV1OutputCollector(JobConf jobConf, HadoopTaskContext taskCtx, boolean directWrite, @Nullable String fileName, TaskAttemptID attempt) throws IOException { this.jobConf = jobConf; this.taskCtx = taskCtx; this.attempt = attempt; if (directWrite) { jobConf.set("mapreduce.task.attempt.id", attempt.toString()); OutputFormat outFormat = jobConf.getOutputFormat(); writer = outFormat.getRecordWriter(null, jobConf, fileName, Reporter.NULL); } else writer = null; }
Example #6
Source File: HadoopSourceWithMap.java From twister2 with Apache License 2.0 | 6 votes |
@Override public boolean hasNext() { if (currentReader != null) { try { boolean current = currentReader.nextKeyValue(); while (!current && consumingSplit < assignedSplits.size() - 1) { TaskID taskID = new TaskID(context.getId(), context.getIndex(), TaskType.MAP, context.getIndex()); TaskAttemptID taskAttemptID = new TaskAttemptID(taskID, context.getIndex()); consumingSplit++; TaskAttemptContextImpl taskAttemptContext = new TaskAttemptContextImpl(jconf, taskAttemptID); currentReader = format.createRecordReader(assignedSplits.get(consumingSplit), taskAttemptContext); currentReader.initialize(assignedSplits.get(consumingSplit), taskAttemptContext); current = currentReader.nextKeyValue(); } return current; } catch (IOException | InterruptedException e) { throw new RuntimeException("Failed to read the next key vale", e); } } return false; }
Example #7
Source File: FileOutputCommitterWrapper.java From stratosphere with Apache License 2.0 | 6 votes |
public void commitTask(JobConf conf, TaskAttemptID taskAttemptID) throws IOException { Path taskOutputPath = getTempTaskOutputPath(conf, taskAttemptID); if (taskOutputPath != null) { FileSystem fs = taskOutputPath.getFileSystem(conf); if (fs.exists(taskOutputPath)) { Path jobOutputPath = taskOutputPath.getParent().getParent(); // Move the task outputs to their final place moveTaskOutputs(conf,taskAttemptID, fs, jobOutputPath, taskOutputPath); // Delete the temporary task-specific output directory if (!fs.delete(taskOutputPath, true)) { LOG.info("Failed to delete the temporary output" + " directory of task: " + taskAttemptID + " - " + taskOutputPath); } LOG.info("Saved output of task '" + taskAttemptID + "' to " + jobOutputPath); } } }
Example #8
Source File: FileOutputCommitterWrapper.java From stratosphere with Apache License 2.0 | 6 votes |
public boolean needsTaskCommit(JobConf conf, TaskAttemptID taskAttemptID) throws IOException { try { Path taskOutputPath = getTempTaskOutputPath(conf, taskAttemptID); if (taskOutputPath != null) { // Get the file-system for the task output directory FileSystem fs = taskOutputPath.getFileSystem(conf); // since task output path is created on demand, // if it exists, task needs a commit if (fs.exists(taskOutputPath)) { return true; } } } catch (IOException ioe) { throw ioe; } return false; }
Example #9
Source File: FileOutputCommitterWrapper.java From stratosphere with Apache License 2.0 | 6 votes |
public Path getTempTaskOutputPath(JobConf conf, TaskAttemptID taskAttemptID) { Path outputPath = FileOutputFormat.getOutputPath(conf); if (outputPath != null) { Path p = new Path(outputPath, (FileOutputCommitter.TEMP_DIR_NAME + Path.SEPARATOR + "_" + taskAttemptID.toString())); try { FileSystem fs = p.getFileSystem(conf); return p.makeQualified(fs); } catch (IOException ie) { LOG.warn(StringUtils.stringifyException(ie)); return p; } } return null; }
Example #10
Source File: TestStreamingStatus.java From big-c with Apache License 2.0 | 5 votes |
void validateTaskStderr(StreamJob job, TaskType type) throws IOException { TaskAttemptID attemptId = new TaskAttemptID(new TaskID(job.jobId_, type, 0), 0); String log = MapReduceTestUtil.readTaskLog(TaskLog.LogName.STDERR, attemptId, false); // trim() is called on expectedStderr here because the method // MapReduceTestUtil.readTaskLog() returns trimmed String. assertTrue(log.equals(expectedStderr.trim())); }
Example #11
Source File: HadoopOutputFormatBase.java From flink with Apache License 2.0 | 5 votes |
/** * create the temporary output file for hadoop RecordWriter. * @param taskNumber The number of the parallel instance. * @param numTasks The number of parallel tasks. * @throws java.io.IOException */ @Override public void open(int taskNumber, int numTasks) throws IOException { // enforce sequential open() calls synchronized (OPEN_MUTEX) { if (Integer.toString(taskNumber + 1).length() > 6) { throw new IOException("Task id too large."); } TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0"); this.jobConf.set("mapred.task.id", taskAttemptID.toString()); this.jobConf.setInt("mapred.task.partition", taskNumber + 1); // for hadoop 2.2 this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString()); this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1); this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID); this.outputCommitter = this.jobConf.getOutputCommitter(); JobContext jobContext = new JobContextImpl(this.jobConf, new JobID()); this.outputCommitter.setupJob(jobContext); this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable()); } }
Example #12
Source File: TestEventFetcher.java From big-c with Apache License 2.0 | 5 votes |
private MapTaskCompletionEventsUpdate getMockedCompletionEventsUpdate( int startIdx, int numEvents) { ArrayList<TaskCompletionEvent> tceList = new ArrayList<TaskCompletionEvent>(numEvents); for (int i = 0; i < numEvents; ++i) { int eventIdx = startIdx + i; TaskCompletionEvent tce = new TaskCompletionEvent(eventIdx, new TaskAttemptID("12345", 1, TaskType.MAP, eventIdx, 0), eventIdx, true, TaskCompletionEvent.Status.SUCCEEDED, "http://somehost:8888"); tceList.add(tce); } TaskCompletionEvent[] events = {}; return new MapTaskCompletionEventsUpdate(tceList.toArray(events), false); }
Example #13
Source File: TestShuffleScheduler.java From big-c with Apache License 2.0 | 5 votes |
@SuppressWarnings("rawtypes") @Test public void testTipFailed() throws Exception { JobConf job = new JobConf(); job.setNumMapTasks(2); TaskStatus status = new TaskStatus() { @Override public boolean getIsMap() { return false; } @Override public void addFetchFailedMap(TaskAttemptID mapTaskId) { } }; Progress progress = new Progress(); TaskAttemptID reduceId = new TaskAttemptID("314159", 0, TaskType.REDUCE, 0, 0); ShuffleSchedulerImpl scheduler = new ShuffleSchedulerImpl(job, status, reduceId, null, progress, null, null, null); JobID jobId = new JobID(); TaskID taskId1 = new TaskID(jobId, TaskType.REDUCE, 1); scheduler.tipFailed(taskId1); Assert.assertEquals("Progress should be 0.5", 0.5f, progress.getProgress(), 0.0f); Assert.assertFalse(scheduler.waitUntilDone(1)); TaskID taskId0 = new TaskID(jobId, TaskType.REDUCE, 0); scheduler.tipFailed(taskId0); Assert.assertEquals("Progress should be 1.0", 1.0f, progress.getProgress(), 0.0f); Assert.assertTrue(scheduler.waitUntilDone(1)); }
Example #14
Source File: HadoopOutputFormatBase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * create the temporary output file for hadoop RecordWriter. * @param taskNumber The number of the parallel instance. * @param numTasks The number of parallel tasks. * @throws java.io.IOException */ @Override public void open(int taskNumber, int numTasks) throws IOException { // enforce sequential open() calls synchronized (OPEN_MUTEX) { if (Integer.toString(taskNumber + 1).length() > 6) { throw new IOException("Task id too large."); } TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0"); this.jobConf.set("mapred.task.id", taskAttemptID.toString()); this.jobConf.setInt("mapred.task.partition", taskNumber + 1); // for hadoop 2.2 this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString()); this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1); this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID); this.outputCommitter = this.jobConf.getOutputCommitter(); JobContext jobContext = new JobContextImpl(this.jobConf, new JobID()); this.outputCommitter.setupJob(jobContext); this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable()); } }
Example #15
Source File: HadoopSource.java From twister2 with Apache License 2.0 | 5 votes |
@Override public void prepare(TSetContext ctx) { this.context = ctx; Configuration hadoopConf = this.wrappedConfiguration.getConfiguration(); jconf = new JobConf(hadoopConf); try { format = inputClazz.newInstance(); JobContext jobContext = new JobContextImpl(hadoopConf, new JobID(context.getId(), context.getIndex())); List<InputSplit> splits = format.getSplits(jobContext); for (int i = 0; i < splits.size(); i++) { if (i % context.getParallelism() == context.getIndex()) { assignedSplits.add(splits.get(i)); } } if (assignedSplits.size() > 0) { TaskID taskID = new TaskID(context.getId(), context.getIndex(), TaskType.MAP, context.getIndex()); TaskAttemptID taskAttemptID = new TaskAttemptID(taskID, context.getIndex()); TaskAttemptContextImpl taskAttemptContext = new TaskAttemptContextImpl(jconf, taskAttemptID); currentReader = format.createRecordReader(assignedSplits.get(consumingSplit), taskAttemptContext); currentReader.initialize(assignedSplits.get(consumingSplit), taskAttemptContext); } } catch (InstantiationException | IllegalAccessException | InterruptedException | IOException e) { throw new RuntimeException("Failed to initialize hadoop input", e); } }
Example #16
Source File: HadoopSourceWithMap.java From twister2 with Apache License 2.0 | 5 votes |
@Override public void prepare(TSetContext ctx) { this.context = ctx; Configuration hadoopConf = this.wrappedConfiguration.getConfiguration(); jconf = new JobConf(hadoopConf); try { format = inputClazz.newInstance(); JobContext jobContext = new JobContextImpl(hadoopConf, new JobID(context.getId(), context.getIndex())); List<InputSplit> splits = format.getSplits(jobContext); for (int i = 0; i < splits.size(); i++) { if (i % context.getParallelism() == context.getIndex()) { assignedSplits.add(splits.get(i)); } } if (assignedSplits.size() > 0) { TaskID taskID = new TaskID(context.getId(), context.getIndex(), TaskType.MAP, context.getIndex()); TaskAttemptID taskAttemptID = new TaskAttemptID(taskID, context.getIndex()); TaskAttemptContextImpl taskAttemptContext = new TaskAttemptContextImpl(jconf, taskAttemptID); currentReader = format.createRecordReader(assignedSplits.get(consumingSplit), taskAttemptContext); currentReader.initialize(assignedSplits.get(consumingSplit), taskAttemptContext); } } catch (InstantiationException | IllegalAccessException | InterruptedException | IOException e) { throw new RuntimeException("Failed to initialize hadoop input", e); } }
Example #17
Source File: TestPipeApplication.java From hadoop with Apache License 2.0 | 5 votes |
/** * clean previous std error and outs */ private void initStdOut(JobConf configuration) { TaskAttemptID taskId = TaskAttemptID.forName(configuration .get(MRJobConfig.TASK_ATTEMPT_ID)); File stdOut = TaskLog.getTaskLogFile(taskId, false, TaskLog.LogName.STDOUT); File stdErr = TaskLog.getTaskLogFile(taskId, false, TaskLog.LogName.STDERR); // prepare folder if (!stdOut.getParentFile().exists()) { stdOut.getParentFile().mkdirs(); } else { // clean logs stdOut.deleteOnExit(); stdErr.deleteOnExit(); } }
Example #18
Source File: TestPipeApplication.java From hadoop with Apache License 2.0 | 5 votes |
private String readStdOut(JobConf conf) throws Exception { TaskAttemptID taskId = TaskAttemptID.forName(conf .get(MRJobConfig.TASK_ATTEMPT_ID)); File stdOut = TaskLog.getTaskLogFile(taskId, false, TaskLog.LogName.STDOUT); return readFile(stdOut); }
Example #19
Source File: TestEventFetcher.java From hadoop with Apache License 2.0 | 5 votes |
private MapTaskCompletionEventsUpdate getMockedCompletionEventsUpdate( int startIdx, int numEvents) { ArrayList<TaskCompletionEvent> tceList = new ArrayList<TaskCompletionEvent>(numEvents); for (int i = 0; i < numEvents; ++i) { int eventIdx = startIdx + i; TaskCompletionEvent tce = new TaskCompletionEvent(eventIdx, new TaskAttemptID("12345", 1, TaskType.MAP, eventIdx, 0), eventIdx, true, TaskCompletionEvent.Status.SUCCEEDED, "http://somehost:8888"); tceList.add(tce); } TaskCompletionEvent[] events = {}; return new MapTaskCompletionEventsUpdate(tceList.toArray(events), false); }
Example #20
Source File: TestShuffleScheduler.java From hadoop with Apache License 2.0 | 5 votes |
@SuppressWarnings("rawtypes") @Test public void testTipFailed() throws Exception { JobConf job = new JobConf(); job.setNumMapTasks(2); TaskStatus status = new TaskStatus() { @Override public boolean getIsMap() { return false; } @Override public void addFetchFailedMap(TaskAttemptID mapTaskId) { } }; Progress progress = new Progress(); TaskAttemptID reduceId = new TaskAttemptID("314159", 0, TaskType.REDUCE, 0, 0); ShuffleSchedulerImpl scheduler = new ShuffleSchedulerImpl(job, status, reduceId, null, progress, null, null, null); JobID jobId = new JobID(); TaskID taskId1 = new TaskID(jobId, TaskType.REDUCE, 1); scheduler.tipFailed(taskId1); Assert.assertEquals("Progress should be 0.5", 0.5f, progress.getProgress(), 0.0f); Assert.assertFalse(scheduler.waitUntilDone(1)); TaskID taskId0 = new TaskID(jobId, TaskType.REDUCE, 0); scheduler.tipFailed(taskId0); Assert.assertEquals("Progress should be 1.0", 1.0f, progress.getProgress(), 0.0f); Assert.assertTrue(scheduler.waitUntilDone(1)); }
Example #21
Source File: MneMapredChunkDataTest.java From mnemonic with Apache License 2.0 | 5 votes |
@BeforeClass public void setUp() throws Exception { m_workdir = new Path( System.getProperty("test.tmp.dir", DEFAULT_WORK_DIR)); m_conf = new JobConf(); m_rand = Utils.createRandom(); unsafe = Utils.getUnsafe(); try { m_fs = FileSystem.getLocal(m_conf).getRaw(); m_fs.delete(m_workdir, true); m_fs.mkdirs(m_workdir); } catch (IOException e) { throw new IllegalStateException("bad fs init", e); } m_taid = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0); m_tacontext = new TaskAttemptContextImpl(m_conf, m_taid); MneConfigHelper.setDir(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, m_workdir.toString()); MneConfigHelper.setBaseOutputName(m_conf, null, "chunk-data"); MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SERVICE_NAME); MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SLOT_KEY_ID); MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new DurableType[]{DurableType.CHUNK}); MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new Class<?>[]{}); MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SERVICE_NAME); MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SLOT_KEY_ID); MneConfigHelper.setMemPoolSize(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, 1024L * 1024 * 1024 * 4); MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new DurableType[]{DurableType.CHUNK}); MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new Class<?>[]{}); }
Example #22
Source File: MneMapredLongDataTest.java From mnemonic with Apache License 2.0 | 5 votes |
@BeforeClass public void setUp() throws IOException { m_workdir = new Path( System.getProperty("test.tmp.dir", DEFAULT_WORK_DIR)); m_conf = new JobConf(); m_rand = Utils.createRandom(); try { m_fs = FileSystem.getLocal(m_conf).getRaw(); m_fs.delete(m_workdir, true); m_fs.mkdirs(m_workdir); } catch (IOException e) { throw new IllegalStateException("bad fs init", e); } m_taid = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0); m_tacontext = new TaskAttemptContextImpl(m_conf, m_taid); MneConfigHelper.setDir(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, m_workdir.toString()); MneConfigHelper.setBaseOutputName(m_conf, null, "long-data"); MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SERVICE_NAME); MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SLOT_KEY_ID); MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new DurableType[]{DurableType.LONG}); MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new Class<?>[]{}); MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SERVICE_NAME); MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SLOT_KEY_ID); MneConfigHelper.setMemPoolSize(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, 1024L * 1024 * 1024 * 2); MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new DurableType[]{DurableType.LONG}); MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new Class<?>[]{}); }
Example #23
Source File: MneMapredPersonDataTest.java From mnemonic with Apache License 2.0 | 5 votes |
@BeforeClass public void setUp() throws IOException { m_workdir = new Path( System.getProperty("test.tmp.dir", DEFAULT_WORK_DIR)); m_conf = new JobConf(); m_rand = Utils.createRandom(); try { m_fs = FileSystem.getLocal(m_conf).getRaw(); m_fs.delete(m_workdir, true); m_fs.mkdirs(m_workdir); } catch (IOException e) { throw new IllegalStateException("bad fs init", e); } m_taid = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0); m_tacontext = new TaskAttemptContextImpl(m_conf, m_taid); MneConfigHelper.setDir(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, m_workdir.toString()); MneConfigHelper.setBaseOutputName(m_conf, null, "person-data"); MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SERVICE_NAME); MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SLOT_KEY_ID); MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new DurableType[]{DurableType.DURABLE}); MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new Class<?>[]{PersonListEFProxy.class}); MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SERVICE_NAME); MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SLOT_KEY_ID); MneConfigHelper.setMemPoolSize(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, 1024L * 1024 * 1024 * 4); MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new DurableType[]{DurableType.DURABLE}); MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new Class<?>[]{PersonListEFProxy.class}); }
Example #24
Source File: HadoopOutputFormatBase.java From flink with Apache License 2.0 | 5 votes |
/** * create the temporary output file for hadoop RecordWriter. * @param taskNumber The number of the parallel instance. * @param numTasks The number of parallel tasks. * @throws java.io.IOException */ @Override public void open(int taskNumber, int numTasks) throws IOException { // enforce sequential open() calls synchronized (OPEN_MUTEX) { if (Integer.toString(taskNumber + 1).length() > 6) { throw new IOException("Task id too large."); } TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0"); this.jobConf.set("mapred.task.id", taskAttemptID.toString()); this.jobConf.setInt("mapred.task.partition", taskNumber + 1); // for hadoop 2.2 this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString()); this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1); this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID); this.outputCommitter = this.jobConf.getOutputCommitter(); JobContext jobContext = new JobContextImpl(this.jobConf, new JobID()); this.outputCommitter.setupJob(jobContext); this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable()); } }
Example #25
Source File: TestVCFOutputFormat.java From Hadoop-BAM with MIT License | 5 votes |
@Before public void setup() throws IOException, NoSuchMethodException, IllegalAccessException, InvocationTargetException, InstantiationException { test_vcf_output = File.createTempFile("test_vcf_output", ""); test_vcf_output.delete(); writable = new VariantContextWritable(); Configuration conf = new Configuration(); conf.set("hadoopbam.vcf.output-format", "VCF"); KeyIgnoringVCFOutputFormat<Long> outputFormat = new KeyIgnoringVCFOutputFormat<Long>(conf); outputFormat.setHeader(readHeader()); taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class)); writer = outputFormat.getRecordWriter(taskAttemptContext, new Path("file://" + test_vcf_output)); }
Example #26
Source File: RunningJobProxyV2.java From pentaho-hadoop-shims with Apache License 2.0 | 5 votes |
/** * Retrieve the diagnostic messages for a given task attempt. * * @param taskAttemptId Identifier of the task * @return an array of diagnostic messages for the task attempt with the id provided. * @throws java.io.IOException */ @Override public String[] getTaskDiagnostics( Object taskAttemptId ) throws IOException { TaskAttemptID id = (TaskAttemptID) taskAttemptId; try { return delegateJob.getTaskDiagnostics( id ); } catch ( InterruptedException e ) { throw new RuntimeException( e ); } }
Example #27
Source File: HadoopOutputFormatWrapper.java From stratosphere with Apache License 2.0 | 5 votes |
/** * commit the task by moving the output file out from the temporary directory. * @throws IOException */ @Override public void close() throws IOException { this.recordWriter.close(new DummyHadoopReporter()); if (this.fileOutputCommitterWrapper.needsTaskCommit(this.jobConf, TaskAttemptID.forName(this.jobConf.get("mapred.task.id")))) { this.fileOutputCommitterWrapper.commitTask(this.jobConf, TaskAttemptID.forName(this.jobConf.get("mapred.task.id"))); } //TODO: commitjob when all the tasks are finished }
Example #28
Source File: TaskCompletionEventProxyTest.java From pentaho-hadoop-shims with Apache License 2.0 | 5 votes |
@Test public void getTaskAttemptId() { final TaskAttemptID id = new TaskAttemptID( new TaskID(), 0 ); org.apache.hadoop.mapred.TaskCompletionEvent delegate = new org.apache.hadoop.mapred.TaskCompletionEvent() { public org.apache.hadoop.mapred.TaskAttemptID getTaskAttemptId() { return id; } }; TaskCompletionEventProxy proxy = new TaskCompletionEventProxy( delegate ); assertEquals( id, proxy.getTaskAttemptId() ); }
Example #29
Source File: HadoopOutputFormatWrapper.java From stratosphere with Apache License 2.0 | 5 votes |
/** * create the temporary output file for hadoop RecordWriter. * @param taskNumber The number of the parallel instance. * @param numTasks The number of parallel tasks. * @throws IOException */ @Override public void open(int taskNumber, int numTasks) throws IOException { this.fileOutputCommitterWrapper.setupJob(this.jobConf); if (Integer.toString(taskNumber + 1).length() <= 6) { this.jobConf.set("mapred.task.id", "attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s"," ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0"); //compatible for hadoop 2.2.0, the temporary output directory is different from hadoop 1.2.1 this.jobConf.set("mapreduce.task.output.dir", this.fileOutputCommitterWrapper.getTempTaskOutputPath(this.jobConf,TaskAttemptID.forName(this.jobConf.get("mapred.task.id"))).toString()); } else { throw new IOException("task id too large"); } this.recordWriter = this.hadoopOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new DummyHadoopProgressable()); }
Example #30
Source File: MRInputBase.java From incubator-tez with Apache License 2.0 | 5 votes |
public List<Event> initialize() throws IOException { getContext().requestInitialMemory(0l, null); // mandatory call MRRuntimeProtos.MRInputUserPayloadProto mrUserPayload = MRHelpers.parseMRInputPayload(getContext().getUserPayload()); Preconditions.checkArgument(mrUserPayload.hasSplits() == false, "Split information not expected in " + this.getClass().getName()); Configuration conf = MRHelpers.createConfFromByteString(mrUserPayload.getConfigurationBytes()); this.jobConf = new JobConf(conf); // Add tokens to the jobConf - in case they are accessed within the RR / IF jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials()); TaskAttemptID taskAttemptId = new TaskAttemptID( new TaskID( Long.toString(getContext().getApplicationId().getClusterTimestamp()), getContext().getApplicationId().getId(), TaskType.MAP, getContext().getTaskIndex()), getContext().getTaskAttemptNumber()); jobConf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptId.toString()); jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, getContext().getDAGAttemptNumber()); this.inputRecordCounter = getContext().getCounters().findCounter( TaskCounter.INPUT_RECORDS_PROCESSED); useNewApi = this.jobConf.getUseNewMapper(); return null; }