Java Code Examples for org.apache.hadoop.mapred.TaskAttemptID#forName()
The following examples show how to use
org.apache.hadoop.mapred.TaskAttemptID#forName() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HadoopOutputFormatBase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * create the temporary output file for hadoop RecordWriter. * @param taskNumber The number of the parallel instance. * @param numTasks The number of parallel tasks. * @throws java.io.IOException */ @Override public void open(int taskNumber, int numTasks) throws IOException { // enforce sequential open() calls synchronized (OPEN_MUTEX) { if (Integer.toString(taskNumber + 1).length() > 6) { throw new IOException("Task id too large."); } TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0"); this.jobConf.set("mapred.task.id", taskAttemptID.toString()); this.jobConf.setInt("mapred.task.partition", taskNumber + 1); // for hadoop 2.2 this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString()); this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1); this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID); this.outputCommitter = this.jobConf.getOutputCommitter(); JobContext jobContext = new JobContextImpl(this.jobConf, new JobID()); this.outputCommitter.setupJob(jobContext); this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable()); } }
Example 2
Source File: HadoopOutputFormatBase.java From flink with Apache License 2.0 | 5 votes |
/** * create the temporary output file for hadoop RecordWriter. * @param taskNumber The number of the parallel instance. * @param numTasks The number of parallel tasks. * @throws java.io.IOException */ @Override public void open(int taskNumber, int numTasks) throws IOException { // enforce sequential open() calls synchronized (OPEN_MUTEX) { if (Integer.toString(taskNumber + 1).length() > 6) { throw new IOException("Task id too large."); } TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0"); this.jobConf.set("mapred.task.id", taskAttemptID.toString()); this.jobConf.setInt("mapred.task.partition", taskNumber + 1); // for hadoop 2.2 this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString()); this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1); this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID); this.outputCommitter = this.jobConf.getOutputCommitter(); JobContext jobContext = new JobContextImpl(this.jobConf, new JobID()); this.outputCommitter.setupJob(jobContext); this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable()); } }
Example 3
Source File: TestPipeApplication.java From hadoop with Apache License 2.0 | 5 votes |
/** * clean previous std error and outs */ private void initStdOut(JobConf configuration) { TaskAttemptID taskId = TaskAttemptID.forName(configuration .get(MRJobConfig.TASK_ATTEMPT_ID)); File stdOut = TaskLog.getTaskLogFile(taskId, false, TaskLog.LogName.STDOUT); File stdErr = TaskLog.getTaskLogFile(taskId, false, TaskLog.LogName.STDERR); // prepare folder if (!stdOut.getParentFile().exists()) { stdOut.getParentFile().mkdirs(); } else { // clean logs stdOut.deleteOnExit(); stdErr.deleteOnExit(); } }
Example 4
Source File: TestPipeApplication.java From hadoop with Apache License 2.0 | 5 votes |
private String readStdOut(JobConf conf) throws Exception { TaskAttemptID taskId = TaskAttemptID.forName(conf .get(MRJobConfig.TASK_ATTEMPT_ID)); File stdOut = TaskLog.getTaskLogFile(taskId, false, TaskLog.LogName.STDOUT); return readFile(stdOut); }
Example 5
Source File: TestPipeApplication.java From big-c with Apache License 2.0 | 5 votes |
/** * clean previous std error and outs */ private void initStdOut(JobConf configuration) { TaskAttemptID taskId = TaskAttemptID.forName(configuration .get(MRJobConfig.TASK_ATTEMPT_ID)); File stdOut = TaskLog.getTaskLogFile(taskId, false, TaskLog.LogName.STDOUT); File stdErr = TaskLog.getTaskLogFile(taskId, false, TaskLog.LogName.STDERR); // prepare folder if (!stdOut.getParentFile().exists()) { stdOut.getParentFile().mkdirs(); } else { // clean logs stdOut.deleteOnExit(); stdErr.deleteOnExit(); } }
Example 6
Source File: TestPipeApplication.java From big-c with Apache License 2.0 | 5 votes |
private String readStdOut(JobConf conf) throws Exception { TaskAttemptID taskId = TaskAttemptID.forName(conf .get(MRJobConfig.TASK_ATTEMPT_ID)); File stdOut = TaskLog.getTaskLogFile(taskId, false, TaskLog.LogName.STDOUT); return readFile(stdOut); }
Example 7
Source File: HadoopOutputFormatBase.java From flink with Apache License 2.0 | 5 votes |
/** * create the temporary output file for hadoop RecordWriter. * @param taskNumber The number of the parallel instance. * @param numTasks The number of parallel tasks. * @throws java.io.IOException */ @Override public void open(int taskNumber, int numTasks) throws IOException { // enforce sequential open() calls synchronized (OPEN_MUTEX) { if (Integer.toString(taskNumber + 1).length() > 6) { throw new IOException("Task id too large."); } TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0"); this.jobConf.set("mapred.task.id", taskAttemptID.toString()); this.jobConf.setInt("mapred.task.partition", taskNumber + 1); // for hadoop 2.2 this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString()); this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1); this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID); this.outputCommitter = this.jobConf.getOutputCommitter(); JobContext jobContext = new JobContextImpl(this.jobConf, new JobID()); this.outputCommitter.setupJob(jobContext); this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable()); } }
Example 8
Source File: HiveTableOutputFormat.java From flink with Apache License 2.0 | 4 votes |
@Override public void open(int taskNumber, int numTasks) throws IOException { try { StorageDescriptor sd = hiveTablePartition.getStorageDescriptor(); Object serdeLib = Class.forName(sd.getSerdeInfo().getSerializationLib()).newInstance(); Preconditions.checkArgument(serdeLib instanceof Serializer && serdeLib instanceof Deserializer, "Expect a SerDe lib implementing both Serializer and Deserializer, but actually got " + serdeLib.getClass().getName()); recordSerDe = (Serializer) serdeLib; ReflectionUtils.setConf(recordSerDe, jobConf); // TODO: support partition properties, for now assume they're same as table properties SerDeUtils.initializeSerDe((Deserializer) recordSerDe, jobConf, tableProperties, null); outputClass = recordSerDe.getSerializedClass(); } catch (IllegalAccessException | SerDeException | InstantiationException | ClassNotFoundException e) { throw new FlinkRuntimeException("Error initializing Hive serializer", e); } TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber).length()) + "s", " ").replace(" ", "0") + taskNumber + "_0"); this.jobConf.set("mapred.task.id", taskAttemptID.toString()); this.jobConf.setInt("mapred.task.partition", taskNumber); // for hadoop 2.2 this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString()); this.jobConf.setInt("mapreduce.task.partition", taskNumber); this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID); if (!isDynamicPartition) { staticWriter = writerForLocation(hiveTablePartition.getStorageDescriptor().getLocation()); } else { dynamicPartitionOffset = fieldNames.length - partitionColumns.size() + hiveTablePartition.getPartitionSpec().size(); } numNonPartitionColumns = isPartitioned ? fieldNames.length - partitionColumns.size() : fieldNames.length; hiveConversions = new HiveObjectConversion[numNonPartitionColumns]; List<ObjectInspector> objectInspectors = new ArrayList<>(hiveConversions.length); for (int i = 0; i < numNonPartitionColumns; i++) { ObjectInspector objectInspector = HiveInspectors.getObjectInspector(fieldTypes[i]); objectInspectors.add(objectInspector); hiveConversions[i] = HiveInspectors.getConversion(objectInspector, fieldTypes[i].getLogicalType()); } if (!isPartitioned) { rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector( Arrays.asList(fieldNames), objectInspectors); } else { rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector( Arrays.asList(fieldNames).subList(0, fieldNames.length - partitionColumns.size()), objectInspectors); defaultPartitionName = jobConf.get(HiveConf.ConfVars.DEFAULTPARTITIONNAME.varname, HiveConf.ConfVars.DEFAULTPARTITIONNAME.defaultStrVal); } }
Example 9
Source File: ContainerHeader.java From garmadon with Apache License 2.0 | 4 votes |
private void setFrameworkComponent() { String[] commands = HeaderUtils.getArrayJavaCommandLine(); mainClass = commands[0]; switch (mainClass) { // MAPREDUCE case "org.apache.hadoop.mapreduce.v2.app.MRAppMaster": framework = Framework.MAPREDUCE; component = Component.APP_MASTER; break; case "org.apache.hadoop.mapred.YarnChild": framework = Framework.MAPREDUCE; if (commands.length > 4) { final TaskAttemptID firstTaskid = TaskAttemptID.forName(commands[3]); try { component = Component.valueOf(firstTaskid.getTaskType().name()); } catch (IllegalArgumentException ex) { LOGGER.debug("Unknown component {}", firstTaskid.getTaskType().name()); } } break; // SPARK case "org.apache.spark.deploy.yarn.ApplicationMaster": framework = Framework.SPARK; component = Component.APP_MASTER; break; case "org.apache.spark.deploy.yarn.ExecutorLauncher": framework = Framework.SPARK; component = Component.APP_MASTER; break; case "org.apache.spark.executor.CoarseGrainedExecutorBackend": framework = Framework.SPARK; component = Component.EXECUTOR; try { for (int i = 1; i < commands.length; i++) { if (commands[i].equals("--executor-id")) { executorId = commands[i + 1]; break; } } } catch (Exception e) { LOGGER.debug("Failed to get executor id from command line", e); } break; // FLINK case "org.apache.flink.yarn.YarnApplicationMasterRunner": framework = Framework.FLINK; component = Component.APP_MASTER; break; case "org.apache.flink.yarn.entrypoint.YarnJobClusterEntrypoint": framework = Framework.FLINK; component = Component.APP_MASTER; break; case "org.apache.flink.yarn.entrypoint.YarnSessionClusterEntrypoint": framework = Framework.FLINK; component = Component.APP_MASTER; break; case "org.apache.flink.yarn.YarnTaskManager": framework = Framework.FLINK; component = Component.TASK_MANAGER; break; case "org.apache.flink.yarn.YarnTaskExecutorRunner": framework = Framework.FLINK; component = Component.TASK_MANAGER; break; // YARN default: break; } }
Example 10
Source File: Application.java From hadoop with Apache License 2.0 | 4 votes |
/** * Start the child process to handle the task for us. * @param conf the task's configuration * @param recordReader the fake record reader to update progress with * @param output the collector to send output to * @param reporter the reporter for the task * @param outputKeyClass the class of the output keys * @param outputValueClass the class of the output values * @throws IOException * @throws InterruptedException */ Application(JobConf conf, RecordReader<FloatWritable, NullWritable> recordReader, OutputCollector<K2,V2> output, Reporter reporter, Class<? extends K2> outputKeyClass, Class<? extends V2> outputValueClass ) throws IOException, InterruptedException { serverSocket = new ServerSocket(0); Map<String, String> env = new HashMap<String,String>(); // add TMPDIR environment variable with the value of java.io.tmpdir env.put("TMPDIR", System.getProperty("java.io.tmpdir")); env.put(Submitter.PORT, Integer.toString(serverSocket.getLocalPort())); //Add token to the environment if security is enabled Token<JobTokenIdentifier> jobToken = TokenCache.getJobToken(conf .getCredentials()); // This password is used as shared secret key between this application and // child pipes process byte[] password = jobToken.getPassword(); String localPasswordFile = new File(".") + Path.SEPARATOR + "jobTokenPassword"; writePasswordToLocalFile(localPasswordFile, password, conf); env.put("hadoop.pipes.shared.secret.location", localPasswordFile); List<String> cmd = new ArrayList<String>(); String interpretor = conf.get(Submitter.INTERPRETOR); if (interpretor != null) { cmd.add(interpretor); } String executable = DistributedCache.getLocalCacheFiles(conf)[0].toString(); if (!FileUtil.canExecute(new File(executable))) { // LinuxTaskController sets +x permissions on all distcache files already. // In case of DefaultTaskController, set permissions here. FileUtil.chmod(executable, "u+x"); } cmd.add(executable); // wrap the command in a stdout/stderr capture // we are starting map/reduce task of the pipes job. this is not a cleanup // attempt. TaskAttemptID taskid = TaskAttemptID.forName(conf.get(MRJobConfig.TASK_ATTEMPT_ID)); File stdout = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDOUT); File stderr = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDERR); long logLength = TaskLog.getTaskLogLength(conf); cmd = TaskLog.captureOutAndError(null, cmd, stdout, stderr, logLength, false); process = runClient(cmd, env); clientSocket = serverSocket.accept(); String challenge = getSecurityChallenge(); String digestToSend = createDigest(password, challenge); String digestExpected = createDigest(password, digestToSend); handler = new OutputHandler<K2, V2>(output, reporter, recordReader, digestExpected); K2 outputKey = (K2) ReflectionUtils.newInstance(outputKeyClass, conf); V2 outputValue = (V2) ReflectionUtils.newInstance(outputValueClass, conf); downlink = new BinaryProtocol<K1, V1, K2, V2>(clientSocket, handler, outputKey, outputValue, conf); downlink.authenticate(digestToSend, challenge); waitForAuthentication(); LOG.debug("Authentication succeeded"); downlink.start(); downlink.setJobConf(conf); }
Example 11
Source File: Application.java From big-c with Apache License 2.0 | 4 votes |
/** * Start the child process to handle the task for us. * @param conf the task's configuration * @param recordReader the fake record reader to update progress with * @param output the collector to send output to * @param reporter the reporter for the task * @param outputKeyClass the class of the output keys * @param outputValueClass the class of the output values * @throws IOException * @throws InterruptedException */ Application(JobConf conf, RecordReader<FloatWritable, NullWritable> recordReader, OutputCollector<K2,V2> output, Reporter reporter, Class<? extends K2> outputKeyClass, Class<? extends V2> outputValueClass ) throws IOException, InterruptedException { serverSocket = new ServerSocket(0); Map<String, String> env = new HashMap<String,String>(); // add TMPDIR environment variable with the value of java.io.tmpdir env.put("TMPDIR", System.getProperty("java.io.tmpdir")); env.put(Submitter.PORT, Integer.toString(serverSocket.getLocalPort())); //Add token to the environment if security is enabled Token<JobTokenIdentifier> jobToken = TokenCache.getJobToken(conf .getCredentials()); // This password is used as shared secret key between this application and // child pipes process byte[] password = jobToken.getPassword(); String localPasswordFile = new File(".") + Path.SEPARATOR + "jobTokenPassword"; writePasswordToLocalFile(localPasswordFile, password, conf); env.put("hadoop.pipes.shared.secret.location", localPasswordFile); List<String> cmd = new ArrayList<String>(); String interpretor = conf.get(Submitter.INTERPRETOR); if (interpretor != null) { cmd.add(interpretor); } String executable = DistributedCache.getLocalCacheFiles(conf)[0].toString(); if (!FileUtil.canExecute(new File(executable))) { // LinuxTaskController sets +x permissions on all distcache files already. // In case of DefaultTaskController, set permissions here. FileUtil.chmod(executable, "u+x"); } cmd.add(executable); // wrap the command in a stdout/stderr capture // we are starting map/reduce task of the pipes job. this is not a cleanup // attempt. TaskAttemptID taskid = TaskAttemptID.forName(conf.get(MRJobConfig.TASK_ATTEMPT_ID)); File stdout = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDOUT); File stderr = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDERR); long logLength = TaskLog.getTaskLogLength(conf); cmd = TaskLog.captureOutAndError(null, cmd, stdout, stderr, logLength, false); process = runClient(cmd, env); clientSocket = serverSocket.accept(); String challenge = getSecurityChallenge(); String digestToSend = createDigest(password, challenge); String digestExpected = createDigest(password, digestToSend); handler = new OutputHandler<K2, V2>(output, reporter, recordReader, digestExpected); K2 outputKey = (K2) ReflectionUtils.newInstance(outputKeyClass, conf); V2 outputValue = (V2) ReflectionUtils.newInstance(outputValueClass, conf); downlink = new BinaryProtocol<K1, V1, K2, V2>(clientSocket, handler, outputKey, outputValue, conf); downlink.authenticate(digestToSend, challenge); waitForAuthentication(); LOG.debug("Authentication succeeded"); downlink.start(); downlink.setJobConf(conf); }
Example 12
Source File: Application.java From RDFS with Apache License 2.0 | 4 votes |
/** * Start the child process to handle the task for us. * @param conf the task's configuration * @param recordReader the fake record reader to update progress with * @param output the collector to send output to * @param reporter the reporter for the task * @param outputKeyClass the class of the output keys * @param outputValueClass the class of the output values * @throws IOException * @throws InterruptedException */ Application(JobConf conf, RecordReader<FloatWritable, NullWritable> recordReader, OutputCollector<K2,V2> output, Reporter reporter, Class<? extends K2> outputKeyClass, Class<? extends V2> outputValueClass ) throws IOException, InterruptedException { serverSocket = new ServerSocket(0); Map<String, String> env = new HashMap<String,String>(); // add TMPDIR environment variable with the value of java.io.tmpdir env.put("TMPDIR", System.getProperty("java.io.tmpdir")); env.put("hadoop.pipes.command.port", Integer.toString(serverSocket.getLocalPort())); List<String> cmd = new ArrayList<String>(); String interpretor = conf.get("hadoop.pipes.executable.interpretor"); if (interpretor != null) { cmd.add(interpretor); } String executable = DistributedCache.getLocalCacheFiles(conf)[0].toString(); FileUtil.chmod(executable, "a+x"); cmd.add(executable); // wrap the command in a stdout/stderr capture TaskAttemptID taskid = TaskAttemptID.forName(conf.get("mapred.task.id")); File stdout = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.STDOUT); File stderr = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.STDERR); long logLength = TaskLog.getTaskLogLength(conf); cmd = TaskLog.captureOutAndError(null, cmd, stdout, stderr, logLength, false); process = runClient(cmd, env); clientSocket = serverSocket.accept(); handler = new OutputHandler<K2, V2>(output, reporter, recordReader); K2 outputKey = (K2) ReflectionUtils.newInstance(outputKeyClass, conf); V2 outputValue = (V2) ReflectionUtils.newInstance(outputValueClass, conf); downlink = new BinaryProtocol<K1, V1, K2, V2>(clientSocket, handler, outputKey, outputValue, conf); downlink.start(); downlink.setJobConf(conf); }
Example 13
Source File: Application.java From hadoop-gpu with Apache License 2.0 | 4 votes |
/** * Start the child process to handle the task for us. * @param conf the task's configuration * @param recordReader the fake record reader to update progress with * @param output the collector to send output to * @param reporter the reporter for the task * @param outputKeyClass the class of the output keys * @param outputValueClass the class of the output values * @param runOnGPU * @throws IOException * @throws InterruptedException */ Application(JobConf conf, RecordReader<FloatWritable, NullWritable> recordReader, OutputCollector<K2,V2> output, Reporter reporter, Class<? extends K2> outputKeyClass, Class<? extends V2> outputValueClass, boolean runOnGPU ) throws IOException, InterruptedException { serverSocket = new ServerSocket(0); Map<String, String> env = new HashMap<String,String>(); // add TMPDIR environment variable with the value of java.io.tmpdir env.put("TMPDIR", System.getProperty("java.io.tmpdir")); env.put("hadoop.pipes.command.port", Integer.toString(serverSocket.getLocalPort())); List<String> cmd = new ArrayList<String>(); String interpretor = conf.get("hadoop.pipes.executable.interpretor"); if (interpretor != null) { cmd.add(interpretor); } // Check whether the applicaiton will run on GPU int i = runOnGPU ? 1 : 0; String executable = DistributedCache.getLocalCacheFiles(conf)[i].toString(); FileUtil.chmod(executable, "a+x"); cmd.add(executable); // wrap the command in a stdout/stderr capture TaskAttemptID taskid = TaskAttemptID.forName(conf.get("mapred.task.id")); File stdout = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.STDOUT); File stderr = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.STDERR); long logLength = TaskLog.getTaskLogLength(conf); cmd = TaskLog.captureOutAndError(cmd, stdout, stderr, logLength); process = runClient(cmd, env); clientSocket = serverSocket.accept(); handler = new OutputHandler<K2, V2>(output, reporter, recordReader); K2 outputKey = (K2) ReflectionUtils.newInstance(outputKeyClass, conf); V2 outputValue = (V2) ReflectionUtils.newInstance(outputValueClass, conf); downlink = new BinaryProtocol<K1, V1, K2, V2>(clientSocket, handler, outputKey, outputValue, conf); downlink.start(); downlink.setJobConf(conf); }