org.apache.tez.mapreduce.protos.MRRuntimeProtos Java Examples
The following examples show how to use
org.apache.tez.mapreduce.protos.MRRuntimeProtos.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MRInputHelpers.java From tez with Apache License 2.0 | 6 votes |
/** * Create an instance of {@link org.apache.hadoop.mapred.InputSplit} from the {@link * org.apache.tez.mapreduce.input.MRInput} representation of a split. * * @param splitProto The {@link org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto} * instance representing the split * @param serializationFactory the serialization mechanism used to write out the split * @return an instance of the split * @throws java.io.IOException */ @SuppressWarnings("unchecked") @InterfaceStability.Evolving @InterfaceAudience.LimitedPrivate({"hive, pig"}) public static InputSplit createOldFormatSplitFromUserPayload( MRRuntimeProtos.MRSplitProto splitProto, SerializationFactory serializationFactory) throws IOException { // This may not need to use serialization factory, since OldFormat // always uses Writable to write splits. Objects.requireNonNull(splitProto, "splitProto cannot be null"); String className = splitProto.getSplitClassName(); Class<InputSplit> clazz; try { clazz = (Class<InputSplit>) Class.forName(className); } catch (ClassNotFoundException e) { throw new IOException("Failed to load InputSplit class: [" + className + "]", e); } Deserializer<InputSplit> deserializer = serializationFactory .getDeserializer(clazz); deserializer.open(splitProto.getSplitBytes().newInput()); InputSplit inputSplit = deserializer.deserialize(null); deserializer.close(); return inputSplit; }
Example #2
Source File: MRInputHelpers.java From tez with Apache License 2.0 | 6 votes |
/** * Create an instance of {@link org.apache.hadoop.mapreduce.InputSplit} from the {@link * org.apache.tez.mapreduce.input.MRInput} representation of a split. * * @param splitProto The {@link org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitProto} * instance representing the split * @param serializationFactory the serialization mechanism used to write out the split * @return an instance of the split * @throws IOException */ @InterfaceStability.Evolving @SuppressWarnings("unchecked") public static org.apache.hadoop.mapreduce.InputSplit createNewFormatSplitFromUserPayload( MRRuntimeProtos.MRSplitProto splitProto, SerializationFactory serializationFactory) throws IOException { Objects.requireNonNull(splitProto, "splitProto must be specified"); String className = splitProto.getSplitClassName(); Class<org.apache.hadoop.mapreduce.InputSplit> clazz; try { clazz = (Class<org.apache.hadoop.mapreduce.InputSplit>) Class .forName(className); } catch (ClassNotFoundException e) { throw new IOException("Failed to load InputSplit class: [" + className + "]", e); } Deserializer<org.apache.hadoop.mapreduce.InputSplit> deserializer = serializationFactory .getDeserializer(clazz); deserializer.open(splitProto.getSplitBytes().newInput()); org.apache.hadoop.mapreduce.InputSplit inputSplit = deserializer .deserialize(null); deserializer.close(); return inputSplit; }
Example #3
Source File: MRInputHelpers.java From tez with Apache License 2.0 | 6 votes |
@InterfaceStability.Evolving public static <T extends org.apache.hadoop.mapreduce.InputSplit> MRRuntimeProtos.MRSplitProto createSplitProto( T newSplit, SerializationFactory serializationFactory) throws IOException, InterruptedException { MRRuntimeProtos.MRSplitProto.Builder builder = MRRuntimeProtos.MRSplitProto .newBuilder(); builder.setSplitClassName(newSplit.getClass().getName()); @SuppressWarnings("unchecked") Serializer<T> serializer = serializationFactory .getSerializer((Class<T>) newSplit.getClass()); ByteString.Output out = ByteString .newOutput(SPLIT_SERIALIZED_LENGTH_ESTIMATE); serializer.open(out); serializer.serialize(newSplit); // TODO MR Compat: Check against max block locations per split. ByteString splitBs = out.toByteString(); builder.setSplitBytes(splitBs); return builder.build(); }
Example #4
Source File: MRInputHelpers.java From tez with Apache License 2.0 | 6 votes |
@InterfaceStability.Evolving @InterfaceAudience.LimitedPrivate({"hive, pig"}) public static MRRuntimeProtos.MRSplitProto createSplitProto( org.apache.hadoop.mapred.InputSplit oldSplit) throws IOException { MRRuntimeProtos.MRSplitProto.Builder builder = MRRuntimeProtos.MRSplitProto.newBuilder(); builder.setSplitClassName(oldSplit.getClass().getName()); ByteString.Output os = ByteString .newOutput(SPLIT_SERIALIZED_LENGTH_ESTIMATE); oldSplit.write(new NonSyncDataOutputStream(os)); ByteString splitBs = os.toByteString(); builder.setSplitBytes(splitBs); return builder.build(); }
Example #5
Source File: MRInputBase.java From incubator-tez with Apache License 2.0 | 5 votes |
public List<Event> initialize() throws IOException { getContext().requestInitialMemory(0l, null); // mandatory call MRRuntimeProtos.MRInputUserPayloadProto mrUserPayload = MRHelpers.parseMRInputPayload(getContext().getUserPayload()); Preconditions.checkArgument(mrUserPayload.hasSplits() == false, "Split information not expected in " + this.getClass().getName()); Configuration conf = MRHelpers.createConfFromByteString(mrUserPayload.getConfigurationBytes()); this.jobConf = new JobConf(conf); // Add tokens to the jobConf - in case they are accessed within the RR / IF jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials()); TaskAttemptID taskAttemptId = new TaskAttemptID( new TaskID( Long.toString(getContext().getApplicationId().getClusterTimestamp()), getContext().getApplicationId().getId(), TaskType.MAP, getContext().getTaskIndex()), getContext().getTaskAttemptNumber()); jobConf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptId.toString()); jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, getContext().getDAGAttemptNumber()); this.inputRecordCounter = getContext().getCounters().findCounter( TaskCounter.INPUT_RECORDS_PROCESSED); useNewApi = this.jobConf.getUseNewMapper(); return null; }
Example #6
Source File: MRInputHelpers.java From tez with Apache License 2.0 | 5 votes |
/** * When isGrouped is true, it specifies that grouping of input splits be * performed by Tez The conf should have the input format class configuration * set to the TezGroupedSplitsInputFormat. The real input format class name * should be passed as an argument to this method. * <p/> * With grouping enabled, the eventual configuration used by the tasks, will have * the user-specified InputFormat replaced by either {@link org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat} * or {@link org.apache.hadoop.mapreduce.split.TezGroupedSplitsInputFormat} */ @InterfaceAudience.Private protected static UserPayload createMRInputPayload(Configuration conf, MRRuntimeProtos.MRSplitsProto mrSplitsProto, boolean isGrouped, boolean isSorted) throws IOException { Preconditions .checkArgument(conf != null, "Configuration must be specified"); return createMRInputPayload(TezUtils.createByteStringFromConf(conf), mrSplitsProto, isGrouped, isSorted); }
Example #7
Source File: MRInputHelpers.java From tez with Apache License 2.0 | 5 votes |
private static UserPayload createMRInputPayload(ByteString bytes, MRRuntimeProtos.MRSplitsProto mrSplitsProto, boolean isGrouped, boolean isSorted) throws IOException { MRRuntimeProtos.MRInputUserPayloadProto.Builder userPayloadBuilder = MRRuntimeProtos.MRInputUserPayloadProto .newBuilder(); userPayloadBuilder.setConfigurationBytes(bytes); if (mrSplitsProto != null) { userPayloadBuilder.setSplits(mrSplitsProto); } userPayloadBuilder.setGroupingEnabled(isGrouped); userPayloadBuilder.setSortSplitsEnabled(isSorted); return UserPayload.create(userPayloadBuilder.build(). toByteString().asReadOnlyByteBuffer()); }
Example #8
Source File: MRInputHelpers.java From tez with Apache License 2.0 | 4 votes |
@InterfaceAudience.Private protected static UserPayload createMRInputPayload(Configuration conf, MRRuntimeProtos.MRSplitsProto mrSplitsProto) throws IOException { return createMRInputPayload(conf, mrSplitsProto, false, true); }
Example #9
Source File: YARNRunner.java From tez with Apache License 2.0 | 4 votes |
protected static UserPayload createMRInputPayload(Configuration conf, MRRuntimeProtos.MRSplitsProto mrSplitsProto) throws IOException { return MRInputHelpers.createMRInputPayload(conf, mrSplitsProto, false, true); }
Example #10
Source File: MRInputBase.java From tez with Apache License 2.0 | 4 votes |
public List<Event> initialize() throws IOException { getContext().requestInitialMemory(0l, null); // mandatory call MRRuntimeProtos.MRInputUserPayloadProto mrUserPayload = MRInputHelpers.parseMRInputPayload(getContext().getUserPayload()); boolean isGrouped = mrUserPayload.getGroupingEnabled(); Preconditions.checkArgument(mrUserPayload.hasSplits() == false, "Split information not expected in " + this.getClass().getName()); Configuration conf = new JobConf(getContext().getContainerConfiguration()); TezUtils.addToConfFromByteString(conf, mrUserPayload.getConfigurationBytes()); this.jobConf = new JobConf(conf); useNewApi = this.jobConf.getUseNewMapper(); if (isGrouped) { if (useNewApi) { jobConf.set(MRJobConfig.INPUT_FORMAT_CLASS_ATTR, org.apache.hadoop.mapreduce.split.TezGroupedSplitsInputFormat.class.getName()); } else { jobConf.set("mapred.input.format.class", org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat.class.getName()); } } // Add tokens to the jobConf - in case they are accessed within the RR / IF jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials()); TaskAttemptID taskAttemptId = new TaskAttemptID( new TaskID( Long.toString(getContext().getApplicationId().getClusterTimestamp()), getContext().getApplicationId().getId(), TaskType.MAP, getContext().getTaskIndex()), getContext().getTaskAttemptNumber()); jobConf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptId.toString()); jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, getContext().getDAGAttemptNumber()); jobConf.setInt(MRInput.TEZ_MAPREDUCE_DAG_INDEX, getContext().getDagIdentifier()); jobConf.setInt(MRInput.TEZ_MAPREDUCE_VERTEX_INDEX, getContext().getTaskVertexIndex()); jobConf.setInt(MRInput.TEZ_MAPREDUCE_TASK_INDEX, getContext().getTaskIndex()); jobConf.setInt(MRInput.TEZ_MAPREDUCE_TASK_ATTEMPT_INDEX, getContext().getTaskAttemptNumber()); jobConf.set(MRInput.TEZ_MAPREDUCE_DAG_NAME, getContext().getDAGName()); jobConf.set(MRInput.TEZ_MAPREDUCE_VERTEX_NAME, getContext().getTaskVertexName()); jobConf.setInt(MRInput.TEZ_MAPREDUCE_INPUT_INDEX, getContext().getInputIndex()); jobConf.set(MRInput.TEZ_MAPREDUCE_INPUT_NAME, getContext().getSourceVertexName()); jobConf.set(MRInput.TEZ_MAPREDUCE_APPLICATION_ID, getContext().getApplicationId().toString()); jobConf.set(MRInput.TEZ_MAPREDUCE_UNIQUE_IDENTIFIER, getContext().getUniqueIdentifier()); jobConf.setInt(MRInput.TEZ_MAPREDUCE_DAG_ATTEMPT_NUMBER, getContext().getDAGAttemptNumber()); TezDAGID tezDAGID = TezDAGID.getInstance(getContext().getApplicationId(), getContext().getDagIdentifier()); TezVertexID tezVertexID = TezVertexID.getInstance(tezDAGID, getContext().getTaskVertexIndex()); TezTaskID tezTaskID = TezTaskID.getInstance(tezVertexID, getContext().getTaskIndex()); TezTaskAttemptID tezTaskAttemptID = TezTaskAttemptID.getInstance(tezTaskID, getContext().getTaskAttemptNumber()); jobConf.set(MRInput.TEZ_MAPREDUCE_DAG_ID, tezDAGID.toString()); jobConf.set(MRInput.TEZ_MAPREDUCE_VERTEX_ID, tezVertexID.toString()); jobConf.set(MRInput.TEZ_MAPREDUCE_TASK_ID, tezTaskID.toString()); jobConf.set(MRInput.TEZ_MAPREDUCE_TASK_ATTEMPT_ID, tezTaskAttemptID.toString()); this.inputRecordCounter = getContext().getCounters().findCounter( TaskCounter.INPUT_RECORDS_PROCESSED); return null; }
Example #11
Source File: MRInput.java From tez with Apache License 2.0 | 4 votes |
protected static UserPayload createMRInputPayload(Configuration conf, MRRuntimeProtos.MRSplitsProto mrSplitsProto) throws IOException { return MRInputHelpers.createMRInputPayload(conf, mrSplitsProto, false, true); }
Example #12
Source File: TestMapProcessor.java From tez with Apache License 2.0 | 4 votes |
@Test(timeout = 5000) public void testMapProcessor() throws Exception { String dagName = "mrdag0"; String vertexName = MultiStageMRConfigUtil.getInitialMapVertexName(); JobConf jobConf = new JobConf(defaultConf); setUpJobConf(jobConf); MRHelpers.translateMRConfToTez(jobConf); jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); jobConf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false); jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, new Path(workDir, "localized-resources").toUri().toString()); Path mapInput = new Path(workDir, "map0"); MapUtils.generateInputSplit(localFs, workDir, jobConf, mapInput, 10); InputSpec mapInputSpec = new InputSpec("NullSrcVertex", InputDescriptor.create(MRInputLegacy.class.getName()) .setUserPayload(UserPayload.create(ByteBuffer.wrap( MRRuntimeProtos.MRInputUserPayloadProto.newBuilder() .setConfigurationBytes(TezUtils.createByteStringFromConf(jobConf)).build() .toByteArray()))), 1); OutputSpec mapOutputSpec = new OutputSpec("NullDestVertex", OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName()) .setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)), 1); TezSharedExecutor sharedExecutor = new TezSharedExecutor(jobConf); LogicalIOProcessorRuntimeTask task = MapUtils.createLogicalTask(localFs, workDir, jobConf, 0, new Path(workDir, "map0"), new TestUmbilical(), dagName, vertexName, Collections.singletonList(mapInputSpec), Collections.singletonList(mapOutputSpec), sharedExecutor); task.initialize(); task.run(); task.close(); sharedExecutor.shutdownNow(); OutputContext outputContext = task.getOutputContexts().iterator().next(); TezTaskOutput mapOutputs = new TezTaskOutputFiles( jobConf, outputContext.getUniqueIdentifier(), outputContext.getDagIdentifier()); // TODO NEWTEZ FIXME OutputCommitter verification // MRTask mrTask = (MRTask)t.getProcessor(); // Assert.assertEquals(TezNullOutputCommitter.class.getName(), mrTask // .getCommitter().getClass().getName()); // t.close(); Path mapOutputFile = getMapOutputFile(jobConf, outputContext); LOG.info("mapOutputFile = " + mapOutputFile); IFile.Reader reader = new IFile.Reader(localFs, mapOutputFile, null, null, null, false, 0, -1); LongWritable key = new LongWritable(); Text value = new Text(); DataInputBuffer keyBuf = new DataInputBuffer(); DataInputBuffer valueBuf = new DataInputBuffer(); long prev = Long.MIN_VALUE; while (reader.nextRawKey(keyBuf)) { reader.nextRawValue(valueBuf); key.readFields(keyBuf); value.readFields(valueBuf); if (prev != Long.MIN_VALUE) { assert(prev <= key.get()); prev = key.get(); } LOG.info("key = " + key.get() + "; value = " + value); } reader.close(); }
Example #13
Source File: TestMapProcessor.java From tez with Apache License 2.0 | 4 votes |
@Test(timeout = 30000) public void testMapProcessorProgress() throws Exception { String dagName = "mrdag0"; String vertexName = MultiStageMRConfigUtil.getInitialMapVertexName(); JobConf jobConf = new JobConf(defaultConf); setUpJobConf(jobConf); MRHelpers.translateMRConfToTez(jobConf); jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); jobConf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false); jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, new Path(workDir, "localized-resources").toUri().toString()); Path mapInput = new Path(workDir, "map0"); MapUtils.generateInputSplit(localFs, workDir, jobConf, mapInput, 100000); InputSpec mapInputSpec = new InputSpec("NullSrcVertex", InputDescriptor.create(MRInputLegacy.class.getName()) .setUserPayload(UserPayload.create(ByteBuffer.wrap( MRRuntimeProtos.MRInputUserPayloadProto.newBuilder() .setConfigurationBytes(TezUtils.createByteStringFromConf (jobConf)).build() .toByteArray()))), 1); OutputSpec mapOutputSpec = new OutputSpec("NullDestVertex", OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName()) .setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)), 1); TezSharedExecutor sharedExecutor = new TezSharedExecutor(jobConf); final LogicalIOProcessorRuntimeTask task = MapUtils.createLogicalTask (localFs, workDir, jobConf, 0, new Path(workDir, "map0"), new TestUmbilical(), dagName, vertexName, Collections.singletonList(mapInputSpec), Collections.singletonList(mapOutputSpec), sharedExecutor); ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1); Thread monitorProgress = new Thread(new Runnable() { @Override public void run() { float prog = task.getProgress(); if(prog > 0.0f && prog < 1.0f) progressUpdate = prog; } }); task.initialize(); scheduler.scheduleAtFixedRate(monitorProgress, 0, 1, TimeUnit.MILLISECONDS); task.run(); Assert.assertTrue("Progress Updates should be captured!", progressUpdate > 0.0f && progressUpdate < 1.0f); task.close(); sharedExecutor.shutdownNow(); }
Example #14
Source File: TestMRInput.java From tez with Apache License 2.0 | 4 votes |
@Test(timeout = 5000) public void testAttributesInJobConf() throws Exception { InputContext inputContext = mock(InputContext.class); doReturn(TEST_ATTRIBUTES_DAG_INDEX).when(inputContext).getDagIdentifier(); doReturn(TEST_ATTRIBUTES_VERTEX_INDEX).when(inputContext).getTaskVertexIndex(); doReturn(TEST_ATTRIBUTES_TASK_INDEX).when(inputContext).getTaskIndex(); doReturn(TEST_ATTRIBUTES_TASK_ATTEMPT_INDEX).when(inputContext).getTaskAttemptNumber(); doReturn(TEST_ATTRIBUTES_INPUT_INDEX).when(inputContext).getInputIndex(); doReturn(TEST_ATTRIBUTES_DAG_ATTEMPT_NUMBER).when(inputContext).getDAGAttemptNumber(); doReturn(TEST_ATTRIBUTES_DAG_NAME).when(inputContext).getDAGName(); doReturn(TEST_ATTRIBUTES_VERTEX_NAME).when(inputContext).getTaskVertexName(); doReturn(TEST_ATTRIBUTES_INPUT_NAME).when(inputContext).getSourceVertexName(); doReturn(TEST_ATTRIBUTES_APPLICATION_ID).when(inputContext).getApplicationId(); doReturn(TEST_ATTRIBUTES_UNIQUE_IDENTIFIER).when(inputContext).getUniqueIdentifier(); doReturn(new Configuration(false)).when(inputContext).getContainerConfiguration(); DataSourceDescriptor dsd = MRInput.createConfigBuilder(new Configuration(false), TestInputFormat.class).groupSplits(false).build(); doReturn(dsd.getInputDescriptor().getUserPayload()).when(inputContext).getUserPayload(); doReturn(new TezCounters()).when(inputContext).getCounters(); MRInput mrInput = new MRInput(inputContext, 1); mrInput.initialize(); MRRuntimeProtos.MRSplitProto splitProto = MRRuntimeProtos.MRSplitProto.newBuilder().setSplitClassName(TestInputSplit.class.getName()) .build(); InputDataInformationEvent diEvent = InputDataInformationEvent .createWithSerializedPayload(0, splitProto.toByteString().asReadOnlyByteBuffer()); List<Event> events = new LinkedList<>(); events.add(diEvent); mrInput.handleEvents(events); TezCounter counter = mrInput.getContext().getCounters() .findCounter(TaskCounter.INPUT_SPLIT_LENGTH_BYTES); assertEquals(counter.getValue(), TestInputSplit.length); assertTrue(TestInputFormat.invoked.get()); }
Example #15
Source File: MRInputHelpers.java From tez with Apache License 2.0 | 3 votes |
/** * Parse the payload used by MRInputPayload * * @param payload the {@link org.apache.tez.dag.api.UserPayload} instance * @return an instance of {@link org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRInputUserPayloadProto}, * which provides access to the underlying configuration bytes * @throws IOException */ @InterfaceStability.Evolving @InterfaceAudience.LimitedPrivate({"hive, pig"}) public static MRRuntimeProtos.MRInputUserPayloadProto parseMRInputPayload(UserPayload payload) throws IOException { return MRRuntimeProtos.MRInputUserPayloadProto.parseFrom(ByteString.copyFrom(payload.getPayload())); }