org.apache.tez.mapreduce.hadoop.MRHelpers Java Examples
The following examples show how to use
org.apache.tez.mapreduce.hadoop.MRHelpers.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MRInput.java From tez with Apache License 2.0 | 6 votes |
private DataSourceDescriptor createGeneratorDataSource() throws IOException { setupBasicConf(conf); MRHelpers.translateMRConfToTez(conf); Collection<URI> uris = maybeGetURIsForCredentials(); UserPayload payload = MRInputHelpersInternal.createMRInputPayload( conf, groupSplitsInAM, sortSplitsInAM); DataSourceDescriptor ds = DataSourceDescriptor.create( InputDescriptor.create(inputClassName).setUserPayload(payload), InputInitializerDescriptor.create(MRInputAMSplitGenerator.class.getName()), null); if (conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT, TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT_DEFAULT)) { ds.getInputDescriptor().setHistoryText(TezUtils.convertToHistoryText(conf)); } if (uris != null) { ds.addURIsForCredentials(uris); } return ds; }
Example #2
Source File: MRInput.java From tez with Apache License 2.0 | 6 votes |
private DataSourceDescriptor createCustomDataSource() throws IOException { setupBasicConf(conf); MRHelpers.translateMRConfToTez(conf); Collection<URI> uris = maybeGetURIsForCredentials(); UserPayload payload = MRInputHelpersInternal.createMRInputPayload( conf, groupSplitsInAM, sortSplitsInAM); DataSourceDescriptor ds = DataSourceDescriptor .create(InputDescriptor.create(inputClassName).setUserPayload(payload), customInitializerDescriptor, null); if (conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT, TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT_DEFAULT)) { ds.getInputDescriptor().setHistoryText(TezUtils.convertToHistoryText(conf)); } if (uris != null) { ds.addURIsForCredentials(uris); } return ds; }
Example #3
Source File: MROutput.java From tez with Apache License 2.0 | 6 votes |
/** * Creates the user payload to be set on the OutputDescriptor for MROutput */ private UserPayload createUserPayload() { // set which api is being used always conf.setBoolean(MRJobConfig.NEW_API_REDUCER_CONFIG, useNewApi); conf.setBoolean(MRJobConfig.NEW_API_MAPPER_CONFIG, useNewApi); if (outputFormatProvided) { if (useNewApi) { conf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, outputFormat.getName()); } else { conf.set("mapred.output.format.class", outputFormat.getName()); } } MRHelpers.translateMRConfToTez(conf); try { return TezUtils.createUserPayloadFromConf(conf); } catch (IOException e) { throw new TezUncheckedException(e); } }
Example #4
Source File: MRInput.java From incubator-tez with Apache License 2.0 | 6 votes |
/** * Helper API to generate the user payload for the MRInput and * MRInputAMSplitGenerator (if used). The InputFormat will be invoked by Tez * at DAG runtime to generate the input splits. * * @param conf * Configuration for the InputFormat * @param inputFormatClassName * Name of the class of the InputFormat * @param useNewApi * use new mapreduce API or old mapred API * @param groupSplitsInAM * do grouping of splits in the AM. If true then splits generated by * the InputFormat will be grouped in the AM based on available * resources, locality etc. This option may be set to true only when * using MRInputAMSplitGenerator as the initializer class in * {@link Vertex#addInput(String, org.apache.tez.dag.api.InputDescriptor, Class)} * @return returns the user payload to be set on the InputDescriptor of MRInput * @throws IOException */ public static byte[] createUserPayload(Configuration conf, String inputFormatClassName, boolean useNewApi, boolean groupSplitsInAM) throws IOException { Configuration inputConf = new JobConf(conf); String wrappedInputFormatClassName = null; String configInputFormatClassName = null; if (groupSplitsInAM) { wrappedInputFormatClassName = inputFormatClassName; configInputFormatClassName = TezGroupedSplitsInputFormat.class.getName(); } else { wrappedInputFormatClassName = null; configInputFormatClassName = inputFormatClassName; } inputConf.set(MRJobConfig.INPUT_FORMAT_CLASS_ATTR, configInputFormatClassName); inputConf.setBoolean("mapred.mapper.new-api", useNewApi); MRHelpers.translateVertexConfToTez(inputConf); MRHelpers.doJobClientMagic(inputConf); if (groupSplitsInAM) { return MRHelpers.createMRInputPayloadWithGrouping(inputConf, wrappedInputFormatClassName); } else { return MRHelpers.createMRInputPayload(inputConf, null); } }
Example #5
Source File: TestMRRJobsDAGApi.java From incubator-tez with Apache License 2.0 | 6 votes |
public List<Event> initialize(TezRootInputInitializerContext rootInputContext) throws Exception { MRInputUserPayloadProto userPayloadProto = MRHelpers .parseMRInputPayload(rootInputContext.getUserPayload()); Configuration conf = MRHelpers.createConfFromByteString(userPayloadProto .getConfigurationBytes()); try { ReflectionUtils.getClazz(RELOCALIZATION_TEST_CLASS_NAME); LOG.info("Class found"); FileSystem fs = FileSystem.get(conf); fs.mkdirs(new Path("/tmp/relocalizationfilefound")); } catch (TezUncheckedException e) { LOG.info("Class not found"); } return super.initialize(rootInputContext); }
Example #6
Source File: WordCount.java From incubator-tez with Apache License 2.0 | 5 votes |
private DAG createDAG(FileSystem fs, TezConfiguration tezConf, Map<String, LocalResource> localResources, Path stagingDir, String inputPath, String outputPath) throws IOException { Configuration inputConf = new Configuration(tezConf); inputConf.set(FileInputFormat.INPUT_DIR, inputPath); InputDescriptor id = new InputDescriptor(MRInput.class.getName()) .setUserPayload(MRInput.createUserPayload(inputConf, TextInputFormat.class.getName(), true, true)); Configuration outputConf = new Configuration(tezConf); outputConf.set(FileOutputFormat.OUTDIR, outputPath); OutputDescriptor od = new OutputDescriptor(MROutput.class.getName()) .setUserPayload(MROutput.createUserPayload( outputConf, TextOutputFormat.class.getName(), true)); Vertex tokenizerVertex = new Vertex("tokenizer", new ProcessorDescriptor( TokenProcessor.class.getName()), -1, MRHelpers.getMapResource(tezConf)); tokenizerVertex.addInput("MRInput", id, MRInputAMSplitGenerator.class); Vertex summerVertex = new Vertex("summer", new ProcessorDescriptor( SumProcessor.class.getName()), 1, MRHelpers.getReduceResource(tezConf)); summerVertex.addOutput("MROutput", od, MROutputCommitter.class); OrderedPartitionedKVEdgeConfigurer edgeConf = OrderedPartitionedKVEdgeConfigurer .newBuilder(Text.class.getName(), IntWritable.class.getName(), HashPartitioner.class.getName(), null).build(); DAG dag = new DAG("WordCount"); dag.addVertex(tokenizerVertex) .addVertex(summerVertex) .addEdge( new Edge(tokenizerVertex, summerVertex, edgeConf.createDefaultEdgeProperty())); return dag; }
Example #7
Source File: IntersectDataGen.java From incubator-tez with Apache License 2.0 | 5 votes |
private DAG createDag(TezConfiguration tezConf, Path largeOutPath, Path smallOutPath, Path expectedOutputPath, int numTasks, long largeOutSize, long smallOutSize) throws IOException { long largeOutSizePerTask = largeOutSize / numTasks; long smallOutSizePerTask = smallOutSize / numTasks; DAG dag = new DAG("IntersectDataGen"); byte[] streamOutputPayload = createPayloadForOutput(largeOutPath, tezConf); byte[] hashOutputPayload = createPayloadForOutput(smallOutPath, tezConf); byte[] expectedOutputPayload = createPayloadForOutput(expectedOutputPath, tezConf); Vertex genDataVertex = new Vertex("datagen", new ProcessorDescriptor( GenDataProcessor.class.getName()).setUserPayload(GenDataProcessor.createConfiguration( largeOutSizePerTask, smallOutSizePerTask)), numTasks, MRHelpers.getMapResource(tezConf)); genDataVertex.addOutput(STREAM_OUTPUT_NAME, new OutputDescriptor(MROutput.class.getName()).setUserPayload(streamOutputPayload), MROutputCommitter.class); genDataVertex.addOutput(HASH_OUTPUT_NAME, new OutputDescriptor(MROutput.class.getName()).setUserPayload(hashOutputPayload), MROutputCommitter.class); genDataVertex.addOutput(EXPECTED_OUTPUT_NAME, new OutputDescriptor(MROutput.class.getName()).setUserPayload(expectedOutputPayload), MROutputCommitter.class); dag.addVertex(genDataVertex); return dag; }
Example #8
Source File: MROutput.java From incubator-tez with Apache License 2.0 | 5 votes |
/** * Creates the user payload to be set on the OutputDescriptor for MROutput * @param conf Configuration for the OutputFormat * @param outputFormatName Name of the class of the OutputFormat * @param useNewApi Use new mapreduce API or old mapred API * @return * @throws IOException */ public static byte[] createUserPayload(Configuration conf, String outputFormatName, boolean useNewApi) throws IOException { Configuration outputConf = new JobConf(conf); outputConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, outputFormatName); outputConf.setBoolean("mapred.mapper.new-api", useNewApi); MRHelpers.translateVertexConfToTez(outputConf); MRHelpers.doJobClientMagic(outputConf); return TezUtils.createUserPayloadFromConf(outputConf); }
Example #9
Source File: YARNRunner.java From incubator-tez with Apache License 2.0 | 5 votes |
private void setupMapReduceEnv(Configuration jobConf, Map<String, String> environment, boolean isMap) throws IOException { if (isMap) { warnForJavaLibPath( jobConf.get(MRJobConfig.MAP_JAVA_OPTS,""), "map", MRJobConfig.MAP_JAVA_OPTS, MRJobConfig.MAP_ENV); warnForJavaLibPath( jobConf.get(MRJobConfig.MAPRED_MAP_ADMIN_JAVA_OPTS,""), "map", MRJobConfig.MAPRED_MAP_ADMIN_JAVA_OPTS, MRJobConfig.MAPRED_ADMIN_USER_ENV); } else { warnForJavaLibPath( jobConf.get(MRJobConfig.REDUCE_JAVA_OPTS,""), "reduce", MRJobConfig.REDUCE_JAVA_OPTS, MRJobConfig.REDUCE_ENV); warnForJavaLibPath( jobConf.get(MRJobConfig.MAPRED_REDUCE_ADMIN_JAVA_OPTS,""), "reduce", MRJobConfig.MAPRED_REDUCE_ADMIN_JAVA_OPTS, MRJobConfig.MAPRED_ADMIN_USER_ENV); } MRHelpers.updateEnvironmentForMRTasks(jobConf, environment, isMap); }
Example #10
Source File: MRInput.java From tez with Apache License 2.0 | 5 votes |
private DataSourceDescriptor createDistributorDataSource() throws IOException { InputSplitInfo inputSplitInfo; setupBasicConf(conf); try { inputSplitInfo = MRInputHelpers.generateInputSplitsToMem(conf, false, true, 0); } catch (Exception e) { throw new TezUncheckedException(e); } MRHelpers.translateMRConfToTez(conf); UserPayload payload = MRInputHelpersInternal.createMRInputPayload(conf, inputSplitInfo.getSplitsProto()); Credentials credentials = null; if (getCredentialsForSourceFilesystem && inputSplitInfo.getCredentials() != null) { credentials = inputSplitInfo.getCredentials(); } DataSourceDescriptor ds = DataSourceDescriptor.create( InputDescriptor.create(inputClassName).setUserPayload(payload), InputInitializerDescriptor.create(MRInputSplitDistributor.class.getName()), inputSplitInfo.getNumTasks(), credentials, VertexLocationHint.create(inputSplitInfo.getTaskLocationHints()), null); if (conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT, TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT_DEFAULT)) { ds.getInputDescriptor().setHistoryText(TezUtils.convertToHistoryText(conf)); } return ds; }
Example #11
Source File: YARNRunner.java From tez with Apache License 2.0 | 5 votes |
private void setupMapReduceEnv(Configuration jobConf, Map<String, String> environment, boolean isMap) throws IOException { if (isMap) { warnForJavaLibPath( jobConf.get(MRJobConfig.MAP_JAVA_OPTS,""), "map", MRJobConfig.MAP_JAVA_OPTS, MRJobConfig.MAP_ENV); warnForJavaLibPath( jobConf.get(MRJobConfig.MAPRED_MAP_ADMIN_JAVA_OPTS,""), "map", MRJobConfig.MAPRED_MAP_ADMIN_JAVA_OPTS, MRJobConfig.MAPRED_ADMIN_USER_ENV); } else { warnForJavaLibPath( jobConf.get(MRJobConfig.REDUCE_JAVA_OPTS,""), "reduce", MRJobConfig.REDUCE_JAVA_OPTS, MRJobConfig.REDUCE_ENV); warnForJavaLibPath( jobConf.get(MRJobConfig.MAPRED_REDUCE_ADMIN_JAVA_OPTS,""), "reduce", MRJobConfig.MAPRED_REDUCE_ADMIN_JAVA_OPTS, MRJobConfig.MAPRED_ADMIN_USER_ENV); } MRHelpers.updateEnvBasedOnMRTaskEnv(jobConf, environment, isMap); }
Example #12
Source File: MRInputBase.java From incubator-tez with Apache License 2.0 | 5 votes |
public List<Event> initialize() throws IOException { getContext().requestInitialMemory(0l, null); // mandatory call MRRuntimeProtos.MRInputUserPayloadProto mrUserPayload = MRHelpers.parseMRInputPayload(getContext().getUserPayload()); Preconditions.checkArgument(mrUserPayload.hasSplits() == false, "Split information not expected in " + this.getClass().getName()); Configuration conf = MRHelpers.createConfFromByteString(mrUserPayload.getConfigurationBytes()); this.jobConf = new JobConf(conf); // Add tokens to the jobConf - in case they are accessed within the RR / IF jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials()); TaskAttemptID taskAttemptId = new TaskAttemptID( new TaskID( Long.toString(getContext().getApplicationId().getClusterTimestamp()), getContext().getApplicationId().getId(), TaskType.MAP, getContext().getTaskIndex()), getContext().getTaskAttemptNumber()); jobConf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptId.toString()); jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, getContext().getDAGAttemptNumber()); this.inputRecordCounter = getContext().getCounters().findCounter( TaskCounter.INPUT_RECORDS_PROCESSED); useNewApi = this.jobConf.getUseNewMapper(); return null; }
Example #13
Source File: TestMRInputSplitDistributor.java From incubator-tez with Apache License 2.0 | 4 votes |
@Test public void testSerializedPayload() throws IOException { Configuration conf = new Configuration(false); conf.setBoolean(MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD, true); ByteString confByteString = MRHelpers.createByteStringFromConf(conf); InputSplit split1 = new InputSplitForTest(1); InputSplit split2 = new InputSplitForTest(2); MRSplitProto proto1 = MRHelpers.createSplitProto(split1); MRSplitProto proto2 = MRHelpers.createSplitProto(split2); MRSplitsProto.Builder splitsProtoBuilder = MRSplitsProto.newBuilder(); splitsProtoBuilder.addSplits(proto1); splitsProtoBuilder.addSplits(proto2); MRInputUserPayloadProto.Builder payloadProto = MRInputUserPayloadProto.newBuilder(); payloadProto.setSplits(splitsProtoBuilder.build()); payloadProto.setConfigurationBytes(confByteString); byte[] userPayload = payloadProto.build().toByteArray(); TezRootInputInitializerContext context = new TezRootInputInitializerContextForTest(userPayload); MRInputSplitDistributor splitDist = new MRInputSplitDistributor(); List<Event> events = splitDist.initialize(context); assertEquals(3, events.size()); assertTrue(events.get(0) instanceof RootInputUpdatePayloadEvent); assertTrue(events.get(1) instanceof RootInputDataInformationEvent); assertTrue(events.get(2) instanceof RootInputDataInformationEvent); RootInputDataInformationEvent diEvent1 = (RootInputDataInformationEvent) (events.get(1)); RootInputDataInformationEvent diEvent2 = (RootInputDataInformationEvent) (events.get(2)); assertNull(diEvent1.getDeserializedUserPayload()); assertNull(diEvent2.getDeserializedUserPayload()); assertNotNull(diEvent1.getUserPayload()); assertNotNull(diEvent2.getUserPayload()); MRSplitProto event1Proto = MRSplitProto.parseFrom(diEvent1.getUserPayload()); InputSplit is1 = MRInputUtils.getOldSplitDetailsFromEvent(event1Proto, new Configuration()); assertTrue(is1 instanceof InputSplitForTest); assertEquals(1, ((InputSplitForTest) is1).identifier); MRSplitProto event2Proto = MRSplitProto.parseFrom(diEvent2.getUserPayload()); InputSplit is2 = MRInputUtils.getOldSplitDetailsFromEvent(event2Proto, new Configuration()); assertTrue(is2 instanceof InputSplitForTest); assertEquals(2, ((InputSplitForTest) is2).identifier); }
Example #14
Source File: MRInputSplitDistributor.java From incubator-tez with Apache License 2.0 | 4 votes |
@Override public List<Event> initialize(TezRootInputInitializerContext rootInputContext) throws IOException { Stopwatch sw = null; if (LOG.isDebugEnabled()) { sw = new Stopwatch().start(); } MRInputUserPayloadProto userPayloadProto = MRHelpers.parseMRInputPayload(rootInputContext.getUserPayload()); if (LOG.isDebugEnabled()) { sw.stop(); LOG.debug("Time to parse MRInput payload into prot: " + sw.elapsedMillis()); } Configuration conf = MRHelpers.createConfFromByteString(userPayloadProto .getConfigurationBytes()); JobConf jobConf = new JobConf(conf); boolean useNewApi = jobConf.getUseNewMapper(); sendSerializedEvents = conf.getBoolean( MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD, MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD_DEFAULT); LOG.info("Emitting serialized splits: " + sendSerializedEvents); this.splitsProto = userPayloadProto.getSplits(); MRInputUserPayloadProto.Builder updatedPayloadBuilder = MRInputUserPayloadProto.newBuilder(userPayloadProto); updatedPayloadBuilder.clearSplits(); List<Event> events = Lists.newArrayListWithCapacity(this.splitsProto.getSplitsCount() + 1); RootInputUpdatePayloadEvent updatePayloadEvent = new RootInputUpdatePayloadEvent( updatedPayloadBuilder.build().toByteArray()); events.add(updatePayloadEvent); int count = 0; for (MRSplitProto mrSplit : this.splitsProto.getSplitsList()) { RootInputDataInformationEvent diEvent; if (sendSerializedEvents) { // Unnecessary array copy, can be avoided by using ByteBuffer instead of // a raw array. diEvent = new RootInputDataInformationEvent(count++, mrSplit.toByteArray()); } else { if (useNewApi) { org.apache.hadoop.mapreduce.InputSplit newInputSplit = MRInputUtils .getNewSplitDetailsFromEvent(mrSplit, conf); diEvent = new RootInputDataInformationEvent(count++, newInputSplit); } else { org.apache.hadoop.mapred.InputSplit oldInputSplit = MRInputUtils .getOldSplitDetailsFromEvent(mrSplit, conf); diEvent = new RootInputDataInformationEvent(count++, oldInputSplit); } } events.add(diEvent); } return events; }
Example #15
Source File: TestMRInputSplitDistributor.java From incubator-tez with Apache License 2.0 | 4 votes |
@Test public void testDeserializedPayload() throws IOException { Configuration conf = new Configuration(false); conf.setBoolean(MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD, false); ByteString confByteString = MRHelpers.createByteStringFromConf(conf); InputSplit split1 = new InputSplitForTest(1); InputSplit split2 = new InputSplitForTest(2); MRSplitProto proto1 = MRHelpers.createSplitProto(split1); MRSplitProto proto2 = MRHelpers.createSplitProto(split2); MRSplitsProto.Builder splitsProtoBuilder = MRSplitsProto.newBuilder(); splitsProtoBuilder.addSplits(proto1); splitsProtoBuilder.addSplits(proto2); MRInputUserPayloadProto.Builder payloadProto = MRInputUserPayloadProto.newBuilder(); payloadProto.setSplits(splitsProtoBuilder.build()); payloadProto.setConfigurationBytes(confByteString); byte[] userPayload = payloadProto.build().toByteArray(); TezRootInputInitializerContext context = new TezRootInputInitializerContextForTest(userPayload); MRInputSplitDistributor splitDist = new MRInputSplitDistributor(); List<Event> events = splitDist.initialize(context); assertEquals(3, events.size()); assertTrue(events.get(0) instanceof RootInputUpdatePayloadEvent); assertTrue(events.get(1) instanceof RootInputDataInformationEvent); assertTrue(events.get(2) instanceof RootInputDataInformationEvent); RootInputDataInformationEvent diEvent1 = (RootInputDataInformationEvent) (events.get(1)); RootInputDataInformationEvent diEvent2 = (RootInputDataInformationEvent) (events.get(2)); assertNull(diEvent1.getUserPayload()); assertNull(diEvent2.getUserPayload()); assertNotNull(diEvent1.getDeserializedUserPayload()); assertNotNull(diEvent2.getDeserializedUserPayload()); assertTrue(diEvent1.getDeserializedUserPayload() instanceof InputSplitForTest); assertEquals(1, ((InputSplitForTest) diEvent1.getDeserializedUserPayload()).identifier); assertTrue(diEvent2.getDeserializedUserPayload() instanceof InputSplitForTest); assertEquals(2, ((InputSplitForTest) diEvent2.getDeserializedUserPayload()).identifier); }
Example #16
Source File: YARNRunner.java From tez with Apache License 2.0 | 4 votes |
private Vertex createVertexForStage(Configuration stageConf, Map<String, LocalResource> jobLocalResources, List<TaskLocationHint> locations, int stageNum, int totalStages) throws IOException { // stageNum starts from 0, goes till numStages - 1 boolean isMap = false; if (stageNum == 0) { isMap = true; } int numTasks = isMap ? stageConf.getInt(MRJobConfig.NUM_MAPS, 0) : stageConf.getInt(MRJobConfig.NUM_REDUCES, 0); String processorName = isMap ? MapProcessor.class.getName() : ReduceProcessor.class.getName(); String vertexName = null; if (isMap) { vertexName = MultiStageMRConfigUtil.getInitialMapVertexName(); } else { if (stageNum == totalStages - 1) { vertexName = MultiStageMRConfigUtil.getFinalReduceVertexName(); } else { vertexName = MultiStageMRConfigUtil .getIntermediateStageVertexName(stageNum); } } Resource taskResource = isMap ? MRHelpers.getResourceForMRMapper(stageConf) : MRHelpers.getResourceForMRReducer(stageConf); stageConf.set(MRJobConfig.MROUTPUT_FILE_NAME_PREFIX, "part"); UserPayload vertexUserPayload = TezUtils.createUserPayloadFromConf(stageConf); Vertex vertex = Vertex.create(vertexName, ProcessorDescriptor.create(processorName).setUserPayload(vertexUserPayload), numTasks, taskResource); if (stageConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT, TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT_DEFAULT)) { vertex.getProcessorDescriptor().setHistoryText(TezUtils.convertToHistoryText(stageConf)); } if (isMap) { vertex.addDataSource("MRInput", configureMRInputWithLegacySplitsGenerated(stageConf, true)); } // Map only jobs. if (stageNum == totalStages -1) { OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName()) .setUserPayload(vertexUserPayload); if (stageConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT, TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT_DEFAULT)) { od.setHistoryText(TezUtils.convertToHistoryText(stageConf)); } vertex.addDataSink("MROutput", DataSinkDescriptor.create(od, OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null)); } Map<String, String> taskEnv = new HashMap<String, String>(); setupMapReduceEnv(stageConf, taskEnv, isMap); Map<String, LocalResource> taskLocalResources = new TreeMap<String, LocalResource>(); // PRECOMMIT Remove split localization for reduce tasks if it's being set // here taskLocalResources.putAll(jobLocalResources); String taskJavaOpts = isMap ? MRHelpers.getJavaOptsForMRMapper(stageConf) : MRHelpers.getJavaOptsForMRReducer(stageConf); vertex.setTaskEnvironment(taskEnv) .addTaskLocalFiles(taskLocalResources) .setLocationHint(VertexLocationHint.create(locations)) .setTaskLaunchCmdOpts(taskJavaOpts); if (!isMap) { vertex.setVertexManagerPlugin((ShuffleVertexManager.createConfigBuilder(stageConf).build())); } if (LOG.isDebugEnabled()) { LOG.debug("Adding vertex to DAG" + ", vertexName=" + vertex.getName() + ", processor=" + vertex.getProcessorDescriptor().getClassName() + ", parallelism=" + vertex.getParallelism() + ", javaOpts=" + vertex.getTaskLaunchCmdOpts() + ", resources=" + vertex.getTaskResource() // TODO Add localResources and Environment ); } return vertex; }
Example #17
Source File: TestMapProcessor.java From tez with Apache License 2.0 | 4 votes |
@Test(timeout = 5000) public void testMapProcessor() throws Exception { String dagName = "mrdag0"; String vertexName = MultiStageMRConfigUtil.getInitialMapVertexName(); JobConf jobConf = new JobConf(defaultConf); setUpJobConf(jobConf); MRHelpers.translateMRConfToTez(jobConf); jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); jobConf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false); jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, new Path(workDir, "localized-resources").toUri().toString()); Path mapInput = new Path(workDir, "map0"); MapUtils.generateInputSplit(localFs, workDir, jobConf, mapInput, 10); InputSpec mapInputSpec = new InputSpec("NullSrcVertex", InputDescriptor.create(MRInputLegacy.class.getName()) .setUserPayload(UserPayload.create(ByteBuffer.wrap( MRRuntimeProtos.MRInputUserPayloadProto.newBuilder() .setConfigurationBytes(TezUtils.createByteStringFromConf(jobConf)).build() .toByteArray()))), 1); OutputSpec mapOutputSpec = new OutputSpec("NullDestVertex", OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName()) .setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)), 1); TezSharedExecutor sharedExecutor = new TezSharedExecutor(jobConf); LogicalIOProcessorRuntimeTask task = MapUtils.createLogicalTask(localFs, workDir, jobConf, 0, new Path(workDir, "map0"), new TestUmbilical(), dagName, vertexName, Collections.singletonList(mapInputSpec), Collections.singletonList(mapOutputSpec), sharedExecutor); task.initialize(); task.run(); task.close(); sharedExecutor.shutdownNow(); OutputContext outputContext = task.getOutputContexts().iterator().next(); TezTaskOutput mapOutputs = new TezTaskOutputFiles( jobConf, outputContext.getUniqueIdentifier(), outputContext.getDagIdentifier()); // TODO NEWTEZ FIXME OutputCommitter verification // MRTask mrTask = (MRTask)t.getProcessor(); // Assert.assertEquals(TezNullOutputCommitter.class.getName(), mrTask // .getCommitter().getClass().getName()); // t.close(); Path mapOutputFile = getMapOutputFile(jobConf, outputContext); LOG.info("mapOutputFile = " + mapOutputFile); IFile.Reader reader = new IFile.Reader(localFs, mapOutputFile, null, null, null, false, 0, -1); LongWritable key = new LongWritable(); Text value = new Text(); DataInputBuffer keyBuf = new DataInputBuffer(); DataInputBuffer valueBuf = new DataInputBuffer(); long prev = Long.MIN_VALUE; while (reader.nextRawKey(keyBuf)) { reader.nextRawValue(valueBuf); key.readFields(keyBuf); value.readFields(valueBuf); if (prev != Long.MIN_VALUE) { assert(prev <= key.get()); prev = key.get(); } LOG.info("key = " + key.get() + "; value = " + value); } reader.close(); }
Example #18
Source File: TestMapProcessor.java From tez with Apache License 2.0 | 4 votes |
@Test(timeout = 30000) public void testMapProcessorProgress() throws Exception { String dagName = "mrdag0"; String vertexName = MultiStageMRConfigUtil.getInitialMapVertexName(); JobConf jobConf = new JobConf(defaultConf); setUpJobConf(jobConf); MRHelpers.translateMRConfToTez(jobConf); jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); jobConf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false); jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, new Path(workDir, "localized-resources").toUri().toString()); Path mapInput = new Path(workDir, "map0"); MapUtils.generateInputSplit(localFs, workDir, jobConf, mapInput, 100000); InputSpec mapInputSpec = new InputSpec("NullSrcVertex", InputDescriptor.create(MRInputLegacy.class.getName()) .setUserPayload(UserPayload.create(ByteBuffer.wrap( MRRuntimeProtos.MRInputUserPayloadProto.newBuilder() .setConfigurationBytes(TezUtils.createByteStringFromConf (jobConf)).build() .toByteArray()))), 1); OutputSpec mapOutputSpec = new OutputSpec("NullDestVertex", OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName()) .setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)), 1); TezSharedExecutor sharedExecutor = new TezSharedExecutor(jobConf); final LogicalIOProcessorRuntimeTask task = MapUtils.createLogicalTask (localFs, workDir, jobConf, 0, new Path(workDir, "map0"), new TestUmbilical(), dagName, vertexName, Collections.singletonList(mapInputSpec), Collections.singletonList(mapOutputSpec), sharedExecutor); ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1); Thread monitorProgress = new Thread(new Runnable() { @Override public void run() { float prog = task.getProgress(); if(prog > 0.0f && prog < 1.0f) progressUpdate = prog; } }); task.initialize(); scheduler.scheduleAtFixedRate(monitorProgress, 0, 1, TimeUnit.MILLISECONDS); task.run(); Assert.assertTrue("Progress Updates should be captured!", progressUpdate > 0.0f && progressUpdate < 1.0f); task.close(); sharedExecutor.shutdownNow(); }
Example #19
Source File: TestMultiMRInput.java From incubator-tez with Apache License 2.0 | 4 votes |
@Test(timeout = 5000) public void testExtraEvents() throws Exception { Path workDir = new Path(TEST_ROOT_DIR, "testExtraEvents"); JobConf jobConf = new JobConf(defaultConf); jobConf.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class); FileInputFormat.setInputPaths(jobConf, workDir); MRInputUserPayloadProto.Builder builder = MRInputUserPayloadProto.newBuilder(); builder.setInputFormatName(SequenceFileInputFormat.class.getName()); builder.setConfigurationBytes(TezUtils.createByteStringFromConf(jobConf)); byte[] payload = builder.build().toByteArray(); TezInputContext inputContext = createTezInputContext(payload); MultiMRInput input = new MultiMRInput(); input.setNumPhysicalInputs(1); input.initialize(inputContext); List<Event> eventList = new ArrayList<Event>(); String file1 = "file1"; createInputData(localFs, workDir, jobConf, file1, 0, 10); SequenceFileInputFormat<LongWritable, Text> format = new SequenceFileInputFormat<LongWritable, Text>(); InputSplit[] splits = format.getSplits(jobConf, 1); assertEquals(1, splits.length); MRSplitProto splitProto = MRHelpers.createSplitProto(splits[0]); RootInputDataInformationEvent event1 = new RootInputDataInformationEvent(0, splitProto.toByteArray()); RootInputDataInformationEvent event2 = new RootInputDataInformationEvent(1, splitProto.toByteArray()); eventList.clear(); eventList.add(event1); eventList.add(event2); try { input.handleEvents(eventList); fail("Expecting Exception due to too many events"); } catch (Exception e) { assertTrue(e.getMessage().contains( "Unexpected event. All physical sources already initialized")); } }
Example #20
Source File: TestMultiMRInput.java From incubator-tez with Apache License 2.0 | 4 votes |
@Test(timeout = 5000) public void testMultipleSplits() throws Exception { Path workDir = new Path(TEST_ROOT_DIR, "testMultipleSplits"); JobConf jobConf = new JobConf(defaultConf); jobConf.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class); FileInputFormat.setInputPaths(jobConf, workDir); MRInputUserPayloadProto.Builder builder = MRInputUserPayloadProto.newBuilder(); builder.setInputFormatName(SequenceFileInputFormat.class.getName()); builder.setConfigurationBytes(TezUtils.createByteStringFromConf(jobConf)); byte[] payload = builder.build().toByteArray(); TezInputContext inputContext = createTezInputContext(payload); MultiMRInput input = new MultiMRInput(); input.setNumPhysicalInputs(2); input.initialize(inputContext); List<Event> eventList = new ArrayList<Event>(); LinkedHashMap<LongWritable, Text> data = new LinkedHashMap<LongWritable, Text>(); String file1 = "file1"; LinkedHashMap<LongWritable, Text> data1 = createInputData(localFs, workDir, jobConf, file1, 0, 10); String file2 = "file2"; LinkedHashMap<LongWritable, Text> data2 = createInputData(localFs, workDir, jobConf, file2, 10, 20); data.putAll(data1); data.putAll(data2); SequenceFileInputFormat<LongWritable, Text> format = new SequenceFileInputFormat<LongWritable, Text>(); InputSplit[] splits = format.getSplits(jobConf, 2); assertEquals(2, splits.length); MRSplitProto splitProto1 = MRHelpers.createSplitProto(splits[0]); RootInputDataInformationEvent event1 = new RootInputDataInformationEvent(0, splitProto1.toByteArray()); MRSplitProto splitProto2 = MRHelpers.createSplitProto(splits[1]); RootInputDataInformationEvent event2 = new RootInputDataInformationEvent(0, splitProto2.toByteArray()); eventList.clear(); eventList.add(event1); eventList.add(event2); input.handleEvents(eventList); int readerCount = 0; for (KeyValueReader reader : input.getKeyValueReaders()) { readerCount++; while (reader.next()) { if (data.size() == 0) { fail("Found more records than expected"); } Object key = reader.getCurrentKey(); Object val = reader.getCurrentValue(); assertEquals(val, data.remove(key)); } } assertEquals(2, readerCount); }
Example #21
Source File: TestMultiMRInput.java From incubator-tez with Apache License 2.0 | 4 votes |
@Test(timeout = 5000) public void testSingleSplit() throws Exception { Path workDir = new Path(TEST_ROOT_DIR, "testSingleSplit"); JobConf jobConf = new JobConf(defaultConf); jobConf.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class); FileInputFormat.setInputPaths(jobConf, workDir); MRInputUserPayloadProto.Builder builder = MRInputUserPayloadProto.newBuilder(); builder.setInputFormatName(SequenceFileInputFormat.class.getName()); builder.setConfigurationBytes(TezUtils.createByteStringFromConf(jobConf)); byte[] payload = builder.build().toByteArray(); TezInputContext inputContext = createTezInputContext(payload); MultiMRInput input = new MultiMRInput(); input.setNumPhysicalInputs(1); input.initialize(inputContext); List<Event> eventList = new ArrayList<Event>(); String file1 = "file1"; LinkedHashMap<LongWritable, Text> data1 = createInputData(localFs, workDir, jobConf, file1, 0, 10); SequenceFileInputFormat<LongWritable, Text> format = new SequenceFileInputFormat<LongWritable, Text>(); InputSplit[] splits = format.getSplits(jobConf, 1); assertEquals(1, splits.length); MRSplitProto splitProto = MRHelpers.createSplitProto(splits[0]); RootInputDataInformationEvent event = new RootInputDataInformationEvent(0, splitProto.toByteArray()); eventList.clear(); eventList.add(event); input.handleEvents(eventList); int readerCount = 0; for (KeyValueReader reader : input.getKeyValueReaders()) { readerCount++; while (reader.next()) { if (data1.size() == 0) { fail("Found more records than expected"); } Object key = reader.getCurrentKey(); Object val = reader.getCurrentValue(); assertEquals(val, data1.remove(key)); } } assertEquals(1, readerCount); }
Example #22
Source File: TestMapProcessor.java From incubator-tez with Apache License 2.0 | 4 votes |
@Test public void testMapProcessor() throws Exception { String dagName = "mrdag0"; String vertexName = MultiStageMRConfigUtil.getInitialMapVertexName(); JobConf jobConf = new JobConf(defaultConf); setUpJobConf(jobConf); MRHelpers.translateVertexConfToTez(jobConf); jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); jobConf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false); jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, new Path(workDir, "localized-resources").toUri().toString()); Path mapInput = new Path(workDir, "map0"); MapUtils.generateInputSplit(localFs, workDir, jobConf, mapInput); InputSpec mapInputSpec = new InputSpec("NullSrcVertex", new InputDescriptor(MRInputLegacy.class.getName()) .setUserPayload(MRHelpers.createMRInputPayload(jobConf, null)), 1); OutputSpec mapOutputSpec = new OutputSpec("NullDestVertex", new OutputDescriptor(LocalOnFileSorterOutput.class.getName()) .setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)), 1); LogicalIOProcessorRuntimeTask task = MapUtils.createLogicalTask(localFs, workDir, jobConf, 0, new Path(workDir, "map0"), new TestUmbilical(), dagName, vertexName, Collections.singletonList(mapInputSpec), Collections.singletonList(mapOutputSpec)); task.initialize(); task.run(); task.close(); TezInputContext inputContext = task.getInputContexts().iterator().next(); TezTaskOutput mapOutputs = new TezLocalTaskOutputFiles(jobConf, inputContext.getUniqueIdentifier()); // TODO NEWTEZ FIXME OutputCommitter verification // MRTask mrTask = (MRTask)t.getProcessor(); // Assert.assertEquals(TezNullOutputCommitter.class.getName(), mrTask // .getCommitter().getClass().getName()); // t.close(); Path mapOutputFile = mapOutputs.getInputFile(new InputAttemptIdentifier(0, 0)); LOG.info("mapOutputFile = " + mapOutputFile); IFile.Reader reader = new IFile.Reader(localFs, mapOutputFile, null, null, null, false, 0, -1); LongWritable key = new LongWritable(); Text value = new Text(); DataInputBuffer keyBuf = new DataInputBuffer(); DataInputBuffer valueBuf = new DataInputBuffer(); long prev = Long.MIN_VALUE; while (reader.nextRawKey(keyBuf)) { reader.nextRawValue(valueBuf); key.readFields(keyBuf); value.readFields(valueBuf); if (prev != Long.MIN_VALUE) { assert(prev <= key.get()); prev = key.get(); } LOG.info("key = " + key.get() + "; value = " + value); } reader.close(); }
Example #23
Source File: MRInputUtils.java From incubator-tez with Apache License 2.0 | 4 votes |
@Private public static InputSplit getOldSplitDetailsFromEvent(MRSplitProto splitProto, Configuration conf) throws IOException { SerializationFactory serializationFactory = new SerializationFactory(conf); return MRHelpers.createOldFormatSplitFromUserPayload(splitProto, serializationFactory); }
Example #24
Source File: MRInputUtils.java From incubator-tez with Apache License 2.0 | 4 votes |
public static org.apache.hadoop.mapreduce.InputSplit getNewSplitDetailsFromEvent( MRSplitProto splitProto, Configuration conf) throws IOException { SerializationFactory serializationFactory = new SerializationFactory(conf); return MRHelpers.createNewFormatSplitFromUserPayload( splitProto, serializationFactory); }
Example #25
Source File: YARNRunner.java From incubator-tez with Apache License 2.0 | 4 votes |
private Vertex createVertexForStage(Configuration stageConf, Map<String, LocalResource> jobLocalResources, List<TaskLocationHint> locations, int stageNum, int totalStages) throws IOException { // stageNum starts from 0, goes till numStages - 1 boolean isMap = false; if (stageNum == 0) { isMap = true; } int numTasks = isMap ? stageConf.getInt(MRJobConfig.NUM_MAPS, 0) : stageConf.getInt(MRJobConfig.NUM_REDUCES, 0); String processorName = isMap ? MapProcessor.class.getName() : ReduceProcessor.class.getName(); String vertexName = null; if (isMap) { vertexName = MultiStageMRConfigUtil.getInitialMapVertexName(); } else { if (stageNum == totalStages - 1) { vertexName = MultiStageMRConfigUtil.getFinalReduceVertexName(); } else { vertexName = MultiStageMRConfigUtil .getIntermediateStageVertexName(stageNum); } } Resource taskResource = isMap ? MRHelpers.getMapResource(stageConf) : MRHelpers.getReduceResource(stageConf); stageConf.set(MRJobConfig.MROUTPUT_FILE_NAME_PREFIX, "part"); byte[] vertexUserPayload = MRHelpers.createUserPayloadFromConf(stageConf); Vertex vertex = new Vertex(vertexName, new ProcessorDescriptor(processorName). setUserPayload(vertexUserPayload), numTasks, taskResource); if (isMap) { byte[] mapInputPayload = MRHelpers.createMRInputPayload(vertexUserPayload, null); MRHelpers.addMRInput(vertex, mapInputPayload, null); } // Map only jobs. if (stageNum == totalStages -1) { MRHelpers.addMROutputLegacy(vertex, vertexUserPayload); } Map<String, String> taskEnv = new HashMap<String, String>(); setupMapReduceEnv(stageConf, taskEnv, isMap); Map<String, LocalResource> taskLocalResources = new TreeMap<String, LocalResource>(); // PRECOMMIT Remove split localization for reduce tasks if it's being set // here taskLocalResources.putAll(jobLocalResources); String taskJavaOpts = isMap ? MRHelpers.getMapJavaOpts(stageConf) : MRHelpers.getReduceJavaOpts(stageConf); vertex.setTaskEnvironment(taskEnv) .setTaskLocalFiles(taskLocalResources) .setTaskLocationsHint(locations) .setTaskLaunchCmdOpts(taskJavaOpts); if (!isMap) { vertex.setVertexManagerPlugin(new VertexManagerPluginDescriptor( ShuffleVertexManager.class.getName())); } if (LOG.isDebugEnabled()) { LOG.debug("Adding vertex to DAG" + ", vertexName=" + vertex.getName() + ", processor=" + vertex.getProcessorDescriptor().getClassName() + ", parallelism=" + vertex.getParallelism() + ", javaOpts=" + vertex.getTaskLaunchCmdOpts() + ", resources=" + vertex.getTaskResource() // TODO Add localResources and Environment ); } return vertex; }
Example #26
Source File: UnionExample.java From incubator-tez with Apache License 2.0 | 4 votes |
private DAG createDAG(FileSystem fs, TezConfiguration tezConf, Map<String, LocalResource> localResources, Path stagingDir, String inputPath, String outputPath) throws IOException { DAG dag = new DAG("UnionExample"); int numMaps = -1; Configuration inputConf = new Configuration(tezConf); inputConf.set(FileInputFormat.INPUT_DIR, inputPath); InputDescriptor id = new InputDescriptor(MRInput.class.getName()) .setUserPayload(MRInput.createUserPayload(inputConf, TextInputFormat.class.getName(), true, true)); Vertex mapVertex1 = new Vertex("map1", new ProcessorDescriptor( TokenProcessor.class.getName()), numMaps, MRHelpers.getMapResource(tezConf)); mapVertex1.addInput("MRInput", id, MRInputAMSplitGenerator.class); Vertex mapVertex2 = new Vertex("map2", new ProcessorDescriptor( TokenProcessor.class.getName()), numMaps, MRHelpers.getMapResource(tezConf)); mapVertex2.addInput("MRInput", id, MRInputAMSplitGenerator.class); Vertex mapVertex3 = new Vertex("map3", new ProcessorDescriptor( TokenProcessor.class.getName()), numMaps, MRHelpers.getMapResource(tezConf)); mapVertex3.addInput("MRInput", id, MRInputAMSplitGenerator.class); Vertex checkerVertex = new Vertex("checker", new ProcessorDescriptor( UnionProcessor.class.getName()), 1, MRHelpers.getReduceResource(tezConf)); Configuration outputConf = new Configuration(tezConf); outputConf.set(FileOutputFormat.OUTDIR, outputPath); OutputDescriptor od = new OutputDescriptor(MROutput.class.getName()) .setUserPayload(MROutput.createUserPayload( outputConf, TextOutputFormat.class.getName(), true)); checkerVertex.addOutput("union", od, MROutputCommitter.class); Configuration allPartsConf = new Configuration(tezConf); allPartsConf.set(FileOutputFormat.OUTDIR, outputPath+"-all-parts"); OutputDescriptor od2 = new OutputDescriptor(MROutput.class.getName()) .setUserPayload(MROutput.createUserPayload( allPartsConf, TextOutputFormat.class.getName(), true)); checkerVertex.addOutput("all-parts", od2, MROutputCommitter.class); Configuration partsConf = new Configuration(tezConf); partsConf.set(FileOutputFormat.OUTDIR, outputPath+"-parts"); VertexGroup unionVertex = dag.createVertexGroup("union", mapVertex1, mapVertex2); OutputDescriptor od1 = new OutputDescriptor(MROutput.class.getName()) .setUserPayload(MROutput.createUserPayload( partsConf, TextOutputFormat.class.getName(), true)); unionVertex.addOutput("parts", od1, MROutputCommitter.class); OrderedPartitionedKVEdgeConfigurer edgeConf = OrderedPartitionedKVEdgeConfigurer .newBuilder(Text.class.getName(), IntWritable.class.getName(), HashPartitioner.class.getName(), null).build(); dag.addVertex(mapVertex1) .addVertex(mapVertex2) .addVertex(mapVertex3) .addVertex(checkerVertex) .addEdge( new Edge(mapVertex3, checkerVertex, edgeConf.createDefaultEdgeProperty())) .addEdge( new GroupInputEdge(unionVertex, checkerVertex, edgeConf.createDefaultEdgeProperty(), new InputDescriptor( ConcatenatedMergedKeyValuesInput.class.getName()))); return dag; }
Example #27
Source File: BroadcastAndOneToOneExample.java From incubator-tez with Apache License 2.0 | 4 votes |
private DAG createDAG(FileSystem fs, TezConfiguration tezConf, Path stagingDir, boolean doLocalityCheck) throws IOException, YarnException { JobConf mrConf = new JobConf(tezConf); int numBroadcastTasks = 2; int numOneToOneTasks = 3; if (doLocalityCheck) { YarnClient yarnClient = YarnClient.createYarnClient(); yarnClient.init(tezConf); yarnClient.start(); int numNMs = yarnClient.getNodeReports(NodeState.RUNNING).size(); yarnClient.stop(); // create enough 1-1 tasks to run in parallel numOneToOneTasks = numNMs - numBroadcastTasks - 1;// 1 AM if (numOneToOneTasks < 1) { numOneToOneTasks = 1; } } byte[] procPayload = {(byte) (doLocalityCheck ? 1 : 0), 1}; System.out.println("Using " + numOneToOneTasks + " 1-1 tasks"); Vertex broadcastVertex = new Vertex("Broadcast", new ProcessorDescriptor( InputProcessor.class.getName()), numBroadcastTasks, MRHelpers.getMapResource(mrConf)); Vertex inputVertex = new Vertex("Input", new ProcessorDescriptor( InputProcessor.class.getName()).setUserPayload(procPayload), numOneToOneTasks, MRHelpers.getMapResource(mrConf)); Vertex oneToOneVertex = new Vertex("OneToOne", new ProcessorDescriptor( OneToOneProcessor.class.getName()).setUserPayload(procPayload), -1, MRHelpers.getReduceResource(mrConf)); oneToOneVertex.setVertexManagerPlugin( new VertexManagerPluginDescriptor(InputReadyVertexManager.class.getName())); UnorderedUnpartitionedKVEdgeConfigurer edgeConf = UnorderedUnpartitionedKVEdgeConfigurer .newBuilder(Text.class.getName(), IntWritable.class.getName()).build(); DAG dag = new DAG("BroadcastAndOneToOneExample"); dag.addVertex(inputVertex) .addVertex(broadcastVertex) .addVertex(oneToOneVertex) .addEdge( new Edge(inputVertex, oneToOneVertex, edgeConf.createDefaultOneToOneEdgeProperty())) .addEdge( new Edge(broadcastVertex, oneToOneVertex, edgeConf.createDefaultBroadcastEdgeProperty())); return dag; }
Example #28
Source File: IntersectExample.java From incubator-tez with Apache License 2.0 | 4 votes |
private DAG createDag(TezConfiguration tezConf, Path streamPath, Path hashPath, Path outPath, int numPartitions) throws IOException { DAG dag = new DAG("IntersectExample"); // Configuration for src1 Configuration streamInputConf = new Configuration(tezConf); streamInputConf.set(FileInputFormat.INPUT_DIR, streamPath.toUri().toString()); byte[] streamInputPayload = MRInput.createUserPayload(streamInputConf, TextInputFormat.class.getName(), true, false); // Configuration for src2 Configuration hashInputConf = new Configuration(tezConf); hashInputConf.set(FileInputFormat.INPUT_DIR, hashPath.toUri().toString()); byte[] hashInputPayload = MRInput.createUserPayload(hashInputConf, TextInputFormat.class.getName(), true, false); // Configuration for intermediate output - shared by Vertex1 and Vertex2 // This should only be setting selective keys from the underlying conf. Fix after there's a // better mechanism to configure the IOs. UnorderedPartitionedKVEdgeConfigurer edgeConf = UnorderedPartitionedKVEdgeConfigurer .newBuilder(Text.class.getName(), NullWritable.class.getName(), HashPartitioner.class.getName(), null).build(); Configuration finalOutputConf = new Configuration(tezConf); finalOutputConf.set(FileOutputFormat.OUTDIR, outPath.toUri().toString()); byte[] finalOutputPayload = MROutput.createUserPayload(finalOutputConf, TextOutputFormat.class.getName(), true); // Change the way resources are setup - no MRHelpers Vertex streamFileVertex = new Vertex("partitioner1", new ProcessorDescriptor(ForwardingProcessor.class.getName()), -1, MRHelpers.getMapResource(tezConf)).addInput("streamfile", new InputDescriptor(MRInput.class.getName()) .setUserPayload(streamInputPayload), MRInputAMSplitGenerator.class); Vertex hashFileVertex = new Vertex("partitioner2", new ProcessorDescriptor( ForwardingProcessor.class.getName()), -1, MRHelpers.getMapResource(tezConf)).addInput("hashfile", new InputDescriptor(MRInput.class.getName()) .setUserPayload(hashInputPayload), MRInputAMSplitGenerator.class); Vertex intersectVertex = new Vertex("intersect", new ProcessorDescriptor( IntersectProcessor.class.getName()), numPartitions, MRHelpers.getReduceResource(tezConf)).addOutput("finalOutput", new OutputDescriptor(MROutput.class.getName()) .setUserPayload(finalOutputPayload), MROutputCommitter.class); Edge e1 = new Edge(streamFileVertex, intersectVertex, edgeConf.createDefaultEdgeProperty()); Edge e2 = new Edge(hashFileVertex, intersectVertex, edgeConf.createDefaultEdgeProperty()); dag.addVertex(streamFileVertex).addVertex(hashFileVertex).addVertex(intersectVertex) .addEdge(e1).addEdge(e2); return dag; }
Example #29
Source File: IntersectValidate.java From incubator-tez with Apache License 2.0 | 4 votes |
private DAG createDag(TezConfiguration tezConf, Path lhs, Path rhs, int numPartitions) throws IOException { DAG dag = new DAG("IntersectValidate"); // Configuration for src1 Configuration lhsInputConf = new Configuration(tezConf); lhsInputConf.set(FileInputFormat.INPUT_DIR, lhs.toUri().toString()); byte[] streamInputPayload = MRInput.createUserPayload(lhsInputConf, TextInputFormat.class.getName(), true, false); // Configuration for src2 Configuration rhsInputConf = new Configuration(tezConf); rhsInputConf.set(FileInputFormat.INPUT_DIR, rhs.toUri().toString()); byte[] hashInputPayload = MRInput.createUserPayload(rhsInputConf, TextInputFormat.class.getName(), true, false); // Configuration for intermediate output - shared by Vertex1 and Vertex2 // This should only be setting selective keys from the underlying conf. Fix after there's a // better mechanism to configure the IOs. OrderedPartitionedKVEdgeConfigurer edgeConf = OrderedPartitionedKVEdgeConfigurer .newBuilder(Text.class.getName(), NullWritable.class.getName(), HashPartitioner.class.getName(), null).build(); // Change the way resources are setup - no MRHelpers Vertex lhsVertex = new Vertex(LHS_INPUT_NAME, new ProcessorDescriptor( ForwardingProcessor.class.getName()), -1, MRHelpers.getMapResource(tezConf)).addInput("lhs", new InputDescriptor( MRInput.class.getName()).setUserPayload(streamInputPayload), MRInputAMSplitGenerator.class); Vertex rhsVertex = new Vertex(RHS_INPUT_NAME, new ProcessorDescriptor( ForwardingProcessor.class.getName()), -1, MRHelpers.getMapResource(tezConf)).addInput("rhs", new InputDescriptor( MRInput.class.getName()).setUserPayload(hashInputPayload), MRInputAMSplitGenerator.class); Vertex intersectValidateVertex = new Vertex("intersectvalidate", new ProcessorDescriptor(IntersectValidateProcessor.class.getName()), numPartitions, MRHelpers.getReduceResource(tezConf)); Edge e1 = new Edge(lhsVertex, intersectValidateVertex, edgeConf.createDefaultEdgeProperty()); Edge e2 = new Edge(rhsVertex, intersectValidateVertex, edgeConf.createDefaultEdgeProperty()); dag.addVertex(lhsVertex).addVertex(rhsVertex).addVertex(intersectValidateVertex).addEdge(e1) .addEdge(e2); return dag; }