Java Code Examples for org.apache.tez.dag.api.OutputDescriptor#create()
The following examples show how to use
org.apache.tez.dag.api.OutputDescriptor#create() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CartesianProduct.java From tez with Apache License 2.0 | 5 votes |
private DAG createDAG(TezConfiguration tezConf) throws IOException { InputDescriptor inputDescriptor = InputDescriptor.create(FakeInput.class.getName()); InputInitializerDescriptor inputInitializerDescriptor = InputInitializerDescriptor.create(FakeInputInitializer.class.getName()); DataSourceDescriptor dataSourceDescriptor = DataSourceDescriptor.create(inputDescriptor, inputInitializerDescriptor, null); Vertex v1 = Vertex.create(VERTEX1, ProcessorDescriptor.create(TokenProcessor.class.getName())); v1.addDataSource(INPUT, dataSourceDescriptor); Vertex v2 = Vertex.create(VERTEX2, ProcessorDescriptor.create(TokenProcessor.class.getName())); v2.addDataSource(INPUT, dataSourceDescriptor); OutputDescriptor outputDescriptor = OutputDescriptor.create(FakeOutput.class.getName()); OutputCommitterDescriptor outputCommitterDescriptor = OutputCommitterDescriptor.create(FakeOutputCommitter.class.getName()); DataSinkDescriptor dataSinkDescriptor = DataSinkDescriptor.create(outputDescriptor, outputCommitterDescriptor, null); CartesianProductConfig cartesianProductConfig = new CartesianProductConfig(Arrays.asList(sourceVertices)); UserPayload userPayload = cartesianProductConfig.toUserPayload(tezConf); Vertex v3 = Vertex.create(VERTEX3, ProcessorDescriptor.create(JoinProcessor.class.getName())); v3.addDataSink(OUTPUT, dataSinkDescriptor); v3.setVertexManagerPlugin( VertexManagerPluginDescriptor.create(CartesianProductVertexManager.class.getName()) .setUserPayload(userPayload)); EdgeManagerPluginDescriptor edgeManagerDescriptor = EdgeManagerPluginDescriptor.create(CartesianProductEdgeManager.class.getName()); edgeManagerDescriptor.setUserPayload(userPayload); UnorderedPartitionedKVEdgeConfig edgeConf = UnorderedPartitionedKVEdgeConfig.newBuilder(Text.class.getName(), IntWritable.class.getName(), RoundRobinPartitioner.class.getName()).build(); EdgeProperty edgeProperty = edgeConf.createDefaultCustomEdgeProperty(edgeManagerDescriptor); return DAG.create("CrossProduct").addVertex(v1).addVertex(v2).addVertex(v3) .addEdge(Edge.create(v1, v3, edgeProperty)).addEdge(Edge.create(v2, v3, edgeProperty)); }
Example 2
Source File: TestDAGRecovery2.java From tez with Apache License 2.0 | 5 votes |
@Test(timeout=120000) public void testFailingCommitter() throws Exception { DAG dag = SimpleVTestDAG.createDAG("FailingCommitterDAG", null); OutputDescriptor od = OutputDescriptor.create(MultiAttemptDAG.NoOpOutput.class.getName()); od.setUserPayload(UserPayload.create(ByteBuffer.wrap( new MultiAttemptDAG.FailingOutputCommitter.FailingOutputCommitterConfig(true) .toUserPayload()))); OutputCommitterDescriptor ocd = OutputCommitterDescriptor.create( MultiAttemptDAG.FailingOutputCommitter.class.getName()); dag.getVertex("v3").addDataSink("FailingOutput", DataSinkDescriptor.create(od, ocd, null)); runDAGAndVerify(dag, State.FAILED); }
Example 3
Source File: TestOutput.java From tez with Apache License 2.0 | 5 votes |
public static OutputDescriptor getOutputDesc(UserPayload payload) { OutputDescriptor desc = OutputDescriptor.create(TestOutput.class.getName()); if (payload != null) { desc.setUserPayload(payload); } return desc; }
Example 4
Source File: TezDagBuilder.java From spork with Apache License 2.0 | 4 votes |
/** * Return EdgeProperty that connects two vertices. * * @param from * @param to * @return EdgeProperty * @throws IOException */ private EdgeProperty newEdge(TezOperator from, TezOperator to) throws IOException { TezEdgeDescriptor edge = to.inEdges.get(from.getOperatorKey()); PhysicalPlan combinePlan = edge.combinePlan; InputDescriptor in = InputDescriptor.create(edge.inputClassName); OutputDescriptor out = OutputDescriptor.create(edge.outputClassName); Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties(), false); if (!combinePlan.isEmpty()) { addCombiner(combinePlan, to, conf); } List<POLocalRearrangeTez> lrs = PlanHelper.getPhysicalOperators(from.plan, POLocalRearrangeTez.class); for (POLocalRearrangeTez lr : lrs) { if (lr.getOutputKey().equals(to.getOperatorKey().toString())) { byte keyType = lr.getKeyType(); setIntermediateOutputKeyValue(keyType, conf, to, lr.isConnectedToPackage()); // In case of secondary key sort, main key type is the actual key type conf.set("pig.reduce.key.type", Byte.toString(lr.getMainKeyType())); break; } } conf.setIfUnset(TezRuntimeConfiguration.TEZ_RUNTIME_PARTITIONER_CLASS, MRPartitioner.class.getName()); if (edge.getIntermediateOutputKeyClass() != null) { conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, edge.getIntermediateOutputKeyClass()); } if (edge.getIntermediateOutputValueClass() != null) { conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS, edge.getIntermediateOutputValueClass()); } if (edge.getIntermediateOutputKeyComparatorClass() != null) { conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_COMPARATOR_CLASS, edge.getIntermediateOutputKeyComparatorClass()); } conf.setBoolean(MRConfiguration.MAPPER_NEW_API, true); conf.set("pig.pigContext", ObjectSerializer.serialize(pc)); conf.set("udf.import.list", ObjectSerializer.serialize(PigContext.getPackageImportList())); if(to.isGlobalSort() || to.isLimitAfterSort()){ conf.set("pig.sortOrder", ObjectSerializer.serialize(to.getSortOrder())); } if (edge.isUseSecondaryKey()) { conf.set("pig.secondarySortOrder", ObjectSerializer.serialize(edge.getSecondarySortOrder())); conf.set(org.apache.hadoop.mapreduce.MRJobConfig.PARTITIONER_CLASS_ATTR, SecondaryKeyPartitioner.class.getName()); // These needs to be on the vertex as well for POShuffleTezLoad to pick it up. // Tez framework also expects this to be per vertex and not edge. IFile.java picks // up keyClass and valueClass from vertex config. TODO - check with Tez folks // In MR - job.setSortComparatorClass() or MRJobConfig.KEY_COMPARATOR conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_COMPARATOR_CLASS, PigSecondaryKeyComparator.class.getName()); // In MR - job.setOutputKeyClass() or MRJobConfig.OUTPUT_KEY_CLASS conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS, NullableTuple.class.getName()); setGroupingComparator(conf, PigSecondaryKeyGroupComparator.class.getName()); } if (edge.partitionerClass != null) { conf.set(org.apache.hadoop.mapreduce.MRJobConfig.PARTITIONER_CLASS_ATTR, edge.partitionerClass.getName()); } conf.set("udf.import.list", ObjectSerializer.serialize(PigContext.getPackageImportList())); MRToTezHelper.processMRSettings(conf, globalConf); String historyString = convertToHistoryText("", conf); in.setUserPayload(TezUtils.createUserPayloadFromConf(conf)).setHistoryText(historyString); out.setUserPayload(TezUtils.createUserPayloadFromConf(conf)).setHistoryText(historyString); if (edge.dataMovementType!=DataMovementType.BROADCAST && to.getEstimatedParallelism()!=-1 && (to.isGlobalSort()||to.isSkewedJoin())) { // Use custom edge return EdgeProperty.create((EdgeManagerPluginDescriptor)null, edge.dataSourceType, edge.schedulingType, out, in); } return EdgeProperty.create(edge.dataMovementType, edge.dataSourceType, edge.schedulingType, out, in); }
Example 5
Source File: TestTaskSpec.java From tez with Apache License 2.0 | 4 votes |
@Test (timeout = 5000) public void testSerDe() throws IOException { ByteBuffer payload = null; ProcessorDescriptor procDesc = ProcessorDescriptor.create("proc").setUserPayload( UserPayload.create(payload)).setHistoryText("historyText"); List<InputSpec> inputSpecs = new ArrayList<>(); InputSpec inputSpec = new InputSpec("src1", InputDescriptor.create("inputClass"),10); inputSpecs.add(inputSpec); List<OutputSpec> outputSpecs = new ArrayList<>(); OutputSpec outputSpec = new OutputSpec("dest1", OutputDescriptor.create("outputClass"), 999); outputSpecs.add(outputSpec); List<GroupInputSpec> groupInputSpecs = null; Configuration taskConf = new Configuration(false); taskConf.set("foo", "bar"); TezTaskAttemptID taId = TezTaskAttemptID.getInstance(TezTaskID.getInstance( TezVertexID.getInstance(TezDAGID.getInstance("1234", 1, 1), 1), 1), 1); TaskSpec taskSpec = new TaskSpec(taId, "dagName", "vName", -1, procDesc, inputSpecs, outputSpecs, groupInputSpecs, taskConf); ByteArrayOutputStream bos = new ByteArrayOutputStream(); DataOutput out = new DataOutputStream(bos); taskSpec.write(out); TaskSpec deSerTaskSpec = new TaskSpec(); ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); DataInput in = new DataInputStream(bis); deSerTaskSpec.readFields(in); Assert.assertEquals(taskSpec.getDAGName(), deSerTaskSpec.getDAGName()); Assert.assertEquals(taskSpec.getVertexName(), deSerTaskSpec.getVertexName()); Assert.assertEquals(taskSpec.getVertexParallelism(), deSerTaskSpec.getVertexParallelism()); Assert.assertEquals(taskSpec.getInputs().size(), deSerTaskSpec.getInputs().size()); Assert.assertEquals(taskSpec.getOutputs().size(), deSerTaskSpec.getOutputs().size()); Assert.assertNull(deSerTaskSpec.getGroupInputs()); Assert.assertEquals(taskSpec.getInputs().get(0).getSourceVertexName(), deSerTaskSpec.getInputs().get(0).getSourceVertexName()); Assert.assertEquals(taskSpec.getOutputs().get(0).getDestinationVertexName(), deSerTaskSpec.getOutputs().get(0).getDestinationVertexName()); Assert.assertEquals(taskConf.get("foo"), deSerTaskSpec.getTaskConf().get("foo")); }
Example 6
Source File: TestLogicalIOProcessorRuntimeTask.java From tez with Apache License 2.0 | 4 votes |
private List<OutputSpec> createOutputSpecList(String outputClassName) { OutputDescriptor outputtDesc = OutputDescriptor.create(outputClassName); OutputSpec outputSpec = new OutputSpec("outedge", outputtDesc, 1); return Lists.newArrayList(outputSpec); }