Java Code Examples for org.apache.beam.runners.core.construction.ParDoTranslation#getSchemaInformation()
The following examples show how to use
org.apache.beam.runners.core.construction.ParDoTranslation#getSchemaInformation() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ParDoMultiOverrideFactory.java From beam with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") private PTransform<PCollection<? extends InputT>, PCollectionTuple> getReplacementForApplication( AppliedPTransform< PCollection<? extends InputT>, PCollectionTuple, PTransform<PCollection<? extends InputT>, PCollectionTuple>> application) throws IOException { DoFn<InputT, OutputT> fn = (DoFn<InputT, OutputT>) ParDoTranslation.getDoFn(application); DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass()); if (signature.processElement().isSplittable()) { return SplittableParDo.forAppliedParDo((AppliedPTransform) application); } else if (signature.stateDeclarations().size() > 0 || signature.timerDeclarations().size() > 0 || signature.timerFamilyDeclarations().size() > 0) { return new GbkThenStatefulParDo( fn, ParDoTranslation.getMainOutputTag(application), ParDoTranslation.getAdditionalOutputTags(application), ParDoTranslation.getSideInputs(application), ParDoTranslation.getSchemaInformation(application), ParDoTranslation.getSideInputMapping(application)); } else { return application.getTransform(); } }
Example 2
Source File: PipelineTranslator.java From incubator-nemo with Apache License 2.0 | 4 votes |
/** * @param ctx provides translation context. * @param beamNode the beam node to be translated. * @param sideInputMap side inputs. * @return the created DoFnTransform. */ private static AbstractDoFnTransform createDoFnTransform(final PipelineTranslationContext ctx, final TransformHierarchy.Node beamNode, final Map<Integer, PCollectionView<?>> sideInputMap) { try { final AppliedPTransform pTransform = beamNode.toAppliedPTransform(ctx.getPipeline()); final DoFn doFn = ParDoTranslation.getDoFn(pTransform); final TupleTag mainOutputTag = ParDoTranslation.getMainOutputTag(pTransform); final TupleTagList additionalOutputTags = ParDoTranslation.getAdditionalOutputTags(pTransform); final PCollection<?> mainInput = (PCollection<?>) Iterables.getOnlyElement(TransformInputs.nonAdditionalInputs(pTransform)); final HasDisplayData displayData = (builder) -> builder.add(DisplayData.item("name", beamNode.getFullName())); final DoFnSchemaInformation doFnSchemaInformation = ParDoTranslation.getSchemaInformation(beamNode.toAppliedPTransform(ctx.getPipeline())); if (sideInputMap.isEmpty()) { return new DoFnTransform( doFn, mainInput.getCoder(), getOutputCoders(pTransform), mainOutputTag, additionalOutputTags.getAll(), mainInput.getWindowingStrategy(), ctx.getPipelineOptions(), DisplayData.from(displayData), doFnSchemaInformation, Collections.emptyMap()); } else { return new PushBackDoFnTransform( doFn, mainInput.getCoder(), getOutputCoders(pTransform), mainOutputTag, additionalOutputTags.getAll(), mainInput.getWindowingStrategy(), sideInputMap, ctx.getPipelineOptions(), DisplayData.from(displayData), doFnSchemaInformation, Collections.emptyMap()); } } catch (final IOException e) { throw new RuntimeException(e); } }
Example 3
Source File: ParDoMultiOutputTranslatorBatch.java From twister2 with Apache License 2.0 | 4 votes |
@Override public void translateNode( ParDo.MultiOutput<IT, OT> transform, Twister2BatchTranslationContext context) { DoFn<IT, OT> doFn; doFn = transform.getFn(); BatchTSetImpl<WindowedValue<IT>> inputTTSet = context.getInputDataSet(context.getInput(transform)); WindowingStrategy<?, ?> windowingStrategy = context.getInput(transform).getWindowingStrategy(); Coder<IT> inputCoder = (Coder<IT>) context.getInput(transform).getCoder(); Map<TupleTag<?>, PValue> outputs = context.getOutputs(); Map<TupleTag<?>, Coder<?>> outputCoders = context.getOutputCoders(); DoFnSignature signature = DoFnSignatures.getSignature(transform.getFn().getClass()); DoFnSchemaInformation doFnSchemaInformation; doFnSchemaInformation = ParDoTranslation.getSchemaInformation(context.getCurrentTransform()); TupleTag<OT> mainOutput = transform.getMainOutputTag(); List<TupleTag<?>> additionalOutputTags = new ArrayList<>(outputs.size() - 1); Collection<PCollectionView<?>> sideInputs = transform.getSideInputs(); // construct a map from side input to WindowingStrategy so that // the DoFn runner can map main-input windows to side input windows Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputStrategies = new HashMap<>(); for (PCollectionView<?> sideInput : sideInputs) { sideInputStrategies.put(sideInput, sideInput.getWindowingStrategyInternal()); } TupleTag<?> mainOutputTag; try { mainOutputTag = ParDoTranslation.getMainOutputTag(context.getCurrentTransform()); } catch (IOException e) { throw new RuntimeException(e); } Map<TupleTag<?>, Integer> outputMap = Maps.newHashMap(); // put the main output at index 0, FlinkMultiOutputDoFnFunction expects this outputMap.put(mainOutputTag, 0); int count = 1; for (TupleTag<?> tag : outputs.keySet()) { if (!outputMap.containsKey(tag)) { outputMap.put(tag, count++); } } ComputeTSet<RawUnionValue, Iterator<WindowedValue<IT>>> outputTSet = inputTTSet .direct() .<RawUnionValue>compute( new DoFnFunction<OT, IT>( context, doFn, inputCoder, outputCoders, additionalOutputTags, windowingStrategy, sideInputStrategies, mainOutput, doFnSchemaInformation, outputMap)); for (Map.Entry<TupleTag<?>, PValue> output : outputs.entrySet()) { ComputeTSet<WindowedValue<OT>, Iterator<RawUnionValue>> tempTSet = outputTSet.direct().compute(new OutputTagFilter(outputMap.get(output.getKey()))); context.setOutputDataSet((PCollection) output.getValue(), tempTSet); } }
Example 4
Source File: ParDoMultiOutputTranslatorBatch.java From beam with Apache License 2.0 | 4 votes |
@Override public void translateNode( ParDo.MultiOutput<InputT, OutputT> transform, Twister2BatchTranslationContext context) { DoFn<InputT, OutputT> doFn; doFn = transform.getFn(); if (DoFnSignatures.signatureForDoFn(doFn).processElement().isSplittable()) { throw new UnsupportedOperationException( String.format( "Not expected to directly translate splittable DoFn, should have been overridden: %s", doFn)); } BatchTSetImpl<WindowedValue<InputT>> inputTTSet = context.getInputDataSet(context.getInput(transform)); WindowingStrategy<?, ?> windowingStrategy = context.getInput(transform).getWindowingStrategy(); Coder<InputT> inputCoder = (Coder<InputT>) context.getInput(transform).getCoder(); Map<String, PCollectionView<?>> sideInputMapping; Map<TupleTag<?>, PValue> outputs = context.getOutputs(); Map<TupleTag<?>, Coder<?>> outputCoders = context.getOutputCoders(); // DoFnSignature signature = DoFnSignatures.getSignature(transform.getFn().getClass()); DoFnSchemaInformation doFnSchemaInformation; doFnSchemaInformation = ParDoTranslation.getSchemaInformation(context.getCurrentTransform()); sideInputMapping = ParDoTranslation.getSideInputMapping(context.getCurrentTransform()); TupleTag<OutputT> mainOutput = transform.getMainOutputTag(); List<TupleTag<?>> additionalOutputTags = new ArrayList<>(transform.getAdditionalOutputTags().getAll()); Map<String, PCollectionView<?>> sideInputs = transform.getSideInputs(); // TODO : note change from List to map in sideinputs // construct a map from side input to WindowingStrategy so that // the DoFn runner can map main-input windows to side input windows Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputStrategies = new HashMap<>(); for (PCollectionView<?> sideInput : sideInputs.values()) { sideInputStrategies.put(sideInput, sideInput.getWindowingStrategyInternal()); } TupleTag<?> mainOutputTag; try { mainOutputTag = ParDoTranslation.getMainOutputTag(context.getCurrentTransform()); } catch (IOException e) { throw new RuntimeException(e); } Map<TupleTag<?>, Integer> outputMap = Maps.newHashMap(); outputMap.put(mainOutputTag, 0); int count = 1; for (TupleTag<?> tag : outputs.keySet()) { if (!outputMap.containsKey(tag)) { outputMap.put(tag, count++); } } ComputeTSet<RawUnionValue, Iterator<WindowedValue<InputT>>> outputTSet = inputTTSet .direct() .<RawUnionValue>compute( new DoFnFunction<OutputT, InputT>( context, doFn, inputCoder, outputCoders, additionalOutputTags, windowingStrategy, sideInputStrategies, mainOutput, doFnSchemaInformation, outputMap, sideInputMapping)); for (Map.Entry<TupleTag<?>, PValue> output : outputs.entrySet()) { ComputeTSet<WindowedValue<OutputT>, Iterator<RawUnionValue>> tempTSet = outputTSet.direct().compute(new OutputTagFilter(outputMap.get(output.getKey()))); context.setOutputDataSet((PCollection) output.getValue(), tempTSet); } }
Example 5
Source File: ParDoTranslatorBatch.java From beam with Apache License 2.0 | 4 votes |
@Override public void translateTransform( PTransform<PCollection<InputT>, PCollectionTuple> transform, TranslationContext context) { String stepName = context.getCurrentTransform().getFullName(); // Check for not supported advanced features // TODO: add support of Splittable DoFn DoFn<InputT, OutputT> doFn = getDoFn(context); checkState( !DoFnSignatures.isSplittable(doFn), "Not expected to directly translate splittable DoFn, should have been overridden: %s", doFn); // TODO: add support of states and timers checkState( !DoFnSignatures.isStateful(doFn), "States and timers are not supported for the moment."); checkState( !DoFnSignatures.requiresTimeSortedInput(doFn), "@RequiresTimeSortedInput is not " + "supported for the moment"); DoFnSchemaInformation doFnSchemaInformation = ParDoTranslation.getSchemaInformation(context.getCurrentTransform()); // Init main variables PValue input = context.getInput(); Dataset<WindowedValue<InputT>> inputDataSet = context.getDataset(input); Map<TupleTag<?>, PValue> outputs = context.getOutputs(); TupleTag<?> mainOutputTag = getTupleTag(context); List<TupleTag<?>> outputTags = new ArrayList<>(outputs.keySet()); WindowingStrategy<?, ?> windowingStrategy = ((PCollection<InputT>) input).getWindowingStrategy(); Coder<InputT> inputCoder = ((PCollection<InputT>) input).getCoder(); Coder<? extends BoundedWindow> windowCoder = windowingStrategy.getWindowFn().windowCoder(); // construct a map from side input to WindowingStrategy so that // the DoFn runner can map main-input windows to side input windows List<PCollectionView<?>> sideInputs = getSideInputs(context); Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputStrategies = new HashMap<>(); for (PCollectionView<?> sideInput : sideInputs) { sideInputStrategies.put(sideInput, sideInput.getPCollection().getWindowingStrategy()); } SideInputBroadcast broadcastStateData = createBroadcastSideInputs(sideInputs, context); Map<TupleTag<?>, Coder<?>> outputCoderMap = context.getOutputCoders(); MetricsContainerStepMapAccumulator metricsAccum = MetricsAccumulator.getInstance(); List<TupleTag<?>> additionalOutputTags = new ArrayList<>(); for (TupleTag<?> tag : outputTags) { if (!tag.equals(mainOutputTag)) { additionalOutputTags.add(tag); } } Map<String, PCollectionView<?>> sideInputMapping = ParDoTranslation.getSideInputMapping(context.getCurrentTransform()); @SuppressWarnings("unchecked") DoFnFunction<InputT, OutputT> doFnWrapper = new DoFnFunction( metricsAccum, stepName, doFn, windowingStrategy, sideInputStrategies, context.getSerializableOptions(), additionalOutputTags, mainOutputTag, inputCoder, outputCoderMap, broadcastStateData, doFnSchemaInformation, sideInputMapping); MultiOuputCoder multipleOutputCoder = MultiOuputCoder.of(SerializableCoder.of(TupleTag.class), outputCoderMap, windowCoder); Dataset<Tuple2<TupleTag<?>, WindowedValue<?>>> allOutputs = inputDataSet.mapPartitions(doFnWrapper, EncoderHelpers.fromBeamCoder(multipleOutputCoder)); if (outputs.entrySet().size() > 1) { allOutputs.persist(); for (Map.Entry<TupleTag<?>, PValue> output : outputs.entrySet()) { pruneOutputFilteredByTag(context, allOutputs, output, windowCoder); } } else { Coder<OutputT> outputCoder = ((PCollection<OutputT>) outputs.get(mainOutputTag)).getCoder(); Coder<WindowedValue<?>> windowedValueCoder = (Coder<WindowedValue<?>>) (Coder<?>) WindowedValue.getFullCoder(outputCoder, windowCoder); Dataset<WindowedValue<?>> outputDataset = allOutputs.map( (MapFunction<Tuple2<TupleTag<?>, WindowedValue<?>>, WindowedValue<?>>) value -> value._2, EncoderHelpers.fromBeamCoder(windowedValueCoder)); context.putDatasetWildcard(outputs.entrySet().iterator().next().getValue(), outputDataset); } }
Example 6
Source File: ParDoBoundMultiTranslator.java From beam with Apache License 2.0 | 4 votes |
private static <InT, OutT> void doTranslatePortable( PipelineNode.PTransformNode transform, QueryablePipeline pipeline, PortableTranslationContext ctx) { Map<String, String> outputs = transform.getTransform().getOutputsMap(); final RunnerApi.ExecutableStagePayload stagePayload; try { stagePayload = RunnerApi.ExecutableStagePayload.parseFrom( transform.getTransform().getSpec().getPayload()); } catch (IOException e) { throw new RuntimeException(e); } String inputId = stagePayload.getInput(); final MessageStream<OpMessage<InT>> inputStream = ctx.getMessageStreamById(inputId); // TODO: support side input final List<MessageStream<OpMessage<InT>>> sideInputStreams = Collections.emptyList(); final Map<TupleTag<?>, Integer> tagToIndexMap = new HashMap<>(); final Map<String, TupleTag<?>> idToTupleTagMap = new HashMap<>(); // first output as the main output final TupleTag<OutT> mainOutputTag = outputs.isEmpty() ? null : new TupleTag(outputs.keySet().iterator().next()); AtomicInteger index = new AtomicInteger(0); outputs .keySet() .iterator() .forEachRemaining( outputName -> { TupleTag<?> tupleTag = new TupleTag<>(outputName); tagToIndexMap.put(tupleTag, index.get()); index.incrementAndGet(); String collectionId = outputs.get(outputName); idToTupleTagMap.put(collectionId, tupleTag); }); WindowedValue.WindowedValueCoder<InT> windowedInputCoder = ctx.instantiateCoder(inputId, pipeline.getComponents()); final DoFnSchemaInformation doFnSchemaInformation; doFnSchemaInformation = ParDoTranslation.getSchemaInformation(transform.getTransform()); Map<String, PCollectionView<?>> sideInputMapping = ParDoTranslation.getSideInputMapping(transform.getTransform()); final RunnerApi.PCollection input = pipeline.getComponents().getPcollectionsOrThrow(inputId); final PCollection.IsBounded isBounded = SamzaPipelineTranslatorUtils.isBounded(input); final DoFnOp<InT, OutT, RawUnionValue> op = new DoFnOp<>( mainOutputTag, new NoOpDoFn<>(), null, // key coder not in use windowedInputCoder.getValueCoder(), // input coder not in use windowedInputCoder, Collections.emptyMap(), // output coders not in use Collections.emptyList(), // sideInputs not in use until side input support new ArrayList<>(idToTupleTagMap.values()), // used by java runner only SamzaPipelineTranslatorUtils.getPortableWindowStrategy(transform, pipeline), Collections.emptyMap(), // idToViewMap not in use until side input support new DoFnOp.MultiOutputManagerFactory(tagToIndexMap), ctx.getTransformFullName(), ctx.getTransformId(), isBounded, true, stagePayload, idToTupleTagMap, doFnSchemaInformation, sideInputMapping); final MessageStream<OpMessage<InT>> mergedStreams; if (sideInputStreams.isEmpty()) { mergedStreams = inputStream; } else { MessageStream<OpMessage<InT>> mergedSideInputStreams = MessageStream.mergeAll(sideInputStreams).flatMap(new SideInputWatermarkFn()); mergedStreams = inputStream.merge(Collections.singletonList(mergedSideInputStreams)); } final MessageStream<OpMessage<RawUnionValue>> taggedOutputStream = mergedStreams.flatMap(OpAdapter.adapt(op)); for (int outputIndex : tagToIndexMap.values()) { final MessageStream<OpMessage<OutT>> outputStream = taggedOutputStream .filter( message -> message.getType() != OpMessage.Type.ELEMENT || message.getElement().getValue().getUnionTag() == outputIndex) .flatMap(OpAdapter.adapt(new RawUnionValueToValue())); ctx.registerMessageStream(ctx.getOutputId(transform), outputStream); } }
Example 7
Source File: DataflowPipelineTranslator.java From beam with Apache License 2.0 | 4 votes |
private <InputT, OutputT> void translateMultiHelper( ParDo.MultiOutput<InputT, OutputT> transform, TranslationContext context) { StepTranslationContext stepContext = context.addStep(transform, "ParallelDo"); DoFnSchemaInformation doFnSchemaInformation; doFnSchemaInformation = ParDoTranslation.getSchemaInformation(context.getCurrentTransform()); Map<String, PCollectionView<?>> sideInputMapping = ParDoTranslation.getSideInputMapping(context.getCurrentTransform()); Map<TupleTag<?>, Coder<?>> outputCoders = context.getOutputs(transform).entrySet().stream() .collect( Collectors.toMap( Map.Entry::getKey, e -> ((PCollection) e.getValue()).getCoder())); translateInputs( stepContext, context.getInput(transform), transform.getSideInputs().values(), context); translateOutputs(context.getOutputs(transform), stepContext); String ptransformId = context.getSdkComponents().getPTransformIdOrThrow(context.getCurrentTransform()); translateFn( stepContext, ptransformId, transform.getFn(), context.getInput(transform).getWindowingStrategy(), transform.getSideInputs().values(), context.getInput(transform).getCoder(), context, transform.getMainOutputTag(), outputCoders, doFnSchemaInformation, sideInputMapping); // TODO: Move this logic into translateFn once the legacy ProcessKeyedElements is // removed. if (context.isFnApi()) { DoFnSignature signature = DoFnSignatures.signatureForDoFn(transform.getFn()); if (signature.processElement().isSplittable()) { DoFnInvoker<?, ?> doFnInvoker = DoFnInvokers.invokerFor(transform.getFn()); Coder<?> restrictionAndWatermarkStateCoder = KvCoder.of( doFnInvoker.invokeGetRestrictionCoder( context.getInput(transform).getPipeline().getCoderRegistry()), doFnInvoker.invokeGetWatermarkEstimatorStateCoder( context.getInput(transform).getPipeline().getCoderRegistry())); stepContext.addInput( PropertyNames.RESTRICTION_ENCODING, translateCoder(restrictionAndWatermarkStateCoder, context)); } } }
Example 8
Source File: DataflowPipelineTranslator.java From beam with Apache License 2.0 | 4 votes |
private <InputT, OutputT> void translateSingleHelper( ParDoSingle<InputT, OutputT> transform, TranslationContext context) { DoFnSchemaInformation doFnSchemaInformation; doFnSchemaInformation = ParDoTranslation.getSchemaInformation(context.getCurrentTransform()); Map<String, PCollectionView<?>> sideInputMapping = ParDoTranslation.getSideInputMapping(context.getCurrentTransform()); StepTranslationContext stepContext = context.addStep(transform, "ParallelDo"); Map<TupleTag<?>, Coder<?>> outputCoders = context.getOutputs(transform).entrySet().stream() .collect( Collectors.toMap( Map.Entry::getKey, e -> ((PCollection) e.getValue()).getCoder())); translateInputs( stepContext, context.getInput(transform), transform.getSideInputs().values(), context); stepContext.addOutput( transform.getMainOutputTag().getId(), context.getOutput(transform)); String ptransformId = context.getSdkComponents().getPTransformIdOrThrow(context.getCurrentTransform()); translateFn( stepContext, ptransformId, transform.getFn(), context.getInput(transform).getWindowingStrategy(), transform.getSideInputs().values(), context.getInput(transform).getCoder(), context, transform.getMainOutputTag(), outputCoders, doFnSchemaInformation, sideInputMapping); // TODO: Move this logic into translateFn once the legacy ProcessKeyedElements is // removed. if (context.isFnApi()) { DoFnSignature signature = DoFnSignatures.signatureForDoFn(transform.getFn()); if (signature.processElement().isSplittable()) { DoFnInvoker<?, ?> doFnInvoker = DoFnInvokers.invokerFor(transform.getFn()); Coder<?> restrictionAndWatermarkStateCoder = KvCoder.of( doFnInvoker.invokeGetRestrictionCoder( context.getInput(transform).getPipeline().getCoderRegistry()), doFnInvoker.invokeGetWatermarkEstimatorStateCoder( context.getInput(transform).getPipeline().getCoderRegistry())); stepContext.addInput( PropertyNames.RESTRICTION_ENCODING, translateCoder(restrictionAndWatermarkStateCoder, context)); } } }
Example 9
Source File: DataflowPipelineTranslator.java From beam with Apache License 2.0 | 4 votes |
private <InputT, OutputT, RestrictionT, WatermarkEstimatorStateT> void translateTyped( SplittableParDo.ProcessKeyedElements< InputT, OutputT, RestrictionT, WatermarkEstimatorStateT> transform, TranslationContext context) { DoFnSchemaInformation doFnSchemaInformation; doFnSchemaInformation = ParDoTranslation.getSchemaInformation(context.getCurrentTransform()); Map<String, PCollectionView<?>> sideInputMapping = ParDoTranslation.getSideInputMapping(context.getCurrentTransform()); StepTranslationContext stepContext = context.addStep(transform, "SplittableProcessKeyed"); Map<TupleTag<?>, Coder<?>> outputCoders = context.getOutputs(transform).entrySet().stream() .collect( Collectors.toMap( Map.Entry::getKey, e -> ((PCollection) e.getValue()).getCoder())); translateInputs( stepContext, context.getInput(transform), transform.getSideInputs(), context); translateOutputs(context.getOutputs(transform), stepContext); String ptransformId = context.getSdkComponents().getPTransformIdOrThrow(context.getCurrentTransform()); translateFn( stepContext, ptransformId, transform.getFn(), transform.getInputWindowingStrategy(), transform.getSideInputs(), transform.getElementCoder(), context, transform.getMainOutputTag(), outputCoders, doFnSchemaInformation, sideInputMapping); stepContext.addInput( PropertyNames.RESTRICTION_CODER, translateCoder( KvCoder.of( transform.getRestrictionCoder(), transform.getWatermarkEstimatorStateCoder()), context)); }