Java Code Examples for org.apache.beam.model.pipeline.v1.RunnerApi#PCollection
The following examples show how to use
org.apache.beam.model.pipeline.v1.RunnerApi#PCollection .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PCollectionTranslationTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testEncodeDecodeCycle() throws Exception { // Encode SdkComponents sdkComponents = SdkComponents.create(); sdkComponents.registerEnvironment(Environments.createDockerEnvironment("java")); RunnerApi.PCollection protoCollection = PCollectionTranslation.toProto(testCollection, sdkComponents); RehydratedComponents protoComponents = RehydratedComponents.forComponents(sdkComponents.toComponents()); // Decode Pipeline pipeline = Pipeline.create(); PCollection<?> decodedCollection = PCollectionTranslation.fromProto(protoCollection, pipeline, protoComponents); // Verify assertThat(decodedCollection.getCoder(), equalTo(testCollection.getCoder())); assertThat( decodedCollection.getWindowingStrategy(), equalTo(testCollection.getWindowingStrategy().fixDefaults())); assertThat(decodedCollection.isBounded(), equalTo(testCollection.isBounded())); }
Example 2
Source File: PCollectionTranslationTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testEncodeDecodeFields() throws Exception { SdkComponents sdkComponents = SdkComponents.create(); sdkComponents.registerEnvironment(Environments.createDockerEnvironment("java")); RunnerApi.PCollection protoCollection = PCollectionTranslation.toProto(testCollection, sdkComponents); RehydratedComponents protoComponents = RehydratedComponents.forComponents(sdkComponents.toComponents()); Coder<?> decodedCoder = protoComponents.getCoder(protoCollection.getCoderId()); WindowingStrategy<?, ?> decodedStrategy = protoComponents.getWindowingStrategy(protoCollection.getWindowingStrategyId()); IsBounded decodedIsBounded = PCollectionTranslation.isBounded(protoCollection); assertThat(decodedCoder, equalTo(testCollection.getCoder())); assertThat(decodedStrategy, equalTo(testCollection.getWindowingStrategy().fixDefaults())); assertThat(decodedIsBounded, equalTo(testCollection.isBounded())); }
Example 3
Source File: ProcessBundleDescriptors.java From beam with Apache License 2.0 | 6 votes |
/** * Patches the input coder of a stateful transform to ensure that the byte representation of a key * used to partition the input element at the Runner, matches the key byte representation received * for state requests and timers from the SDK Harness. Stateful transforms always have a KvCoder * as input. */ private static void lengthPrefixKeyCoder( String inputColId, Components.Builder componentsBuilder) { RunnerApi.PCollection pcollection = componentsBuilder.getPcollectionsOrThrow(inputColId); RunnerApi.Coder kvCoder = componentsBuilder.getCodersOrThrow(pcollection.getCoderId()); Preconditions.checkState( ModelCoders.KV_CODER_URN.equals(kvCoder.getSpec().getUrn()), "Stateful executable stages must use a KV coder, but is: %s", kvCoder.getSpec().getUrn()); String keyCoderId = ModelCoders.getKvCoderComponents(kvCoder).keyCoderId(); // Retain the original coder, but wrap in LengthPrefixCoder String newKeyCoderId = LengthPrefixUnknownCoders.addLengthPrefixedCoder(keyCoderId, componentsBuilder, false); // Replace old key coder with LengthPrefixCoder<old_key_coder> kvCoder = kvCoder.toBuilder().setComponentCoderIds(0, newKeyCoderId).build(); componentsBuilder.putCoders(pcollection.getCoderId(), kvCoder); }
Example 4
Source File: PCollectionTranslation.java From beam with Apache License 2.0 | 5 votes |
public static RunnerApi.PCollection toProto(PCollection<?> pCollection, SdkComponents components) throws IOException { String coderId = components.registerCoder(pCollection.getCoder()); String windowingStrategyId = components.registerWindowingStrategy(pCollection.getWindowingStrategy()); return RunnerApi.PCollection.newBuilder() .setUniqueName(pCollection.getName()) .setCoderId(coderId) .setIsBounded(toProto(pCollection.isBounded())) .setWindowingStrategyId(windowingStrategyId) .build(); }
Example 5
Source File: PCollectionTranslation.java From beam with Apache License 2.0 | 5 votes |
public static PCollection<?> fromProto( RunnerApi.PCollection pCollection, Pipeline pipeline, RehydratedComponents components) throws IOException { Coder<?> coder = components.getCoder(pCollection.getCoderId()); return PCollection.createPrimitiveOutputInternal( pipeline, components.getWindowingStrategy(pCollection.getWindowingStrategyId()), fromProto(pCollection.getIsBounded()), (Coder) coder); }
Example 6
Source File: ParDoTranslation.java From beam with Apache License 2.0 | 5 votes |
public static RunnerApi.PCollection getMainInput( RunnerApi.PTransform ptransform, Components components) throws IOException { checkArgument( PAR_DO_TRANSFORM_URN.equals(ptransform.getSpec().getUrn()) || SPLITTABLE_PAIR_WITH_RESTRICTION_URN.equals(ptransform.getSpec().getUrn()) || SPLITTABLE_SPLIT_AND_SIZE_RESTRICTIONS_URN.equals(ptransform.getSpec().getUrn()) || SPLITTABLE_PROCESS_ELEMENTS_URN.equals(ptransform.getSpec().getUrn()) || SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN.equals( ptransform.getSpec().getUrn()), "Unexpected payload type %s", ptransform.getSpec().getUrn()); return components.getPcollectionsOrThrow( ptransform.getInputsOrThrow(getMainInputName(ptransform))); }
Example 7
Source File: GreedyStageFuser.java From beam with Apache License 2.0 | 5 votes |
private static boolean anyInputsSideInputs(PTransformNode consumer, QueryablePipeline pipeline) { for (String inputPCollectionId : consumer.getTransform().getInputsMap().values()) { RunnerApi.PCollection pCollection = pipeline.getComponents().getPcollectionsMap().get(inputPCollectionId); PCollectionNode pCollectionNode = PipelineNode.pCollection(inputPCollectionId, pCollection); if (!pipeline.getSingletonConsumers(pCollectionNode).isEmpty()) { return true; } } return false; }
Example 8
Source File: RegisterNodeFunction.java From beam with Apache License 2.0 | 5 votes |
/** * Returns an artificial PCollectionView that can be used to fulfill API requirements of a {@link * SideInputReader} when used inside the Dataflow runner harness. * * <p>Generates length prefixed coder variants suitable to be used within the Dataflow Runner * harness so that encoding and decoding values matches the length prefixing that occurred when * materializing the side input. */ public static final PCollectionView<?> transformSideInputForRunner( RunnerApi.Pipeline pipeline, RunnerApi.PTransform parDoPTransform, String sideInputTag, RunnerApi.SideInput sideInput) { checkArgument( Materializations.MULTIMAP_MATERIALIZATION_URN.equals(sideInput.getAccessPattern().getUrn()), "This handler is only capable of dealing with %s materializations " + "but was asked to handle %s for PCollectionView with tag %s.", Materializations.MULTIMAP_MATERIALIZATION_URN, sideInput.getAccessPattern().getUrn(), sideInputTag); String sideInputPCollectionId = parDoPTransform.getInputsOrThrow(sideInputTag); RunnerApi.PCollection sideInputPCollection = pipeline.getComponents().getPcollectionsOrThrow(sideInputPCollectionId); try { FullWindowedValueCoder<KV<Object, Object>> runnerSideInputCoder = (FullWindowedValueCoder) WireCoders.instantiateRunnerWireCoder( PipelineNode.pCollection(sideInputPCollectionId, sideInputPCollection), pipeline.getComponents()); return DataflowPortabilityPCollectionView.with( new TupleTag<>(sideInputTag), runnerSideInputCoder); } catch (IOException e) { throw new IllegalStateException("Unable to translate proto to coder", e); } }
Example 9
Source File: CombineRunners.java From beam with Apache License 2.0 | 4 votes |
@Override public PrecombineRunner<KeyT, InputT, AccumT> createRunnerForPTransform( PipelineOptions pipelineOptions, BeamFnDataClient beamFnDataClient, BeamFnStateClient beamFnStateClient, BeamFnTimerClient beamFnTimerClient, String pTransformId, PTransform pTransform, Supplier<String> processBundleInstructionId, Map<String, PCollection> pCollections, Map<String, RunnerApi.Coder> coders, Map<String, RunnerApi.WindowingStrategy> windowingStrategies, PCollectionConsumerRegistry pCollectionConsumerRegistry, PTransformFunctionRegistry startFunctionRegistry, PTransformFunctionRegistry finishFunctionRegistry, Consumer<ThrowingRunnable> tearDownFunctions, Consumer<ProgressRequestCallback> addProgressRequestCallback, BundleSplitListener splitListener, BundleFinalizer bundleFinalizer) throws IOException { // Get objects needed to create the runner. RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents( RunnerApi.Components.newBuilder() .putAllCoders(coders) .putAllWindowingStrategies(windowingStrategies) .build()); String mainInputTag = Iterables.getOnlyElement(pTransform.getInputsMap().keySet()); RunnerApi.PCollection mainInput = pCollections.get(pTransform.getInputsOrThrow(mainInputTag)); // Input coder may sometimes be WindowedValueCoder depending on runner, instead of the // expected KvCoder. Coder<?> uncastInputCoder = rehydratedComponents.getCoder(mainInput.getCoderId()); KvCoder<KeyT, InputT> inputCoder; if (uncastInputCoder instanceof WindowedValueCoder) { inputCoder = (KvCoder<KeyT, InputT>) ((WindowedValueCoder<KV<KeyT, InputT>>) uncastInputCoder).getValueCoder(); } else { inputCoder = (KvCoder<KeyT, InputT>) rehydratedComponents.getCoder(mainInput.getCoderId()); } Coder<KeyT> keyCoder = inputCoder.getKeyCoder(); CombinePayload combinePayload = CombinePayload.parseFrom(pTransform.getSpec().getPayload()); CombineFn<InputT, AccumT, ?> combineFn = (CombineFn) SerializableUtils.deserializeFromByteArray( combinePayload.getCombineFn().getPayload().toByteArray(), "CombineFn"); Coder<AccumT> accumCoder = (Coder<AccumT>) rehydratedComponents.getCoder(combinePayload.getAccumulatorCoderId()); FnDataReceiver<WindowedValue<KV<KeyT, AccumT>>> consumer = (FnDataReceiver) pCollectionConsumerRegistry.getMultiplexingConsumer( Iterables.getOnlyElement(pTransform.getOutputsMap().values())); PrecombineRunner<KeyT, InputT, AccumT> runner = new PrecombineRunner<>(pipelineOptions, combineFn, consumer, keyCoder, accumCoder); // Register the appropriate handlers. startFunctionRegistry.register(pTransformId, runner::startBundle); pCollectionConsumerRegistry.register( Iterables.getOnlyElement(pTransform.getInputsMap().values()), pTransformId, (FnDataReceiver) (FnDataReceiver<WindowedValue<KV<KeyT, InputT>>>) runner::processElement); finishFunctionRegistry.register(pTransformId, runner::finishBundle); return runner; }
Example 10
Source File: PCollectionTranslation.java From beam with Apache License 2.0 | 4 votes |
public static IsBounded isBounded(RunnerApi.PCollection pCollection) { return fromProto(pCollection.getIsBounded()); }
Example 11
Source File: ParDoBoundMultiTranslator.java From beam with Apache License 2.0 | 4 votes |
private static <InT, OutT> void doTranslatePortable( PipelineNode.PTransformNode transform, QueryablePipeline pipeline, PortableTranslationContext ctx) { Map<String, String> outputs = transform.getTransform().getOutputsMap(); final RunnerApi.ExecutableStagePayload stagePayload; try { stagePayload = RunnerApi.ExecutableStagePayload.parseFrom( transform.getTransform().getSpec().getPayload()); } catch (IOException e) { throw new RuntimeException(e); } String inputId = stagePayload.getInput(); final MessageStream<OpMessage<InT>> inputStream = ctx.getMessageStreamById(inputId); // TODO: support side input final List<MessageStream<OpMessage<InT>>> sideInputStreams = Collections.emptyList(); final Map<TupleTag<?>, Integer> tagToIndexMap = new HashMap<>(); final Map<String, TupleTag<?>> idToTupleTagMap = new HashMap<>(); // first output as the main output final TupleTag<OutT> mainOutputTag = outputs.isEmpty() ? null : new TupleTag(outputs.keySet().iterator().next()); AtomicInteger index = new AtomicInteger(0); outputs .keySet() .iterator() .forEachRemaining( outputName -> { TupleTag<?> tupleTag = new TupleTag<>(outputName); tagToIndexMap.put(tupleTag, index.get()); index.incrementAndGet(); String collectionId = outputs.get(outputName); idToTupleTagMap.put(collectionId, tupleTag); }); WindowedValue.WindowedValueCoder<InT> windowedInputCoder = ctx.instantiateCoder(inputId, pipeline.getComponents()); final DoFnSchemaInformation doFnSchemaInformation; doFnSchemaInformation = ParDoTranslation.getSchemaInformation(transform.getTransform()); Map<String, PCollectionView<?>> sideInputMapping = ParDoTranslation.getSideInputMapping(transform.getTransform()); final RunnerApi.PCollection input = pipeline.getComponents().getPcollectionsOrThrow(inputId); final PCollection.IsBounded isBounded = SamzaPipelineTranslatorUtils.isBounded(input); final DoFnOp<InT, OutT, RawUnionValue> op = new DoFnOp<>( mainOutputTag, new NoOpDoFn<>(), null, // key coder not in use windowedInputCoder.getValueCoder(), // input coder not in use windowedInputCoder, Collections.emptyMap(), // output coders not in use Collections.emptyList(), // sideInputs not in use until side input support new ArrayList<>(idToTupleTagMap.values()), // used by java runner only SamzaPipelineTranslatorUtils.getPortableWindowStrategy(transform, pipeline), Collections.emptyMap(), // idToViewMap not in use until side input support new DoFnOp.MultiOutputManagerFactory(tagToIndexMap), ctx.getTransformFullName(), ctx.getTransformId(), isBounded, true, stagePayload, idToTupleTagMap, doFnSchemaInformation, sideInputMapping); final MessageStream<OpMessage<InT>> mergedStreams; if (sideInputStreams.isEmpty()) { mergedStreams = inputStream; } else { MessageStream<OpMessage<InT>> mergedSideInputStreams = MessageStream.mergeAll(sideInputStreams).flatMap(new SideInputWatermarkFn()); mergedStreams = inputStream.merge(Collections.singletonList(mergedSideInputStreams)); } final MessageStream<OpMessage<RawUnionValue>> taggedOutputStream = mergedStreams.flatMap(OpAdapter.adapt(op)); for (int outputIndex : tagToIndexMap.values()) { final MessageStream<OpMessage<OutT>> outputStream = taggedOutputStream .filter( message -> message.getType() != OpMessage.Type.ELEMENT || message.getElement().getValue().getUnionTag() == outputIndex) .flatMap(OpAdapter.adapt(new RawUnionValueToValue())); ctx.registerMessageStream(ctx.getOutputId(transform), outputStream); } }
Example 12
Source File: GroupByKeyTranslator.java From beam with Apache License 2.0 | 4 votes |
private static <K, InputT, OutputT> void doTranslatePortable( PipelineNode.PTransformNode transform, QueryablePipeline pipeline, PortableTranslationContext ctx) { final MessageStream<OpMessage<KV<K, InputT>>> inputStream = ctx.getOneInputMessageStream(transform); final boolean needRepartition = ctx.getSamzaPipelineOptions().getMaxSourceParallelism() > 1; final WindowingStrategy<?, BoundedWindow> windowingStrategy = ctx.getPortableWindowStrategy(transform, pipeline); final Coder<BoundedWindow> windowCoder = windowingStrategy.getWindowFn().windowCoder(); final String inputId = ctx.getInputId(transform); final WindowedValue.WindowedValueCoder<KV<K, InputT>> windowedInputCoder = ctx.instantiateCoder(inputId, pipeline.getComponents()); final KvCoder<K, InputT> kvInputCoder = (KvCoder<K, InputT>) windowedInputCoder.getValueCoder(); final Coder<WindowedValue<KV<K, InputT>>> elementCoder = WindowedValue.FullWindowedValueCoder.of(kvInputCoder, windowCoder); final TupleTag<KV<K, OutputT>> outputTag = new TupleTag<>(Iterables.getOnlyElement(transform.getTransform().getOutputsMap().keySet())); @SuppressWarnings("unchecked") final SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> reduceFn = (SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow>) SystemReduceFn.buffering(kvInputCoder.getValueCoder()); final RunnerApi.PCollection input = pipeline.getComponents().getPcollectionsOrThrow(inputId); final PCollection.IsBounded isBounded = SamzaPipelineTranslatorUtils.isBounded(input); final MessageStream<OpMessage<KV<K, OutputT>>> outputStream = doTranslateGBK( inputStream, needRepartition, reduceFn, windowingStrategy, kvInputCoder, elementCoder, ctx.getTransformFullName(), ctx.getTransformId(), outputTag, isBounded); ctx.registerMessageStream(ctx.getOutputId(transform), outputStream); }
Example 13
Source File: SamzaPipelineTranslatorUtils.java From beam with Apache License 2.0 | 4 votes |
public static PCollection.IsBounded isBounded(RunnerApi.PCollection pCollection) { return pCollection.getIsBounded() == RunnerApi.IsBounded.Enum.BOUNDED ? PCollection.IsBounded.BOUNDED : PCollection.IsBounded.UNBOUNDED; }
Example 14
Source File: RegisterNodeFunction.java From beam with Apache License 2.0 | 4 votes |
/** * Modifies the process bundle descriptor and updates the PTransform that the SDK harness will see * with length prefixed coders used on the side input PCollection and windowing strategy. */ private static final void transformSideInputForSdk( RunnerApi.Pipeline pipeline, RunnerApi.PTransform originalPTransform, String sideInputTag, ProcessBundleDescriptor.Builder processBundleDescriptor, RunnerApi.PTransform.Builder updatedPTransform) { RunnerApi.PCollection sideInputPCollection = pipeline .getComponents() .getPcollectionsOrThrow(originalPTransform.getInputsOrThrow(sideInputTag)); RunnerApi.WindowingStrategy sideInputWindowingStrategy = pipeline .getComponents() .getWindowingStrategiesOrThrow(sideInputPCollection.getWindowingStrategyId()); // TODO: We should not length prefix the window or key for the SDK side since the // key and window are already length delimited via protobuf itself. But we need to // maintain the length prefixing within the Runner harness to match the bytes that were // materialized to the side input sink. // We take the original pipeline coders and add any coders we have added when processing side // inputs before building new length prefixed variants. RunnerApi.Components.Builder componentsBuilder = pipeline.getComponents().toBuilder(); componentsBuilder.putAllCoders(processBundleDescriptor.getCodersMap()); String updatedSdkSideInputCoderId = LengthPrefixUnknownCoders.addLengthPrefixedCoder( sideInputPCollection.getCoderId(), componentsBuilder, false); String updatedSdkSideInputWindowCoderId = LengthPrefixUnknownCoders.addLengthPrefixedCoder( sideInputWindowingStrategy.getWindowCoderId(), componentsBuilder, false); processBundleDescriptor.putAllCoders(componentsBuilder.getCodersMap()); String updatedSdkWindowingStrategyId = SyntheticComponents.uniqueId( sideInputPCollection.getWindowingStrategyId() + "-runner_generated", processBundleDescriptor.getWindowingStrategiesMap().keySet()::contains); processBundleDescriptor.putWindowingStrategies( updatedSdkWindowingStrategyId, sideInputWindowingStrategy .toBuilder() .setWindowCoderId(updatedSdkSideInputWindowCoderId) .build()); RunnerApi.PCollection updatedSdkSideInputPcollection = sideInputPCollection .toBuilder() .setCoderId(updatedSdkSideInputCoderId) .setWindowingStrategyId(updatedSdkWindowingStrategyId) .build(); // Replace the contents of the PCollection with the updated side input PCollection // specification and insert it into the update PTransform. processBundleDescriptor.putPcollections( originalPTransform.getInputsOrThrow(sideInputTag), updatedSdkSideInputPcollection); updatedPTransform.putInputs(sideInputTag, originalPTransform.getInputsOrThrow(sideInputTag)); }