Java Code Examples for org.apache.beam.model.pipeline.v1.RunnerApi#WindowingStrategy
The following examples show how to use
org.apache.beam.model.pipeline.v1.RunnerApi#WindowingStrategy .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: WindowingStrategyTranslation.java From beam with Apache License 2.0 | 6 votes |
/** * Converts a {@link WindowingStrategy} into a {@link RunnerApi.WindowingStrategy}, registering * any components in the provided {@link SdkComponents}. */ public static RunnerApi.WindowingStrategy toProto( WindowingStrategy<?, ?> windowingStrategy, SdkComponents components) throws IOException { FunctionSpec windowFnSpec = toProto(windowingStrategy.getWindowFn(), components); RunnerApi.WindowingStrategy.Builder windowingStrategyProto = RunnerApi.WindowingStrategy.newBuilder() .setOutputTime(toProto(windowingStrategy.getTimestampCombiner())) .setAccumulationMode(toProto(windowingStrategy.getMode())) .setClosingBehavior(toProto(windowingStrategy.getClosingBehavior())) .setAllowedLateness(windowingStrategy.getAllowedLateness().getMillis()) .setTrigger(TriggerTranslation.toProto(windowingStrategy.getTrigger())) .setWindowFn(windowFnSpec) .setAssignsToOneWindow(windowingStrategy.getWindowFn().assignsToOneWindow()) .setOnTimeBehavior(toProto(windowingStrategy.getOnTimeBehavior())) .setWindowCoderId( components.registerCoder(windowingStrategy.getWindowFn().windowCoder())) .setEnvironmentId(components.getOnlyEnvironmentId()); return windowingStrategyProto.build(); }
Example 2
Source File: PipelineTranslatorUtils.java From beam with Apache License 2.0 | 6 votes |
public static WindowingStrategy getWindowingStrategy( String pCollectionId, RunnerApi.Components components) { RunnerApi.WindowingStrategy windowingStrategyProto = components.getWindowingStrategiesOrThrow( components.getPcollectionsOrThrow(pCollectionId).getWindowingStrategyId()); try { return WindowingStrategyTranslation.fromProto( windowingStrategyProto, RehydratedComponents.forComponents(components)); } catch (InvalidProtocolBufferException e) { throw new IllegalStateException( String.format( "Unable to hydrate windowing strategy %s for %s.", windowingStrategyProto, pCollectionId), e); } }
Example 3
Source File: WindowingStrategyTranslationTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testToProtoAndBackWithComponents() throws Exception { WindowingStrategy<?, ?> windowingStrategy = toProtoAndBackSpec.getWindowingStrategy(); SdkComponents components = SdkComponents.create(); components.registerEnvironment(Environments.createDockerEnvironment("java")); RunnerApi.WindowingStrategy proto = WindowingStrategyTranslation.toProto(windowingStrategy, components); RehydratedComponents protoComponents = RehydratedComponents.forComponents(components.toComponents()); assertThat( WindowingStrategyTranslation.fromProto(proto, protoComponents).fixDefaults(), equalTo(windowingStrategy.fixDefaults())); protoComponents.getCoder( components.registerCoder(windowingStrategy.getWindowFn().windowCoder())); assertThat( proto.getAssignsToOneWindow(), equalTo(windowingStrategy.getWindowFn().assignsToOneWindow())); }
Example 4
Source File: WindowingStrategyTranslation.java From beam with Apache License 2.0 | 6 votes |
/** * Converts from {@link RunnerApi.WindowingStrategy} to the SDK's {@link WindowingStrategy} using * the provided components to dereferences identifiers found in the proto. */ public static WindowingStrategy<?, ?> fromProto( RunnerApi.WindowingStrategy proto, RehydratedComponents components) throws InvalidProtocolBufferException { FunctionSpec windowFnSpec = proto.getWindowFn(); WindowFn<?, ?> windowFn = windowFnFromProto(windowFnSpec); TimestampCombiner timestampCombiner = timestampCombinerFromProto(proto.getOutputTime()); AccumulationMode accumulationMode = fromProto(proto.getAccumulationMode()); Trigger trigger = TriggerTranslation.fromProto(proto.getTrigger()); ClosingBehavior closingBehavior = fromProto(proto.getClosingBehavior()); Duration allowedLateness = Duration.millis(proto.getAllowedLateness()); OnTimeBehavior onTimeBehavior = fromProto(proto.getOnTimeBehavior()); return WindowingStrategy.of(windowFn) .withAllowedLateness(allowedLateness) .withMode(accumulationMode) .withTrigger(trigger) .withTimestampCombiner(timestampCombiner) .withClosingBehavior(closingBehavior) .withOnTimeBehavior(onTimeBehavior); }
Example 5
Source File: SdkComponents.java From beam with Apache License 2.0 | 6 votes |
/** * Registers the provided {@link WindowingStrategy} into this {@link SdkComponents}, returning a * unique ID for the {@link WindowingStrategy}. Multiple registrations of the same {@link * WindowingStrategy} will return the same unique ID. */ public String registerWindowingStrategy(WindowingStrategy<?, ?> windowingStrategy) throws IOException { String existing = windowingStrategyIds.get(windowingStrategy); if (existing != null) { return existing; } String baseName = String.format( "%s(%s)", NameUtils.approximateSimpleName(windowingStrategy), NameUtils.approximateSimpleName(windowingStrategy.getWindowFn())); String name = uniqify(baseName, windowingStrategyIds.values()); windowingStrategyIds.put(windowingStrategy, name); RunnerApi.WindowingStrategy windowingStrategyProto = WindowingStrategyTranslation.toProto(windowingStrategy, this); componentsBuilder.putWindowingStrategies(name, windowingStrategyProto); return name; }
Example 6
Source File: FlattenRunner.java From beam with Apache License 2.0 | 5 votes |
@Override public FlattenRunner<InputT> createRunnerForPTransform( PipelineOptions pipelineOptions, BeamFnDataClient beamFnDataClient, BeamFnStateClient beamFnStateClient, BeamFnTimerClient beamFnTimerClient, String pTransformId, RunnerApi.PTransform pTransform, Supplier<String> processBundleInstructionId, Map<String, PCollection> pCollections, Map<String, Coder> coders, Map<String, RunnerApi.WindowingStrategy> windowingStrategies, PCollectionConsumerRegistry pCollectionConsumerRegistry, PTransformFunctionRegistry startFunctionRegistry, PTransformFunctionRegistry finishFunctionRegistry, Consumer<ThrowingRunnable> tearDownFunctions, Consumer<ProgressRequestCallback> addProgressRequestCallback, BundleSplitListener splitListener, BundleFinalizer bundleFinalizer) throws IOException { // Give each input a MultiplexingFnDataReceiver to all outputs of the flatten. String output = getOnlyElement(pTransform.getOutputsMap().values()); FnDataReceiver<WindowedValue<?>> receiver = pCollectionConsumerRegistry.getMultiplexingConsumer(output); FlattenRunner<InputT> runner = new FlattenRunner<>(); for (String pCollectionId : pTransform.getInputsMap().values()) { pCollectionConsumerRegistry.register( pCollectionId, pTransformId, (FnDataReceiver) receiver); } return runner; }
Example 7
Source File: WindowingStrategyTranslation.java From beam with Apache License 2.0 | 5 votes |
/** * Converts a {@link WindowingStrategy} into a {@link RunnerApi.MessageWithComponents} where * {@link RunnerApi.MessageWithComponents#getWindowingStrategy()} ()} is a {@link * RunnerApi.WindowingStrategy RunnerApi.WindowingStrategy (proto)} for the input {@link * WindowingStrategy}. */ public static RunnerApi.MessageWithComponents toMessageProto( WindowingStrategy<?, ?> windowingStrategy, SdkComponents components) throws IOException { RunnerApi.WindowingStrategy windowingStrategyProto = toProto(windowingStrategy, components); return RunnerApi.MessageWithComponents.newBuilder() .setWindowingStrategy(windowingStrategyProto) .setComponents(components.toComponents()) .build(); }
Example 8
Source File: BeamFnDataWriteRunner.java From beam with Apache License 2.0 | 5 votes |
@Override public BeamFnDataWriteRunner<InputT> createRunnerForPTransform( PipelineOptions pipelineOptions, BeamFnDataClient beamFnDataClient, BeamFnStateClient beamFnStateClient, BeamFnTimerClient beamFnTimerClient, String pTransformId, PTransform pTransform, Supplier<String> processBundleInstructionId, Map<String, PCollection> pCollections, Map<String, RunnerApi.Coder> coders, Map<String, RunnerApi.WindowingStrategy> windowingStrategies, PCollectionConsumerRegistry pCollectionConsumerRegistry, PTransformFunctionRegistry startFunctionRegistry, PTransformFunctionRegistry finishFunctionRegistry, Consumer<ThrowingRunnable> tearDownFunctions, Consumer<ProgressRequestCallback> addProgressRequestCallback, BundleSplitListener splitListener, BundleFinalizer bundleFinalizer) throws IOException { BeamFnDataWriteRunner<InputT> runner = new BeamFnDataWriteRunner<>( pTransformId, pTransform, processBundleInstructionId, coders, beamFnDataClient, beamFnStateClient); startFunctionRegistry.register(pTransformId, runner::registerForOutput); pCollectionConsumerRegistry.register( getOnlyElement(pTransform.getInputsMap().values()), pTransformId, (FnDataReceiver) (FnDataReceiver<WindowedValue<InputT>>) runner::consume); finishFunctionRegistry.register(pTransformId, runner::close); return runner; }
Example 9
Source File: RehydratedComponents.java From beam with Apache License 2.0 | 5 votes |
@Override public WindowingStrategy<?, ?> load(String id) throws Exception { @Nullable RunnerApi.WindowingStrategy windowingStrategyProto = components.getWindowingStrategiesOrDefault(id, null); checkState( windowingStrategyProto != null, "No WindowingStrategy with id '%s' in serialized components", id); return WindowingStrategyTranslation.fromProto( windowingStrategyProto, RehydratedComponents.this); }
Example 10
Source File: PortableTranslationContext.java From beam with Apache License 2.0 | 5 votes |
public WindowingStrategy<?, BoundedWindow> getPortableWindowStrategy( PipelineNode.PTransformNode transform, QueryablePipeline pipeline) { String inputId = Iterables.getOnlyElement(transform.getTransform().getInputsMap().values()); RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(pipeline.getComponents()); RunnerApi.WindowingStrategy windowingStrategyProto = pipeline .getComponents() .getWindowingStrategiesOrThrow( pipeline.getComponents().getPcollectionsOrThrow(inputId).getWindowingStrategyId()); WindowingStrategy<?, ?> windowingStrategy; try { windowingStrategy = WindowingStrategyTranslation.fromProto(windowingStrategyProto, rehydratedComponents); } catch (Exception e) { throw new IllegalStateException( String.format( "Unable to hydrate GroupByKey windowing strategy %s.", windowingStrategyProto), e); } @SuppressWarnings("unchecked") WindowingStrategy<?, BoundedWindow> ret = (WindowingStrategy<?, BoundedWindow>) windowingStrategy; return ret; }
Example 11
Source File: SamzaPipelineTranslatorUtils.java From beam with Apache License 2.0 | 5 votes |
public static WindowingStrategy<?, BoundedWindow> getPortableWindowStrategy( PipelineNode.PTransformNode transform, QueryablePipeline pipeline) { String inputId = Iterables.getOnlyElement(transform.getTransform().getInputsMap().values()); RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(pipeline.getComponents()); RunnerApi.WindowingStrategy windowingStrategyProto = pipeline .getComponents() .getWindowingStrategiesOrThrow( pipeline.getComponents().getPcollectionsOrThrow(inputId).getWindowingStrategyId()); WindowingStrategy<?, ?> windowingStrategy; try { windowingStrategy = WindowingStrategyTranslation.fromProto(windowingStrategyProto, rehydratedComponents); } catch (InvalidProtocolBufferException e) { throw new IllegalStateException( String.format( "Unable to hydrate GroupByKey windowing strategy %s.", windowingStrategyProto), e); } @SuppressWarnings("unchecked") WindowingStrategy<?, BoundedWindow> ret = (WindowingStrategy<?, BoundedWindow>) windowingStrategy; return ret; }
Example 12
Source File: MapFnRunners.java From beam with Apache License 2.0 | 5 votes |
@Override public Mapper<InputT, OutputT> createRunnerForPTransform( PipelineOptions pipelineOptions, BeamFnDataClient beamFnDataClient, BeamFnStateClient beamFnStateClient, BeamFnTimerClient beamFnTimerClient, String pTransformId, PTransform pTransform, Supplier<String> processBundleInstructionId, Map<String, PCollection> pCollections, Map<String, RunnerApi.Coder> coders, Map<String, RunnerApi.WindowingStrategy> windowingStrategies, PCollectionConsumerRegistry pCollectionConsumerRegistry, PTransformFunctionRegistry startFunctionRegistry, PTransformFunctionRegistry finishFunctionRegistry, Consumer<ThrowingRunnable> tearDownFunctions, Consumer<ProgressRequestCallback> addProgressRequestCallback, BundleSplitListener splitListener, BundleFinalizer bundleFinalizer) throws IOException { FnDataReceiver<WindowedValue<InputT>> consumer = (FnDataReceiver) pCollectionConsumerRegistry.getMultiplexingConsumer( getOnlyElement(pTransform.getOutputsMap().values())); Mapper<InputT, OutputT> mapper = mapperFactory.create(pTransformId, pTransform, consumer); pCollectionConsumerRegistry.register( Iterables.getOnlyElement(pTransform.getInputsMap().values()), pTransformId, (FnDataReceiver) (FnDataReceiver<WindowedValue<InputT>>) mapper::map); return mapper; }
Example 13
Source File: CombineRunners.java From beam with Apache License 2.0 | 4 votes |
@Override public PrecombineRunner<KeyT, InputT, AccumT> createRunnerForPTransform( PipelineOptions pipelineOptions, BeamFnDataClient beamFnDataClient, BeamFnStateClient beamFnStateClient, BeamFnTimerClient beamFnTimerClient, String pTransformId, PTransform pTransform, Supplier<String> processBundleInstructionId, Map<String, PCollection> pCollections, Map<String, RunnerApi.Coder> coders, Map<String, RunnerApi.WindowingStrategy> windowingStrategies, PCollectionConsumerRegistry pCollectionConsumerRegistry, PTransformFunctionRegistry startFunctionRegistry, PTransformFunctionRegistry finishFunctionRegistry, Consumer<ThrowingRunnable> tearDownFunctions, Consumer<ProgressRequestCallback> addProgressRequestCallback, BundleSplitListener splitListener, BundleFinalizer bundleFinalizer) throws IOException { // Get objects needed to create the runner. RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents( RunnerApi.Components.newBuilder() .putAllCoders(coders) .putAllWindowingStrategies(windowingStrategies) .build()); String mainInputTag = Iterables.getOnlyElement(pTransform.getInputsMap().keySet()); RunnerApi.PCollection mainInput = pCollections.get(pTransform.getInputsOrThrow(mainInputTag)); // Input coder may sometimes be WindowedValueCoder depending on runner, instead of the // expected KvCoder. Coder<?> uncastInputCoder = rehydratedComponents.getCoder(mainInput.getCoderId()); KvCoder<KeyT, InputT> inputCoder; if (uncastInputCoder instanceof WindowedValueCoder) { inputCoder = (KvCoder<KeyT, InputT>) ((WindowedValueCoder<KV<KeyT, InputT>>) uncastInputCoder).getValueCoder(); } else { inputCoder = (KvCoder<KeyT, InputT>) rehydratedComponents.getCoder(mainInput.getCoderId()); } Coder<KeyT> keyCoder = inputCoder.getKeyCoder(); CombinePayload combinePayload = CombinePayload.parseFrom(pTransform.getSpec().getPayload()); CombineFn<InputT, AccumT, ?> combineFn = (CombineFn) SerializableUtils.deserializeFromByteArray( combinePayload.getCombineFn().getPayload().toByteArray(), "CombineFn"); Coder<AccumT> accumCoder = (Coder<AccumT>) rehydratedComponents.getCoder(combinePayload.getAccumulatorCoderId()); FnDataReceiver<WindowedValue<KV<KeyT, AccumT>>> consumer = (FnDataReceiver) pCollectionConsumerRegistry.getMultiplexingConsumer( Iterables.getOnlyElement(pTransform.getOutputsMap().values())); PrecombineRunner<KeyT, InputT, AccumT> runner = new PrecombineRunner<>(pipelineOptions, combineFn, consumer, keyCoder, accumCoder); // Register the appropriate handlers. startFunctionRegistry.register(pTransformId, runner::startBundle); pCollectionConsumerRegistry.register( Iterables.getOnlyElement(pTransform.getInputsMap().values()), pTransformId, (FnDataReceiver) (FnDataReceiver<WindowedValue<KV<KeyT, InputT>>>) runner::processElement); finishFunctionRegistry.register(pTransformId, runner::finishBundle); return runner; }
Example 14
Source File: FnApiDoFnRunner.java From beam with Apache License 2.0 | 4 votes |
@Override public final FnApiDoFnRunner<InputT, RestrictionT, PositionT, WatermarkEstimatorStateT, OutputT> createRunnerForPTransform( PipelineOptions pipelineOptions, BeamFnDataClient beamFnDataClient, BeamFnStateClient beamFnStateClient, BeamFnTimerClient beamFnTimerClient, String pTransformId, PTransform pTransform, Supplier<String> processBundleInstructionId, Map<String, PCollection> pCollections, Map<String, RunnerApi.Coder> coders, Map<String, RunnerApi.WindowingStrategy> windowingStrategies, PCollectionConsumerRegistry pCollectionConsumerRegistry, PTransformFunctionRegistry startFunctionRegistry, PTransformFunctionRegistry finishFunctionRegistry, Consumer<ThrowingRunnable> tearDownFunctions, Consumer<ProgressRequestCallback> addProgressRequestCallback, BundleSplitListener splitListener, BundleFinalizer bundleFinalizer) { FnApiDoFnRunner<InputT, RestrictionT, PositionT, WatermarkEstimatorStateT, OutputT> runner = new FnApiDoFnRunner<>( pipelineOptions, beamFnStateClient, beamFnTimerClient, pTransformId, pTransform, processBundleInstructionId, pCollections, coders, windowingStrategies, pCollectionConsumerRegistry, startFunctionRegistry, finishFunctionRegistry, tearDownFunctions, addProgressRequestCallback, splitListener, bundleFinalizer); return runner; }
Example 15
Source File: PTransformRunnerFactory.java From beam with Apache License 2.0 | 4 votes |
/** * Creates and returns a handler for a given PTransform. Note that the handler must support * processing multiple bundles. The handler will be discarded if an error is thrown during element * processing, or during execution of start/finish. * * @param pipelineOptions Pipeline options * @param beamFnDataClient A client for handling inbound and outbound data streams. * @param beamFnStateClient A client for handling state requests. * @param beamFnTimerClient A client for handling inbound and outbound timer streams. * @param pTransformId The id of the PTransform. * @param pTransform The PTransform definition. * @param processBundleInstructionId A supplier containing the active process bundle instruction * id. * @param pCollections A mapping from PCollection id to PCollection definition. * @param coders A mapping from coder id to coder definition. * @param windowingStrategies * @param pCollectionConsumerRegistry A mapping from PCollection id to a collection of consumers. * Note that if this handler is a consumer, it should register itself within this multimap * under the appropriate PCollection ids. Also note that all output consumers needed by this * PTransform (based on the values of the {@link PTransform#getOutputsMap()} will have already * registered within this multimap. * @param startFunctionRegistry A class to register a start bundle handler with. * @param finishFunctionRegistry A class to register a finish bundle handler with. * @param addTearDownFunction A consumer to register a tear down handler with. * @param addProgressRequestCallback A consumer to register a callback whenever progress is being * requested. * @param splitListener A listener to be invoked when the PTransform splits itself. * @param bundleFinalizer Register callbacks that will be invoked when the runner completes the * bundle. The specified instant provides the timeout on how long the finalization callback is * valid for. */ T createRunnerForPTransform( PipelineOptions pipelineOptions, BeamFnDataClient beamFnDataClient, BeamFnStateClient beamFnStateClient, BeamFnTimerClient beamFnTimerClient, String pTransformId, RunnerApi.PTransform pTransform, Supplier<String> processBundleInstructionId, Map<String, PCollection> pCollections, Map<String, Coder> coders, Map<String, RunnerApi.WindowingStrategy> windowingStrategies, PCollectionConsumerRegistry pCollectionConsumerRegistry, PTransformFunctionRegistry startFunctionRegistry, PTransformFunctionRegistry finishFunctionRegistry, Consumer<ThrowingRunnable> addTearDownFunction, Consumer<ProgressRequestCallback> addProgressRequestCallback, BundleSplitListener splitListener, BundleFinalizer bundleFinalizer) throws IOException;
Example 16
Source File: BeamFnDataReadRunner.java From beam with Apache License 2.0 | 4 votes |
@Override public BeamFnDataReadRunner<OutputT> createRunnerForPTransform( PipelineOptions pipelineOptions, BeamFnDataClient beamFnDataClient, BeamFnStateClient beamFnStateClient, BeamFnTimerClient beamFnTimerClient, String pTransformId, PTransform pTransform, Supplier<String> processBundleInstructionId, Map<String, PCollection> pCollections, Map<String, RunnerApi.Coder> coders, Map<String, RunnerApi.WindowingStrategy> windowingStrategies, PCollectionConsumerRegistry pCollectionConsumerRegistry, PTransformFunctionRegistry startFunctionRegistry, PTransformFunctionRegistry finishFunctionRegistry, Consumer<ThrowingRunnable> tearDownFunctions, Consumer<ProgressRequestCallback> addProgressRequestCallback, BundleSplitListener splitListener, BundleFinalizer bundleFinalizer) throws IOException { FnDataReceiver<WindowedValue<OutputT>> consumer = (FnDataReceiver<WindowedValue<OutputT>>) (FnDataReceiver) pCollectionConsumerRegistry.getMultiplexingConsumer( getOnlyElement(pTransform.getOutputsMap().values())); BeamFnDataReadRunner<OutputT> runner = new BeamFnDataReadRunner<>( pTransformId, pTransform, processBundleInstructionId, coders, beamFnDataClient, beamFnStateClient, addProgressRequestCallback, consumer); startFunctionRegistry.register(pTransformId, runner::registerInputLocation); finishFunctionRegistry.register(pTransformId, runner::blockTillReadFinishes); return runner; }
Example 17
Source File: FlinkStreamingPortablePipelineTranslator.java From beam with Apache License 2.0 | 4 votes |
private <K, V> void translateGroupByKey( String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) { RunnerApi.PTransform pTransform = pipeline.getComponents().getTransformsOrThrow(id); String inputPCollectionId = Iterables.getOnlyElement(pTransform.getInputsMap().values()); RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(pipeline.getComponents()); RunnerApi.WindowingStrategy windowingStrategyProto = pipeline .getComponents() .getWindowingStrategiesOrThrow( pipeline .getComponents() .getPcollectionsOrThrow(inputPCollectionId) .getWindowingStrategyId()); WindowingStrategy<?, ?> windowingStrategy; try { windowingStrategy = WindowingStrategyTranslation.fromProto(windowingStrategyProto, rehydratedComponents); } catch (InvalidProtocolBufferException e) { throw new IllegalStateException( String.format( "Unable to hydrate GroupByKey windowing strategy %s.", windowingStrategyProto), e); } WindowedValueCoder<KV<K, V>> windowedInputCoder = (WindowedValueCoder) instantiateCoder(inputPCollectionId, pipeline.getComponents()); DataStream<WindowedValue<KV<K, V>>> inputDataStream = context.getDataStreamOrThrow(inputPCollectionId); SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<V>>>> outputDataStream = addGBK( inputDataStream, windowingStrategy, windowedInputCoder, pTransform.getUniqueName(), context); // Assign a unique but consistent id to re-map operator state outputDataStream.uid(pTransform.getUniqueName()); context.addDataStream( Iterables.getOnlyElement(pTransform.getOutputsMap().values()), outputDataStream); }
Example 18
Source File: FlinkStreamingPortablePipelineTranslator.java From beam with Apache License 2.0 | 4 votes |
private static LinkedHashMap<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>> getSideInputIdToPCollectionViewMap( RunnerApi.ExecutableStagePayload stagePayload, RunnerApi.Components components) { RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(components); LinkedHashMap<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>> sideInputs = new LinkedHashMap<>(); // for PCollectionView compatibility, not used to transform materialization ViewFn<Iterable<WindowedValue<?>>, ?> viewFn = (ViewFn) new PCollectionViews.MultimapViewFn<>( (PCollectionViews.TypeDescriptorSupplier<Iterable<WindowedValue<Void>>>) () -> TypeDescriptors.iterables(new TypeDescriptor<WindowedValue<Void>>() {}), (PCollectionViews.TypeDescriptorSupplier<Void>) TypeDescriptors::voids); for (RunnerApi.ExecutableStagePayload.SideInputId sideInputId : stagePayload.getSideInputsList()) { // TODO: local name is unique as long as only one transform with side input can be within a // stage String sideInputTag = sideInputId.getLocalName(); String collectionId = components .getTransformsOrThrow(sideInputId.getTransformId()) .getInputsOrThrow(sideInputId.getLocalName()); RunnerApi.WindowingStrategy windowingStrategyProto = components.getWindowingStrategiesOrThrow( components.getPcollectionsOrThrow(collectionId).getWindowingStrategyId()); final WindowingStrategy<?, ?> windowingStrategy; try { windowingStrategy = WindowingStrategyTranslation.fromProto(windowingStrategyProto, rehydratedComponents); } catch (InvalidProtocolBufferException e) { throw new IllegalStateException( String.format( "Unable to hydrate side input windowing strategy %s.", windowingStrategyProto), e); } Coder<WindowedValue<Object>> coder = instantiateCoder(collectionId, components); // side input materialization via GBK (T -> Iterable<T>) WindowedValueCoder wvCoder = (WindowedValueCoder) coder; coder = wvCoder.withValueCoder(IterableCoder.of(wvCoder.getValueCoder())); sideInputs.put( sideInputId, new RunnerPCollectionView<>( null, new TupleTag<>(sideInputTag), viewFn, // TODO: support custom mapping fn windowingStrategy.getWindowFn().getDefaultWindowMappingFn(), windowingStrategy, coder)); } return sideInputs; }
Example 19
Source File: RegisterNodeFunction.java From beam with Apache License 2.0 | 4 votes |
/** * Modifies the process bundle descriptor and updates the PTransform that the SDK harness will see * with length prefixed coders used on the side input PCollection and windowing strategy. */ private static final void transformSideInputForSdk( RunnerApi.Pipeline pipeline, RunnerApi.PTransform originalPTransform, String sideInputTag, ProcessBundleDescriptor.Builder processBundleDescriptor, RunnerApi.PTransform.Builder updatedPTransform) { RunnerApi.PCollection sideInputPCollection = pipeline .getComponents() .getPcollectionsOrThrow(originalPTransform.getInputsOrThrow(sideInputTag)); RunnerApi.WindowingStrategy sideInputWindowingStrategy = pipeline .getComponents() .getWindowingStrategiesOrThrow(sideInputPCollection.getWindowingStrategyId()); // TODO: We should not length prefix the window or key for the SDK side since the // key and window are already length delimited via protobuf itself. But we need to // maintain the length prefixing within the Runner harness to match the bytes that were // materialized to the side input sink. // We take the original pipeline coders and add any coders we have added when processing side // inputs before building new length prefixed variants. RunnerApi.Components.Builder componentsBuilder = pipeline.getComponents().toBuilder(); componentsBuilder.putAllCoders(processBundleDescriptor.getCodersMap()); String updatedSdkSideInputCoderId = LengthPrefixUnknownCoders.addLengthPrefixedCoder( sideInputPCollection.getCoderId(), componentsBuilder, false); String updatedSdkSideInputWindowCoderId = LengthPrefixUnknownCoders.addLengthPrefixedCoder( sideInputWindowingStrategy.getWindowCoderId(), componentsBuilder, false); processBundleDescriptor.putAllCoders(componentsBuilder.getCodersMap()); String updatedSdkWindowingStrategyId = SyntheticComponents.uniqueId( sideInputPCollection.getWindowingStrategyId() + "-runner_generated", processBundleDescriptor.getWindowingStrategiesMap().keySet()::contains); processBundleDescriptor.putWindowingStrategies( updatedSdkWindowingStrategyId, sideInputWindowingStrategy .toBuilder() .setWindowCoderId(updatedSdkSideInputWindowCoderId) .build()); RunnerApi.PCollection updatedSdkSideInputPcollection = sideInputPCollection .toBuilder() .setCoderId(updatedSdkSideInputCoderId) .setWindowingStrategyId(updatedSdkWindowingStrategyId) .build(); // Replace the contents of the PCollection with the updated side input PCollection // specification and insert it into the update PTransform. processBundleDescriptor.putPcollections( originalPTransform.getInputsOrThrow(sideInputTag), updatedSdkSideInputPcollection); updatedPTransform.putInputs(sideInputTag, originalPTransform.getInputsOrThrow(sideInputTag)); }