Java Code Examples for org.apache.beam.sdk.transforms.Combine#GroupedValues
The following examples show how to use
org.apache.beam.sdk.transforms.Combine#GroupedValues .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CombineTranslation.java From beam with Apache License 2.0 | 6 votes |
@Override public FunctionSpec translate( AppliedPTransform<?, ?, Combine.GroupedValues<?, ?, ?>> transform, SdkComponents components) throws IOException { if (transform.getTransform().getSideInputs().isEmpty()) { GlobalCombineFn<?, ?, ?> combineFn = transform.getTransform().getFn(); Coder<?> accumulatorCoder = extractAccumulatorCoder(combineFn, (AppliedPTransform) transform); return FunctionSpec.newBuilder() .setUrn(getUrn(transform.getTransform())) .setPayload(combinePayload(combineFn, accumulatorCoder, components).toByteString()) .build(); } else { // Combines with side inputs are translated as generic composites, which have a blank // FunctionSpec. return null; } }
Example 2
Source File: CombineTranslation.java From beam with Apache License 2.0 | 6 votes |
private static <K, InputT, AccumT> Coder<AccumT> extractAccumulatorCoder( GlobalCombineFn<InputT, AccumT, ?> combineFn, AppliedPTransform< PCollection<KV<K, Iterable<InputT>>>, ?, Combine.GroupedValues<K, InputT, ?>> transform) throws IOException { try { @SuppressWarnings("unchecked") PCollection<KV<K, Iterable<InputT>>> mainInput = (PCollection<KV<K, Iterable<InputT>>>) Iterables.getOnlyElement(TransformInputs.nonAdditionalInputs(transform)); KvCoder<K, Iterable<InputT>> kvCoder = (KvCoder<K, Iterable<InputT>>) mainInput.getCoder(); IterableCoder<InputT> iterCoder = (IterableCoder<InputT>) kvCoder.getValueCoder(); return combineFn.getAccumulatorCoder( transform.getPipeline().getCoderRegistry(), iterCoder.getElemCoder()); } catch (CannotProvideCoderException e) { throw new IOException("Could not obtain a Coder for the accumulator", e); } }
Example 3
Source File: DataflowPipelineTranslator.java From beam with Apache License 2.0 | 5 votes |
private <K, InputT, OutputT> void translateHelper( final CombineGroupedValues<K, InputT, OutputT> primitiveTransform, TranslationContext context) { Combine.GroupedValues<K, InputT, OutputT> originalTransform = primitiveTransform.getOriginalCombine(); StepTranslationContext stepContext = context.addStep(primitiveTransform, "CombineValues"); translateInputs( stepContext, context.getInput(primitiveTransform), originalTransform.getSideInputs(), context); AppliedCombineFn<? super K, ? super InputT, ?, OutputT> fn = originalTransform.getAppliedFn( context.getInput(primitiveTransform).getPipeline().getCoderRegistry(), context.getInput(primitiveTransform).getCoder(), context.getInput(primitiveTransform).getWindowingStrategy()); stepContext.addEncodingInput(fn.getAccumulatorCoder()); List<String> experiments = context.getPipelineOptions().getExperiments(); boolean isFnApi = experiments != null && experiments.contains("beam_fn_api"); if (isFnApi) { String ptransformId = context.getSdkComponents().getPTransformIdOrThrow(context.getCurrentParent()); stepContext.addInput(PropertyNames.SERIALIZED_FN, ptransformId); } else { stepContext.addInput( PropertyNames.SERIALIZED_FN, byteArrayToJsonString(serializeToByteArray(fn))); } stepContext.addOutput(PropertyNames.OUTPUT, context.getOutput(primitiveTransform)); }
Example 4
Source File: CombineTranslation.java From beam with Apache License 2.0 | 4 votes |
@Override public String getUrn(Combine.GroupedValues<?, ?, ?> transform) { return COMBINE_GROUPED_VALUES_TRANSFORM_URN; }
Example 5
Source File: TransformTranslator.java From beam with Apache License 2.0 | 4 votes |
private static <K, InputT, OutputT> TransformEvaluator<Combine.GroupedValues<KV<K, InputT>, InputT, OutputT>> combineGrouped() { return new TransformEvaluator<Combine.GroupedValues<KV<K, InputT>, InputT, OutputT>>() { @Override public void evaluate( Combine.GroupedValues<KV<K, InputT>, InputT, OutputT> transform, EvaluationContext context) { @SuppressWarnings("unchecked") CombineWithContext.CombineFnWithContext<InputT, ?, OutputT> combineFn = (CombineWithContext.CombineFnWithContext<InputT, ?, OutputT>) CombineFnUtil.toFnWithContext(transform.getFn()); final SparkCombineFn<KV<K, InputT>, InputT, ?, OutputT> sparkCombineFn = SparkCombineFn.keyed( combineFn, context.getSerializableOptions(), TranslationUtils.getSideInputs(transform.getSideInputs(), context), context.getInput(transform).getWindowingStrategy()); @SuppressWarnings("unchecked") JavaRDD<WindowedValue<KV<K, Iterable<InputT>>>> inRDD = ((BoundedDataset<KV<K, Iterable<InputT>>>) context.borrowDataset(transform)).getRDD(); @SuppressWarnings("unchecked") JavaRDD<WindowedValue<KV<K, OutputT>>> outRDD = inRDD.map( in -> WindowedValue.of( KV.of( in.getValue().getKey(), combineFn.apply( in.getValue().getValue(), sparkCombineFn.ctxtForValue(in))), in.getTimestamp(), in.getWindows(), in.getPane())); context.putDataset(transform, new BoundedDataset<>(outRDD)); } @Override public String toNativeString() { return "map(new <fn>())"; } }; }
Example 6
Source File: StreamingTransformTranslator.java From beam with Apache License 2.0 | 4 votes |
private static <K, InputT, OutputT> TransformEvaluator<Combine.GroupedValues<K, InputT, OutputT>> combineGrouped() { return new TransformEvaluator<Combine.GroupedValues<K, InputT, OutputT>>() { @Override public void evaluate( final Combine.GroupedValues<K, InputT, OutputT> transform, EvaluationContext context) { // get the applied combine function. PCollection<? extends KV<K, ? extends Iterable<InputT>>> input = context.getInput(transform); final WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy(); @SuppressWarnings("unchecked") final CombineWithContext.CombineFnWithContext<InputT, ?, OutputT> fn = (CombineWithContext.CombineFnWithContext<InputT, ?, OutputT>) CombineFnUtil.toFnWithContext(transform.getFn()); @SuppressWarnings("unchecked") UnboundedDataset<KV<K, Iterable<InputT>>> unboundedDataset = (UnboundedDataset<KV<K, Iterable<InputT>>>) context.borrowDataset(transform); JavaDStream<WindowedValue<KV<K, Iterable<InputT>>>> dStream = unboundedDataset.getDStream(); final SerializablePipelineOptions options = context.getSerializableOptions(); final SparkPCollectionView pviews = context.getPViews(); JavaDStream<WindowedValue<KV<K, OutputT>>> outStream = dStream.transform( rdd -> { SparkCombineFn<KV<K, InputT>, InputT, ?, OutputT> combineFnWithContext = SparkCombineFn.keyed( fn, options, TranslationUtils.getSideInputs( transform.getSideInputs(), new JavaSparkContext(rdd.context()), pviews), windowingStrategy); return rdd.map(new TranslationUtils.CombineGroupedValues<>(combineFnWithContext)); }); context.putDataset( transform, new UnboundedDataset<>(outStream, unboundedDataset.getStreamSources())); } @Override public String toNativeString() { return "map(new <fn>())"; } }; }
Example 7
Source File: DataflowRunner.java From beam with Apache License 2.0 | 4 votes |
public Combine.GroupedValues<K, InputT, OutputT> getOriginalCombine() { return original; }