Java Code Examples for org.apache.beam.sdk.transforms.Combine#Globally
The following examples show how to use
org.apache.beam.sdk.transforms.Combine#Globally .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CombineTranslation.java From beam with Apache License 2.0 | 6 votes |
@Override public FunctionSpec translate( AppliedPTransform<?, ?, Combine.Globally<?, ?>> transform, SdkComponents components) throws IOException { if (transform.getTransform().getSideInputs().isEmpty()) { return FunctionSpec.newBuilder() .setUrn(getUrn(transform.getTransform())) .setPayload( payloadForCombineGlobally((AppliedPTransform) transform, components).toByteString()) .build(); } else { // Combines with side inputs are translated as generic composites, which have a blank // FunctionSpec. return null; } }
Example 2
Source File: CombineTranslation.java From beam with Apache License 2.0 | 5 votes |
private static <InputT, AccumT> Coder<AccumT> extractAccumulatorCoder( GlobalCombineFn<InputT, AccumT, ?> combineFn, AppliedPTransform<PCollection<InputT>, ?, Combine.Globally<InputT, ?>> transform) throws IOException { try { @SuppressWarnings("unchecked") PCollection<InputT> mainInput = (PCollection<InputT>) Iterables.getOnlyElement(TransformInputs.nonAdditionalInputs(transform)); return combineFn.getAccumulatorCoder( transform.getPipeline().getCoderRegistry(), mainInput.getCoder()); } catch (CannotProvideCoderException e) { throw new IOException("Could not obtain a Coder for the accumulator", e); } }
Example 3
Source File: CombineTranslation.java From beam with Apache License 2.0 | 5 votes |
/** Produces a {@link RunnerApi.CombinePayload} from a {@link Combine.Globally}. */ @VisibleForTesting static <InputT, OutputT> CombinePayload payloadForCombineGlobally( final AppliedPTransform< PCollection<InputT>, PCollection<OutputT>, Combine.Globally<InputT, OutputT>> transform, final SdkComponents components) throws IOException { GlobalCombineFn<?, ?, ?> combineFn = transform.getTransform().getFn(); Coder<?> accumulatorCoder = extractAccumulatorCoder(combineFn, (AppliedPTransform) transform); return combinePayload(combineFn, accumulatorCoder, components); }
Example 4
Source File: CombineTranslationTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testToProto() throws Exception { PCollection<Integer> input = pipeline.apply(Create.of(1, 2, 3)); input.apply(Combine.globally(combineFn)); final AtomicReference<AppliedPTransform<?, ?, Combine.Globally<?, ?>>> combine = new AtomicReference<>(); pipeline.traverseTopologically( new PipelineVisitor.Defaults() { @Override public void leaveCompositeTransform(Node node) { if (node.getTransform() instanceof Combine.Globally) { checkState(combine.get() == null); combine.set((AppliedPTransform) node.toAppliedPTransform(getPipeline())); } } }); checkState(combine.get() != null); assertEquals(combineFn, combine.get().getTransform().getFn()); SdkComponents sdkComponents = SdkComponents.create(); sdkComponents.registerEnvironment(Environments.createDockerEnvironment("java")); CombinePayload combineProto = CombineTranslation.CombineGloballyPayloadTranslator.payloadForCombineGlobally( (AppliedPTransform) combine.get(), sdkComponents); RunnerApi.Components componentsProto = sdkComponents.toComponents(); assertEquals( combineFn.getAccumulatorCoder(pipeline.getCoderRegistry(), input.getCoder()), getAccumulatorCoder(combineProto, RehydratedComponents.forComponents(componentsProto))); assertEquals( combineFn, SerializableUtils.deserializeFromByteArray( combineProto.getCombineFn().getPayload().toByteArray(), "CombineFn")); }
Example 5
Source File: CombineTranslationTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testToProtoWithoutSideInputs() throws Exception { PCollection<Integer> input = pipeline.apply(Create.of(1, 2, 3)); CombineFnWithContext<Integer, int[], Integer> combineFn = new TestCombineFnWithContext(); input.apply(Combine.globally(combineFn).withoutDefaults()); final AtomicReference<AppliedPTransform<?, ?, Combine.Globally<?, ?>>> combine = new AtomicReference<>(); pipeline.traverseTopologically( new PipelineVisitor.Defaults() { @Override public void leaveCompositeTransform(Node node) { if (node.getTransform() instanceof Combine.Globally) { checkState(combine.get() == null); combine.set((AppliedPTransform) node.toAppliedPTransform(getPipeline())); } } }); checkState(combine.get() != null); assertEquals(combineFn, combine.get().getTransform().getFn()); SdkComponents sdkComponents = SdkComponents.create(); sdkComponents.registerEnvironment(Environments.createDockerEnvironment("java")); CombinePayload combineProto = CombineTranslation.CombineGloballyPayloadTranslator.payloadForCombineGlobally( (AppliedPTransform) combine.get(), sdkComponents); RunnerApi.Components componentsProto = sdkComponents.toComponents(); assertEquals( combineFn.getAccumulatorCoder(pipeline.getCoderRegistry(), input.getCoder()), getAccumulatorCoder(combineProto, RehydratedComponents.forComponents(componentsProto))); assertEquals( combineFn, SerializableUtils.deserializeFromByteArray( combineProto.getCombineFn().getPayload().toByteArray(), "CombineFn")); }
Example 6
Source File: CombineTranslationTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void testToProtoWithSideInputsFails() throws Exception { exception.expect(IllegalArgumentException.class); PCollection<Integer> input = pipeline.apply(Create.of(1, 2, 3)); final PCollectionView<Iterable<String>> sideInputs = pipeline.apply(Create.of("foo")).apply(View.asIterable()); CombineFnWithContext<Integer, int[], Integer> combineFn = new TestCombineFnWithContext() { @Override public Integer extractOutput(int[] accumulator, Context c) { Iterable<String> sideInput = c.sideInput(sideInputs); return accumulator[0]; } }; input.apply(Combine.globally(combineFn).withSideInputs(sideInputs).withoutDefaults()); final AtomicReference<AppliedPTransform<?, ?, Combine.Globally<?, ?>>> combine = new AtomicReference<>(); pipeline.traverseTopologically( new PipelineVisitor.Defaults() { @Override public void leaveCompositeTransform(Node node) { if (node.getTransform() instanceof Combine.Globally) { checkState(combine.get() == null); combine.set((AppliedPTransform) node.toAppliedPTransform(getPipeline())); } } }); SdkComponents sdkComponents = SdkComponents.create(); sdkComponents.registerEnvironment(Environments.createDockerEnvironment("java")); CombinePayload payload = CombineTranslation.CombineGloballyPayloadTranslator.payloadForCombineGlobally( (AppliedPTransform) combine.get(), sdkComponents); }
Example 7
Source File: CombineTranslation.java From beam with Apache License 2.0 | 4 votes |
@Override public String getUrn(Combine.Globally<?, ?> transform) { return COMBINE_GLOBALLY_TRANSFORM_URN; }
Example 8
Source File: TransformTranslator.java From beam with Apache License 2.0 | 4 votes |
private static <InputT, AccumT, OutputT> TransformEvaluator<Combine.Globally<InputT, OutputT>> combineGlobally() { return new TransformEvaluator<Combine.Globally<InputT, OutputT>>() { @Override public void evaluate(Combine.Globally<InputT, OutputT> transform, EvaluationContext context) { final PCollection<InputT> input = context.getInput(transform); final Coder<InputT> iCoder = context.getInput(transform).getCoder(); final Coder<OutputT> oCoder = context.getOutput(transform).getCoder(); final WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy(); @SuppressWarnings("unchecked") final CombineWithContext.CombineFnWithContext<InputT, AccumT, OutputT> combineFn = (CombineWithContext.CombineFnWithContext<InputT, AccumT, OutputT>) CombineFnUtil.toFnWithContext(transform.getFn()); final WindowedValue.FullWindowedValueCoder<OutputT> wvoCoder = WindowedValue.FullWindowedValueCoder.of( oCoder, windowingStrategy.getWindowFn().windowCoder()); final boolean hasDefault = transform.isInsertDefault(); final SparkCombineFn<InputT, InputT, AccumT, OutputT> sparkCombineFn = SparkCombineFn.globally( combineFn, context.getSerializableOptions(), TranslationUtils.getSideInputs(transform.getSideInputs(), context), windowingStrategy); final Coder<AccumT> aCoder; try { aCoder = combineFn.getAccumulatorCoder(context.getPipeline().getCoderRegistry(), iCoder); } catch (CannotProvideCoderException e) { throw new IllegalStateException("Could not determine coder for accumulator", e); } @SuppressWarnings("unchecked") JavaRDD<WindowedValue<InputT>> inRdd = ((BoundedDataset<InputT>) context.borrowDataset(transform)).getRDD(); JavaRDD<WindowedValue<OutputT>> outRdd; SparkCombineFn.WindowedAccumulator<InputT, InputT, AccumT, ?> accumulated = GroupCombineFunctions.combineGlobally(inRdd, sparkCombineFn, aCoder, windowingStrategy); if (!accumulated.isEmpty()) { Iterable<WindowedValue<OutputT>> output = sparkCombineFn.extractOutput(accumulated); outRdd = context .getSparkContext() .parallelize(CoderHelpers.toByteArrays(output, wvoCoder)) .map(CoderHelpers.fromByteFunction(wvoCoder)); } else { // handle empty input RDD, which will naturally skip the entire execution // as Spark will not run on empty RDDs. JavaSparkContext jsc = new JavaSparkContext(inRdd.context()); if (hasDefault) { OutputT defaultValue = combineFn.defaultValue(); outRdd = jsc.parallelize(Lists.newArrayList(CoderHelpers.toByteArray(defaultValue, oCoder))) .map(CoderHelpers.fromByteFunction(oCoder)) .map(WindowedValue::valueInGlobalWindow); } else { outRdd = jsc.emptyRDD(); } } context.putDataset(transform, new BoundedDataset<>(outRdd)); } @Override public String toNativeString() { return "aggregate(..., new <fn>(), ...)"; } }; }
Example 9
Source File: HllCount.java From beam with Apache License 2.0 | 2 votes |
/** * Returns a {@link Combine.Globally} {@code PTransform} that takes an input {@code * PCollection<InputT>} and returns a {@code PCollection<byte[]>} which consists of the HLL++ * sketch computed from the elements in the input {@code PCollection}. * * <p>Returns a singleton {@code PCollection} with an "empty sketch" (byte array of length 0) * if the input {@code PCollection} is empty. */ public Combine.Globally<InputT, byte[]> globally() { return Combine.globally(initFn); }
Example 10
Source File: HllCount.java From beam with Apache License 2.0 | 2 votes |
/** * Returns a {@link Combine.Globally} {@code PTransform} that takes an input {@code * PCollection<byte[]>} of HLL++ sketches and returns a {@code PCollection<byte[]>} of a new * sketch merged from the input sketches. * * <p>Only sketches of the same type can be merged together. If incompatible sketches are * provided, a runtime error will occur. * * <p>If sketches of different {@code precision}s are merged, the merged sketch will get the * minimum precision encountered among all the input sketches. * * <p>Returns a singleton {@code PCollection} with an "empty sketch" (byte array of length 0) if * the input {@code PCollection} is empty. */ public static Combine.Globally<byte[], byte[]> globally() { return Combine.globally(HllCountMergePartialFn.create()); }