org.apache.beam.sdk.transforms.windowing.WindowFn Java Examples
The following examples show how to use
org.apache.beam.sdk.transforms.windowing.WindowFn.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: WindowingStrategy.java From beam with Apache License 2.0 | 6 votes |
private WindowingStrategy( WindowFn<T, W> windowFn, Trigger trigger, boolean triggerSpecified, AccumulationMode mode, boolean modeSpecified, Duration allowedLateness, boolean allowedLatenessSpecified, TimestampCombiner timestampCombiner, boolean timestampCombinerSpecified, ClosingBehavior closingBehavior, OnTimeBehavior onTimeBehavior) { this.windowFn = windowFn; this.trigger = trigger; this.triggerSpecified = triggerSpecified; this.mode = mode; this.modeSpecified = modeSpecified; this.allowedLateness = allowedLateness; this.allowedLatenessSpecified = allowedLatenessSpecified; this.closingBehavior = closingBehavior; this.onTimeBehavior = onTimeBehavior; this.timestampCombiner = timestampCombiner; this.timestampCombinerSpecified = timestampCombinerSpecified; }
Example #2
Source File: BeamAggregationRel.java From beam with Apache License 2.0 | 6 votes |
private NodeStats computeWindowingCostEffect(NodeStats inputStat) { if (windowFn == null) { return inputStat; } WindowFn w = windowFn; double multiplicationFactor = 1; // If the window is SlidingWindow, the number of tuples will increase. (Because, some of the // tuples repeat in multiple windows). if (w instanceof SlidingWindows) { multiplicationFactor = ((double) ((SlidingWindows) w).getSize().getStandardSeconds()) / ((SlidingWindows) w).getPeriod().getStandardSeconds(); } return NodeStats.create( inputStat.getRowCount() * multiplicationFactor, inputStat.getRate() * multiplicationFactor, BeamIOSourceRel.CONSTANT_WINDOW_SIZE); }
Example #3
Source File: PipelineTranslator.java From incubator-nemo with Apache License 2.0 | 6 votes |
/** * @param ctx provides translation context * @param beamNode the beam node to be translated * @param transform transform which can be obtained from {@code beamNode} */ @PrimitiveTransformTranslator({Window.class, Window.Assign.class}) private static void windowTranslator(final PipelineTranslationContext ctx, final TransformHierarchy.Node beamNode, final PTransform<?, ?> transform) { final WindowFn windowFn; if (transform instanceof Window) { windowFn = ((Window) transform).getWindowFn(); } else if (transform instanceof Window.Assign) { windowFn = ((Window.Assign) transform).getWindowFn(); } else { throw new UnsupportedOperationException(String.format("%s is not supported", transform)); } final IRVertex vertex = new OperatorVertex( new WindowFnTransform(windowFn, DisplayData.from(beamNode.getTransform()))); ctx.addVertex(vertex); beamNode.getInputs().values().forEach(input -> ctx.addEdgeTo(vertex, input)); beamNode.getOutputs().values().forEach(output -> ctx.registerMainOutputFrom(beamNode, vertex, output)); }
Example #4
Source File: WindowIntoTranslationTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void testToFromProto() throws InvalidProtocolBufferException { pipeline.apply(GenerateSequence.from(0)).apply(Window.<Long>into((WindowFn) windowFn)); final AtomicReference<AppliedPTransform<?, ?, Assign<?>>> assign = new AtomicReference<>(null); pipeline.traverseTopologically( new PipelineVisitor.Defaults() { @Override public void visitPrimitiveTransform(Node node) { if (node.getTransform() instanceof Window.Assign) { checkState(assign.get() == null); assign.set( (AppliedPTransform<?, ?, Assign<?>>) node.toAppliedPTransform(getPipeline())); } } }); checkState(assign.get() != null); SdkComponents components = SdkComponents.create(); components.registerEnvironment(Environments.createDockerEnvironment("java")); WindowIntoPayload payload = WindowIntoTranslation.toProto(assign.get().getTransform(), components); assertEquals(windowFn, WindowingStrategyTranslation.windowFnFromProto(payload.getWindowFn())); }
Example #5
Source File: WindowFnTestUtils.java From beam with Apache License 2.0 | 6 votes |
/** * Verifies that later-ending merged windows from any of the timestamps hold up output of * earlier-ending windows, using the provided {@link WindowFn} and {@link TimestampCombiner}. * * <p>Given a list of lists of timestamps, where each list is expected to merge into a single * window with end times in ascending order, assigns and merges windows for each list (as though * each were a separate key/user session). Then combines each timestamp in the list according to * the provided {@link TimestampCombiner}. * * <p>Verifies that a overlapping windows do not hold each other up via the watermark. */ public static <T, W extends IntervalWindow> void validateGetOutputTimestamps( WindowFn<T, W> windowFn, TimestampCombiner timestampCombiner, List<List<Long>> timestampsPerWindow) throws Exception { List<List<TimestampedValue<T>>> timestampValuesPerWindow = new ArrayList<>(); for (List<Long> timestamps : timestampsPerWindow) { List<TimestampedValue<T>> timestampedValues = new ArrayList<>(); for (Long timestamp : timestamps) { TimestampedValue<T> tv = TimestampedValue.of(null, new Instant(timestamp)); timestampedValues.add(tv); } timestampValuesPerWindow.add(timestampedValues); } validateGetOutputTimestampsWithValue(windowFn, timestampCombiner, timestampValuesPerWindow); }
Example #6
Source File: StreamingSideInputDoFnRunnerTest.java From beam with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") private <ReceiverT> StreamingSideInputDoFnRunner<String, String, IntervalWindow> createRunner( WindowFn<?, ?> windowFn, DoFnRunners.OutputManager outputManager, List<PCollectionView<String>> views, StreamingSideInputFetcher<String, IntervalWindow> sideInputFetcher) throws Exception { DoFnRunner<String, String> simpleDoFnRunner = DoFnRunners.simpleRunner( PipelineOptionsFactory.create(), new SideInputFn(views), mockSideInputReader, outputManager, mainOutputTag, Arrays.<TupleTag<?>>asList(), stepContext, null, Collections.emptyMap(), WindowingStrategy.of(windowFn), DoFnSchemaInformation.create(), Collections.emptyMap()); return new StreamingSideInputDoFnRunner<>(simpleDoFnRunner, sideInputFetcher); }
Example #7
Source File: TriggerStateMachineTester.java From beam with Apache License 2.0 | 6 votes |
protected TriggerStateMachineTester( ExecutableTriggerStateMachine executableTriggerStateMachine, WindowFn<Object, W> windowFn, Duration allowedLateness) throws Exception { this.windowFn = windowFn; this.executableTrigger = executableTriggerStateMachine; this.finishedSets = new HashMap<>(); this.activeWindows = windowFn.isNonMerging() ? new NonMergingActiveWindowSet<>() : new MergingActiveWindowSet<>(windowFn, stateInternals); this.windowToMergeResult = new HashMap<>(); this.contextFactory = new TriggerStateMachineContextFactory<>(windowFn, stateInternals, activeWindows); }
Example #8
Source File: WindowingStrategy.java From beam with Apache License 2.0 | 6 votes |
/** * Returns a {@link WindowingStrategy} identical to {@code this} but with the window function set * to {@code wildcardWindowFn}. */ public WindowingStrategy<T, W> withWindowFn(WindowFn<?, ?> wildcardWindowFn) { @SuppressWarnings("unchecked") WindowFn<T, W> typedWindowFn = (WindowFn<T, W>) wildcardWindowFn; return new WindowingStrategy<>( typedWindowFn, trigger, triggerSpecified, mode, modeSpecified, allowedLateness, allowedLatenessSpecified, timestampCombiner, timestampCombinerSpecified, closingBehavior, onTimeBehavior); }
Example #9
Source File: ReduceFnTester.java From beam with Apache License 2.0 | 6 votes |
public static <W extends BoundedWindow> ReduceFnTester<Integer, Iterable<Integer>, W> nonCombining( WindowFn<?, W> windowFn, TriggerStateMachine triggerStateMachine, AccumulationMode mode, Duration allowedDataLateness, ClosingBehavior closingBehavior) throws Exception { WindowingStrategy<?, W> strategy = WindowingStrategy.of(windowFn) .withTimestampCombiner(TimestampCombiner.EARLIEST) .withMode(mode) .withAllowedLateness(allowedDataLateness) .withClosingBehavior(closingBehavior); return nonCombining(strategy, triggerStateMachine); }
Example #10
Source File: StaticWindows.java From beam with Apache License 2.0 | 5 votes |
@Override public boolean isCompatible(WindowFn<?, ?> other) { if (!(other instanceof StaticWindows)) { return false; } StaticWindows that = (StaticWindows) other; return Objects.equals(this.windows.get(), that.windows.get()); }
Example #11
Source File: TriggerStateMachineTester.java From beam with Apache License 2.0 | 5 votes |
private SimpleTriggerStateMachineTester( ExecutableTriggerStateMachine executableTriggerStateMachine, WindowFn<Object, W> windowFn, Duration allowedLateness) throws Exception { super(executableTriggerStateMachine, windowFn, allowedLateness); }
Example #12
Source File: ReduceFnRunnerTest.java From beam with Apache License 2.0 | 5 votes |
/** * Test that it fires an empty on-time isFinished pane when OnTimeBehavior is FIRE_ALWAYS and * ClosingBehavior is FIRE_IF_NON_EMPTY. * * <p>This is a test just for backward compatibility. */ @Test public void testEmptyOnTimeWithOnTimeBehaviorBackwardCompatibility() throws Exception { WindowingStrategy<?, IntervalWindow> strategy = WindowingStrategy.of((WindowFn<?, IntervalWindow>) FixedWindows.of(Duration.millis(10))) .withTimestampCombiner(TimestampCombiner.EARLIEST) .withTrigger( AfterWatermark.pastEndOfWindow().withEarlyFirings(AfterPane.elementCountAtLeast(1))) .withMode(AccumulationMode.ACCUMULATING_FIRED_PANES) .withAllowedLateness(Duration.ZERO) .withClosingBehavior(ClosingBehavior.FIRE_IF_NON_EMPTY); ReduceFnTester<Integer, Integer, IntervalWindow> tester = ReduceFnTester.combining(strategy, Sum.ofIntegers(), VarIntCoder.of()); tester.advanceInputWatermark(new Instant(0)); tester.advanceProcessingTime(new Instant(0)); tester.injectElements(TimestampedValue.of(1, new Instant(1))); // Should fire empty on time isFinished pane tester.advanceInputWatermark(new Instant(11)); List<WindowedValue<Integer>> output = tester.extractOutput(); assertEquals(2, output.size()); assertThat( output.get(0), WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, false, Timing.EARLY, 0, -1))); assertThat( output.get(1), WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, true, Timing.ON_TIME, 1, 0))); }
Example #13
Source File: StaticWindowsTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void getSideInputWindowIdentity() { WindowFn<Object, BoundedWindow> fn = StaticWindows.of(IntervalWindow.getCoder(), ImmutableList.of(first, second)); assertThat(fn.getDefaultWindowMappingFn().getSideInputWindow(first), Matchers.equalTo(first)); assertThat(fn.getDefaultWindowMappingFn().getSideInputWindow(second), Matchers.equalTo(second)); }
Example #14
Source File: SparkBatchPortablePipelineTranslator.java From beam with Apache License 2.0 | 5 votes |
private static <K, V> void translateGroupByKey( PTransformNode transformNode, RunnerApi.Pipeline pipeline, SparkTranslationContext context) { RunnerApi.Components components = pipeline.getComponents(); String inputId = getInputId(transformNode); Dataset inputDataset = context.popDataset(inputId); JavaRDD<WindowedValue<KV<K, V>>> inputRdd = ((BoundedDataset<KV<K, V>>) inputDataset).getRDD(); WindowedValueCoder<KV<K, V>> inputCoder = getWindowedValueCoder(inputId, components); KvCoder<K, V> inputKvCoder = (KvCoder<K, V>) inputCoder.getValueCoder(); Coder<K> inputKeyCoder = inputKvCoder.getKeyCoder(); Coder<V> inputValueCoder = inputKvCoder.getValueCoder(); WindowingStrategy windowingStrategy = getWindowingStrategy(inputId, components); WindowFn<Object, BoundedWindow> windowFn = windowingStrategy.getWindowFn(); WindowedValue.WindowedValueCoder<V> wvCoder = WindowedValue.FullWindowedValueCoder.of(inputValueCoder, windowFn.windowCoder()); JavaRDD<WindowedValue<KV<K, Iterable<V>>>> groupedByKeyAndWindow; Partitioner partitioner = getPartitioner(context); if (GroupNonMergingWindowsFunctions.isEligibleForGroupByWindow(windowingStrategy)) { // we can have a memory sensitive translation for non-merging windows groupedByKeyAndWindow = GroupNonMergingWindowsFunctions.groupByKeyAndWindow( inputRdd, inputKeyCoder, inputValueCoder, windowingStrategy, partitioner); } else { JavaRDD<KV<K, Iterable<WindowedValue<V>>>> groupedByKeyOnly = GroupCombineFunctions.groupByKeyOnly(inputRdd, inputKeyCoder, wvCoder, partitioner); // for batch, GroupAlsoByWindow uses an in-memory StateInternals. groupedByKeyAndWindow = groupedByKeyOnly.flatMap( new SparkGroupAlsoByWindowViaOutputBufferFn<>( windowingStrategy, new TranslationUtils.InMemoryStateInternalsFactory<>(), SystemReduceFn.buffering(inputValueCoder), context.serializablePipelineOptions)); } context.pushDataset(getOutputId(transformNode), new BoundedDataset<>(groupedByKeyAndWindow)); }
Example #15
Source File: StaticWindows.java From beam with Apache License 2.0 | 5 votes |
@Override public void verifyCompatibility(WindowFn<?, ?> other) throws IncompatibleWindowException { if (!this.isCompatible(other)) { throw new IncompatibleWindowException( other, String.format( "Only %s objects with the same window supplier are compatible.", StaticWindows.class.getSimpleName())); } }
Example #16
Source File: SamzaAssignContext.java From beam with Apache License 2.0 | 5 votes |
public SamzaAssignContext(WindowFn<InT, W> fn, WindowedValue<InT> value) { fn.super(); this.value = value; if (value.getWindows().size() != 1) { throw new IllegalArgumentException( String.format( "Only single windowed value allowed for assignment. Windows: %s", value.getWindows())); } }
Example #17
Source File: WindowFnTestUtils.java From beam with Apache License 2.0 | 5 votes |
/** * Assigns the given {@code timestampedValue} to windows using the specified {@code windowFn}, and * verifies that result of {@link WindowFn#getOutputTime windowFn.getOutputTime} for later windows * (as defined by {@code maxTimestamp} won't prevent the watermark from passing the end of earlier * windows. * * <p>This verifies that overlapping windows don't interfere at all. Depending on the {@code * windowFn} this may be stricter than desired. This version allows passing a {@link * TimestampedValue} in case the value is needed to assign windows. */ public static <T, W extends BoundedWindow> void validateGetOutputTimestampWithValue( WindowFn<T, W> windowFn, TimestampedValue<T> timestampedValue) throws Exception { Collection<W> windows = assignedWindowsWithValue(windowFn, timestampedValue); List<W> sortedWindows = new ArrayList<>(windows); sortedWindows.sort(Comparator.comparing(BoundedWindow::maxTimestamp)); Instant instant = timestampedValue.getTimestamp(); Instant endOfPrevious = null; for (W window : sortedWindows) { Instant outputTimestamp = windowFn.getOutputTime(instant, window); if (endOfPrevious == null) { // If this is the first window, the output timestamp can be anything, as long as it is in // the valid range. assertFalse( "getOutputTime must be greater than or equal to input timestamp", outputTimestamp.isBefore(instant)); assertFalse( "getOutputTime must be less than or equal to the max timestamp", outputTimestamp.isAfter(window.maxTimestamp())); } else { // If this is a later window, the output timestamp must be after the end of the previous // window assertTrue( "getOutputTime must be greater than the end of the previous window", outputTimestamp.isAfter(endOfPrevious)); assertFalse( "getOutputTime must be less than or equal to the max timestamp", outputTimestamp.isAfter(window.maxTimestamp())); } endOfPrevious = window.maxTimestamp(); } }
Example #18
Source File: SparkAssignWindowFn.java From beam with Apache License 2.0 | 5 votes |
@Override @SuppressWarnings("unchecked") public WindowedValue<T> call(WindowedValue<T> windowedValue) throws Exception { final BoundedWindow boundedWindow = Iterables.getOnlyElement(windowedValue.getWindows()); final T element = windowedValue.getValue(); final Instant timestamp = windowedValue.getTimestamp(); Collection<W> windows = ((WindowFn<T, W>) fn) .assignWindows( ((WindowFn<T, W>) fn).new AssignContext() { @Override public T element() { return element; } @Override public Instant timestamp() { return timestamp; } @Override public BoundedWindow window() { return boundedWindow; } }); return WindowedValue.of(element, timestamp, windows, PaneInfo.NO_FIRING); }
Example #19
Source File: IdentityWindowFn.java From beam with Apache License 2.0 | 5 votes |
@Override public void verifyCompatibility(WindowFn<?, ?> other) throws IncompatibleWindowException { throw new UnsupportedOperationException( String.format( "%s.verifyCompatibility() should never be called." + " It is a private implementation detail of sdk utilities." + " This message indicates a bug in the Beam SDK.", getClass().getCanonicalName())); }
Example #20
Source File: MergingActiveWindowSet.java From beam with Apache License 2.0 | 5 votes |
public MergingActiveWindowSet(WindowFn<Object, W> windowFn, StateInternals state) { this.windowFn = windowFn; StateTag<ValueState<Map<W, Set<W>>>> tag = StateTags.makeSystemTagInternal( StateTags.value( "tree", MapCoder.of(windowFn.windowCoder(), SetCoder.of(windowFn.windowCoder())))); valueState = state.state(StateNamespaces.global(), tag); // Little use trying to prefetch this state since the ReduceFnRunner // is stymied until it is available. activeWindowToStateAddressWindows = emptyIfNull(valueState.read()); originalActiveWindowToStateAddressWindows = deepCopy(activeWindowToStateAddressWindows); }
Example #21
Source File: WindowEvaluatorFactoryTest.java From beam with Apache License 2.0 | 5 votes |
@Override public void verifyCompatibility(WindowFn<?, ?> other) throws IncompatibleWindowException { throw new IncompatibleWindowException( other, String.format( "%s is not compatible with any other %s.", EvaluatorTestWindowFn.class.getSimpleName(), WindowFn.class.getSimpleName())); }
Example #22
Source File: ReduceFnRunnerTest.java From beam with Apache License 2.0 | 5 votes |
/** Tests that a processing time timer does not cause window GC. */ @Test public void testProcessingTimeTimerDoesNotGc() throws Exception { WindowingStrategy<?, IntervalWindow> strategy = WindowingStrategy.of((WindowFn<?, IntervalWindow>) FixedWindows.of(Duration.millis(100))) .withTimestampCombiner(TimestampCombiner.EARLIEST) .withMode(AccumulationMode.ACCUMULATING_FIRED_PANES) .withAllowedLateness(Duration.ZERO) .withTrigger( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.millis(10)))); ReduceFnTester<Integer, Integer, IntervalWindow> tester = ReduceFnTester.combining(strategy, Sum.ofIntegers(), VarIntCoder.of()); tester.advanceProcessingTime(new Instant(5000)); injectElement(tester, 2); // processing timer @ 5000 + 10; EOW timer @ 100 injectElement(tester, 5); tester.advanceProcessingTime(new Instant(10000)); tester.assertHasOnlyGlobalAndStateFor(new IntervalWindow(new Instant(0), new Instant(100))); assertThat( tester.extractOutput(), contains( isSingleWindowedValue( equalTo(7), 2, 0, 100, PaneInfo.createPane(true, false, Timing.EARLY, 0, 0)))); }
Example #23
Source File: PCollectionTranslationTest.java From beam with Apache License 2.0 | 5 votes |
@Override public void verifyCompatibility(WindowFn<?, ?> other) throws IncompatibleWindowException { if (!this.isCompatible(other)) { throw new IncompatibleWindowException( other, String.format( "%s is only compatible with %s.", CustomWindows.class.getSimpleName(), CustomWindows.class.getSimpleName())); } }
Example #24
Source File: WatermarkCallbackExecutorTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void multipleCallbacksShouldFireFires() throws Exception { CountDownLatch latch = new CountDownLatch(2); WindowFn<Object, IntervalWindow> windowFn = FixedWindows.of(Duration.standardMinutes(10)); IntervalWindow window = new IntervalWindow(new Instant(0L), new Instant(0L).plus(Duration.standardMinutes(10))); executor.callOnGuaranteedFiring( create, window, WindowingStrategy.of(windowFn), new CountDownLatchCallback(latch)); executor.callOnGuaranteedFiring( create, window, WindowingStrategy.of(windowFn), new CountDownLatchCallback(latch)); executor.fireForWatermark(create, new Instant(0L).plus(Duration.standardMinutes(10))); assertThat(latch.await(500, TimeUnit.MILLISECONDS), equalTo(true)); }
Example #25
Source File: WindowMergingFnRunner.java From beam with Apache License 2.0 | 5 votes |
static <T, W extends BoundedWindow> ThrowingFunction<KV<T, Iterable<W>>, KV<T, KV<Iterable<W>, Iterable<KV<W, Iterable<W>>>>>> createMapFunctionForPTransform(String ptransformId, PTransform ptransform) throws IOException { RunnerApi.FunctionSpec payload = RunnerApi.FunctionSpec.parseFrom(ptransform.getSpec().getPayload()); WindowFn<?, W> windowFn = (WindowFn<?, W>) WindowingStrategyTranslation.windowFnFromProto(payload); return WindowMergingFnRunner.<T, W>create(windowFn)::mergeWindows; }
Example #26
Source File: AssignWindowsFunction.java From beam with Apache License 2.0 | 5 votes |
public AssignWindowsFunction(WindowFn<T, BoundedWindow> windowFn, PipelineOptions options) { this.windowFn = windowFn; SdkComponents components = SdkComponents.create(); this.options = options; this.serializedOptions = new SerializablePipelineOptions(options).toString(); components.registerEnvironment( Environments.createOrGetDefaultEnvironment(options.as(PortablePipelineOptions.class))); RunnerApi.FunctionSpec windowFnProto = WindowingStrategyTranslation.toProto(windowFn, components); windowFnBytes = windowFnProto.toByteArray(); }
Example #27
Source File: FlinkAssignContext.java From beam with Apache License 2.0 | 5 votes |
FlinkAssignContext(WindowFn<InputT, W> fn, WindowedValue<InputT> value) { fn.super(); if (Iterables.size(value.getWindows()) != 1) { throw new IllegalArgumentException( String.format( "%s passed to window assignment must be in a single window, but it was in %s: %s", WindowedValue.class.getSimpleName(), Iterables.size(value.getWindows()), value.getWindows())); } this.value = value; }
Example #28
Source File: BeamAggregationRel.java From beam with Apache License 2.0 | 5 votes |
private Transform( WindowFn<Row, IntervalWindow> windowFn, int windowFieldIndex, ImmutableBitSet groupSet, List<FieldAggregation> fieldAggregations, Schema outputSchema) { this.windowFn = windowFn; this.windowFieldIndex = windowFieldIndex; this.fieldAggregations = fieldAggregations; this.outputSchema = outputSchema; this.keyFieldsIds = groupSet.asList().stream().filter(i -> i != windowFieldIndex).collect(toList()); }
Example #29
Source File: BeamAggregationRel.java From beam with Apache License 2.0 | 5 votes |
public BeamAggregationRel( RelOptCluster cluster, RelTraitSet traits, RelNode child, ImmutableBitSet groupSet, List<ImmutableBitSet> groupSets, List<AggregateCall> aggCalls, @Nullable WindowFn<Row, IntervalWindow> windowFn, int windowFieldIndex) { super(cluster, traits, child, groupSet, groupSets, aggCalls); this.windowFn = windowFn; this.windowFieldIndex = windowFieldIndex; }
Example #30
Source File: BeamSetOperatorRelBase.java From beam with Apache License 2.0 | 5 votes |
@Override public PCollection<Row> expand(PCollectionList<Row> inputs) { checkArgument( inputs.size() == 2, "Wrong number of arguments to %s: %s", beamRelNode.getClass().getSimpleName(), inputs); PCollection<Row> leftRows = inputs.get(0); PCollection<Row> rightRows = inputs.get(1); WindowFn leftWindow = leftRows.getWindowingStrategy().getWindowFn(); WindowFn rightWindow = rightRows.getWindowingStrategy().getWindowFn(); if (!leftWindow.isCompatible(rightWindow)) { throw new IllegalArgumentException( "inputs of " + opType + " have different window strategy: " + leftWindow + " VS " + rightWindow); } // TODO: We may want to preaggregate the counts first using Group instead of calling CoGroup and // measuring the // iterable size. If on average there are duplicates in the input, this will be faster. final String lhsTag = "lhs"; final String rhsTag = "rhs"; PCollection<Row> joined = PCollectionTuple.of(lhsTag, leftRows, rhsTag, rightRows) .apply("CoGroup", CoGroup.join(By.fieldNames("*"))); return joined .apply( "FilterResults", ParDo.of( new BeamSetOperatorsTransforms.SetOperatorFilteringDoFn( lhsTag, rhsTag, opType, all))) .setRowSchema(joined.getSchema().getField("key").getType().getRowSchema()); }