Java Code Examples for org.apache.beam.runners.core.SystemReduceFn#buffering()

The following examples show how to use org.apache.beam.runners.core.SystemReduceFn#buffering() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PipelineTranslator.java    From incubator-nemo with Apache License 2.0 6 votes vote down vote up
/**
 * Create a group by key transform.
 * It returns GroupByKeyAndWindowDoFnTransform if window function is not default.
 *
 * @param ctx      translation context
 * @param beamNode the beam node to be translated
 * @return group by key transform
 */
private static Transform createGBKTransform(
  final PipelineTranslationContext ctx,
  final TransformHierarchy.Node beamNode) {
  final AppliedPTransform pTransform = beamNode.toAppliedPTransform(ctx.getPipeline());
  final PCollection<?> mainInput = (PCollection<?>)
    Iterables.getOnlyElement(TransformInputs.nonAdditionalInputs(pTransform));
  final TupleTag mainOutputTag = new TupleTag<>();

  if (isGlobalWindow(beamNode, ctx.getPipeline())) {
    return new GroupByKeyTransform();
  } else {
    return new GroupByKeyAndWindowDoFnTransform(
      getOutputCoders(pTransform),
      mainOutputTag,
      mainInput.getWindowingStrategy(),
      ctx.getPipelineOptions(),
      SystemReduceFn.buffering(mainInput.getCoder()),
      DisplayData.from(beamNode.getTransform()));
  }
}
 
Example 2
Source File: GroupByKeyTranslator.java    From beam with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private static <K, InputT, OutputT>
    SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> getSystemReduceFn(
        PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>> transform,
        Pipeline pipeline,
        KvCoder<K, InputT> kvInputCoder) {
  if (transform instanceof GroupByKey) {
    return (SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow>)
        SystemReduceFn.buffering(kvInputCoder.getValueCoder());
  } else if (transform instanceof Combine.PerKey) {
    final CombineFnBase.GlobalCombineFn<? super InputT, ?, OutputT> combineFn =
        ((Combine.PerKey) transform).getFn();
    return SystemReduceFn.combining(
        kvInputCoder.getKeyCoder(),
        AppliedCombineFn.withInputCoder(combineFn, pipeline.getCoderRegistry(), kvInputCoder));
  } else {
    throw new RuntimeException("Transform " + transform + " cannot be translated as GroupByKey.");
  }
}
 
Example 3
Source File: BatchGroupAlsoByWindowsDoFns.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Create a {@link BatchGroupAlsoByWindowFn} without a combine function. Depending on the {@code
 * windowFn} this will either use iterators or window sets to implement the grouping.
 *
 * @param windowingStrategy The window function and trigger to use for grouping
 * @param inputCoder the input coder to use
 */
public static <K, V, W extends BoundedWindow>
    BatchGroupAlsoByWindowFn<K, V, Iterable<V>> createForIterable(
        WindowingStrategy<?, W> windowingStrategy,
        StateInternalsFactory<K> stateInternalsFactory,
        Coder<V> inputCoder) {
  // If the windowing strategy indicates we're doing a reshuffle, use the special-path.
  if (BatchGroupAlsoByWindowReshuffleFn.isReshuffle(windowingStrategy)) {
    return new BatchGroupAlsoByWindowReshuffleFn<>();
  } else if (BatchGroupAlsoByWindowViaIteratorsFn.isSupported(windowingStrategy)) {
    return new BatchGroupAlsoByWindowViaIteratorsFn<K, V, W>(windowingStrategy);
  }
  return new BatchGroupAlsoByWindowViaOutputBufferFn<>(
      windowingStrategy, stateInternalsFactory, SystemReduceFn.buffering(inputCoder));
}
 
Example 4
Source File: GroupAlsoByWindowViaOutputBufferDoFnTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public <W extends BoundedWindow>
    BatchGroupAlsoByWindowFn<K, InputT, Iterable<InputT>> forStrategy(
        WindowingStrategy<?, W> windowingStrategy,
        StateInternalsFactory<K> stateInternalsFactory) {
  return new BatchGroupAlsoByWindowViaOutputBufferFn<K, InputT, Iterable<InputT>, W>(
      windowingStrategy,
      stateInternalsFactory,
      SystemReduceFn.<K, InputT, W>buffering(inputCoder));
}
 
Example 5
Source File: GroupAlsoByWindowEvaluatorFactory.java    From beam with Apache License 2.0 5 votes vote down vote up
public GroupAlsoByWindowEvaluator(
    final EvaluationContext evaluationContext,
    PipelineOptions options,
    CommittedBundle<KeyedWorkItem<K, V>> inputBundle,
    final AppliedPTransform<
            PCollection<KeyedWorkItem<K, V>>,
            PCollection<KV<K, Iterable<V>>>,
            DirectGroupAlsoByWindow<K, V>>
        application) {
  this.evaluationContext = evaluationContext;
  this.options = options;
  this.application = application;

  structuralKey = inputBundle.getKey();
  stepContext =
      evaluationContext
          .getExecutionContext(application, inputBundle.getKey())
          .getStepContext(evaluationContext.getStepName(application));
  windowingStrategy =
      (WindowingStrategy<?, BoundedWindow>)
          application.getTransform().getInputWindowingStrategy();

  outputBundles = new ArrayList<>();
  unprocessedElements = ImmutableList.builder();

  Coder<V> valueCoder =
      application.getTransform().getValueCoder(inputBundle.getPCollection().getCoder());
  reduceFn = SystemReduceFn.buffering(valueCoder);
  droppedDueToLateness =
      Metrics.counter(
          GroupAlsoByWindowEvaluator.class,
          GroupAlsoByWindowsAggregators.DROPPED_DUE_TO_LATENESS_COUNTER);
}
 
Example 6
Source File: WindowGroupP.java    From beam with Apache License 2.0 4 votes vote down vote up
KeyManager(K key) {
  this.timerInternals = new InMemoryTimerInternals();
  this.stateInternals = new InMemoryStateInternalsImpl(key);
  this.reduceFnRunner =
      new ReduceFnRunner<>(
          key,
          windowingStrategy,
          ExecutableTriggerStateMachine.create(
              TriggerStateMachines.stateMachineForTrigger(
                  TriggerTranslation.toProto(windowingStrategy.getTrigger()))),
          stateInternals,
          timerInternals,
          new OutputWindowedValue<KV<K, Iterable<V>>>() {
            @Override
            public void outputWindowedValue(
                KV<K, Iterable<V>> output,
                Instant timestamp,
                Collection<? extends BoundedWindow> windows,
                PaneInfo pane) {
              WindowedValue<KV<K, Iterable<V>>> windowedValue =
                  WindowedValue.of(output, timestamp, windows, pane);
              byte[] encodedValue = Utils.encode(windowedValue, outputCoder);
              //noinspection ResultOfMethodCallIgnored
              appendableTraverser.append(encodedValue);
            }

            @Override
            public <AdditionalOutputT> void outputWindowedValue(
                TupleTag<AdditionalOutputT> tag,
                AdditionalOutputT output,
                Instant timestamp,
                Collection<? extends BoundedWindow> windows,
                PaneInfo pane) {
              throw new UnsupportedOperationException("Grouping should not use side outputs");
            }
          },
          NullSideInputReader.empty(),
          SystemReduceFn.buffering(inputValueValueCoder),
          pipelineOptions.get());
  advanceWatermark(latestWatermark, Instant.now());
}
 
Example 7
Source File: SparkGroupAlsoByWindowViaWindowSet.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public Iterator<
        Tuple2</*K*/ ByteArray, Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/ List<byte[]>>>>
    apply(
        final Iterator<
                Tuple3<
                    /*K*/ ByteArray,
                    Seq</*WV<I>*/ byte[]>,
                    Option<Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/ List<byte[]>>>>>
            input) {
  // --- ACTUAL STATEFUL OPERATION:
  //
  // Input Iterator: the partition (~bundle) of a co-grouping of the input
  // and the previous state (if exists).
  //
  // Output Iterator: the output key, and the updated state.
  //
  // possible input scenarios for (K, Seq, Option<S>):
  // (1) Option<S>.isEmpty: new data with no previous state.
  // (2) Seq.isEmpty: no new data, but evaluating previous state (timer-like behaviour).
  // (3) Seq.nonEmpty && Option<S>.isDefined: new data with previous state.

  final SystemReduceFn<K, InputT, Iterable<InputT>, Iterable<InputT>, W> reduceFn =
      SystemReduceFn.buffering(wvCoder.getValueCoder());

  final MetricsContainerImpl cellProvider = new MetricsContainerImpl("cellProvider");

  final CounterCell droppedDueToClosedWindow =
      cellProvider.getCounter(
          MetricName.named(
              SparkGroupAlsoByWindowViaWindowSet.class,
              GroupAlsoByWindowsAggregators.DROPPED_DUE_TO_CLOSED_WINDOW_COUNTER));

  final CounterCell droppedDueToLateness =
      cellProvider.getCounter(
          MetricName.named(
              SparkGroupAlsoByWindowViaWindowSet.class,
              GroupAlsoByWindowsAggregators.DROPPED_DUE_TO_LATENESS_COUNTER));

  // log if there's something to log.
  final long lateDropped = droppedDueToLateness.getCumulative();
  if (lateDropped > 0) {
    LOG.info(String.format("Dropped %d elements due to lateness.", lateDropped));
    droppedDueToLateness.inc(-droppedDueToLateness.getCumulative());
  }
  final long closedWindowDropped = droppedDueToClosedWindow.getCumulative();
  if (closedWindowDropped > 0) {
    LOG.info(String.format("Dropped %d elements due to closed window.", closedWindowDropped));
    droppedDueToClosedWindow.inc(-droppedDueToClosedWindow.getCumulative());
  }

  return scala.collection.JavaConversions.asScalaIterator(
      new UpdateStateByKeyOutputIterator(input, reduceFn, droppedDueToLateness));
}
 
Example 8
Source File: FlinkStreamingTransformTranslators.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public void translateNode(
    PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, Iterable<InputT>>>> transform,
    FlinkStreamingTranslationContext context) {

  PCollection<KV<K, InputT>> input = context.getInput(transform);

  @SuppressWarnings("unchecked")
  WindowingStrategy<?, BoundedWindow> windowingStrategy =
      (WindowingStrategy<?, BoundedWindow>) input.getWindowingStrategy();

  KvCoder<K, InputT> inputKvCoder = (KvCoder<K, InputT>) input.getCoder();

  SingletonKeyedWorkItemCoder<K, InputT> workItemCoder =
      SingletonKeyedWorkItemCoder.of(
          inputKvCoder.getKeyCoder(),
          inputKvCoder.getValueCoder(),
          input.getWindowingStrategy().getWindowFn().windowCoder());

  DataStream<WindowedValue<KV<K, InputT>>> inputDataStream = context.getInputDataStream(input);

  WindowedValue.FullWindowedValueCoder<SingletonKeyedWorkItem<K, InputT>>
      windowedWorkItemCoder =
          WindowedValue.getFullCoder(
              workItemCoder, input.getWindowingStrategy().getWindowFn().windowCoder());

  CoderTypeInformation<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemTypeInfo =
      new CoderTypeInformation<>(windowedWorkItemCoder);

  DataStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemStream =
      inputDataStream
          .flatMap(new ToKeyedWorkItem<>(context.getPipelineOptions()))
          .returns(workItemTypeInfo)
          .name("ToKeyedWorkItem");

  WorkItemKeySelector keySelector = new WorkItemKeySelector<>(inputKvCoder.getKeyCoder());

  KeyedStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>, ByteBuffer>
      keyedWorkItemStream =
          workItemStream.keyBy(new WorkItemKeySelector<>(inputKvCoder.getKeyCoder()));

  SystemReduceFn<K, InputT, Iterable<InputT>, Iterable<InputT>, BoundedWindow> reduceFn =
      SystemReduceFn.buffering(inputKvCoder.getValueCoder());

  Coder<WindowedValue<KV<K, Iterable<InputT>>>> outputCoder =
      context.getWindowedInputCoder(context.getOutput(transform));
  TypeInformation<WindowedValue<KV<K, Iterable<InputT>>>> outputTypeInfo =
      context.getTypeInfo(context.getOutput(transform));

  TupleTag<KV<K, Iterable<InputT>>> mainTag = new TupleTag<>("main output");

  String fullName = getCurrentTransformName(context);
  WindowDoFnOperator<K, InputT, Iterable<InputT>> doFnOperator =
      new WindowDoFnOperator<>(
          reduceFn,
          fullName,
          (Coder) windowedWorkItemCoder,
          mainTag,
          Collections.emptyList(),
          new DoFnOperator.MultiOutputOutputManagerFactory<>(mainTag, outputCoder),
          windowingStrategy,
          new HashMap<>(), /* side-input mapping */
          Collections.emptyList(), /* side inputs */
          context.getPipelineOptions(),
          inputKvCoder.getKeyCoder(),
          keySelector);

  // our operator expects WindowedValue<KeyedWorkItem> while our input stream
  // is WindowedValue<SingletonKeyedWorkItem>, which is fine but Java doesn't like it ...
  @SuppressWarnings("unchecked")
  SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<InputT>>>> outDataStream =
      keyedWorkItemStream
          .transform(fullName, outputTypeInfo, (OneInputStreamOperator) doFnOperator)
          .uid(fullName);

  context.setOutputDataStream(context.getOutput(transform), outDataStream);
}
 
Example 9
Source File: FlinkStreamingPortablePipelineTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
private <K, V> SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<V>>>> addGBK(
    DataStream<WindowedValue<KV<K, V>>> inputDataStream,
    WindowingStrategy<?, ?> windowingStrategy,
    WindowedValueCoder<KV<K, V>> windowedInputCoder,
    String operatorName,
    StreamingTranslationContext context) {
  KvCoder<K, V> inputElementCoder = (KvCoder<K, V>) windowedInputCoder.getValueCoder();

  SingletonKeyedWorkItemCoder<K, V> workItemCoder =
      SingletonKeyedWorkItemCoder.of(
          inputElementCoder.getKeyCoder(),
          inputElementCoder.getValueCoder(),
          windowingStrategy.getWindowFn().windowCoder());

  WindowedValue.FullWindowedValueCoder<SingletonKeyedWorkItem<K, V>> windowedWorkItemCoder =
      WindowedValue.getFullCoder(workItemCoder, windowingStrategy.getWindowFn().windowCoder());

  CoderTypeInformation<WindowedValue<SingletonKeyedWorkItem<K, V>>> workItemTypeInfo =
      new CoderTypeInformation<>(windowedWorkItemCoder);

  DataStream<WindowedValue<SingletonKeyedWorkItem<K, V>>> workItemStream =
      inputDataStream
          .flatMap(
              new FlinkStreamingTransformTranslators.ToKeyedWorkItem<>(
                  context.getPipelineOptions()))
          .returns(workItemTypeInfo)
          .name("ToKeyedWorkItem");

  WorkItemKeySelector<K, V> keySelector =
      new WorkItemKeySelector<>(inputElementCoder.getKeyCoder());

  KeyedStream<WindowedValue<SingletonKeyedWorkItem<K, V>>, ByteBuffer> keyedWorkItemStream =
      workItemStream.keyBy(keySelector);

  SystemReduceFn<K, V, Iterable<V>, Iterable<V>, BoundedWindow> reduceFn =
      SystemReduceFn.buffering(inputElementCoder.getValueCoder());

  Coder<Iterable<V>> accumulatorCoder = IterableCoder.of(inputElementCoder.getValueCoder());

  Coder<WindowedValue<KV<K, Iterable<V>>>> outputCoder =
      WindowedValue.getFullCoder(
          KvCoder.of(inputElementCoder.getKeyCoder(), accumulatorCoder),
          windowingStrategy.getWindowFn().windowCoder());

  TypeInformation<WindowedValue<KV<K, Iterable<V>>>> outputTypeInfo =
      new CoderTypeInformation<>(outputCoder);

  TupleTag<KV<K, Iterable<V>>> mainTag = new TupleTag<>("main output");

  WindowDoFnOperator<K, V, Iterable<V>> doFnOperator =
      new WindowDoFnOperator<>(
          reduceFn,
          operatorName,
          (Coder) windowedWorkItemCoder,
          mainTag,
          Collections.emptyList(),
          new DoFnOperator.MultiOutputOutputManagerFactory(mainTag, outputCoder),
          windowingStrategy,
          new HashMap<>(), /* side-input mapping */
          Collections.emptyList(), /* side inputs */
          context.getPipelineOptions(),
          inputElementCoder.getKeyCoder(),
          (KeySelector) keySelector /* key selector */);

  SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<V>>>> outputDataStream =
      keyedWorkItemStream.transform(
          operatorName, outputTypeInfo, (OneInputStreamOperator) doFnOperator);

  return outputDataStream;
}
 
Example 10
Source File: GroupByKeyTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
private static <K, InputT, OutputT> void doTranslatePortable(
    PipelineNode.PTransformNode transform,
    QueryablePipeline pipeline,
    PortableTranslationContext ctx) {
  final MessageStream<OpMessage<KV<K, InputT>>> inputStream =
      ctx.getOneInputMessageStream(transform);
  final boolean needRepartition = ctx.getSamzaPipelineOptions().getMaxSourceParallelism() > 1;
  final WindowingStrategy<?, BoundedWindow> windowingStrategy =
      ctx.getPortableWindowStrategy(transform, pipeline);
  final Coder<BoundedWindow> windowCoder = windowingStrategy.getWindowFn().windowCoder();

  final String inputId = ctx.getInputId(transform);
  final WindowedValue.WindowedValueCoder<KV<K, InputT>> windowedInputCoder =
      ctx.instantiateCoder(inputId, pipeline.getComponents());
  final KvCoder<K, InputT> kvInputCoder = (KvCoder<K, InputT>) windowedInputCoder.getValueCoder();
  final Coder<WindowedValue<KV<K, InputT>>> elementCoder =
      WindowedValue.FullWindowedValueCoder.of(kvInputCoder, windowCoder);

  final TupleTag<KV<K, OutputT>> outputTag =
      new TupleTag<>(Iterables.getOnlyElement(transform.getTransform().getOutputsMap().keySet()));

  @SuppressWarnings("unchecked")
  final SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> reduceFn =
      (SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow>)
          SystemReduceFn.buffering(kvInputCoder.getValueCoder());

  final RunnerApi.PCollection input = pipeline.getComponents().getPcollectionsOrThrow(inputId);
  final PCollection.IsBounded isBounded = SamzaPipelineTranslatorUtils.isBounded(input);

  final MessageStream<OpMessage<KV<K, OutputT>>> outputStream =
      doTranslateGBK(
          inputStream,
          needRepartition,
          reduceFn,
          windowingStrategy,
          kvInputCoder,
          elementCoder,
          ctx.getTransformFullName(),
          ctx.getTransformId(),
          outputTag,
          isBounded);
  ctx.registerMessageStream(ctx.getOutputId(transform), outputStream);
}