Java Code Examples for org.apache.beam.sdk.util.WindowedValue#FullWindowedValueCoder

The following examples show how to use org.apache.beam.sdk.util.WindowedValue#FullWindowedValueCoder . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ValueAndCoderLazySerializableTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void serializableAccumulatorSerializationTest()
    throws IOException, ClassNotFoundException {
  Iterable<WindowedValue<Integer>> accumulatedValue =
      Arrays.asList(winVal(0), winVal(1), winVal(3), winVal(4));

  final WindowedValue.FullWindowedValueCoder<Integer> wvaCoder =
      WindowedValue.FullWindowedValueCoder.of(
          BigEndianIntegerCoder.of(), GlobalWindow.Coder.INSTANCE);

  final IterableCoder<WindowedValue<Integer>> iterAccumCoder = IterableCoder.of(wvaCoder);

  ValueAndCoderLazySerializable<Iterable<WindowedValue<Integer>>> accUnderTest =
      ValueAndCoderLazySerializable.of(accumulatedValue, iterAccumCoder);

  ByteArrayOutputStream inMemOut = new ByteArrayOutputStream();
  ObjectOutputStream oos = new ObjectOutputStream(inMemOut);
  oos.writeObject(accUnderTest);

  ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(inMemOut.toByteArray()));

  @SuppressWarnings("unchecked")
  ValueAndCoderLazySerializable<Iterable<WindowedValue<Integer>>> materialized =
      (ValueAndCoderLazySerializable<Iterable<WindowedValue<Integer>>>) ois.readObject();
  assertEquals(accumulatedValue, materialized.getOrDecode(iterAccumCoder));
}
 
Example 2
Source File: GroupNonMergingWindowsFunctionsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private <W extends BoundedWindow> GroupByKeyIterator<String, Integer, W> createGbkIterator(
    W window, Coder<W> winCoder, WindowingStrategy<Object, W> winStrategy)
    throws Coder.NonDeterministicException {

  StringUtf8Coder keyCoder = StringUtf8Coder.of();
  final WindowedValue.FullWindowedValueCoder<KV<String, Integer>> winValCoder =
      WindowedValue.getFullCoder(
          KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()),
          winStrategy.getWindowFn().windowCoder());

  ItemFactory<String, Integer, W> factory =
      ItemFactory.forWindow(keyCoder, winValCoder, winCoder, window);
  List<Tuple2<ByteArray, byte[]>> items =
      Arrays.asList(
          factory.create("k1", 1),
          factory.create("k1", 2),
          factory.create("k2", 3),
          factory.create("k2", 4),
          factory.create("k2", 5));
  return new GroupByKeyIterator<>(items.iterator(), keyCoder, winStrategy, winValCoder);
}
 
Example 3
Source File: RowHelpers.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Serialize a windowedValue to bytes using windowedValueCoder {@link
 * WindowedValue.FullWindowedValueCoder} and stores it an InternalRow.
 */
public static <T> InternalRow storeWindowedValueInRow(
    WindowedValue<T> windowedValue, Coder<T> coder) {
  List<Object> list = new ArrayList<>();
  // serialize the windowedValue to bytes array to comply with dataset binary schema
  WindowedValue.FullWindowedValueCoder<T> windowedValueCoder =
      WindowedValue.FullWindowedValueCoder.of(coder, GlobalWindow.Coder.INSTANCE);
  ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
  try {
    windowedValueCoder.encode(windowedValue, byteArrayOutputStream);
    byte[] bytes = byteArrayOutputStream.toByteArray();
    list.add(bytes);
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
  return InternalRow.apply(asScalaBuffer(list).toList());
}
 
Example 4
Source File: WindowAssignTranslatorBatch.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public void translateTransform(
    PTransform<PCollection<T>, PCollection<T>> transform, TranslationContext context) {

  Window.Assign<T> assignTransform = (Window.Assign<T>) transform;
  @SuppressWarnings("unchecked")
  final PCollection<T> input = (PCollection<T>) context.getInput();
  @SuppressWarnings("unchecked")
  final PCollection<T> output = (PCollection<T>) context.getOutput();

  Dataset<WindowedValue<T>> inputDataset = context.getDataset(input);
  if (WindowingHelpers.skipAssignWindows(assignTransform, context)) {
    context.putDataset(output, inputDataset);
  } else {
    WindowFn<T, ?> windowFn = assignTransform.getWindowFn();
    WindowedValue.FullWindowedValueCoder<T> windowedValueCoder =
        WindowedValue.FullWindowedValueCoder.of(input.getCoder(), windowFn.windowCoder());
    Dataset<WindowedValue<T>> outputDataset =
        inputDataset.map(
            WindowingHelpers.assignWindowsMapFunction(windowFn),
            EncoderHelpers.fromBeamCoder(windowedValueCoder));
    context.putDataset(output, outputDataset);
  }
}
 
Example 5
Source File: AbstractPythonStatelessFunctionRunner.java    From flink with Apache License 2.0 5 votes vote down vote up
private RunnerApi.WireCoderSetting createValueOnlyWireCoderSetting() throws IOException {
	WindowedValue<byte[]> value = WindowedValue.valueInGlobalWindow(new byte[0]);
	Coder<? extends BoundedWindow> windowCoder = GlobalWindow.Coder.INSTANCE;
	WindowedValue.FullWindowedValueCoder<byte[]> windowedValueCoder =
		WindowedValue.FullWindowedValueCoder.of(ByteArrayCoder.of(), windowCoder);
	ByteArrayOutputStream baos = new ByteArrayOutputStream();
	windowedValueCoder.encode(value, baos);
	return RunnerApi.WireCoderSetting.newBuilder()
		.setUrn(getUrn(RunnerApi.StandardCoders.Enum.PARAM_WINDOWED_VALUE))
		.setPayload(
			org.apache.beam.vendor.grpc.v1p21p0.com.google.protobuf.ByteString.copyFrom(baos.toByteArray()))
		.build();
}
 
Example 6
Source File: FlinkBatchTranslationContext.java    From beam with Apache License 2.0 5 votes vote down vote up
<T> TypeInformation<WindowedValue<T>> getTypeInfo(
    Coder<T> coder, WindowingStrategy<?, ?> windowingStrategy) {
  WindowedValue.FullWindowedValueCoder<T> windowedValueCoder =
      WindowedValue.getFullCoder(coder, windowingStrategy.getWindowFn().windowCoder());

  return new CoderTypeInformation<>(windowedValueCoder);
}
 
Example 7
Source File: GroupNonMergingWindowsFunctions.java    From beam with Apache License 2.0 5 votes vote down vote up
GroupByKeyIterator(
    Iterator<Tuple2<ByteArray, byte[]>> inner,
    Coder<K> keyCoder,
    WindowingStrategy<?, W> windowingStrategy,
    WindowedValue.FullWindowedValueCoder<KV<K, V>> windowedValueCoder)
    throws Coder.NonDeterministicException {

  this.inner = Iterators.peekingIterator(inner);
  this.keyCoder = keyCoder;
  this.windowingStrategy = windowingStrategy;
  this.windowedValueCoder = windowedValueCoder;
}
 
Example 8
Source File: PipelineTranslationContext.java    From incubator-nemo with Apache License 2.0 5 votes vote down vote up
/**
 * Say the dstIRVertex consumes three views: view0, view1, and view2.
 * <p>
 * We translate that as the following:
 * view0 -> SideInputTransform(index=0) ->
 * view1 -> SideInputTransform(index=1) -> dstIRVertex(with a map from indices to PCollectionViews)
 * view2 -> SideInputTransform(index=2) ->
 *
 * @param dstVertex  vertex.
 * @param sideInputs of the vertex.
 */
void addSideInputEdges(final IRVertex dstVertex, final Map<Integer, PCollectionView<?>> sideInputs) {
  for (final Map.Entry<Integer, PCollectionView<?>> entry : sideInputs.entrySet()) {
    final int index = entry.getKey();
    final PCollectionView view = entry.getValue();

    final IRVertex srcVertex = pValueToProducerVertex.get(view);
    final IRVertex sideInputTransformVertex = new OperatorVertex(new SideInputTransform(index));
    addVertex(sideInputTransformVertex);
    final Coder viewCoder = getCoderForView(view, this);
    final Coder windowCoder = view.getPCollection().getWindowingStrategy().getWindowFn().windowCoder();

    // First edge: view to transform
    final IREdge firstEdge =
      new IREdge(CommunicationPatternProperty.Value.ONE_TO_ONE, srcVertex, sideInputTransformVertex);
    addEdge(firstEdge, viewCoder, windowCoder);

    // Second edge: transform to the dstIRVertex
    final IREdge secondEdge =
      new IREdge(CommunicationPatternProperty.Value.BROADCAST, sideInputTransformVertex, dstVertex);
    final WindowedValue.FullWindowedValueCoder sideInputElementCoder =
      WindowedValue.getFullCoder(SideInputCoder.of(viewCoder), windowCoder);

    // The vertices should be Parallelism=1
    srcVertex.setPropertyPermanently(ParallelismProperty.of(1));
    sideInputTransformVertex.setPropertyPermanently(ParallelismProperty.of(1));

    secondEdge.setProperty(EncoderProperty.of(new BeamEncoderFactory(sideInputElementCoder)));
    secondEdge.setProperty(DecoderProperty.of(new BeamDecoderFactory(sideInputElementCoder)));
    builder.connectVertices(secondEdge);
  }
}
 
Example 9
Source File: FlinkStreamingTranslationContext.java    From beam with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public <T> TypeInformation<WindowedValue<T>> getTypeInfo(PCollection<T> collection) {
  Coder<T> valueCoder = collection.getCoder();
  WindowedValue.FullWindowedValueCoder<T> windowedValueCoder =
      WindowedValue.getFullCoder(
          valueCoder, collection.getWindowingStrategy().getWindowFn().windowCoder());

  return new CoderTypeInformation<>(windowedValueCoder);
}
 
Example 10
Source File: MultiOuputCoder.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public Tuple2<TupleTag<T>, WindowedValue<T>> decode(InputStream inStream)
    throws CoderException, IOException {
  TupleTag<T> tupleTag = (TupleTag<T>) tupleTagCoder.decode(inStream);
  Coder<T> valueCoder = (Coder<T>) coderMap.get(tupleTag);
  WindowedValue.FullWindowedValueCoder<T> wvCoder =
      WindowedValue.FullWindowedValueCoder.of(valueCoder, windowCoder);
  WindowedValue<T> wv = wvCoder.decode(inStream);
  return Tuple2.apply(tupleTag, wv);
}
 
Example 11
Source File: MultiOuputCoder.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void encode(Tuple2<TupleTag<T>, WindowedValue<T>> tuple2, OutputStream outStream)
    throws IOException {
  TupleTag<T> tupleTag = tuple2._1();
  tupleTagCoder.encode(tupleTag, outStream);
  Coder<T> valueCoder = (Coder<T>) coderMap.get(tupleTag);
  WindowedValue.FullWindowedValueCoder<T> wvCoder =
      WindowedValue.FullWindowedValueCoder.of(valueCoder, windowCoder);
  wvCoder.encode(tuple2._2(), outStream);
}
 
Example 12
Source File: PipelineTranslationContext.java    From incubator-nemo with Apache License 2.0 5 votes vote down vote up
/**
 * @param edge         IR edge to add.
 * @param elementCoder element coder.
 * @param windowCoder  window coder.
 */
void addEdge(final IREdge edge, final Coder elementCoder, final Coder windowCoder) {
  edge.setProperty(KeyExtractorProperty.of(new BeamKeyExtractor()));
  if (elementCoder instanceof KvCoder) {
    Coder keyCoder = ((KvCoder) elementCoder).getKeyCoder();
    edge.setProperty(KeyEncoderProperty.of(new BeamEncoderFactory(keyCoder)));
    edge.setProperty(KeyDecoderProperty.of(new BeamDecoderFactory(keyCoder)));
  }

  final WindowedValue.FullWindowedValueCoder coder = WindowedValue.getFullCoder(elementCoder, windowCoder);
  edge.setProperty(EncoderProperty.of(new BeamEncoderFactory<>(coder)));
  edge.setProperty(DecoderProperty.of(new BeamDecoderFactory<>(coder)));

  builder.connectVertices(edge);
}
 
Example 13
Source File: ReadSourceTranslatorStreaming.java    From beam with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public void translateTransform(
    PTransform<PBegin, PCollection<T>> transform, TranslationContext context) {
  AppliedPTransform<PBegin, PCollection<T>, PTransform<PBegin, PCollection<T>>> rootTransform =
      (AppliedPTransform<PBegin, PCollection<T>, PTransform<PBegin, PCollection<T>>>)
          context.getCurrentTransform();

  UnboundedSource<T, UnboundedSource.CheckpointMark> source;
  try {
    source = ReadTranslation.unboundedSourceFromTransform(rootTransform);
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
  SparkSession sparkSession = context.getSparkSession();

  String serializedSource = Base64Serializer.serializeUnchecked(source);
  Dataset<Row> rowDataset =
      sparkSession
          .readStream()
          .format(sourceProviderClass)
          .option(DatasetSourceStreaming.BEAM_SOURCE_OPTION, serializedSource)
          .option(
              DatasetSourceStreaming.DEFAULT_PARALLELISM,
              String.valueOf(context.getSparkSession().sparkContext().defaultParallelism()))
          .option(
              DatasetSourceStreaming.PIPELINE_OPTIONS,
              context.getSerializableOptions().toString())
          .load();

  // extract windowedValue from Row
  WindowedValue.FullWindowedValueCoder<T> windowedValueCoder =
      WindowedValue.FullWindowedValueCoder.of(
          source.getOutputCoder(), GlobalWindow.Coder.INSTANCE);
  Dataset<WindowedValue<T>> dataset =
      rowDataset.map(
          RowHelpers.extractWindowedValueFromRowMapFunction(windowedValueCoder),
          EncoderHelpers.fromBeamCoder(windowedValueCoder));

  PCollection<T> output = (PCollection<T>) context.getOutput();
  context.putDataset(output, dataset);
}
 
Example 14
Source File: TransformTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
private static <InputT, AccumT, OutputT>
    TransformEvaluator<Combine.Globally<InputT, OutputT>> combineGlobally() {
  return new TransformEvaluator<Combine.Globally<InputT, OutputT>>() {

    @Override
    public void evaluate(Combine.Globally<InputT, OutputT> transform, EvaluationContext context) {
      final PCollection<InputT> input = context.getInput(transform);
      final Coder<InputT> iCoder = context.getInput(transform).getCoder();
      final Coder<OutputT> oCoder = context.getOutput(transform).getCoder();
      final WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
      @SuppressWarnings("unchecked")
      final CombineWithContext.CombineFnWithContext<InputT, AccumT, OutputT> combineFn =
          (CombineWithContext.CombineFnWithContext<InputT, AccumT, OutputT>)
              CombineFnUtil.toFnWithContext(transform.getFn());
      final WindowedValue.FullWindowedValueCoder<OutputT> wvoCoder =
          WindowedValue.FullWindowedValueCoder.of(
              oCoder, windowingStrategy.getWindowFn().windowCoder());
      final boolean hasDefault = transform.isInsertDefault();

      final SparkCombineFn<InputT, InputT, AccumT, OutputT> sparkCombineFn =
          SparkCombineFn.globally(
              combineFn,
              context.getSerializableOptions(),
              TranslationUtils.getSideInputs(transform.getSideInputs(), context),
              windowingStrategy);
      final Coder<AccumT> aCoder;
      try {
        aCoder = combineFn.getAccumulatorCoder(context.getPipeline().getCoderRegistry(), iCoder);
      } catch (CannotProvideCoderException e) {
        throw new IllegalStateException("Could not determine coder for accumulator", e);
      }

      @SuppressWarnings("unchecked")
      JavaRDD<WindowedValue<InputT>> inRdd =
          ((BoundedDataset<InputT>) context.borrowDataset(transform)).getRDD();

      JavaRDD<WindowedValue<OutputT>> outRdd;

      SparkCombineFn.WindowedAccumulator<InputT, InputT, AccumT, ?> accumulated =
          GroupCombineFunctions.combineGlobally(inRdd, sparkCombineFn, aCoder, windowingStrategy);

      if (!accumulated.isEmpty()) {
        Iterable<WindowedValue<OutputT>> output = sparkCombineFn.extractOutput(accumulated);
        outRdd =
            context
                .getSparkContext()
                .parallelize(CoderHelpers.toByteArrays(output, wvoCoder))
                .map(CoderHelpers.fromByteFunction(wvoCoder));
      } else {
        // handle empty input RDD, which will naturally skip the entire execution
        // as Spark will not run on empty RDDs.
        JavaSparkContext jsc = new JavaSparkContext(inRdd.context());
        if (hasDefault) {
          OutputT defaultValue = combineFn.defaultValue();
          outRdd =
              jsc.parallelize(Lists.newArrayList(CoderHelpers.toByteArray(defaultValue, oCoder)))
                  .map(CoderHelpers.fromByteFunction(oCoder))
                  .map(WindowedValue::valueInGlobalWindow);
        } else {
          outRdd = jsc.emptyRDD();
        }
      }

      context.putDataset(transform, new BoundedDataset<>(outRdd));
    }

    @Override
    public String toNativeString() {
      return "aggregate(..., new <fn>(), ...)";
    }
  };
}
 
Example 15
Source File: Utils.java    From beam with Apache License 2.0 4 votes vote down vote up
public static WindowedValue.FullWindowedValueCoder deriveIterableValueCoder(
    WindowedValue.FullWindowedValueCoder elementCoder) {
  return WindowedValue.FullWindowedValueCoder.of(
      ListCoder.of(elementCoder.getValueCoder()), elementCoder.getWindowCoder());
}
 
Example 16
Source File: SparkUnboundedSource.java    From beam with Apache License 2.0 4 votes vote down vote up
public static <T, CheckpointMarkT extends CheckpointMark> UnboundedDataset<T> read(
    JavaStreamingContext jssc,
    SerializablePipelineOptions rc,
    UnboundedSource<T, CheckpointMarkT> source,
    String stepName) {

  SparkPipelineOptions options = rc.get().as(SparkPipelineOptions.class);
  Long maxRecordsPerBatch = options.getMaxRecordsPerBatch();
  SourceDStream<T, CheckpointMarkT> sourceDStream =
      new SourceDStream<>(jssc.ssc(), source, rc, maxRecordsPerBatch);

  JavaPairInputDStream<Source<T>, CheckpointMarkT> inputDStream =
      JavaPairInputDStream$.MODULE$.fromInputDStream(
          sourceDStream,
          JavaSparkContext$.MODULE$.fakeClassTag(),
          JavaSparkContext$.MODULE$.fakeClassTag());

  // call mapWithState to read from a checkpointable sources.
  JavaMapWithStateDStream<
          Source<T>, CheckpointMarkT, Tuple2<byte[], Instant>, Tuple2<Iterable<byte[]>, Metadata>>
      mapWithStateDStream =
          inputDStream.mapWithState(
              StateSpec.function(
                      StateSpecFunctions.<T, CheckpointMarkT>mapSourceFunction(rc, stepName))
                  .numPartitions(sourceDStream.getNumPartitions()));

  // set checkpoint duration for read stream, if set.
  checkpointStream(mapWithStateDStream, options);

  // report the number of input elements for this InputDStream to the InputInfoTracker.
  int id = inputDStream.inputDStream().id();
  JavaDStream<Metadata> metadataDStream = mapWithStateDStream.map(new Tuple2MetadataFunction());

  // register ReadReportDStream to report information related to this read.
  new ReadReportDStream(metadataDStream.dstream(), id, getSourceName(source, id), stepName)
      .register();

  // output the actual (deserialized) stream.
  WindowedValue.FullWindowedValueCoder<T> coder =
      WindowedValue.FullWindowedValueCoder.of(
          source.getOutputCoder(), GlobalWindow.Coder.INSTANCE);
  JavaDStream<WindowedValue<T>> readUnboundedStream =
      mapWithStateDStream
          .flatMap(new Tuple2byteFlatMapFunction())
          .map(CoderHelpers.fromByteFunction(coder));
  return new UnboundedDataset<>(readUnboundedStream, Collections.singletonList(id));
}
 
Example 17
Source File: JetTranslationContext.java    From beam with Apache License 2.0 4 votes vote down vote up
<T> WindowedValue.FullWindowedValueCoder<T> getTypeInfo(
    Coder<T> coder, WindowingStrategy<?, ?> windowingStrategy) {
  return WindowedValue.getFullCoder(coder, windowingStrategy.getWindowFn().windowCoder());
}
 
Example 18
Source File: JetTranslationContext.java    From beam with Apache License 2.0 4 votes vote down vote up
<T> WindowedValue.FullWindowedValueCoder<T> getTypeInfo(PCollection<T> collection) {
  return getTypeInfo(collection.getCoder(), collection.getWindowingStrategy());
}
 
Example 19
Source File: FlinkStreamingPortablePipelineTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
private <K, V> SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<V>>>> addGBK(
    DataStream<WindowedValue<KV<K, V>>> inputDataStream,
    WindowingStrategy<?, ?> windowingStrategy,
    WindowedValueCoder<KV<K, V>> windowedInputCoder,
    String operatorName,
    StreamingTranslationContext context) {
  KvCoder<K, V> inputElementCoder = (KvCoder<K, V>) windowedInputCoder.getValueCoder();

  SingletonKeyedWorkItemCoder<K, V> workItemCoder =
      SingletonKeyedWorkItemCoder.of(
          inputElementCoder.getKeyCoder(),
          inputElementCoder.getValueCoder(),
          windowingStrategy.getWindowFn().windowCoder());

  WindowedValue.FullWindowedValueCoder<SingletonKeyedWorkItem<K, V>> windowedWorkItemCoder =
      WindowedValue.getFullCoder(workItemCoder, windowingStrategy.getWindowFn().windowCoder());

  CoderTypeInformation<WindowedValue<SingletonKeyedWorkItem<K, V>>> workItemTypeInfo =
      new CoderTypeInformation<>(windowedWorkItemCoder);

  DataStream<WindowedValue<SingletonKeyedWorkItem<K, V>>> workItemStream =
      inputDataStream
          .flatMap(
              new FlinkStreamingTransformTranslators.ToKeyedWorkItem<>(
                  context.getPipelineOptions()))
          .returns(workItemTypeInfo)
          .name("ToKeyedWorkItem");

  WorkItemKeySelector<K, V> keySelector =
      new WorkItemKeySelector<>(inputElementCoder.getKeyCoder());

  KeyedStream<WindowedValue<SingletonKeyedWorkItem<K, V>>, ByteBuffer> keyedWorkItemStream =
      workItemStream.keyBy(keySelector);

  SystemReduceFn<K, V, Iterable<V>, Iterable<V>, BoundedWindow> reduceFn =
      SystemReduceFn.buffering(inputElementCoder.getValueCoder());

  Coder<Iterable<V>> accumulatorCoder = IterableCoder.of(inputElementCoder.getValueCoder());

  Coder<WindowedValue<KV<K, Iterable<V>>>> outputCoder =
      WindowedValue.getFullCoder(
          KvCoder.of(inputElementCoder.getKeyCoder(), accumulatorCoder),
          windowingStrategy.getWindowFn().windowCoder());

  TypeInformation<WindowedValue<KV<K, Iterable<V>>>> outputTypeInfo =
      new CoderTypeInformation<>(outputCoder);

  TupleTag<KV<K, Iterable<V>>> mainTag = new TupleTag<>("main output");

  WindowDoFnOperator<K, V, Iterable<V>> doFnOperator =
      new WindowDoFnOperator<>(
          reduceFn,
          operatorName,
          (Coder) windowedWorkItemCoder,
          mainTag,
          Collections.emptyList(),
          new DoFnOperator.MultiOutputOutputManagerFactory(mainTag, outputCoder),
          windowingStrategy,
          new HashMap<>(), /* side-input mapping */
          Collections.emptyList(), /* side inputs */
          context.getPipelineOptions(),
          inputElementCoder.getKeyCoder(),
          (KeySelector) keySelector /* key selector */);

  SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<V>>>> outputDataStream =
      keyedWorkItemStream.transform(
          operatorName, outputTypeInfo, (OneInputStreamOperator) doFnOperator);

  return outputDataStream;
}
 
Example 20
Source File: FlinkStreamingTransformTranslators.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public void translateNode(
    PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, Iterable<InputT>>>> transform,
    FlinkStreamingTranslationContext context) {

  PCollection<KV<K, InputT>> input = context.getInput(transform);

  @SuppressWarnings("unchecked")
  WindowingStrategy<?, BoundedWindow> windowingStrategy =
      (WindowingStrategy<?, BoundedWindow>) input.getWindowingStrategy();

  KvCoder<K, InputT> inputKvCoder = (KvCoder<K, InputT>) input.getCoder();

  SingletonKeyedWorkItemCoder<K, InputT> workItemCoder =
      SingletonKeyedWorkItemCoder.of(
          inputKvCoder.getKeyCoder(),
          inputKvCoder.getValueCoder(),
          input.getWindowingStrategy().getWindowFn().windowCoder());

  DataStream<WindowedValue<KV<K, InputT>>> inputDataStream = context.getInputDataStream(input);

  WindowedValue.FullWindowedValueCoder<SingletonKeyedWorkItem<K, InputT>>
      windowedWorkItemCoder =
          WindowedValue.getFullCoder(
              workItemCoder, input.getWindowingStrategy().getWindowFn().windowCoder());

  CoderTypeInformation<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemTypeInfo =
      new CoderTypeInformation<>(windowedWorkItemCoder);

  DataStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemStream =
      inputDataStream
          .flatMap(new ToKeyedWorkItem<>(context.getPipelineOptions()))
          .returns(workItemTypeInfo)
          .name("ToKeyedWorkItem");

  WorkItemKeySelector keySelector = new WorkItemKeySelector<>(inputKvCoder.getKeyCoder());

  KeyedStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>, ByteBuffer>
      keyedWorkItemStream =
          workItemStream.keyBy(new WorkItemKeySelector<>(inputKvCoder.getKeyCoder()));

  SystemReduceFn<K, InputT, Iterable<InputT>, Iterable<InputT>, BoundedWindow> reduceFn =
      SystemReduceFn.buffering(inputKvCoder.getValueCoder());

  Coder<WindowedValue<KV<K, Iterable<InputT>>>> outputCoder =
      context.getWindowedInputCoder(context.getOutput(transform));
  TypeInformation<WindowedValue<KV<K, Iterable<InputT>>>> outputTypeInfo =
      context.getTypeInfo(context.getOutput(transform));

  TupleTag<KV<K, Iterable<InputT>>> mainTag = new TupleTag<>("main output");

  String fullName = getCurrentTransformName(context);
  WindowDoFnOperator<K, InputT, Iterable<InputT>> doFnOperator =
      new WindowDoFnOperator<>(
          reduceFn,
          fullName,
          (Coder) windowedWorkItemCoder,
          mainTag,
          Collections.emptyList(),
          new DoFnOperator.MultiOutputOutputManagerFactory<>(mainTag, outputCoder),
          windowingStrategy,
          new HashMap<>(), /* side-input mapping */
          Collections.emptyList(), /* side inputs */
          context.getPipelineOptions(),
          inputKvCoder.getKeyCoder(),
          keySelector);

  // our operator expects WindowedValue<KeyedWorkItem> while our input stream
  // is WindowedValue<SingletonKeyedWorkItem>, which is fine but Java doesn't like it ...
  @SuppressWarnings("unchecked")
  SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<InputT>>>> outDataStream =
      keyedWorkItemStream
          .transform(fullName, outputTypeInfo, (OneInputStreamOperator) doFnOperator)
          .uid(fullName);

  context.setOutputDataStream(context.getOutput(transform), outDataStream);
}