Java Code Examples for org.apache.beam.sdk.util.WindowedValue#WindowedValueCoder

The following examples show how to use org.apache.beam.sdk.util.WindowedValue#WindowedValueCoder . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BoundedDataset.java    From beam with Apache License 2.0 6 votes vote down vote up
Iterable<WindowedValue<T>> getValues(PCollection<T> pcollection) {
  if (windowedValues == null) {
    WindowFn<?, ?> windowFn = pcollection.getWindowingStrategy().getWindowFn();
    Coder<? extends BoundedWindow> windowCoder = windowFn.windowCoder();
    final WindowedValue.WindowedValueCoder<T> windowedValueCoder;
    if (windowFn instanceof GlobalWindows) {
      windowedValueCoder = WindowedValue.ValueOnlyWindowedValueCoder.of(pcollection.getCoder());
    } else {
      windowedValueCoder =
          WindowedValue.FullWindowedValueCoder.of(pcollection.getCoder(), windowCoder);
    }
    JavaRDDLike<byte[], ?> bytesRDD = rdd.map(CoderHelpers.toByteFunction(windowedValueCoder));
    List<byte[]> clientBytes = bytesRDD.collect();
    windowedValues =
        clientBytes.stream()
            .map(bytes -> CoderHelpers.fromByteArray(bytes, windowedValueCoder))
            .collect(Collectors.toList());
  }
  return windowedValues;
}
 
Example 2
Source File: JetTransformTranslators.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public Vertex translate(
    Pipeline pipeline,
    AppliedPTransform<?, ?, ?> appliedTransform,
    Node node,
    JetTranslationContext context) {
  String transformName = appliedTransform.getFullName();

  PCollection<KV<K, InputT>> input =
      (PCollection<KV<K, InputT>>) Utils.getInput(appliedTransform);
  WindowedValue.WindowedValueCoder<KV<K, InputT>> inputCoder =
      Utils.getWindowedValueCoder(input);
  Map.Entry<TupleTag<?>, PValue> output = Utils.getOutput(appliedTransform);
  Coder outputCoder = Utils.getCoder((PCollection) output.getValue());

  WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();

  DAGBuilder dagBuilder = context.getDagBuilder();
  String vertexId = dagBuilder.newVertexId(transformName);
  Vertex vertex =
      dagBuilder.addVertex(
          vertexId,
          WindowGroupP.supplier(
              context.getOptions(), inputCoder, outputCoder, windowingStrategy, vertexId));

  dagBuilder.registerEdgeEndPoint(Utils.getTupleTagId(input), vertex);

  String outputEdgeId = Utils.getTupleTagId(output.getValue());
  dagBuilder.registerCollectionOfEdge(outputEdgeId, output.getKey().getId());
  dagBuilder.registerEdgeStartPoint(outputEdgeId, vertex, outputCoder);
  return vertex;
}
 
Example 3
Source File: StreamingTransformTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
private static <K, V, W extends BoundedWindow> TransformEvaluator<GroupByKey<K, V>> groupByKey() {
  return new TransformEvaluator<GroupByKey<K, V>>() {
    @Override
    public void evaluate(GroupByKey<K, V> transform, EvaluationContext context) {
      @SuppressWarnings("unchecked")
      UnboundedDataset<KV<K, V>> inputDataset =
          (UnboundedDataset<KV<K, V>>) context.borrowDataset(transform);
      List<Integer> streamSources = inputDataset.getStreamSources();
      JavaDStream<WindowedValue<KV<K, V>>> dStream = inputDataset.getDStream();
      final KvCoder<K, V> coder = (KvCoder<K, V>) context.getInput(transform).getCoder();
      @SuppressWarnings("unchecked")
      final WindowingStrategy<?, W> windowingStrategy =
          (WindowingStrategy<?, W>) context.getInput(transform).getWindowingStrategy();
      @SuppressWarnings("unchecked")
      final WindowFn<Object, W> windowFn = (WindowFn<Object, W>) windowingStrategy.getWindowFn();

      // --- coders.
      final WindowedValue.WindowedValueCoder<V> wvCoder =
          WindowedValue.FullWindowedValueCoder.of(coder.getValueCoder(), windowFn.windowCoder());

      JavaDStream<WindowedValue<KV<K, Iterable<V>>>> outStream =
          SparkGroupAlsoByWindowViaWindowSet.groupByKeyAndWindow(
              dStream,
              coder.getKeyCoder(),
              wvCoder,
              windowingStrategy,
              context.getSerializableOptions(),
              streamSources,
              context.getCurrentTransform().getFullName());

      context.putDataset(transform, new UnboundedDataset<>(outStream, streamSources));
    }

    @Override
    public String toNativeString() {
      return "groupByKey()";
    }
  };
}
 
Example 4
Source File: TransformTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
private static <K, V, OutputT> JavaPairRDD<TupleTag<?>, WindowedValue<?>> statefulParDoTransform(
    KvCoder<K, V> kvCoder,
    Coder<? extends BoundedWindow> windowCoder,
    JavaRDD<WindowedValue<KV<K, V>>> kvInRDD,
    Partitioner partitioner,
    MultiDoFnFunction<KV<K, V>, OutputT> doFnFunction,
    boolean requiresSortedInput) {
  Coder<K> keyCoder = kvCoder.getKeyCoder();

  final WindowedValue.WindowedValueCoder<V> wvCoder =
      WindowedValue.FullWindowedValueCoder.of(kvCoder.getValueCoder(), windowCoder);

  if (!requiresSortedInput) {
    return GroupCombineFunctions.groupByKeyOnly(kvInRDD, keyCoder, wvCoder, partitioner)
        .map(
            input -> {
              final K key = input.getKey();
              Iterable<WindowedValue<V>> value = input.getValue();
              return FluentIterable.from(value)
                  .transform(
                      windowedValue ->
                          windowedValue.withValue(KV.of(key, windowedValue.getValue())))
                  .iterator();
            })
        .flatMapToPair(doFnFunction);
  }

  JavaPairRDD<ByteArray, byte[]> pairRDD =
      kvInRDD
          .map(new ReifyTimestampsAndWindowsFunction<>())
          .mapToPair(TranslationUtils.toPairFunction())
          .mapToPair(
              CoderHelpers.toByteFunctionWithTs(keyCoder, wvCoder, in -> in._2().getTimestamp()));

  JavaPairRDD<ByteArray, byte[]> sorted =
      pairRDD.repartitionAndSortWithinPartitions(keyPrefixPartitionerFrom(partitioner));

  return sorted.mapPartitionsToPair(wrapDoFnFromSortedRDD(doFnFunction, keyCoder, wvCoder));
}
 
Example 5
Source File: RowHelpers.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * A Spark {@link MapFunction} for extracting a {@link WindowedValue} from a Row in which the
 * {@link WindowedValue} was serialized to bytes using its {@link
 * WindowedValue.WindowedValueCoder}.
 *
 * @param <T> The type of the object.
 * @return A {@link MapFunction} that accepts a {@link Row} and returns its {@link WindowedValue}.
 */
public static <T> MapFunction<Row, WindowedValue<T>> extractWindowedValueFromRowMapFunction(
    WindowedValue.WindowedValueCoder<T> windowedValueCoder) {
  return (MapFunction<Row, WindowedValue<T>>)
      value -> {
        // there is only one value put in each Row by the InputPartitionReader
        byte[] bytes = (byte[]) value.get(0);
        return windowedValueCoder.decode(new ByteArrayInputStream(bytes));
      };
}
 
Example 6
Source File: Utils.java    From beam with Apache License 2.0 5 votes vote down vote up
static boolean isKeyedValueCoder(Coder coder) {
  if (coder instanceof KvCoder) {
    return true;
  } else if (coder instanceof WindowedValue.WindowedValueCoder) {
    return ((WindowedValue.WindowedValueCoder) coder).getValueCoder() instanceof KvCoder;
  }
  return false;
}
 
Example 7
Source File: GroupByKeyTranslatorBatch.java    From twister2 with Apache License 2.0 5 votes vote down vote up
@Override
public void translateNode(GroupByKey<K, V> transform, Twister2BatchTranslationContext context) {
  PCollection<KV<K, V>> input = context.getInput(transform);
  BatchTSetImpl<WindowedValue<KV<K, V>>> inputTTSet = context.getInputDataSet(input);
  final KvCoder<K, V> coder = (KvCoder<K, V>) context.getInput(transform).getCoder();
  Coder<K> inputKeyCoder = ((KvCoder<K, V>) input.getCoder()).getKeyCoder();
  WindowingStrategy windowingStrategy = input.getWindowingStrategy();
  WindowFn<KV<K, V>, BoundedWindow> windowFn =
      (WindowFn<KV<K, V>, BoundedWindow>) windowingStrategy.getWindowFn();
  final WindowedValue.WindowedValueCoder<V> wvCoder =
      WindowedValue.FullWindowedValueCoder.of(coder.getValueCoder(), windowFn.windowCoder());
  KeyedTSet<byte[], byte[]> keyedTSet =
      inputTTSet.mapToTuple(new MapToTupleFunction<K, V>(inputKeyCoder, wvCoder));

  // todo add support for a partition function to be specified, this would use
  // todo keyedPartition function instead of KeyedGather
  ComputeTSet<KV<K, Iterable<WindowedValue<V>>>, Iterator<Tuple<byte[], Iterator<byte[]>>>>
      groupedbyKeyTset =
      keyedTSet.keyedGather().map(new ByteToWindowFunction(inputKeyCoder, wvCoder));

  // --- now group also by window.
  ComputeTSet<WindowedValue<KV<K, Iterable<V>>>, Iterable<KV<K, Iterator<WindowedValue<V>>>>>
      outputTset =
      groupedbyKeyTset
          .direct()
          .<WindowedValue<KV<K, Iterable<V>>>>flatmap(
              new GroupByWindowFunction(
                  windowingStrategy,
                  SystemReduceFn.buffering(coder.getValueCoder())));
  PCollection output = context.getOutput(transform);
  context.setOutputDataSet(output, outputTset);
}
 
Example 8
Source File: BoundedDataset.java    From beam with Apache License 2.0 5 votes vote down vote up
List<byte[]> getBytes(WindowedValue.WindowedValueCoder<T> wvCoder) {
  if (clientBytes == null) {
    JavaRDDLike<byte[], ?> bytesRDD = rdd.map(CoderHelpers.toByteFunction(wvCoder));
    clientBytes = bytesRDD.collect();
  }
  return clientBytes;
}
 
Example 9
Source File: BufferedElementsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testCoder() throws IOException {

  StringUtf8Coder elementCoder = StringUtf8Coder.of();
  // Generics fail to see here that this is Coder<BoundedWindow>
  org.apache.beam.sdk.coders.Coder windowCoder = GlobalWindow.Coder.INSTANCE;
  WindowedValue.WindowedValueCoder windowedValueCoder =
      WindowedValue.FullWindowedValueCoder.of(elementCoder, windowCoder);
  KV<String, Integer> key = KV.of("one", 1);
  BufferedElements.Coder coder = new BufferedElements.Coder(windowedValueCoder, windowCoder, key);

  BufferedElement element =
      new BufferedElements.Element(
          WindowedValue.of("test", new Instant(2), GlobalWindow.INSTANCE, PaneInfo.NO_FIRING));
  BufferedElement timerElement =
      new BufferedElements.Timer(
          "timerId",
          "timerId",
          key,
          GlobalWindow.INSTANCE,
          new Instant(1),
          new Instant(1),
          TimeDomain.EVENT_TIME);

  testRoundTrip(ImmutableList.of(element), coder);
  testRoundTrip(ImmutableList.of(timerElement), coder);
  testRoundTrip(ImmutableList.of(element, timerElement), coder);
  testRoundTrip(ImmutableList.of(element, timerElement, element), coder);
  testRoundTrip(ImmutableList.of(element, element, element, timerElement, timerElement), coder);
}
 
Example 10
Source File: TranslationUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Utility method for deserializing a byte array using the specified coder. (From spark code)
 *
 * @param <T> Type of object to be returned.
 * @param serialized bytearray to be deserialized.
 * @param coder Coder to deserialize with.
 * @return Deserialized object.
 */
public static <T> WindowedValue<T> fromByteArray(
    byte[] serialized, WindowedValue.WindowedValueCoder<T> coder) {
  try {
    return CoderUtils.decodeFromByteArray(coder, serialized);
  } catch (CoderException e) {
    LOG.log(Level.SEVERE, "Error while decoding message", e);
  }
  return null;
}
 
Example 11
Source File: MapToTupleFunction.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Method used to initialize the transient variables that were sent over as byte arrays or proto
 * buffers.
 */
private void initTransient() {
  if (isInitialized) {
    return;
  }
  keyCoder = (Coder<K>) SerializableUtils.deserializeFromByteArray(keyCoderBytes, "Coder");
  wvCoder =
      (WindowedValue.WindowedValueCoder<V>)
          SerializableUtils.deserializeFromByteArray(wvCoderBytes, "Coder");
  this.isInitialized = true;
}
 
Example 12
Source File: PCollectionViewTranslatorBatch.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void translateNode(
    View.CreatePCollectionView<ElemT, ViewT> transform, Twister2BatchTranslationContext context) {
  BatchTSet<WindowedValue<ElemT>> inputDataSet =
      context.getInputDataSet(context.getInput(transform));
  @SuppressWarnings("unchecked")
  AppliedPTransform<
          PCollection<ElemT>,
          PCollection<ElemT>,
          PTransform<PCollection<ElemT>, PCollection<ElemT>>>
      application =
          (AppliedPTransform<
                  PCollection<ElemT>,
                  PCollection<ElemT>,
                  PTransform<PCollection<ElemT>, PCollection<ElemT>>>)
              context.getCurrentTransform();
  org.apache.beam.sdk.values.PCollectionView<ViewT> input;
  PCollection<ElemT> inputPCol = context.getInput(transform);
  final KvCoder coder = (KvCoder) inputPCol.getCoder();
  Coder inputKeyCoder = coder.getKeyCoder();
  WindowingStrategy windowingStrategy = inputPCol.getWindowingStrategy();
  WindowFn windowFn = windowingStrategy.getWindowFn();
  final WindowedValue.WindowedValueCoder wvCoder =
      WindowedValue.FullWindowedValueCoder.of(coder.getValueCoder(), windowFn.windowCoder());
  BatchTSet<WindowedValue<ElemT>> inputGathered =
      inputDataSet
          .direct()
          .map(new MapToTupleFunction<>(inputKeyCoder, wvCoder))
          .allGather()
          .map(new ByteToWindowFunctionPrimitive(inputKeyCoder, wvCoder));
  try {
    input = CreatePCollectionViewTranslation.getView(application);
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
  context.setSideInputDataSet(input.getTagInternal().getId(), inputGathered);
}
 
Example 13
Source File: SparkBatchPortablePipelineTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
private static <K, V> void translateGroupByKey(
    PTransformNode transformNode, RunnerApi.Pipeline pipeline, SparkTranslationContext context) {

  RunnerApi.Components components = pipeline.getComponents();
  String inputId = getInputId(transformNode);
  Dataset inputDataset = context.popDataset(inputId);
  JavaRDD<WindowedValue<KV<K, V>>> inputRdd = ((BoundedDataset<KV<K, V>>) inputDataset).getRDD();
  WindowedValueCoder<KV<K, V>> inputCoder = getWindowedValueCoder(inputId, components);
  KvCoder<K, V> inputKvCoder = (KvCoder<K, V>) inputCoder.getValueCoder();
  Coder<K> inputKeyCoder = inputKvCoder.getKeyCoder();
  Coder<V> inputValueCoder = inputKvCoder.getValueCoder();
  WindowingStrategy windowingStrategy = getWindowingStrategy(inputId, components);
  WindowFn<Object, BoundedWindow> windowFn = windowingStrategy.getWindowFn();
  WindowedValue.WindowedValueCoder<V> wvCoder =
      WindowedValue.FullWindowedValueCoder.of(inputValueCoder, windowFn.windowCoder());

  JavaRDD<WindowedValue<KV<K, Iterable<V>>>> groupedByKeyAndWindow;
  Partitioner partitioner = getPartitioner(context);
  if (GroupNonMergingWindowsFunctions.isEligibleForGroupByWindow(windowingStrategy)) {
    // we can have a memory sensitive translation for non-merging windows
    groupedByKeyAndWindow =
        GroupNonMergingWindowsFunctions.groupByKeyAndWindow(
            inputRdd, inputKeyCoder, inputValueCoder, windowingStrategy, partitioner);
  } else {
    JavaRDD<KV<K, Iterable<WindowedValue<V>>>> groupedByKeyOnly =
        GroupCombineFunctions.groupByKeyOnly(inputRdd, inputKeyCoder, wvCoder, partitioner);
    // for batch, GroupAlsoByWindow uses an in-memory StateInternals.
    groupedByKeyAndWindow =
        groupedByKeyOnly.flatMap(
            new SparkGroupAlsoByWindowViaOutputBufferFn<>(
                windowingStrategy,
                new TranslationUtils.InMemoryStateInternalsFactory<>(),
                SystemReduceFn.buffering(inputValueCoder),
                context.serializablePipelineOptions));
  }
  context.pushDataset(getOutputId(transformNode), new BoundedDataset<>(groupedByKeyAndWindow));
}
 
Example 14
Source File: StreamingTransformTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
private static <K, V, W extends BoundedWindow> TransformEvaluator<Reshuffle<K, V>> reshuffle() {
  return new TransformEvaluator<Reshuffle<K, V>>() {
    @Override
    public void evaluate(Reshuffle<K, V> transform, EvaluationContext context) {
      @SuppressWarnings("unchecked")
      UnboundedDataset<KV<K, V>> inputDataset =
          (UnboundedDataset<KV<K, V>>) context.borrowDataset(transform);
      List<Integer> streamSources = inputDataset.getStreamSources();
      JavaDStream<WindowedValue<KV<K, V>>> dStream = inputDataset.getDStream();
      final KvCoder<K, V> coder = (KvCoder<K, V>) context.getInput(transform).getCoder();
      @SuppressWarnings("unchecked")
      final WindowingStrategy<?, W> windowingStrategy =
          (WindowingStrategy<?, W>) context.getInput(transform).getWindowingStrategy();
      @SuppressWarnings("unchecked")
      final WindowFn<Object, W> windowFn = (WindowFn<Object, W>) windowingStrategy.getWindowFn();

      final WindowedValue.WindowedValueCoder<KV<K, V>> wvCoder =
          WindowedValue.FullWindowedValueCoder.of(coder, windowFn.windowCoder());

      JavaDStream<WindowedValue<KV<K, V>>> reshuffledStream =
          dStream.transform(rdd -> GroupCombineFunctions.reshuffle(rdd, wvCoder));

      context.putDataset(transform, new UnboundedDataset<>(reshuffledStream, streamSources));
    }

    @Override
    public String toNativeString() {
      return "repartition(...)";
    }
  };
}
 
Example 15
Source File: DAGBuilder.java    From beam with Apache License 2.0 4 votes vote down vote up
PartitionedKeyExtractor(Coder coder) {
  this.coder =
      Utils.isKeyedValueCoder(coder)
          ? (WindowedValue.WindowedValueCoder<KV<K, V>>) coder
          : null;
}
 
Example 16
Source File: MapToTupleFunction.java    From beam with Apache License 2.0 4 votes vote down vote up
public MapToTupleFunction(Coder<K> inputKeyCoder, WindowedValue.WindowedValueCoder<V> wvCoder) {
  this.keyCoder = inputKeyCoder;
  this.wvCoder = wvCoder;
  keyCoderBytes = SerializableUtils.serializeToByteArray(keyCoder);
  wvCoderBytes = SerializableUtils.serializeToByteArray(wvCoder);
}
 
Example 17
Source File: Utils.java    From beam with Apache License 2.0 4 votes vote down vote up
static <T> WindowedValue.WindowedValueCoder<T> getWindowedValueCoder(PCollection<T> pCollection) {
  return WindowedValue.FullWindowedValueCoder.of(
      pCollection.getCoder(), pCollection.getWindowingStrategy().getWindowFn().windowCoder());
}
 
Example 18
Source File: CombinePerKeyTranslatorBatch.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public void translateTransform(
    PTransform<PCollection<KV<K, InputT>>, PCollection<KV<K, OutputT>>> transform,
    TranslationContext context) {

  Combine.PerKey combineTransform = (Combine.PerKey) transform;
  @SuppressWarnings("unchecked")
  final PCollection<KV<K, InputT>> input = (PCollection<KV<K, InputT>>) context.getInput();
  @SuppressWarnings("unchecked")
  final PCollection<KV<K, OutputT>> output = (PCollection<KV<K, OutputT>>) context.getOutput();
  @SuppressWarnings("unchecked")
  final Combine.CombineFn<InputT, AccumT, OutputT> combineFn =
      (Combine.CombineFn<InputT, AccumT, OutputT>) combineTransform.getFn();
  WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();

  Dataset<WindowedValue<KV<K, InputT>>> inputDataset = context.getDataset(input);

  KvCoder<K, InputT> inputCoder = (KvCoder<K, InputT>) input.getCoder();
  Coder<K> keyCoder = inputCoder.getKeyCoder();
  KvCoder<K, OutputT> outputKVCoder = (KvCoder<K, OutputT>) output.getCoder();
  Coder<OutputT> outputCoder = outputKVCoder.getValueCoder();

  KeyValueGroupedDataset<K, WindowedValue<KV<K, InputT>>> groupedDataset =
      inputDataset.groupByKey(KVHelpers.extractKey(), EncoderHelpers.fromBeamCoder(keyCoder));

  Coder<AccumT> accumulatorCoder = null;
  try {
    accumulatorCoder =
        combineFn.getAccumulatorCoder(
            input.getPipeline().getCoderRegistry(), inputCoder.getValueCoder());
  } catch (CannotProvideCoderException e) {
    throw new RuntimeException(e);
  }

  Dataset<Tuple2<K, Iterable<WindowedValue<OutputT>>>> combinedDataset =
      groupedDataset.agg(
          new AggregatorCombiner<K, InputT, AccumT, OutputT, BoundedWindow>(
                  combineFn, windowingStrategy, accumulatorCoder, outputCoder)
              .toColumn());

  // expand the list into separate elements and put the key back into the elements
  WindowedValue.WindowedValueCoder<KV<K, OutputT>> wvCoder =
      WindowedValue.FullWindowedValueCoder.of(
          outputKVCoder, input.getWindowingStrategy().getWindowFn().windowCoder());
  Dataset<WindowedValue<KV<K, OutputT>>> outputDataset =
      combinedDataset.flatMap(
          (FlatMapFunction<
                  Tuple2<K, Iterable<WindowedValue<OutputT>>>, WindowedValue<KV<K, OutputT>>>)
              tuple2 -> {
                K key = tuple2._1();
                Iterable<WindowedValue<OutputT>> windowedValues = tuple2._2();
                List<WindowedValue<KV<K, OutputT>>> result = new ArrayList<>();
                for (WindowedValue<OutputT> windowedValue : windowedValues) {
                  KV<K, OutputT> kv = KV.of(key, windowedValue.getValue());
                  result.add(
                      WindowedValue.of(
                          kv,
                          windowedValue.getTimestamp(),
                          windowedValue.getWindows(),
                          windowedValue.getPane()));
                }
                return result.iterator();
              },
          EncoderHelpers.fromBeamCoder(wvCoder));
  context.putDataset(output, outputDataset);
}
 
Example 19
Source File: ParDoBoundMultiTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
private static <InT, OutT> void doTranslatePortable(
    PipelineNode.PTransformNode transform,
    QueryablePipeline pipeline,
    PortableTranslationContext ctx) {
  Map<String, String> outputs = transform.getTransform().getOutputsMap();

  final RunnerApi.ExecutableStagePayload stagePayload;
  try {
    stagePayload =
        RunnerApi.ExecutableStagePayload.parseFrom(
            transform.getTransform().getSpec().getPayload());
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
  String inputId = stagePayload.getInput();
  final MessageStream<OpMessage<InT>> inputStream = ctx.getMessageStreamById(inputId);
  // TODO: support side input
  final List<MessageStream<OpMessage<InT>>> sideInputStreams = Collections.emptyList();

  final Map<TupleTag<?>, Integer> tagToIndexMap = new HashMap<>();
  final Map<String, TupleTag<?>> idToTupleTagMap = new HashMap<>();

  // first output as the main output
  final TupleTag<OutT> mainOutputTag =
      outputs.isEmpty() ? null : new TupleTag(outputs.keySet().iterator().next());

  AtomicInteger index = new AtomicInteger(0);
  outputs
      .keySet()
      .iterator()
      .forEachRemaining(
          outputName -> {
            TupleTag<?> tupleTag = new TupleTag<>(outputName);
            tagToIndexMap.put(tupleTag, index.get());
            index.incrementAndGet();
            String collectionId = outputs.get(outputName);
            idToTupleTagMap.put(collectionId, tupleTag);
          });

  WindowedValue.WindowedValueCoder<InT> windowedInputCoder =
      ctx.instantiateCoder(inputId, pipeline.getComponents());

  final DoFnSchemaInformation doFnSchemaInformation;
  doFnSchemaInformation = ParDoTranslation.getSchemaInformation(transform.getTransform());

  Map<String, PCollectionView<?>> sideInputMapping =
      ParDoTranslation.getSideInputMapping(transform.getTransform());

  final RunnerApi.PCollection input = pipeline.getComponents().getPcollectionsOrThrow(inputId);
  final PCollection.IsBounded isBounded = SamzaPipelineTranslatorUtils.isBounded(input);

  final DoFnOp<InT, OutT, RawUnionValue> op =
      new DoFnOp<>(
          mainOutputTag,
          new NoOpDoFn<>(),
          null, // key coder not in use
          windowedInputCoder.getValueCoder(), // input coder not in use
          windowedInputCoder,
          Collections.emptyMap(), // output coders not in use
          Collections.emptyList(), // sideInputs not in use until side input support
          new ArrayList<>(idToTupleTagMap.values()), // used by java runner only
          SamzaPipelineTranslatorUtils.getPortableWindowStrategy(transform, pipeline),
          Collections.emptyMap(), // idToViewMap not in use until side input support
          new DoFnOp.MultiOutputManagerFactory(tagToIndexMap),
          ctx.getTransformFullName(),
          ctx.getTransformId(),
          isBounded,
          true,
          stagePayload,
          idToTupleTagMap,
          doFnSchemaInformation,
          sideInputMapping);

  final MessageStream<OpMessage<InT>> mergedStreams;
  if (sideInputStreams.isEmpty()) {
    mergedStreams = inputStream;
  } else {
    MessageStream<OpMessage<InT>> mergedSideInputStreams =
        MessageStream.mergeAll(sideInputStreams).flatMap(new SideInputWatermarkFn());
    mergedStreams = inputStream.merge(Collections.singletonList(mergedSideInputStreams));
  }

  final MessageStream<OpMessage<RawUnionValue>> taggedOutputStream =
      mergedStreams.flatMap(OpAdapter.adapt(op));

  for (int outputIndex : tagToIndexMap.values()) {
    final MessageStream<OpMessage<OutT>> outputStream =
        taggedOutputStream
            .filter(
                message ->
                    message.getType() != OpMessage.Type.ELEMENT
                        || message.getElement().getValue().getUnionTag() == outputIndex)
            .flatMap(OpAdapter.adapt(new RawUnionValueToValue()));

    ctx.registerMessageStream(ctx.getOutputId(transform), outputStream);
  }
}
 
Example 20
Source File: GroupByKeyTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
private static <K, InputT, OutputT> void doTranslatePortable(
    PipelineNode.PTransformNode transform,
    QueryablePipeline pipeline,
    PortableTranslationContext ctx) {
  final MessageStream<OpMessage<KV<K, InputT>>> inputStream =
      ctx.getOneInputMessageStream(transform);
  final boolean needRepartition = ctx.getSamzaPipelineOptions().getMaxSourceParallelism() > 1;
  final WindowingStrategy<?, BoundedWindow> windowingStrategy =
      ctx.getPortableWindowStrategy(transform, pipeline);
  final Coder<BoundedWindow> windowCoder = windowingStrategy.getWindowFn().windowCoder();

  final String inputId = ctx.getInputId(transform);
  final WindowedValue.WindowedValueCoder<KV<K, InputT>> windowedInputCoder =
      ctx.instantiateCoder(inputId, pipeline.getComponents());
  final KvCoder<K, InputT> kvInputCoder = (KvCoder<K, InputT>) windowedInputCoder.getValueCoder();
  final Coder<WindowedValue<KV<K, InputT>>> elementCoder =
      WindowedValue.FullWindowedValueCoder.of(kvInputCoder, windowCoder);

  final TupleTag<KV<K, OutputT>> outputTag =
      new TupleTag<>(Iterables.getOnlyElement(transform.getTransform().getOutputsMap().keySet()));

  @SuppressWarnings("unchecked")
  final SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> reduceFn =
      (SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow>)
          SystemReduceFn.buffering(kvInputCoder.getValueCoder());

  final RunnerApi.PCollection input = pipeline.getComponents().getPcollectionsOrThrow(inputId);
  final PCollection.IsBounded isBounded = SamzaPipelineTranslatorUtils.isBounded(input);

  final MessageStream<OpMessage<KV<K, OutputT>>> outputStream =
      doTranslateGBK(
          inputStream,
          needRepartition,
          reduceFn,
          windowingStrategy,
          kvInputCoder,
          elementCoder,
          ctx.getTransformFullName(),
          ctx.getTransformId(),
          outputTag,
          isBounded);
  ctx.registerMessageStream(ctx.getOutputId(transform), outputStream);
}