Java Code Examples for org.apache.beam.runners.core.construction.graph.PipelineNode#PTransformNode

The following examples show how to use org.apache.beam.runners.core.construction.graph.PipelineNode#PTransformNode . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PortablePipelineDotRenderer.java    From beam with Apache License 2.0 6 votes vote down vote up
private void visitTransform(PipelineNode.PTransformNode node) {
  final int nodeId = nextNodeId++;
  final RunnerApi.PTransform transform = node.getTransform();
  writeLine(
      "%d [label=\"%s\\n%s\"]",
      nodeId,
      escapeString(transform.getUniqueName()),
      escapeString(transform.getSpec().getUrn()));

  transform.getOutputsMap().values().forEach(x -> valueToProducerNodeId.put(x, nodeId));

  transform
      .getInputsMap()
      .forEach(
          (key, value) -> {
            final int producerId = valueToProducerNodeId.get(value);
            String style = "solid";
            writeLine(
                "%d -> %d [style=%s label=\"%s\"]",
                producerId,
                nodeId,
                style,
                escapeString(value.substring(value.lastIndexOf('_') + 1)));
          });
}
 
Example 2
Source File: FlinkBatchPortablePipelineTranslator.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public FlinkPortablePipelineTranslator.Executor translate(
    BatchTranslationContext context, RunnerApi.Pipeline pipeline) {
  // Use a QueryablePipeline to traverse transforms topologically.
  QueryablePipeline p =
      QueryablePipeline.forTransforms(
          pipeline.getRootTransformIdsList(), pipeline.getComponents());
  for (PipelineNode.PTransformNode transform : p.getTopologicallyOrderedTransforms()) {
    urnToTransformTranslator
        .getOrDefault(
            transform.getTransform().getSpec().getUrn(),
            FlinkBatchPortablePipelineTranslator::urnNotFound)
        .translate(transform, pipeline, context);
  }

  // Ensure that side effects are performed for unconsumed DataSets.
  for (DataSet<?> dataSet : context.getDanglingDataSets()) {
    dataSet.output(new DiscardingOutputFormat<>()).name("DiscardingOutput");
  }

  return context;
}
 
Example 3
Source File: ImpulseTranslator.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public void translatePortable(
    PipelineNode.PTransformNode transform,
    QueryablePipeline pipeline,
    PortableTranslationContext ctx) {

  final String outputId = ctx.getOutputId(transform);
  final GenericSystemDescriptor systemDescriptor =
      new GenericSystemDescriptor(outputId, SamzaImpulseSystemFactory.class.getName());

  // The KvCoder is needed here for Samza not to crop the key.
  final Serde<KV<?, OpMessage<byte[]>>> kvSerde = KVSerde.of(new NoOpSerde(), new NoOpSerde<>());
  final GenericInputDescriptor<KV<?, OpMessage<byte[]>>> inputDescriptor =
      systemDescriptor.getInputDescriptor(outputId, kvSerde);

  ctx.registerInputMessageStream(outputId, inputDescriptor);
}
 
Example 4
Source File: WindowAssignTranslator.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public void translatePortable(
    PipelineNode.PTransformNode transform,
    QueryablePipeline pipeline,
    PortableTranslationContext ctx) {
  final RunnerApi.WindowIntoPayload payload;
  try {
    payload =
        RunnerApi.WindowIntoPayload.parseFrom(transform.getTransform().getSpec().getPayload());
  } catch (InvalidProtocolBufferException e) {
    throw new IllegalArgumentException(
        String.format("failed to parse WindowIntoPayload: %s", transform.getId()), e);
  }

  @SuppressWarnings("unchecked")
  final WindowFn<T, ?> windowFn =
      (WindowFn<T, ?>) WindowingStrategyTranslation.windowFnFromProto(payload.getWindowFn());

  final MessageStream<OpMessage<T>> inputStream = ctx.getOneInputMessageStream(transform);

  final MessageStream<OpMessage<T>> outputStream =
      inputStream.flatMap(OpAdapter.adapt(new WindowAssignOp<>(windowFn)));

  ctx.registerMessageStream(ctx.getOutputId(transform), outputStream);
}
 
Example 5
Source File: PortablePipelineDotRenderer.java    From beam with Apache License 2.0 5 votes vote down vote up
private String toDot(RunnerApi.Pipeline pipeline) {
  final QueryablePipeline p =
      QueryablePipeline.forTransforms(
          pipeline.getRootTransformIdsList(), pipeline.getComponents());

  begin();

  for (PipelineNode.PTransformNode transform : p.getTopologicallyOrderedTransforms()) {
    visitTransform(transform);
  }

  end();

  return dotBuilder.toString();
}
 
Example 6
Source File: FlinkStreamingPortablePipelineTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public FlinkPortablePipelineTranslator.Executor translate(
    StreamingTranslationContext context, RunnerApi.Pipeline pipeline) {
  QueryablePipeline p =
      QueryablePipeline.forTransforms(
          pipeline.getRootTransformIdsList(), pipeline.getComponents());
  for (PipelineNode.PTransformNode transform : p.getTopologicallyOrderedTransforms()) {
    urnToTransformTranslator
        .getOrDefault(transform.getTransform().getSpec().getUrn(), this::urnNotFound)
        .translate(transform.getId(), pipeline, context);
  }

  return context;
}
 
Example 7
Source File: ParDoBoundMultiTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void translatePortable(
    PipelineNode.PTransformNode transform,
    QueryablePipeline pipeline,
    PortableTranslationContext ctx) {
  doTranslatePortable(transform, pipeline, ctx);
}
 
Example 8
Source File: SamzaPipelineTranslatorUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
public static WindowingStrategy<?, BoundedWindow> getPortableWindowStrategy(
    PipelineNode.PTransformNode transform, QueryablePipeline pipeline) {
  String inputId = Iterables.getOnlyElement(transform.getTransform().getInputsMap().values());
  RehydratedComponents rehydratedComponents =
      RehydratedComponents.forComponents(pipeline.getComponents());

  RunnerApi.WindowingStrategy windowingStrategyProto =
      pipeline
          .getComponents()
          .getWindowingStrategiesOrThrow(
              pipeline.getComponents().getPcollectionsOrThrow(inputId).getWindowingStrategyId());

  WindowingStrategy<?, ?> windowingStrategy;
  try {
    windowingStrategy =
        WindowingStrategyTranslation.fromProto(windowingStrategyProto, rehydratedComponents);
  } catch (InvalidProtocolBufferException e) {
    throw new IllegalStateException(
        String.format(
            "Unable to hydrate GroupByKey windowing strategy %s.", windowingStrategyProto),
        e);
  }

  @SuppressWarnings("unchecked")
  WindowingStrategy<?, BoundedWindow> ret =
      (WindowingStrategy<?, BoundedWindow>) windowingStrategy;
  return ret;
}
 
Example 9
Source File: FlattenPCollectionsTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
private static <T> void doTranslatePortable(
    PipelineNode.PTransformNode transform,
    QueryablePipeline pipeline,
    PortableTranslationContext ctx) {
  final List<MessageStream<OpMessage<T>>> inputStreams = ctx.getAllInputMessageStreams(transform);
  final String outputId = ctx.getOutputId(transform);

  // For portable api there should be at least the impulse as a dummy input
  // We will know once validateRunner tests are available for portable runners
  checkState(
      !inputStreams.isEmpty(), "no input streams defined for Flatten: %s", transform.getId());

  ctx.registerMessageStream(outputId, mergeInputStreams(inputStreams));
}
 
Example 10
Source File: GroupByKeyTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void translatePortable(
    PipelineNode.PTransformNode transform,
    QueryablePipeline pipeline,
    PortableTranslationContext ctx) {
  doTranslatePortable(transform, pipeline, ctx);
}
 
Example 11
Source File: PortableTranslationContext.java    From beam with Apache License 2.0 5 votes vote down vote up
public WindowingStrategy<?, BoundedWindow> getPortableWindowStrategy(
    PipelineNode.PTransformNode transform, QueryablePipeline pipeline) {
  String inputId = Iterables.getOnlyElement(transform.getTransform().getInputsMap().values());
  RehydratedComponents rehydratedComponents =
      RehydratedComponents.forComponents(pipeline.getComponents());

  RunnerApi.WindowingStrategy windowingStrategyProto =
      pipeline
          .getComponents()
          .getWindowingStrategiesOrThrow(
              pipeline.getComponents().getPcollectionsOrThrow(inputId).getWindowingStrategyId());

  WindowingStrategy<?, ?> windowingStrategy;
  try {
    windowingStrategy =
        WindowingStrategyTranslation.fromProto(windowingStrategyProto, rehydratedComponents);
  } catch (Exception e) {
    throw new IllegalStateException(
        String.format(
            "Unable to hydrate GroupByKey windowing strategy %s.", windowingStrategyProto),
        e);
  }

  @SuppressWarnings("unchecked")
  WindowingStrategy<?, BoundedWindow> ret =
      (WindowingStrategy<?, BoundedWindow>) windowingStrategy;
  return ret;
}
 
Example 12
Source File: FlattenPCollectionsTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void translatePortable(
    PipelineNode.PTransformNode transform,
    QueryablePipeline pipeline,
    PortableTranslationContext ctx) {
  doTranslatePortable(transform, pipeline, ctx);
}
 
Example 13
Source File: PortableTranslationContext.java    From beam with Apache License 2.0 4 votes vote down vote up
public void setCurrentTransform(PipelineNode.PTransformNode currentTransform) {
  this.currentTransform = currentTransform;
}
 
Example 14
Source File: PortableTranslationContext.java    From beam with Apache License 2.0 4 votes vote down vote up
public <T> MessageStream<OpMessage<T>> getOneInputMessageStream(
    PipelineNode.PTransformNode transform) {
  String id = Iterables.getOnlyElement(transform.getTransform().getInputsMap().values());
  return getMessageStreamById(id);
}
 
Example 15
Source File: PortableTranslationContext.java    From beam with Apache License 2.0 4 votes vote down vote up
public <T> List<MessageStream<OpMessage<T>>> getAllInputMessageStreams(
    PipelineNode.PTransformNode transform) {
  final Collection<String> inputStreamIds = transform.getTransform().getInputsMap().values();
  return inputStreamIds.stream().map(this::<T>getMessageStreamById).collect(Collectors.toList());
}
 
Example 16
Source File: PortableTranslationContext.java    From beam with Apache License 2.0 4 votes vote down vote up
public String getOutputId(PipelineNode.PTransformNode transform) {
  return Iterables.getOnlyElement(transform.getTransform().getOutputsMap().values());
}
 
Example 17
Source File: PortableTranslationContext.java    From beam with Apache License 2.0 4 votes vote down vote up
public String getInputId(PipelineNode.PTransformNode transform) {
  return Iterables.getOnlyElement(transform.getTransform().getInputsMap().values());
}
 
Example 18
Source File: Twister2BatchPortablePipelineTranslator.java    From twister2 with Apache License 2.0 4 votes vote down vote up
/**
 * Translates transformNode from Beam into the Spark context.
 */
void translate(
    PipelineNode.PTransformNode transformNode,
    RunnerApi.Pipeline pipeline,
    Twister2BatchTranslationContext context);
 
Example 19
Source File: ParDoBoundMultiTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
private static <InT, OutT> void doTranslatePortable(
    PipelineNode.PTransformNode transform,
    QueryablePipeline pipeline,
    PortableTranslationContext ctx) {
  Map<String, String> outputs = transform.getTransform().getOutputsMap();

  final RunnerApi.ExecutableStagePayload stagePayload;
  try {
    stagePayload =
        RunnerApi.ExecutableStagePayload.parseFrom(
            transform.getTransform().getSpec().getPayload());
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
  String inputId = stagePayload.getInput();
  final MessageStream<OpMessage<InT>> inputStream = ctx.getMessageStreamById(inputId);
  // TODO: support side input
  final List<MessageStream<OpMessage<InT>>> sideInputStreams = Collections.emptyList();

  final Map<TupleTag<?>, Integer> tagToIndexMap = new HashMap<>();
  final Map<String, TupleTag<?>> idToTupleTagMap = new HashMap<>();

  // first output as the main output
  final TupleTag<OutT> mainOutputTag =
      outputs.isEmpty() ? null : new TupleTag(outputs.keySet().iterator().next());

  AtomicInteger index = new AtomicInteger(0);
  outputs
      .keySet()
      .iterator()
      .forEachRemaining(
          outputName -> {
            TupleTag<?> tupleTag = new TupleTag<>(outputName);
            tagToIndexMap.put(tupleTag, index.get());
            index.incrementAndGet();
            String collectionId = outputs.get(outputName);
            idToTupleTagMap.put(collectionId, tupleTag);
          });

  WindowedValue.WindowedValueCoder<InT> windowedInputCoder =
      ctx.instantiateCoder(inputId, pipeline.getComponents());

  final DoFnSchemaInformation doFnSchemaInformation;
  doFnSchemaInformation = ParDoTranslation.getSchemaInformation(transform.getTransform());

  Map<String, PCollectionView<?>> sideInputMapping =
      ParDoTranslation.getSideInputMapping(transform.getTransform());

  final RunnerApi.PCollection input = pipeline.getComponents().getPcollectionsOrThrow(inputId);
  final PCollection.IsBounded isBounded = SamzaPipelineTranslatorUtils.isBounded(input);

  final DoFnOp<InT, OutT, RawUnionValue> op =
      new DoFnOp<>(
          mainOutputTag,
          new NoOpDoFn<>(),
          null, // key coder not in use
          windowedInputCoder.getValueCoder(), // input coder not in use
          windowedInputCoder,
          Collections.emptyMap(), // output coders not in use
          Collections.emptyList(), // sideInputs not in use until side input support
          new ArrayList<>(idToTupleTagMap.values()), // used by java runner only
          SamzaPipelineTranslatorUtils.getPortableWindowStrategy(transform, pipeline),
          Collections.emptyMap(), // idToViewMap not in use until side input support
          new DoFnOp.MultiOutputManagerFactory(tagToIndexMap),
          ctx.getTransformFullName(),
          ctx.getTransformId(),
          isBounded,
          true,
          stagePayload,
          idToTupleTagMap,
          doFnSchemaInformation,
          sideInputMapping);

  final MessageStream<OpMessage<InT>> mergedStreams;
  if (sideInputStreams.isEmpty()) {
    mergedStreams = inputStream;
  } else {
    MessageStream<OpMessage<InT>> mergedSideInputStreams =
        MessageStream.mergeAll(sideInputStreams).flatMap(new SideInputWatermarkFn());
    mergedStreams = inputStream.merge(Collections.singletonList(mergedSideInputStreams));
  }

  final MessageStream<OpMessage<RawUnionValue>> taggedOutputStream =
      mergedStreams.flatMap(OpAdapter.adapt(op));

  for (int outputIndex : tagToIndexMap.values()) {
    final MessageStream<OpMessage<OutT>> outputStream =
        taggedOutputStream
            .filter(
                message ->
                    message.getType() != OpMessage.Type.ELEMENT
                        || message.getElement().getValue().getUnionTag() == outputIndex)
            .flatMap(OpAdapter.adapt(new RawUnionValueToValue()));

    ctx.registerMessageStream(ctx.getOutputId(transform), outputStream);
  }
}
 
Example 20
Source File: Twister2StreamingPortablePipelineTranslator.java    From twister2 with Apache License 2.0 4 votes vote down vote up
/**
 * Translates transformNode from Beam into the Spark context.
 */
void translate(
    PipelineNode.PTransformNode transformNode,
    RunnerApi.Pipeline pipeline,
    Twister2StreamingTranslationContext context);