org.apache.beam.runners.core.construction.graph.PipelineNode Java Examples

The following examples show how to use org.apache.beam.runners.core.construction.graph.PipelineNode. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PortablePipelineDotRenderer.java    From beam with Apache License 2.0 6 votes vote down vote up
private void visitTransform(PipelineNode.PTransformNode node) {
  final int nodeId = nextNodeId++;
  final RunnerApi.PTransform transform = node.getTransform();
  writeLine(
      "%d [label=\"%s\\n%s\"]",
      nodeId,
      escapeString(transform.getUniqueName()),
      escapeString(transform.getSpec().getUrn()));

  transform.getOutputsMap().values().forEach(x -> valueToProducerNodeId.put(x, nodeId));

  transform
      .getInputsMap()
      .forEach(
          (key, value) -> {
            final int producerId = valueToProducerNodeId.get(value);
            String style = "solid";
            writeLine(
                "%d -> %d [style=%s label=\"%s\"]",
                producerId,
                nodeId,
                style,
                escapeString(value.substring(value.lastIndexOf('_') + 1)));
          });
}
 
Example #2
Source File: FlinkBatchPortablePipelineTranslator.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public FlinkPortablePipelineTranslator.Executor translate(
    BatchTranslationContext context, RunnerApi.Pipeline pipeline) {
  // Use a QueryablePipeline to traverse transforms topologically.
  QueryablePipeline p =
      QueryablePipeline.forTransforms(
          pipeline.getRootTransformIdsList(), pipeline.getComponents());
  for (PipelineNode.PTransformNode transform : p.getTopologicallyOrderedTransforms()) {
    urnToTransformTranslator
        .getOrDefault(
            transform.getTransform().getSpec().getUrn(),
            FlinkBatchPortablePipelineTranslator::urnNotFound)
        .translate(transform, pipeline, context);
  }

  // Ensure that side effects are performed for unconsumed DataSets.
  for (DataSet<?> dataSet : context.getDanglingDataSets()) {
    dataSet.output(new DiscardingOutputFormat<>()).name("DiscardingOutput");
  }

  return context;
}
 
Example #3
Source File: BatchSideInputHandlerFactoryTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private static ExecutableStage createExecutableStage(Collection<SideInputReference> sideInputs) {
  Components components = Components.getDefaultInstance();
  Environment environment = Environment.getDefaultInstance();
  PCollectionNode inputCollection =
      PipelineNode.pCollection("collection-id", RunnerApi.PCollection.getDefaultInstance());
  return ImmutableExecutableStage.of(
      components,
      environment,
      inputCollection,
      sideInputs,
      Collections.emptyList(),
      Collections.emptyList(),
      Collections.emptyList(),
      Collections.emptyList(),
      DEFAULT_WIRE_CODER_SETTINGS);
}
 
Example #4
Source File: ImpulseTranslator.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public void translatePortable(
    PipelineNode.PTransformNode transform,
    QueryablePipeline pipeline,
    PortableTranslationContext ctx) {

  final String outputId = ctx.getOutputId(transform);
  final GenericSystemDescriptor systemDescriptor =
      new GenericSystemDescriptor(outputId, SamzaImpulseSystemFactory.class.getName());

  // The KvCoder is needed here for Samza not to crop the key.
  final Serde<KV<?, OpMessage<byte[]>>> kvSerde = KVSerde.of(new NoOpSerde(), new NoOpSerde<>());
  final GenericInputDescriptor<KV<?, OpMessage<byte[]>>> inputDescriptor =
      systemDescriptor.getInputDescriptor(outputId, kvSerde);

  ctx.registerInputMessageStream(outputId, inputDescriptor);
}
 
Example #5
Source File: SamzaPortablePipelineTranslator.java    From beam with Apache License 2.0 6 votes vote down vote up
public static void translate(RunnerApi.Pipeline pipeline, PortableTranslationContext ctx) {
  QueryablePipeline queryablePipeline =
      QueryablePipeline.forTransforms(
          pipeline.getRootTransformIdsList(), pipeline.getComponents());

  for (PipelineNode.PTransformNode transform :
      queryablePipeline.getTopologicallyOrderedTransforms()) {
    ctx.setCurrentTransform(transform);

    LOG.info("Translating transform urn: {}", transform.getTransform().getSpec().getUrn());
    TRANSLATORS
        .get(transform.getTransform().getSpec().getUrn())
        .translatePortable(transform, queryablePipeline, ctx);

    ctx.clearCurrentTransform();
  }
}
 
Example #6
Source File: WindowAssignTranslator.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public void translatePortable(
    PipelineNode.PTransformNode transform,
    QueryablePipeline pipeline,
    PortableTranslationContext ctx) {
  final RunnerApi.WindowIntoPayload payload;
  try {
    payload =
        RunnerApi.WindowIntoPayload.parseFrom(transform.getTransform().getSpec().getPayload());
  } catch (InvalidProtocolBufferException e) {
    throw new IllegalArgumentException(
        String.format("failed to parse WindowIntoPayload: %s", transform.getId()), e);
  }

  @SuppressWarnings("unchecked")
  final WindowFn<T, ?> windowFn =
      (WindowFn<T, ?>) WindowingStrategyTranslation.windowFnFromProto(payload.getWindowFn());

  final MessageStream<OpMessage<T>> inputStream = ctx.getOneInputMessageStream(transform);

  final MessageStream<OpMessage<T>> outputStream =
      inputStream.flatMap(OpAdapter.adapt(new WindowAssignOp<>(windowFn)));

  ctx.registerMessageStream(ctx.getOutputId(transform), outputStream);
}
 
Example #7
Source File: GroupByKeyTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void translatePortable(
    PipelineNode.PTransformNode transform,
    QueryablePipeline pipeline,
    PortableTranslationContext ctx) {
  doTranslatePortable(transform, pipeline, ctx);
}
 
Example #8
Source File: PortableTranslationContext.java    From beam with Apache License 2.0 5 votes vote down vote up
public WindowedValue.WindowedValueCoder instantiateCoder(
    String collectionId, RunnerApi.Components components) {
  PipelineNode.PCollectionNode collectionNode =
      PipelineNode.pCollection(collectionId, components.getPcollectionsOrThrow(collectionId));
  try {
    return (WindowedValue.WindowedValueCoder)
        WireCoders.instantiateRunnerWireCoder(collectionNode, components);
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}
 
Example #9
Source File: PortableTranslationContext.java    From beam with Apache License 2.0 5 votes vote down vote up
public WindowingStrategy<?, BoundedWindow> getPortableWindowStrategy(
    PipelineNode.PTransformNode transform, QueryablePipeline pipeline) {
  String inputId = Iterables.getOnlyElement(transform.getTransform().getInputsMap().values());
  RehydratedComponents rehydratedComponents =
      RehydratedComponents.forComponents(pipeline.getComponents());

  RunnerApi.WindowingStrategy windowingStrategyProto =
      pipeline
          .getComponents()
          .getWindowingStrategiesOrThrow(
              pipeline.getComponents().getPcollectionsOrThrow(inputId).getWindowingStrategyId());

  WindowingStrategy<?, ?> windowingStrategy;
  try {
    windowingStrategy =
        WindowingStrategyTranslation.fromProto(windowingStrategyProto, rehydratedComponents);
  } catch (Exception e) {
    throw new IllegalStateException(
        String.format(
            "Unable to hydrate GroupByKey windowing strategy %s.", windowingStrategyProto),
        e);
  }

  @SuppressWarnings("unchecked")
  WindowingStrategy<?, BoundedWindow> ret =
      (WindowingStrategy<?, BoundedWindow>) windowingStrategy;
  return ret;
}
 
Example #10
Source File: SamzaPipelineTranslatorUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
public static WindowedValue.WindowedValueCoder instantiateCoder(
    String collectionId, RunnerApi.Components components) {
  PipelineNode.PCollectionNode collectionNode =
      PipelineNode.pCollection(collectionId, components.getPcollectionsOrThrow(collectionId));
  try {
    return (WindowedValue.WindowedValueCoder)
        WireCoders.instantiateRunnerWireCoder(collectionNode, components);
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}
 
Example #11
Source File: SamzaPipelineTranslatorUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
public static WindowingStrategy<?, BoundedWindow> getPortableWindowStrategy(
    PipelineNode.PTransformNode transform, QueryablePipeline pipeline) {
  String inputId = Iterables.getOnlyElement(transform.getTransform().getInputsMap().values());
  RehydratedComponents rehydratedComponents =
      RehydratedComponents.forComponents(pipeline.getComponents());

  RunnerApi.WindowingStrategy windowingStrategyProto =
      pipeline
          .getComponents()
          .getWindowingStrategiesOrThrow(
              pipeline.getComponents().getPcollectionsOrThrow(inputId).getWindowingStrategyId());

  WindowingStrategy<?, ?> windowingStrategy;
  try {
    windowingStrategy =
        WindowingStrategyTranslation.fromProto(windowingStrategyProto, rehydratedComponents);
  } catch (InvalidProtocolBufferException e) {
    throw new IllegalStateException(
        String.format(
            "Unable to hydrate GroupByKey windowing strategy %s.", windowingStrategyProto),
        e);
  }

  @SuppressWarnings("unchecked")
  WindowingStrategy<?, BoundedWindow> ret =
      (WindowingStrategy<?, BoundedWindow>) windowingStrategy;
  return ret;
}
 
Example #12
Source File: FlattenPCollectionsTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
private static <T> void doTranslatePortable(
    PipelineNode.PTransformNode transform,
    QueryablePipeline pipeline,
    PortableTranslationContext ctx) {
  final List<MessageStream<OpMessage<T>>> inputStreams = ctx.getAllInputMessageStreams(transform);
  final String outputId = ctx.getOutputId(transform);

  // For portable api there should be at least the impulse as a dummy input
  // We will know once validateRunner tests are available for portable runners
  checkState(
      !inputStreams.isEmpty(), "no input streams defined for Flatten: %s", transform.getId());

  ctx.registerMessageStream(outputId, mergeInputStreams(inputStreams));
}
 
Example #13
Source File: FlattenPCollectionsTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void translatePortable(
    PipelineNode.PTransformNode transform,
    QueryablePipeline pipeline,
    PortableTranslationContext ctx) {
  doTranslatePortable(transform, pipeline, ctx);
}
 
Example #14
Source File: RegisterNodeFunction.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Returns an artificial PCollectionView that can be used to fulfill API requirements of a {@link
 * SideInputReader} when used inside the Dataflow runner harness.
 *
 * <p>Generates length prefixed coder variants suitable to be used within the Dataflow Runner
 * harness so that encoding and decoding values matches the length prefixing that occurred when
 * materializing the side input.
 */
public static final PCollectionView<?> transformSideInputForRunner(
    RunnerApi.Pipeline pipeline,
    RunnerApi.PTransform parDoPTransform,
    String sideInputTag,
    RunnerApi.SideInput sideInput) {
  checkArgument(
      Materializations.MULTIMAP_MATERIALIZATION_URN.equals(sideInput.getAccessPattern().getUrn()),
      "This handler is only capable of dealing with %s materializations "
          + "but was asked to handle %s for PCollectionView with tag %s.",
      Materializations.MULTIMAP_MATERIALIZATION_URN,
      sideInput.getAccessPattern().getUrn(),
      sideInputTag);
  String sideInputPCollectionId = parDoPTransform.getInputsOrThrow(sideInputTag);
  RunnerApi.PCollection sideInputPCollection =
      pipeline.getComponents().getPcollectionsOrThrow(sideInputPCollectionId);
  try {
    FullWindowedValueCoder<KV<Object, Object>> runnerSideInputCoder =
        (FullWindowedValueCoder)
            WireCoders.instantiateRunnerWireCoder(
                PipelineNode.pCollection(sideInputPCollectionId, sideInputPCollection),
                pipeline.getComponents());

    return DataflowPortabilityPCollectionView.with(
        new TupleTag<>(sideInputTag), runnerSideInputCoder);
  } catch (IOException e) {
    throw new IllegalStateException("Unable to translate proto to coder", e);
  }
}
 
Example #15
Source File: TransformTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Translates the portable {@link org.apache.beam.model.pipeline.v1.RunnerApi.PTransform} into
 * Samza API.
 */
default void translatePortable(
    PipelineNode.PTransformNode transform,
    QueryablePipeline pipeline,
    PortableTranslationContext ctx) {
  throw new UnsupportedOperationException(
      "Portable translation is not supported for " + this.getClass().getSimpleName());
}
 
Example #16
Source File: PipelineTranslatorUtils.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Creates a coder for a given PCollection id from the Proto definition. */
public static <T> Coder<WindowedValue<T>> instantiateCoder(
    String collectionId, RunnerApi.Components components) {
  PipelineNode.PCollectionNode collectionNode =
      PipelineNode.pCollection(collectionId, components.getPcollectionsOrThrow(collectionId));
  try {
    return WireCoders.instantiateRunnerWireCoder(collectionNode, components);
  } catch (IOException e) {
    throw new RuntimeException("Could not instantiate Coder", e);
  }
}
 
Example #17
Source File: SamzaPortablePipelineTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
public static void createConfig(
    RunnerApi.Pipeline pipeline, ConfigBuilder configBuilder, SamzaPipelineOptions options) {
  QueryablePipeline queryablePipeline =
      QueryablePipeline.forTransforms(
          pipeline.getRootTransformIdsList(), pipeline.getComponents());
  for (PipelineNode.PTransformNode transform :
      queryablePipeline.getTopologicallyOrderedTransforms()) {
    TransformTranslator<?> translator =
        TRANSLATORS.get(transform.getTransform().getSpec().getUrn());
    if (translator instanceof TransformConfigGenerator) {
      TransformConfigGenerator configGenerator = (TransformConfigGenerator) translator;
      configBuilder.putAll(configGenerator.createPortableConfig(transform, options));
    }
  }
}
 
Example #18
Source File: ParDoBoundMultiTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public void translatePortable(
    PipelineNode.PTransformNode transform,
    QueryablePipeline pipeline,
    PortableTranslationContext ctx) {
  doTranslatePortable(transform, pipeline, ctx);
}
 
Example #19
Source File: FlinkStreamingPortablePipelineTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public FlinkPortablePipelineTranslator.Executor translate(
    StreamingTranslationContext context, RunnerApi.Pipeline pipeline) {
  QueryablePipeline p =
      QueryablePipeline.forTransforms(
          pipeline.getRootTransformIdsList(), pipeline.getComponents());
  for (PipelineNode.PTransformNode transform : p.getTopologicallyOrderedTransforms()) {
    urnToTransformTranslator
        .getOrDefault(transform.getTransform().getSpec().getUrn(), this::urnNotFound)
        .translate(transform.getId(), pipeline, context);
  }

  return context;
}
 
Example #20
Source File: SparkBatchPortablePipelineTranslator.java    From beam with Apache License 2.0 5 votes vote down vote up
private static <T> WindowedValueCoder<T> getWindowedValueCoder(
    String pCollectionId, RunnerApi.Components components) {
  PCollection pCollection = components.getPcollectionsOrThrow(pCollectionId);
  PCollectionNode pCollectionNode = PipelineNode.pCollection(pCollectionId, pCollection);
  WindowedValueCoder<T> coder;
  try {
    coder =
        (WindowedValueCoder) WireCoders.instantiateRunnerWireCoder(pCollectionNode, components);
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
  return coder;
}
 
Example #21
Source File: PortablePipelineDotRenderer.java    From beam with Apache License 2.0 5 votes vote down vote up
private String toDot(RunnerApi.Pipeline pipeline) {
  final QueryablePipeline p =
      QueryablePipeline.forTransforms(
          pipeline.getRootTransformIdsList(), pipeline.getComponents());

  begin();

  for (PipelineNode.PTransformNode transform : p.getTopologicallyOrderedTransforms()) {
    visitTransform(transform);
  }

  end();

  return dotBuilder.toString();
}
 
Example #22
Source File: AbstractPythonStatelessFunctionRunner.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public ExecutableStage createExecutableStage() throws Exception {
	RunnerApi.Components components =
		RunnerApi.Components.newBuilder()
			.putPcollections(
				INPUT_ID,
				RunnerApi.PCollection.newBuilder()
					.setWindowingStrategyId(WINDOW_STRATEGY)
					.setCoderId(INPUT_CODER_ID)
					.build())
			.putPcollections(
				OUTPUT_ID,
				RunnerApi.PCollection.newBuilder()
					.setWindowingStrategyId(WINDOW_STRATEGY)
					.setCoderId(OUTPUT_CODER_ID)
					.build())
			.putTransforms(
				TRANSFORM_ID,
				RunnerApi.PTransform.newBuilder()
					.setUniqueName(TRANSFORM_ID)
					.setSpec(RunnerApi.FunctionSpec.newBuilder()
						.setUrn(functionUrn)
						.setPayload(
							org.apache.beam.vendor.grpc.v1p21p0.com.google.protobuf.ByteString.copyFrom(
								getUserDefinedFunctionsProto().toByteArray()))
						.build())
					.putInputs(MAIN_INPUT_NAME, INPUT_ID)
					.putOutputs(MAIN_OUTPUT_NAME, OUTPUT_ID)
					.build())
			.putWindowingStrategies(
				WINDOW_STRATEGY,
				RunnerApi.WindowingStrategy.newBuilder()
					.setWindowCoderId(WINDOW_CODER_ID)
					.build())
			.putCoders(
				INPUT_CODER_ID,
				getInputCoderProto())
			.putCoders(
				OUTPUT_CODER_ID,
				getOutputCoderProto())
			.putCoders(
				WINDOW_CODER_ID,
				getWindowCoderProto())
			.build();

	PipelineNode.PCollectionNode input =
		PipelineNode.pCollection(INPUT_ID, components.getPcollectionsOrThrow(INPUT_ID));
	List<SideInputReference> sideInputs = Collections.EMPTY_LIST;
	List<UserStateReference> userStates = Collections.EMPTY_LIST;
	List<TimerReference> timers = Collections.EMPTY_LIST;
	List<PipelineNode.PTransformNode> transforms =
		Collections.singletonList(
			PipelineNode.pTransform(TRANSFORM_ID, components.getTransformsOrThrow(TRANSFORM_ID)));
	List<PipelineNode.PCollectionNode> outputs =
		Collections.singletonList(
			PipelineNode.pCollection(OUTPUT_ID, components.getPcollectionsOrThrow(OUTPUT_ID)));
	return ImmutableExecutableStage.of(
		components, createPythonExecutionEnvironment(), input, sideInputs, userStates, timers, transforms, outputs, createValueOnlyWireCoderSetting());
}
 
Example #23
Source File: PortableTranslationContext.java    From beam with Apache License 2.0 4 votes vote down vote up
public void setCurrentTransform(PipelineNode.PTransformNode currentTransform) {
  this.currentTransform = currentTransform;
}
 
Example #24
Source File: Twister2BatchPortablePipelineTranslator.java    From twister2 with Apache License 2.0 4 votes vote down vote up
/**
 * Translates transformNode from Beam into the Spark context.
 */
void translate(
    PipelineNode.PTransformNode transformNode,
    RunnerApi.Pipeline pipeline,
    Twister2BatchTranslationContext context);
 
Example #25
Source File: PortableTranslationContext.java    From beam with Apache License 2.0 4 votes vote down vote up
public String getOutputId(PipelineNode.PTransformNode transform) {
  return Iterables.getOnlyElement(transform.getTransform().getOutputsMap().values());
}
 
Example #26
Source File: PortableTranslationContext.java    From beam with Apache License 2.0 4 votes vote down vote up
public String getInputId(PipelineNode.PTransformNode transform) {
  return Iterables.getOnlyElement(transform.getTransform().getInputsMap().values());
}
 
Example #27
Source File: PortableTranslationContext.java    From beam with Apache License 2.0 4 votes vote down vote up
public <T> MessageStream<OpMessage<T>> getOneInputMessageStream(
    PipelineNode.PTransformNode transform) {
  String id = Iterables.getOnlyElement(transform.getTransform().getInputsMap().values());
  return getMessageStreamById(id);
}
 
Example #28
Source File: PortableTranslationContext.java    From beam with Apache License 2.0 4 votes vote down vote up
public <T> List<MessageStream<OpMessage<T>>> getAllInputMessageStreams(
    PipelineNode.PTransformNode transform) {
  final Collection<String> inputStreamIds = transform.getTransform().getInputsMap().values();
  return inputStreamIds.stream().map(this::<T>getMessageStreamById).collect(Collectors.toList());
}
 
Example #29
Source File: TransformConfigGenerator.java    From beam with Apache License 2.0 4 votes vote down vote up
/** Generate config for portable api PTransform. */
default Map<String, String> createPortableConfig(
    PipelineNode.PTransformNode transform, SamzaPipelineOptions options) {
  return Collections.emptyMap();
}
 
Example #30
Source File: GroupByKeyTranslator.java    From beam with Apache License 2.0 4 votes vote down vote up
private static <K, InputT, OutputT> void doTranslatePortable(
    PipelineNode.PTransformNode transform,
    QueryablePipeline pipeline,
    PortableTranslationContext ctx) {
  final MessageStream<OpMessage<KV<K, InputT>>> inputStream =
      ctx.getOneInputMessageStream(transform);
  final boolean needRepartition = ctx.getSamzaPipelineOptions().getMaxSourceParallelism() > 1;
  final WindowingStrategy<?, BoundedWindow> windowingStrategy =
      ctx.getPortableWindowStrategy(transform, pipeline);
  final Coder<BoundedWindow> windowCoder = windowingStrategy.getWindowFn().windowCoder();

  final String inputId = ctx.getInputId(transform);
  final WindowedValue.WindowedValueCoder<KV<K, InputT>> windowedInputCoder =
      ctx.instantiateCoder(inputId, pipeline.getComponents());
  final KvCoder<K, InputT> kvInputCoder = (KvCoder<K, InputT>) windowedInputCoder.getValueCoder();
  final Coder<WindowedValue<KV<K, InputT>>> elementCoder =
      WindowedValue.FullWindowedValueCoder.of(kvInputCoder, windowCoder);

  final TupleTag<KV<K, OutputT>> outputTag =
      new TupleTag<>(Iterables.getOnlyElement(transform.getTransform().getOutputsMap().keySet()));

  @SuppressWarnings("unchecked")
  final SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> reduceFn =
      (SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow>)
          SystemReduceFn.buffering(kvInputCoder.getValueCoder());

  final RunnerApi.PCollection input = pipeline.getComponents().getPcollectionsOrThrow(inputId);
  final PCollection.IsBounded isBounded = SamzaPipelineTranslatorUtils.isBounded(input);

  final MessageStream<OpMessage<KV<K, OutputT>>> outputStream =
      doTranslateGBK(
          inputStream,
          needRepartition,
          reduceFn,
          windowingStrategy,
          kvInputCoder,
          elementCoder,
          ctx.getTransformFullName(),
          ctx.getTransformId(),
          outputTag,
          isBounded);
  ctx.registerMessageStream(ctx.getOutputId(transform), outputStream);
}